switchroom 0.13.52 → 0.13.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -244,6 +244,7 @@ import { handleInjectCommand } from './inject-handler.js'
244
244
  import { type BannerState } from '../slot-banner.js'
245
245
  import { refreshBanner } from '../slot-banner-driver.js'
246
246
  import { loadConfig as loadSwitchroomConfig } from '../../src/config/loader.js'; import { resolveAgentConfig } from '../../src/config/merge.js'
247
+ import { resolveOutboundTopic as resolveOutboundTopicHelper, type TopicRouterConfig as _OutboundRouterConfig } from '../../src/telegram/topic-router.js'
247
248
  import { readTurnUsages } from '../../src/agents/perf.js'
248
249
  import { decideProactiveCompact, initialCompactState, type CompactState } from './proactive-compact.js'
249
250
  import { nextCompactNotify, idleCompactNotifyState, type CompactNotifyState } from './compact-notify.js'
@@ -1108,6 +1109,41 @@ const outboundDedup = new OutboundDedupCache()
1108
1109
  const chatAvailableReactions = new Map<string, Set<string> | null>()
1109
1110
  const chatProbesInFlight = new Set<string>()
1110
1111
  const activeTurnStartedAt = new Map<string, number>()
1112
+ // PR3b parallel-turns: tracks turns claude has ACTUALLY been handed
1113
+ // (set after successful sendToAgent, cleared on turn_end), as opposed
1114
+ // to activeTurnStartedAt which is set eagerly on inbound receipt to
1115
+ // stamp the user-visible turn start time. Under fleet-shared and DM
1116
+ // topologies these are equivalent — every received inbound is delivered.
1117
+ // Under supergroup-owned (one agent owns the whole supergroup, multiple
1118
+ // topics share this gateway process), topic B's inbound that arrives
1119
+ // while topic A is processing gets buffered; without this split, keyB
1120
+ // stays in activeTurnStartedAt forever (no turn_end ever fires for a
1121
+ // turn claude never started), so the fleet-wide "claude is idle" gate
1122
+ // at purgeReactionTracking/releaseTurnBufferGate never re-opens — the
1123
+ // canonical supergroup-mode deadlock. Fleet gates read claudeBusyKeys;
1124
+ // per-key reads (status-query metric, wedge detection, etc.) keep
1125
+ // reading activeTurnStartedAt because they want the receipt timestamp.
1126
+ const claudeBusyKeys = new Set<string>()
1127
+
1128
+ /**
1129
+ * Helper: stamp a claudeBusyKeys entry for an inbound about to be
1130
+ * handed to claude. Pulls the thread id from the top-level field if
1131
+ * present, otherwise falls back to meta.message_thread_id (cron and
1132
+ * vault-synthetic inbounds put it there). chatKey canonicalises
1133
+ * null/undefined/0 to `_` so callers don't need to think about it.
1134
+ */
1135
+ function markClaudeBusyForInbound(m: {
1136
+ chatId: string
1137
+ threadId?: number
1138
+ meta?: Record<string, string>
1139
+ }): void {
1140
+ let tid: number | null = m.threadId ?? null
1141
+ if (tid == null && m.meta?.message_thread_id != null) {
1142
+ const n = Number(m.meta.message_thread_id)
1143
+ if (Number.isFinite(n)) tid = n
1144
+ }
1145
+ claudeBusyKeys.add(chatKey(m.chatId, tid))
1146
+ }
1111
1147
  const pendingRestarts = new Map<string, number>() // agentName -> timestamp when restart was requested
1112
1148
 
1113
1149
  // ─── Proactive context compaction (session.max_context_tokens) ──────────
@@ -1351,13 +1387,29 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1351
1387
  activeStatusReactions.delete(key)
1352
1388
  activeReactionMsgIds.delete(key)
1353
1389
  activeTurnStartedAt.delete(key)
1390
+ // PR3b: clear the parallel-turns fleet-gate entry. Symmetric with
1391
+ // the markClaudeBusyForInbound on the delivery path. Safe no-op
1392
+ // when the key was never marked (synthetic purge from a sweep).
1393
+ claudeBusyKeys.delete(key)
1354
1394
  // Human-feel UX: stop the turn-long `typing…` indicator started in
1355
1395
  // the turn-start block. `purgeReactionTracking` is the canonical
1356
1396
  // turn-end, so this is the single owner of the stop. (If an abnormal
1357
1397
  // abort skips purge, the stray loop self-heals: the next turn on this
1358
1398
  // chat calls `startTurnTypingLoop`, which stops the old interval
1359
1399
  // first.)
1360
- stopTurnTypingLoop(chatIdOfChatKey(key as _ChatKey))
1400
+ // PR3 supergroup-mode: stop the per-(chat,thread) typing loop, not
1401
+ // the whole chat's. Prefer the ending-turn's session ids (the
1402
+ // canonical turn ownership); fall back to parsing the chatKey
1403
+ // for sibling-purge / restart-cleanup callers that don't have a
1404
+ // Turn handle.
1405
+ if (endingTurn != null) {
1406
+ stopTurnTypingLoop(endingTurn.sessionChatId, endingTurn.sessionThreadId ?? null)
1407
+ } else {
1408
+ const chatId = chatIdOfChatKey(key as _ChatKey)
1409
+ const threadPart = (key as string).slice(chatId.length + 1)
1410
+ const threadId = threadPart === '_' || threadPart === '' ? null : Number(threadPart)
1411
+ stopTurnTypingLoop(chatId, Number.isFinite(threadId) ? threadId : null)
1412
+ }
1361
1413
  if (msgInfo) {
1362
1414
  const agentDir = resolveAgentDirFromEnv()
1363
1415
  if (agentDir != null) removeActiveReaction(agentDir, msgInfo.chatId, msgInfo.messageId)
@@ -1377,7 +1429,13 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1377
1429
  // survives us getting killed by our own restart. Fire-and-forget;
1378
1430
  // response to the client was already sent when the restart was
1379
1431
  // scheduled, so nobody is waiting on this.
1380
- if (activeTurnStartedAt.size === 0) {
1432
+ //
1433
+ // PR3b: gated on `claudeBusyKeys.size`, not `activeTurnStartedAt.size`,
1434
+ // so a buffered topic-B inbound (which had eagerly set its own
1435
+ // activeTurnStartedAt entry in the fresh-turn branch) doesn't pin this
1436
+ // gate forever while claude is genuinely idle. See the claudeBusyKeys
1437
+ // declaration for the supergroup deadlock this fixes.
1438
+ if (claudeBusyKeys.size === 0) {
1381
1439
  // #1556: the deterministic delivery point. claude has just gone
1382
1440
  // idle — flush any inbound held mid-turn so the channel
1383
1441
  // notification lands at the idle prompt and submits as a fresh
@@ -1390,7 +1448,11 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1390
1448
  const fr = redeliverBufferedInbound(
1391
1449
  pendingInboundBuffer,
1392
1450
  selfAgentForFlush,
1393
- (m) => ipcServer.sendToAgent(selfAgentForFlush, m),
1451
+ (m) => {
1452
+ const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1453
+ if (d) markClaudeBusyForInbound(m)
1454
+ return d
1455
+ },
1394
1456
  inboundSpool,
1395
1457
  )
1396
1458
  if (fr.redelivered > 0) {
@@ -1458,6 +1520,9 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1458
1520
  function releaseTurnBufferGate(key: string): void {
1459
1521
  if (!activeTurnStartedAt.has(key)) return
1460
1522
  activeTurnStartedAt.delete(key)
1523
+ // PR3b: keep claudeBusyKeys in sync — same lifecycle as the
1524
+ // activeTurnStartedAt entry it's mirroring here.
1525
+ claudeBusyKeys.delete(key)
1461
1526
  // Shadow trace so the structural turn-end metric still records.
1462
1527
  // outboundEmitted=true is correct here — we only reach this from
1463
1528
  // executeReply AFTER an outbound landed.
@@ -1468,13 +1533,19 @@ function releaseTurnBufferGate(key: string): void {
1468
1533
  // hits zero-active-turns, drain any held inbound. This is the
1469
1534
  // load-bearing wedge fix: the gate that pinned msg 1874+ in
1470
1535
  // test-harness's 13:02 UAT now opens after the reply.
1471
- if (activeTurnStartedAt.size === 0) {
1536
+ //
1537
+ // PR3b: gated on claudeBusyKeys (see purgeReactionTracking comment).
1538
+ if (claudeBusyKeys.size === 0) {
1472
1539
  const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
1473
1540
  if (pendingInboundBuffer.depth(selfAgentForFlush) > 0) {
1474
1541
  const fr = redeliverBufferedInbound(
1475
1542
  pendingInboundBuffer,
1476
1543
  selfAgentForFlush,
1477
- (m) => ipcServer.sendToAgent(selfAgentForFlush, m),
1544
+ (m) => {
1545
+ const d = ipcServer.sendToAgent(selfAgentForFlush, m)
1546
+ if (d) markClaudeBusyForInbound(m)
1547
+ return d
1548
+ },
1478
1549
  inboundSpool,
1479
1550
  )
1480
1551
  if (fr.redelivered > 0) {
@@ -1636,7 +1707,15 @@ async function postCompactCard(occ: number, cap: number): Promise<void> {
1636
1707
  try {
1637
1708
  const chatId = loadAccess().allowFrom[0];
1638
1709
  if (!chatId) return;
1639
- const threadId = chatThreadMap.get(chatId);
1710
+ // PR4b-compact: supergroup-owned agents route the compaction card
1711
+ // into the `alerts` alias topic (or default_topic_id fallback) so
1712
+ // operators see lifecycle/system events in a predictable lane
1713
+ // instead of conversation lanes. Fleet/DM agents fall through to
1714
+ // the existing chatThreadMap last-seen-thread fallback (no
1715
+ // observable change).
1716
+ const threadId =
1717
+ resolveAgentOutboundTopic({ kind: 'compact-watchdog' })
1718
+ ?? chatThreadMap.get(chatId);
1640
1719
  const text =
1641
1720
  `🗜️ <b>Context compaction</b>\n` +
1642
1721
  `Working context hit ~${occ.toLocaleString()} tokens ` +
@@ -1875,6 +1954,13 @@ function escapeMarkdownV2(text: string): string {
1875
1954
  }
1876
1955
 
1877
1956
  // ─── Typing indicator ─────────────────────────────────────────────────────
1957
+ // All four state maps re-keyed from `chat_id` to `chatKey(chat, thread)`
1958
+ // in PR3 of the supergroup-mode rollout. In supergroup mode one agent
1959
+ // owns many topics in one chat; chatId-only keying meant topic A's
1960
+ // typing indicator died when topic B's tool call ended (last-stop-wins
1961
+ // on a shared key). Per-(chat,thread) keying preserves independent
1962
+ // typing loops across topics. Callers without thread context pass
1963
+ // `null` and behave exactly as before (chatKey collapses null→`_`).
1878
1964
  const typingIntervals = new Map<string, ReturnType<typeof setInterval>>()
1879
1965
  // Track pending backoff-retry timers so shutdown and stop can cancel them.
1880
1966
  const typingRetryTimers = new Map<string, ReturnType<typeof setTimeout>>()
@@ -1903,34 +1989,41 @@ const CHAT_ACTION_WHITELIST = new Set([
1903
1989
  ] as const)
1904
1990
  type ChatAction = typeof CHAT_ACTION_WHITELIST extends Set<infer T> ? T : never
1905
1991
 
1906
- function startTypingLoop(chat_id: string, action: ChatAction = 'typing'): void {
1907
- stopTypingLoop(chat_id)
1992
+ function startTypingLoop(
1993
+ chat_id: string,
1994
+ thread_id: number | null = null,
1995
+ action: ChatAction = 'typing',
1996
+ ): void {
1997
+ stopTypingLoop(chat_id, thread_id)
1998
+ const key = chatKey(chat_id, thread_id) as string
1999
+ const sendOpts = thread_id != null ? { message_thread_id: thread_id } : undefined
1908
2000
  const send = () => {
1909
- bot.api.sendChatAction(chat_id, action).then(
2001
+ bot.api.sendChatAction(chat_id, action, sendOpts).then(
1910
2002
  () => { typingBackoffMs = 0 },
1911
2003
  (err) => {
1912
2004
  const msg = err instanceof Error ? err.message : String(err)
1913
2005
  if (msg.includes('401') || msg.includes('Unauthorized')) {
1914
2006
  typingBackoffMs = Math.min(Math.max(typingBackoffMs * 2 || 1000, 1000), TYPING_BACKOFF_MAX)
1915
- stopTypingLoop(chat_id)
2007
+ stopTypingLoop(chat_id, thread_id)
1916
2008
  const retry = setTimeout(() => {
1917
- typingRetryTimers.delete(chat_id)
1918
- startTypingLoop(chat_id, action)
2009
+ typingRetryTimers.delete(key)
2010
+ startTypingLoop(chat_id, thread_id, action)
1919
2011
  }, typingBackoffMs)
1920
- typingRetryTimers.set(chat_id, retry)
2012
+ typingRetryTimers.set(key, retry)
1921
2013
  }
1922
2014
  },
1923
2015
  )
1924
2016
  }
1925
2017
  send()
1926
- typingIntervals.set(chat_id, setInterval(send, 4000))
2018
+ typingIntervals.set(key, setInterval(send, 4000))
1927
2019
  }
1928
2020
 
1929
- function stopTypingLoop(chat_id: string): void {
1930
- const iv = typingIntervals.get(chat_id)
1931
- if (iv) { clearInterval(iv); typingIntervals.delete(chat_id) }
1932
- const retry = typingRetryTimers.get(chat_id)
1933
- if (retry) { clearTimeout(retry); typingRetryTimers.delete(chat_id) }
2021
+ function stopTypingLoop(chat_id: string, thread_id: number | null = null): void {
2022
+ const key = chatKey(chat_id, thread_id) as string
2023
+ const iv = typingIntervals.get(key)
2024
+ if (iv) { clearInterval(iv); typingIntervals.delete(key) }
2025
+ const retry = typingRetryTimers.get(key)
2026
+ if (retry) { clearTimeout(retry); typingRetryTimers.delete(key) }
1934
2027
  }
1935
2028
 
1936
2029
  // Turn-level `typing…` indicator. Deliberately a SEPARATE interval map
@@ -1945,18 +2038,21 @@ function stopTypingLoop(chat_id: string): void {
1945
2038
  // sendChatAction is cheap.
1946
2039
  const turnTypingIntervals = new Map<string, ReturnType<typeof setInterval>>()
1947
2040
 
1948
- function startTurnTypingLoop(chat_id: string): void {
1949
- stopTurnTypingLoop(chat_id)
2041
+ function startTurnTypingLoop(chat_id: string, thread_id: number | null = null): void {
2042
+ stopTurnTypingLoop(chat_id, thread_id)
2043
+ const key = chatKey(chat_id, thread_id) as string
2044
+ const sendOpts = thread_id != null ? { message_thread_id: thread_id } : undefined
1950
2045
  const send = () => {
1951
- void bot.api.sendChatAction(chat_id, 'typing').catch(() => {})
2046
+ void bot.api.sendChatAction(chat_id, 'typing', sendOpts).catch(() => {})
1952
2047
  }
1953
2048
  send()
1954
- turnTypingIntervals.set(chat_id, setInterval(send, 4000))
2049
+ turnTypingIntervals.set(key, setInterval(send, 4000))
1955
2050
  }
1956
2051
 
1957
- function stopTurnTypingLoop(chat_id: string): void {
1958
- const iv = turnTypingIntervals.get(chat_id)
1959
- if (iv) { clearInterval(iv); turnTypingIntervals.delete(chat_id) }
2052
+ function stopTurnTypingLoop(chat_id: string, thread_id: number | null = null): void {
2053
+ const key = chatKey(chat_id, thread_id) as string
2054
+ const iv = turnTypingIntervals.get(key)
2055
+ if (iv) { clearInterval(iv); turnTypingIntervals.delete(key) }
1960
2056
  }
1961
2057
 
1962
2058
  const typingWrapper = createTypingWrapper({
@@ -3405,7 +3501,11 @@ silencePoke.startTimer({
3405
3501
  const fbRedeliver = redeliverBufferedInbound(
3406
3502
  pendingInboundBuffer,
3407
3503
  fbSelfAgent,
3408
- (m) => ipcServer.sendToAgent(fbSelfAgent, m),
3504
+ (m) => {
3505
+ const d = ipcServer.sendToAgent(fbSelfAgent, m)
3506
+ if (d) markClaudeBusyForInbound(m)
3507
+ return d
3508
+ },
3409
3509
  inboundSpool,
3410
3510
  )
3411
3511
  process.stderr.write(
@@ -3733,6 +3833,7 @@ const ipcServer: IpcServer = createIpcServer({
3733
3833
  activeStatusReactions,
3734
3834
  activeReactionMsgIds,
3735
3835
  activeTurnStartedAt,
3836
+ claudeBusyKeys,
3736
3837
  activeDraftStreams,
3737
3838
  activeDraftParseModes,
3738
3839
  clearActiveReactions: () => {
@@ -3928,8 +4029,11 @@ const ipcServer: IpcServer = createIpcServer({
3928
4029
  onScheduleRestart(client: IpcClient, msg: ScheduleRestartMessage) {
3929
4030
  const { agentName } = msg;
3930
4031
 
3931
- // Check if any turn is currently in flight
3932
- const turnInFlight = activeTurnStartedAt.size > 0;
4032
+ // Check if any turn is currently in flight.
4033
+ // PR3b: gated on claudeBusyKeys (actually-handed-to-claude turns)
4034
+ // not activeTurnStartedAt (receipt-eager), so a buffered topic-B
4035
+ // inbound doesn't pin this as turnInFlight=true forever.
4036
+ const turnInFlight = claudeBusyKeys.size > 0;
3933
4037
 
3934
4038
  if (!turnInFlight) {
3935
4039
  // No active turn, restart immediately. Cycle both the agent and
@@ -4204,6 +4308,7 @@ const ipcServer: IpcServer = createIpcServer({
4204
4308
  ? msg.inbound.meta.source
4205
4309
  : 'unknown'
4206
4310
  const delivered = ipcServer.sendToAgent(msg.agentName, msg.inbound)
4311
+ if (delivered) markClaudeBusyForInbound(msg.inbound)
4207
4312
  process.stderr.write(
4208
4313
  `telegram gateway: inject_inbound agent=${msg.agentName} source=${source} prompt_key=${promptKey} delivered=${delivered}\n`,
4209
4314
  )
@@ -4252,11 +4357,16 @@ if (!STATIC) {
4252
4357
  () => {
4253
4358
  // #1556: never drain mid-turn — that re-creates the composer
4254
4359
  // wedge this buffer exists to prevent.
4255
- if (activeTurnStartedAt.size > 0) return false
4360
+ // PR3b: gated on claudeBusyKeys (see purgeReactionTracking).
4361
+ if (claudeBusyKeys.size > 0) return false
4256
4362
  const c = ipcServer.getClient(selfAgent)
4257
4363
  return c != null && c.isAlive()
4258
4364
  },
4259
- (m) => ipcServer.sendToAgent(selfAgent, m),
4365
+ (m) => {
4366
+ const d = ipcServer.sendToAgent(selfAgent, m)
4367
+ if (d) markClaudeBusyForInbound(m)
4368
+ return d
4369
+ },
4260
4370
  inboundSpool,
4261
4371
  )
4262
4372
  if (r != null && r.redelivered > 0) {
@@ -4672,7 +4782,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4672
4782
  previewMessageId = null
4673
4783
  }
4674
4784
 
4675
- startTypingLoop(chat_id)
4785
+ startTypingLoop(chat_id, threadId ?? null)
4676
4786
 
4677
4787
  // #1677 silent-reply auto-edit. Consecutive silent replies within
4678
4788
  // a turn edit a single anchor message instead of stacking new
@@ -4804,7 +4914,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4804
4914
  if (silentAnchorEditDone) {
4805
4915
  // Skip the chunk loop entirely — the anchor edit IS the send.
4806
4916
  // Match the normal exit path: stop typing, then return.
4807
- stopTypingLoop(chat_id)
4917
+ stopTypingLoop(chat_id, threadId ?? null)
4808
4918
  return {
4809
4919
  content: [
4810
4920
  {
@@ -4921,7 +5031,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4921
5031
  const msg = err instanceof Error ? err.message : String(err)
4922
5032
  throw new Error(`reply failed after ${sentIds.length} of ${chunks.length} chunk(s) sent: ${msg}`)
4923
5033
  } finally {
4924
- stopTypingLoop(chat_id)
5034
+ stopTypingLoop(chat_id, threadId ?? null)
4925
5035
  }
4926
5036
 
4927
5037
  // #710: remember per-button agent meta (ack_text / single_use) keyed
@@ -6281,8 +6391,12 @@ async function executeSendTyping(args: Record<string, unknown>): Promise<unknown
6281
6391
  }
6282
6392
  action = rawAction as ChatAction
6283
6393
  }
6284
- startTypingLoop(stChatId, action)
6285
- setTimeout(() => stopTypingLoop(stChatId), 30000)
6394
+ // PR3 supergroup-mode: resolve thread from args or fall back to the
6395
+ // last-seen thread for this chat so the indicator lands in the topic
6396
+ // the agent is working in (rather than the chat root).
6397
+ const stThreadId = resolveThreadId(stChatId, args.message_thread_id as string | number | undefined) ?? null
6398
+ startTypingLoop(stChatId, stThreadId, action)
6399
+ setTimeout(() => stopTypingLoop(stChatId, stThreadId), 30000)
6286
6400
  for (const [key, ctrl] of activeStatusReactions.entries()) {
6287
6401
  if (key.startsWith(`${stChatId}:`)) ctrl.setTool()
6288
6402
  }
@@ -6632,7 +6746,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6632
6746
  if (isTelegramSurfaceTool(name)) return
6633
6747
  ctrl.setTool(name)
6634
6748
  if (ev.toolUseId) {
6635
- typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, name)
6749
+ typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, name, turn.sessionThreadId ?? null)
6636
6750
  }
6637
6751
  return
6638
6752
  }
@@ -6875,7 +6989,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6875
6989
  const turn = currentTurn
6876
6990
  if (turn == null) return
6877
6991
  if (!ev.toolUseId) return
6878
- typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, ev.toolName)
6992
+ typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, ev.toolName, turn.sessionThreadId ?? null)
6879
6993
  return
6880
6994
  }
6881
6995
  case 'sub_agent_tool_result': {
@@ -7870,7 +7984,11 @@ async function handleInboundCoalesced(
7870
7984
  // defensive against future routers that might call this without one).
7871
7985
  maybeEarlyAckReaction(ctx, from)
7872
7986
 
7873
- const key = inboundCoalesceKey(String(ctx.chat!.id), String(from.id))
7987
+ const key = inboundCoalesceKey(
7988
+ String(ctx.chat!.id),
7989
+ ctx.message?.message_thread_id,
7990
+ String(from.id),
7991
+ )
7874
7992
  const result = inboundCoalescer.enqueue(key, { text, ctx, downloadImage, attachment })
7875
7993
  if (result.bypass) return handleInbound(ctx, text, undefined, undefined)
7876
7994
  }
@@ -7988,7 +8106,16 @@ async function handleInbound(
7988
8106
  // an ack on the buffered path). The snapshot is the minimal precise
7989
8107
  // fix. Phase 2b's state-machine extraction will revisit this
7990
8108
  // structurally.
7991
- const turnInFlightAtReceipt = activeTurnStartedAt.size > 0
8109
+ // PR3b: gated on claudeBusyKeys (turns claude has been handed) not
8110
+ // activeTurnStartedAt (eager set on receipt). In supergroup mode,
8111
+ // topic A active + topic B inbound arriving: pre-fix, B saw
8112
+ // turnInFlightAtReceipt=true because A's key was in
8113
+ // activeTurnStartedAt, AND B's fresh-turn branch then eagerly set
8114
+ // its OWN key — wedging the gate forever (claude is idle on B but
8115
+ // no turn_end ever fires). With claudeBusyKeys, B sees true (A is
8116
+ // busy) → B is buffered correctly, AND the gate cleanly reopens
8117
+ // when A's turn_end deletes keyA → flush triggers → B delivered.
8118
+ const turnInFlightAtReceipt = claudeBusyKeys.size > 0
7992
8119
 
7993
8120
  const access = result.access
7994
8121
  const from = ctx.from!
@@ -8167,11 +8294,17 @@ async function handleInbound(
8167
8294
  // hygiene). The add-flow intercept comes first because /auth add
8168
8295
  // creates fresh credentials at the broker layer, vs /reauth which
8169
8296
  // mutates an existing agent's slot — different success paths.
8170
- const pendingAdd = pendingAuthAddFlows.get(chat_id)
8297
+ //
8298
+ // PR3 supergroup-mode: keyed by chatKey(chat, thread) so an OAuth
8299
+ // code pasted into topic A isn't intercepted when topic B has a
8300
+ // separate /auth add flow pending (security: prevents cross-topic
8301
+ // credential mis-attribution).
8302
+ const interceptKey = chatKey(chat_id, messageThreadId) as string
8303
+ const pendingAdd = pendingAuthAddFlows.get(interceptKey)
8171
8304
  if (pendingAdd && looksLikeAuthCode(text)) {
8172
8305
  const elapsed = Date.now() - pendingAdd.startedAt
8173
8306
  if (elapsed < REAUTH_INTERCEPT_TTL_MS) {
8174
- pendingAuthAddFlows.delete(chat_id)
8307
+ pendingAuthAddFlows.delete(interceptKey)
8175
8308
  try {
8176
8309
  const credentials = await submitAccountAuthCode(pendingAdd, text.trim())
8177
8310
  try {
@@ -8212,15 +8345,15 @@ async function handleInbound(
8212
8345
  // Stale — drop the pending entry but let the message fall through
8213
8346
  // to other intercepts (defensively wipe scratch).
8214
8347
  cancelAccountAuthSession(pendingAdd)
8215
- pendingAuthAddFlows.delete(chat_id)
8348
+ pendingAuthAddFlows.delete(interceptKey)
8216
8349
  }
8217
8350
 
8218
8351
  // Auth-code intercept
8219
- const pendingReauth = pendingReauthFlows.get(chat_id)
8352
+ const pendingReauth = pendingReauthFlows.get(interceptKey)
8220
8353
  if (pendingReauth && looksLikeAuthCode(text)) {
8221
8354
  const elapsed = Date.now() - pendingReauth.startedAt
8222
8355
  if (elapsed < REAUTH_INTERCEPT_TTL_MS) {
8223
- pendingReauthFlows.delete(chat_id)
8356
+ pendingReauthFlows.delete(interceptKey)
8224
8357
  const { result, errorText } = execAuthCode(pendingReauth.agent, text.trim())
8225
8358
  if (errorText) {
8226
8359
  await switchroomReply(ctx, `<b>auth code failed:</b>\n${preBlock(formatSwitchroomOutput(errorText))}`, { html: true })
@@ -8242,7 +8375,7 @@ async function handleInbound(
8242
8375
  redactAuthCodeMessage(bot.api as never, chat_id, msgId ?? null, line => process.stderr.write(line))
8243
8376
  return
8244
8377
  }
8245
- pendingReauthFlows.delete(chat_id)
8378
+ pendingReauthFlows.delete(interceptKey)
8246
8379
  }
8247
8380
 
8248
8381
  // Vault intercept
@@ -8740,7 +8873,10 @@ async function handleInbound(
8740
8873
  // turn-end (`purgeReactionTracking → stopTurnTypingLoop`).
8741
8874
  // Deterministic, framework-owned, no prose — the mechanical
8742
8875
  // ambient layer of the pacing contract.
8743
- startTurnTypingLoop(chat_id)
8876
+ // PR3 supergroup-mode: pass thread so the indicator lands in
8877
+ // this turn's topic (otherwise topic A's turn-end would kill
8878
+ // topic B's typing indicator on shared chat_id keying).
8879
+ startTurnTypingLoop(chat_id, messageThreadId ?? null)
8744
8880
  // #1122 KPI: emit turn_started so dashboards can compute funnel
8745
8881
  // start counts + correlate to turn_ended for duration / TTFO.
8746
8882
  emitRuntimeMetric({
@@ -8951,6 +9087,7 @@ async function handleInbound(
8951
9087
  }
8952
9088
 
8953
9089
  const delivered = ipcServer.sendToAgent(selfAgent, inboundMsg)
9090
+ if (delivered) markClaudeBusyForInbound(inboundMsg)
8954
9091
  if (!delivered) {
8955
9092
  pendingInboundBuffer.push(selfAgent, inboundMsg)
8956
9093
  const threadOpts = messageThreadId != null ? { message_thread_id: messageThreadId } : {}
@@ -9194,7 +9331,10 @@ function resolveBootChatId(
9194
9331
  marker: { chat_id: string; thread_id: number | null; ack_message_id: number | null; ts: number } | null,
9195
9332
  ageMs?: number,
9196
9333
  ): { chatId: string; threadId: number | undefined; ackMsgId: number | undefined } | null {
9197
- // 1. Restart marker
9334
+ // 1. Restart marker — operator-initiated; honor where they typed
9335
+ // /restart. The marker carries the exact chat+thread context; no
9336
+ // routing override because the user expects to see the boot card
9337
+ // in the same lane where they invoked the restart.
9198
9338
  if (marker != null && (ageMs == null || ageMs < 5 * 60_000)) {
9199
9339
  return {
9200
9340
  chatId: marker.chat_id,
@@ -9202,9 +9342,19 @@ function resolveBootChatId(
9202
9342
  ackMsgId: marker.ack_message_id ?? undefined,
9203
9343
  }
9204
9344
  }
9345
+
9346
+ // For non-marker paths (spontaneous boot, crash recovery, env var,
9347
+ // history fallback): supergroup-mode agents route the boot card to
9348
+ // the `alerts` alias topic (or default_topic_id fallback) so the
9349
+ // operator sees lifecycle events in a predictable lane instead of
9350
+ // chat-root. For fleet-mode / DM agents the helper returns undefined
9351
+ // → behavior unchanged (lands at chat-root as today). PR4b of
9352
+ // supergroup-mode rollout (docs/rfcs/supergroup-mode.md).
9353
+ const supergroupBootTopic = resolveAgentOutboundTopic({ kind: 'boot' })
9354
+
9205
9355
  // 2. Env var
9206
9356
  const envChat = process.env.SUBAGENT_OWNER_CHAT_ID
9207
- if (envChat) return { chatId: envChat, threadId: undefined, ackMsgId: undefined }
9357
+ if (envChat) return { chatId: envChat, threadId: supergroupBootTopic, ackMsgId: undefined }
9208
9358
  // 3. Most-recent inbound from history
9209
9359
  if (HISTORY_ENABLED) {
9210
9360
  try {
@@ -9212,7 +9362,7 @@ function resolveBootChatId(
9212
9362
  const ownerChatId = access.allowFrom[0]
9213
9363
  if (ownerChatId) {
9214
9364
  const recent = queryHistory({ chat_id: ownerChatId, limit: 1 })
9215
- if (recent.length > 0) return { chatId: ownerChatId, threadId: undefined, ackMsgId: undefined }
9365
+ if (recent.length > 0) return { chatId: ownerChatId, threadId: supergroupBootTopic, ackMsgId: undefined }
9216
9366
  }
9217
9367
  } catch {}
9218
9368
  }
@@ -9220,6 +9370,40 @@ function resolveBootChatId(
9220
9370
  return null
9221
9371
  }
9222
9372
 
9373
+ /**
9374
+ * Resolve the supergroup-mode topic for an outbound event, or
9375
+ * undefined when the agent isn't in supergroup-owned mode. Best-effort
9376
+ * — any config-read failure returns undefined and the caller falls
9377
+ * through to today's behavior. Generic over every OutboundEvent
9378
+ * variant so the same helper backs boot card, compact card, vault,
9379
+ * permission, hostd, and watchdog emitters.
9380
+ *
9381
+ * Called sparingly (boot/reconnect, compaction edges, approval-card
9382
+ * dispatch) — not per turn — so the cost of a fresh config-read per
9383
+ * call is well within budget.
9384
+ */
9385
+ function resolveAgentOutboundTopic(
9386
+ event: Parameters<typeof resolveOutboundTopicHelper>[1],
9387
+ ): number | undefined {
9388
+ const agentName = process.env.SWITCHROOM_AGENT_NAME
9389
+ if (!agentName) return undefined
9390
+ try {
9391
+ const cfg = loadSwitchroomConfig()
9392
+ const rawAgent = cfg.agents?.[agentName]
9393
+ if (!rawAgent) return undefined
9394
+ const resolved = resolveAgentConfig(cfg.defaults, cfg.profiles, rawAgent)
9395
+ const tg = resolved.channels?.telegram
9396
+ if (!tg) return undefined
9397
+ // The router treats the absence of default_topic_id as
9398
+ // "fleet-mode" and returns undefined for ops-lane events (the
9399
+ // caller's existing fallback). Only supergroup-owned agents
9400
+ // (with default_topic_id set) get a routed value.
9401
+ return resolveOutboundTopicHelper(tg as _OutboundRouterConfig, event)
9402
+ } catch {
9403
+ return undefined
9404
+ }
9405
+ }
9406
+
9223
9407
  /**
9224
9408
  * Stamp a user-facing restart reason into the clean-shutdown marker
9225
9409
  * (same file the SIGTERM handler writes to and the next session greeting
@@ -11242,19 +11426,24 @@ bot.command("auth", async ctx => {
11242
11426
  )
11243
11427
  return
11244
11428
  }
11429
+ // PR3 supergroup-mode: key auth-add flows by (chat, thread) so
11430
+ // separate flows in two topics of one supergroup can't collide.
11431
+ // In DM chats message_thread_id is undefined → key collapses to
11432
+ // `chatId:_`, identical to today's behavior.
11433
+ const authAddKey = chatKey(chatId, ctx.message?.message_thread_id ?? null) as string
11245
11434
  if (parsed.kind === 'cancel') {
11246
- const existing = pendingAuthAddFlows.get(chatId)
11435
+ const existing = pendingAuthAddFlows.get(authAddKey)
11247
11436
  if (!existing) {
11248
11437
  await switchroomReply(ctx, "<i>No pending <code>/auth add</code> flow in this chat.</i>", { html: true })
11249
11438
  return
11250
11439
  }
11251
11440
  cancelAccountAuthSession(existing)
11252
- pendingAuthAddFlows.delete(chatId)
11441
+ pendingAuthAddFlows.delete(authAddKey)
11253
11442
  await switchroomReply(ctx, "Cancelled.", { html: true })
11254
11443
  return
11255
11444
  }
11256
11445
  // parsed.kind === 'add'
11257
- if (pendingAuthAddFlows.has(chatId)) {
11446
+ if (pendingAuthAddFlows.has(authAddKey)) {
11258
11447
  await switchroomReply(
11259
11448
  ctx,
11260
11449
  "<i>An <code>/auth add</code> flow is already in progress for this chat. " +
@@ -11265,7 +11454,7 @@ bot.command("auth", async ctx => {
11265
11454
  }
11266
11455
  try {
11267
11456
  const { loginUrl, scratchDir, child } = await startAccountAuthSession(parsed.label)
11268
- pendingAuthAddFlows.set(chatId, {
11457
+ pendingAuthAddFlows.set(authAddKey, {
11269
11458
  label: parsed.label,
11270
11459
  scratchDir,
11271
11460
  child,
@@ -11822,6 +12011,7 @@ async function performVaultAccessApproval(
11822
12011
  operatorId: senderId,
11823
12012
  })
11824
12013
  const delivered = ipcServer.sendToAgent(pending.agent, synthetic)
12014
+ if (delivered) markClaudeBusyForInbound(synthetic)
11825
12015
  process.stderr.write(
11826
12016
  `telegram gateway: vault_grant_approved injection agent=${pending.agent} ` +
11827
12017
  `key=${pending.key} stage=${stageId} delivered=${delivered}\n`,
@@ -11901,6 +12091,7 @@ async function handleVaultRequestAccessCallback(ctx: Context, data: string): Pro
11901
12091
  operatorId: senderId,
11902
12092
  })
11903
12093
  const denyDelivered = ipcServer.sendToAgent(pending.agent, denyInbound)
12094
+ if (denyDelivered) markClaudeBusyForInbound(denyInbound)
11904
12095
  process.stderr.write(
11905
12096
  `telegram gateway: vault_grant_denied injection agent=${pending.agent} ` +
11906
12097
  `key=${pending.key} stage=${stageId} delivered=${denyDelivered}\n`,
@@ -12051,6 +12242,7 @@ async function handleVaultRequestSaveCallback(ctx: Context, data: string): Promi
12051
12242
  operatorId: senderId,
12052
12243
  })
12053
12244
  const dDelivered = ipcServer.sendToAgent(pending.agent, discardInbound)
12245
+ if (dDelivered) markClaudeBusyForInbound(discardInbound)
12054
12246
  process.stderr.write(
12055
12247
  `telegram gateway: vault_save_discarded injection agent=${pending.agent} ` +
12056
12248
  `key=${pending.key} stage=${stageId} delivered=${dDelivered}\n`,
@@ -12174,6 +12366,7 @@ async function handleVaultRequestSaveCallback(ctx: Context, data: string): Promi
12174
12366
  reason: failReason,
12175
12367
  })
12176
12368
  const fDelivered = ipcServer.sendToAgent(pending.agent, failInbound)
12369
+ if (fDelivered) markClaudeBusyForInbound(failInbound)
12177
12370
  process.stderr.write(
12178
12371
  `telegram gateway: vault_save_failed injection agent=${pending.agent} ` +
12179
12372
  `key=${pending.key} stage=${stageId} delivered=${fDelivered}\n`,
@@ -12203,6 +12396,7 @@ async function handleVaultRequestSaveCallback(ctx: Context, data: string): Promi
12203
12396
  operatorId: senderId,
12204
12397
  })
12205
12398
  const okDelivered = ipcServer.sendToAgent(pending.agent, okInbound)
12399
+ if (okDelivered) markClaudeBusyForInbound(okInbound)
12206
12400
  process.stderr.write(
12207
12401
  `telegram gateway: vault_save_completed injection agent=${pending.agent} ` +
12208
12402
  `key=${pending.key} stage=${stageId} delivered=${okDelivered}\n`,
@@ -13029,7 +13223,14 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
13029
13223
  parseMode: 'HTML',
13030
13224
  synthInbound: async () => {
13031
13225
  await runSwitchroomAuthCommand(ctx, ['auth', 'reauth', agent], `auth reauth ${agent}`)
13032
- pendingReauthFlows.set(String(ctx.chat!.id), { agent, startedAt: Date.now() })
13226
+ // PR3 supergroup-mode: key by (chat, thread) so an OAuth code
13227
+ // pasted into a different topic isn't mistakenly intercepted
13228
+ // as this flow's reauth code.
13229
+ const reauthThreadId = ctx.callbackQuery?.message?.message_thread_id
13230
+ pendingReauthFlows.set(
13231
+ chatKey(String(ctx.chat!.id), reauthThreadId ?? null) as string,
13232
+ { agent, startedAt: Date.now() },
13233
+ )
13033
13234
  },
13034
13235
  })
13035
13236
  return
@@ -14194,6 +14395,7 @@ bot.on('callback_query:data', async ctx => {
14194
14395
  // by onClientRegistered) makes the "queued" promise real.
14195
14396
  const selfAgentBtn = process.env.SWITCHROOM_AGENT_NAME ?? ''
14196
14397
  const btnDelivered = ipcServer.sendToAgent(selfAgentBtn, inboundMsg)
14398
+ if (btnDelivered) markClaudeBusyForInbound(inboundMsg)
14197
14399
  if (!btnDelivered) {
14198
14400
  pendingInboundBuffer.push(selfAgentBtn, inboundMsg)
14199
14401
  // No registered bridge — the agent's mid-restart. Tell the user
@@ -15078,6 +15280,7 @@ function flushReactionBatch(batch: ReactionBatch): void {
15078
15280
  meta,
15079
15281
  }
15080
15282
  const delivered = ipcServer.sendToAgent(agentName, inbound)
15283
+ if (delivered) markClaudeBusyForInbound(inbound)
15081
15284
  process.stderr.write(
15082
15285
  `telegram gateway: reactions.dispatch agent=${agentName} chat=${batch.chatId} ` +
15083
15286
  `count=${batch.reactions.length} batched=${batch.batched} delivered=${delivered}\n`,