npm - switchroom - Versions diffs - 0.13.52 → 0.13.53 - Mend

switchroom 0.13.52 → 0.13.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/agent-scheduler/index.js +317 -132
package/dist/auth-broker/index.js +494 -156
package/dist/cli/drive-write-pretool.mjs +18 -3
package/dist/cli/switchroom.js +2353 -994
package/dist/host-control/main.js +246 -127
package/dist/vault/approvals/kernel-server.js +8269 -8146
package/dist/vault/broker/server.js +2811 -2688
package/package.json +1 -1
package/skills/switchroom-status/SKILL.md +8 -6
package/telegram-plugin/chat-lock.ts +87 -19
package/telegram-plugin/dist/gateway/gateway.js +752 -120
package/telegram-plugin/gateway/disconnect-flush.ts +32 -0
package/telegram-plugin/gateway/gateway.ts +258 -55
package/telegram-plugin/gateway/inbound-coalesce.ts +19 -6
package/telegram-plugin/stream-reply-handler.ts +10 -8
package/telegram-plugin/tests/gateway-disconnect-flush.test.ts +116 -0
package/telegram-plugin/tests/inbound-coalesce.test.ts +20 -4
package/telegram-plugin/tests/outbound-ordering.test.ts +228 -0
package/telegram-plugin/tests/parallel-turns-deadlock-fix.test.ts +217 -0
package/telegram-plugin/tests/typing-wrap.test.ts +65 -8
package/telegram-plugin/typing-wrap.ts +43 -21
package/profiles/default/CLAUDE.md +0 -122

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -244,6 +244,7 @@ import { handleInjectCommand } from './inject-handler.js'
 import { type BannerState } from '../slot-banner.js'
 import { refreshBanner } from '../slot-banner-driver.js'
 import { loadConfig as loadSwitchroomConfig } from '../../src/config/loader.js'; import { resolveAgentConfig } from '../../src/config/merge.js'
+import { resolveOutboundTopic as resolveOutboundTopicHelper, type TopicRouterConfig as _OutboundRouterConfig } from '../../src/telegram/topic-router.js'
 import { readTurnUsages } from '../../src/agents/perf.js'
 import { decideProactiveCompact, initialCompactState, type CompactState } from './proactive-compact.js'
 import { nextCompactNotify, idleCompactNotifyState, type CompactNotifyState } from './compact-notify.js'
@@ -1108,6 +1109,41 @@ const outboundDedup = new OutboundDedupCache()
 const chatAvailableReactions = new Map<string, Set<string> | null>()
 const chatProbesInFlight = new Set<string>()
 const activeTurnStartedAt = new Map<string, number>()
+// PR3b parallel-turns: tracks turns claude has ACTUALLY been handed
+// (set after successful sendToAgent, cleared on turn_end), as opposed
+// to activeTurnStartedAt which is set eagerly on inbound receipt to
+// stamp the user-visible turn start time. Under fleet-shared and DM
+// topologies these are equivalent — every received inbound is delivered.
+// Under supergroup-owned (one agent owns the whole supergroup, multiple
+// topics share this gateway process), topic B's inbound that arrives
+// while topic A is processing gets buffered; without this split, keyB
+// stays in activeTurnStartedAt forever (no turn_end ever fires for a
+// turn claude never started), so the fleet-wide "claude is idle" gate
+// at purgeReactionTracking/releaseTurnBufferGate never re-opens — the
+// canonical supergroup-mode deadlock. Fleet gates read claudeBusyKeys;
+// per-key reads (status-query metric, wedge detection, etc.) keep
+// reading activeTurnStartedAt because they want the receipt timestamp.
+const claudeBusyKeys = new Set<string>()
+/**
+ * Helper: stamp a claudeBusyKeys entry for an inbound about to be
+ * handed to claude. Pulls the thread id from the top-level field if
+ * present, otherwise falls back to meta.message_thread_id (cron and
+ * vault-synthetic inbounds put it there). chatKey canonicalises
+ * null/undefined/0 to `_` so callers don't need to think about it.
+ */
+function markClaudeBusyForInbound(m: {
+  chatId: string
+  threadId?: number
+  meta?: Record<string, string>
+}): void {
+  let tid: number | null = m.threadId ?? null
+  if (tid == null && m.meta?.message_thread_id != null) {
+    const n = Number(m.meta.message_thread_id)
+    if (Number.isFinite(n)) tid = n
+  }
+  claudeBusyKeys.add(chatKey(m.chatId, tid))
+}
 const pendingRestarts = new Map<string, number>()  // agentName -> timestamp when restart was requested
 // ─── Proactive context compaction (session.max_context_tokens) ──────────
@@ -1351,13 +1387,29 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
   activeStatusReactions.delete(key)
   activeReactionMsgIds.delete(key)
   activeTurnStartedAt.delete(key)
+  // PR3b: clear the parallel-turns fleet-gate entry. Symmetric with
+  // the markClaudeBusyForInbound on the delivery path. Safe no-op
+  // when the key was never marked (synthetic purge from a sweep).
+  claudeBusyKeys.delete(key)
   // Human-feel UX: stop the turn-long `typing…` indicator started in
   // the turn-start block. `purgeReactionTracking` is the canonical
   // turn-end, so this is the single owner of the stop. (If an abnormal
   // abort skips purge, the stray loop self-heals: the next turn on this
   // chat calls `startTurnTypingLoop`, which stops the old interval
   // first.)
-  stopTurnTypingLoop(chatIdOfChatKey(key as _ChatKey))
+  // PR3 supergroup-mode: stop the per-(chat,thread) typing loop, not
+  // the whole chat's. Prefer the ending-turn's session ids (the
+  // canonical turn ownership); fall back to parsing the chatKey
+  // for sibling-purge / restart-cleanup callers that don't have a
+  // Turn handle.
+  if (endingTurn != null) {
+    stopTurnTypingLoop(endingTurn.sessionChatId, endingTurn.sessionThreadId ?? null)
+  } else {
+    const chatId = chatIdOfChatKey(key as _ChatKey)
+    const threadPart = (key as string).slice(chatId.length + 1)
+    const threadId = threadPart === '_' || threadPart === '' ? null : Number(threadPart)
+    stopTurnTypingLoop(chatId, Number.isFinite(threadId) ? threadId : null)
+  }
   if (msgInfo) {
     const agentDir = resolveAgentDirFromEnv()
     if (agentDir != null) removeActiveReaction(agentDir, msgInfo.chatId, msgInfo.messageId)
@@ -1377,7 +1429,13 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
   // survives us getting killed by our own restart. Fire-and-forget;
   // response to the client was already sent when the restart was
   // scheduled, so nobody is waiting on this.
-  if (activeTurnStartedAt.size === 0) {
+  //
+  // PR3b: gated on `claudeBusyKeys.size`, not `activeTurnStartedAt.size`,
+  // so a buffered topic-B inbound (which had eagerly set its own
+  // activeTurnStartedAt entry in the fresh-turn branch) doesn't pin this
+  // gate forever while claude is genuinely idle. See the claudeBusyKeys
+  // declaration for the supergroup deadlock this fixes.
+  if (claudeBusyKeys.size === 0) {
     // #1556: the deterministic delivery point. claude has just gone
     // idle — flush any inbound held mid-turn so the channel
     // notification lands at the idle prompt and submits as a fresh
@@ -1390,7 +1448,11 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
       const fr = redeliverBufferedInbound(
         pendingInboundBuffer,
         selfAgentForFlush,
-        (m) => ipcServer.sendToAgent(selfAgentForFlush, m),
+        (m) => {
+          const d = ipcServer.sendToAgent(selfAgentForFlush, m)
+          if (d) markClaudeBusyForInbound(m)
+          return d
+        },
         inboundSpool,
       )
       if (fr.redelivered > 0) {
@@ -1458,6 +1520,9 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
 function releaseTurnBufferGate(key: string): void {
   if (!activeTurnStartedAt.has(key)) return
   activeTurnStartedAt.delete(key)
+  // PR3b: keep claudeBusyKeys in sync — same lifecycle as the
+  // activeTurnStartedAt entry it's mirroring here.
+  claudeBusyKeys.delete(key)
   // Shadow trace so the structural turn-end metric still records.
   // outboundEmitted=true is correct here — we only reach this from
   // executeReply AFTER an outbound landed.
@@ -1468,13 +1533,19 @@ function releaseTurnBufferGate(key: string): void {
   // hits zero-active-turns, drain any held inbound. This is the
   // load-bearing wedge fix: the gate that pinned msg 1874+ in
   // test-harness's 13:02 UAT now opens after the reply.
-  if (activeTurnStartedAt.size === 0) {
+  //
+  // PR3b: gated on claudeBusyKeys (see purgeReactionTracking comment).
+  if (claudeBusyKeys.size === 0) {
     const selfAgentForFlush = process.env.SWITCHROOM_AGENT_NAME ?? ''
     if (pendingInboundBuffer.depth(selfAgentForFlush) > 0) {
       const fr = redeliverBufferedInbound(
         pendingInboundBuffer,
         selfAgentForFlush,
-        (m) => ipcServer.sendToAgent(selfAgentForFlush, m),
+        (m) => {
+          const d = ipcServer.sendToAgent(selfAgentForFlush, m)
+          if (d) markClaudeBusyForInbound(m)
+          return d
+        },
         inboundSpool,
       )
       if (fr.redelivered > 0) {
@@ -1636,7 +1707,15 @@ async function postCompactCard(occ: number, cap: number): Promise<void> {
   try {
     const chatId = loadAccess().allowFrom[0];
     if (!chatId) return;
-    const threadId = chatThreadMap.get(chatId);
+    // PR4b-compact: supergroup-owned agents route the compaction card
+    // into the `alerts` alias topic (or default_topic_id fallback) so
+    // operators see lifecycle/system events in a predictable lane
+    // instead of conversation lanes. Fleet/DM agents fall through to
+    // the existing chatThreadMap last-seen-thread fallback (no
+    // observable change).
+    const threadId =
+      resolveAgentOutboundTopic({ kind: 'compact-watchdog' })
+      ?? chatThreadMap.get(chatId);
     const text =
       `🗜️ <b>Context compaction</b>\n` +
       `Working context hit ~${occ.toLocaleString()} tokens ` +
@@ -1875,6 +1954,13 @@ function escapeMarkdownV2(text: string): string {
 }
 // ─── Typing indicator ─────────────────────────────────────────────────────
+// All four state maps re-keyed from `chat_id` to `chatKey(chat, thread)`
+// in PR3 of the supergroup-mode rollout. In supergroup mode one agent
+// owns many topics in one chat; chatId-only keying meant topic A's
+// typing indicator died when topic B's tool call ended (last-stop-wins
+// on a shared key). Per-(chat,thread) keying preserves independent
+// typing loops across topics. Callers without thread context pass
+// `null` and behave exactly as before (chatKey collapses null→`_`).
 const typingIntervals = new Map<string, ReturnType<typeof setInterval>>()
 // Track pending backoff-retry timers so shutdown and stop can cancel them.
 const typingRetryTimers = new Map<string, ReturnType<typeof setTimeout>>()
@@ -1903,34 +1989,41 @@ const CHAT_ACTION_WHITELIST = new Set([
 ] as const)
 type ChatAction = typeof CHAT_ACTION_WHITELIST extends Set<infer T> ? T : never
-function startTypingLoop(chat_id: string, action: ChatAction = 'typing'): void {
-  stopTypingLoop(chat_id)
+function startTypingLoop(
+  chat_id: string,
+  thread_id: number | null = null,
+  action: ChatAction = 'typing',
+): void {
+  stopTypingLoop(chat_id, thread_id)
+  const key = chatKey(chat_id, thread_id) as string
+  const sendOpts = thread_id != null ? { message_thread_id: thread_id } : undefined
   const send = () => {
-    bot.api.sendChatAction(chat_id, action).then(
+    bot.api.sendChatAction(chat_id, action, sendOpts).then(
       () => { typingBackoffMs = 0 },
       (err) => {
         const msg = err instanceof Error ? err.message : String(err)
         if (msg.includes('401') || msg.includes('Unauthorized')) {
           typingBackoffMs = Math.min(Math.max(typingBackoffMs * 2 || 1000, 1000), TYPING_BACKOFF_MAX)
-          stopTypingLoop(chat_id)
+          stopTypingLoop(chat_id, thread_id)
           const retry = setTimeout(() => {
-            typingRetryTimers.delete(chat_id)
-            startTypingLoop(chat_id, action)
+            typingRetryTimers.delete(key)
+            startTypingLoop(chat_id, thread_id, action)
           }, typingBackoffMs)
-          typingRetryTimers.set(chat_id, retry)
+          typingRetryTimers.set(key, retry)
         }
       },
     )
   }
   send()
-  typingIntervals.set(chat_id, setInterval(send, 4000))
+  typingIntervals.set(key, setInterval(send, 4000))
 }
-function stopTypingLoop(chat_id: string): void {
-  const iv = typingIntervals.get(chat_id)
-  if (iv) { clearInterval(iv); typingIntervals.delete(chat_id) }
-  const retry = typingRetryTimers.get(chat_id)
-  if (retry) { clearTimeout(retry); typingRetryTimers.delete(chat_id) }
+function stopTypingLoop(chat_id: string, thread_id: number | null = null): void {
+  const key = chatKey(chat_id, thread_id) as string
+  const iv = typingIntervals.get(key)
+  if (iv) { clearInterval(iv); typingIntervals.delete(key) }
+  const retry = typingRetryTimers.get(key)
+  if (retry) { clearTimeout(retry); typingRetryTimers.delete(key) }
 }
 // Turn-level `typing…` indicator. Deliberately a SEPARATE interval map
@@ -1945,18 +2038,21 @@ function stopTypingLoop(chat_id: string): void {
 // sendChatAction is cheap.
 const turnTypingIntervals = new Map<string, ReturnType<typeof setInterval>>()
-function startTurnTypingLoop(chat_id: string): void {
-  stopTurnTypingLoop(chat_id)
+function startTurnTypingLoop(chat_id: string, thread_id: number | null = null): void {
+  stopTurnTypingLoop(chat_id, thread_id)
+  const key = chatKey(chat_id, thread_id) as string
+  const sendOpts = thread_id != null ? { message_thread_id: thread_id } : undefined
   const send = () => {
-    void bot.api.sendChatAction(chat_id, 'typing').catch(() => {})
+    void bot.api.sendChatAction(chat_id, 'typing', sendOpts).catch(() => {})
   }
   send()
-  turnTypingIntervals.set(chat_id, setInterval(send, 4000))
+  turnTypingIntervals.set(key, setInterval(send, 4000))
 }
-function stopTurnTypingLoop(chat_id: string): void {
-  const iv = turnTypingIntervals.get(chat_id)
-  if (iv) { clearInterval(iv); turnTypingIntervals.delete(chat_id) }
+function stopTurnTypingLoop(chat_id: string, thread_id: number | null = null): void {
+  const key = chatKey(chat_id, thread_id) as string
+  const iv = turnTypingIntervals.get(key)
+  if (iv) { clearInterval(iv); turnTypingIntervals.delete(key) }
 }
 const typingWrapper = createTypingWrapper({
@@ -3405,7 +3501,11 @@ silencePoke.startTimer({
     const fbRedeliver = redeliverBufferedInbound(
       pendingInboundBuffer,
       fbSelfAgent,
-      (m) => ipcServer.sendToAgent(fbSelfAgent, m),
+      (m) => {
+        const d = ipcServer.sendToAgent(fbSelfAgent, m)
+        if (d) markClaudeBusyForInbound(m)
+        return d
+      },
       inboundSpool,
     )
     process.stderr.write(
@@ -3733,6 +3833,7 @@ const ipcServer: IpcServer = createIpcServer({
       activeStatusReactions,
       activeReactionMsgIds,
       activeTurnStartedAt,
+      claudeBusyKeys,
       activeDraftStreams,
       activeDraftParseModes,
       clearActiveReactions: () => {
@@ -3928,8 +4029,11 @@ const ipcServer: IpcServer = createIpcServer({
   onScheduleRestart(client: IpcClient, msg: ScheduleRestartMessage) {
     const { agentName } = msg;
-    // Check if any turn is currently in flight
-    const turnInFlight = activeTurnStartedAt.size > 0;
+    // Check if any turn is currently in flight.
+    // PR3b: gated on claudeBusyKeys (actually-handed-to-claude turns)
+    // not activeTurnStartedAt (receipt-eager), so a buffered topic-B
+    // inbound doesn't pin this as turnInFlight=true forever.
+    const turnInFlight = claudeBusyKeys.size > 0;
     if (!turnInFlight) {
       // No active turn, restart immediately. Cycle both the agent and
@@ -4204,6 +4308,7 @@ const ipcServer: IpcServer = createIpcServer({
       ? msg.inbound.meta.source
       : 'unknown'
     const delivered = ipcServer.sendToAgent(msg.agentName, msg.inbound)
+    if (delivered) markClaudeBusyForInbound(msg.inbound)
     process.stderr.write(
       `telegram gateway: inject_inbound agent=${msg.agentName} source=${source} prompt_key=${promptKey} delivered=${delivered}\n`,
     )
@@ -4252,11 +4357,16 @@ if (!STATIC) {
       () => {
         // #1556: never drain mid-turn — that re-creates the composer
         // wedge this buffer exists to prevent.
-        if (activeTurnStartedAt.size > 0) return false
+        // PR3b: gated on claudeBusyKeys (see purgeReactionTracking).
+        if (claudeBusyKeys.size > 0) return false
         const c = ipcServer.getClient(selfAgent)
         return c != null && c.isAlive()
       },
-      (m) => ipcServer.sendToAgent(selfAgent, m),
+      (m) => {
+        const d = ipcServer.sendToAgent(selfAgent, m)
+        if (d) markClaudeBusyForInbound(m)
+        return d
+      },
       inboundSpool,
     )
     if (r != null && r.redelivered > 0) {
@@ -4672,7 +4782,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     previewMessageId = null
   }
-  startTypingLoop(chat_id)
+  startTypingLoop(chat_id, threadId ?? null)
   // #1677 silent-reply auto-edit. Consecutive silent replies within
   // a turn edit a single anchor message instead of stacking new
@@ -4804,7 +4914,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   if (silentAnchorEditDone) {
     // Skip the chunk loop entirely — the anchor edit IS the send.
     // Match the normal exit path: stop typing, then return.
-    stopTypingLoop(chat_id)
+    stopTypingLoop(chat_id, threadId ?? null)
     return {
       content: [
         {
@@ -4921,7 +5031,7 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
     const msg = err instanceof Error ? err.message : String(err)
     throw new Error(`reply failed after ${sentIds.length} of ${chunks.length} chunk(s) sent: ${msg}`)
   } finally {
-    stopTypingLoop(chat_id)
+    stopTypingLoop(chat_id, threadId ?? null)
   }
   // #710: remember per-button agent meta (ack_text / single_use) keyed
@@ -6281,8 +6391,12 @@ async function executeSendTyping(args: Record<string, unknown>): Promise<unknown
     }
     action = rawAction as ChatAction
   }
-  startTypingLoop(stChatId, action)
-  setTimeout(() => stopTypingLoop(stChatId), 30000)
+  // PR3 supergroup-mode: resolve thread from args or fall back to the
+  // last-seen thread for this chat so the indicator lands in the topic
+  // the agent is working in (rather than the chat root).
+  const stThreadId = resolveThreadId(stChatId, args.message_thread_id as string | number | undefined) ?? null
+  startTypingLoop(stChatId, stThreadId, action)
+  setTimeout(() => stopTypingLoop(stChatId, stThreadId), 30000)
   for (const [key, ctrl] of activeStatusReactions.entries()) {
     if (key.startsWith(`${stChatId}:`)) ctrl.setTool()
   }
@@ -6632,7 +6746,7 @@ function handleSessionEvent(ev: SessionEvent): void {
       if (isTelegramSurfaceTool(name)) return
       ctrl.setTool(name)
       if (ev.toolUseId) {
-        typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, name)
+        typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, name, turn.sessionThreadId ?? null)
       }
       return
     }
@@ -6875,7 +6989,7 @@ function handleSessionEvent(ev: SessionEvent): void {
       const turn = currentTurn
       if (turn == null) return
       if (!ev.toolUseId) return
-      typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, ev.toolName)
+      typingWrapper.onToolUse(ev.toolUseId, turn.sessionChatId, ev.toolName, turn.sessionThreadId ?? null)
       return
     }
     case 'sub_agent_tool_result': {
@@ -7870,7 +7984,11 @@ async function handleInboundCoalesced(
   //     defensive against future routers that might call this without one).
   maybeEarlyAckReaction(ctx, from)
-  const key = inboundCoalesceKey(String(ctx.chat!.id), String(from.id))
+  const key = inboundCoalesceKey(
+    String(ctx.chat!.id),
+    ctx.message?.message_thread_id,
+    String(from.id),
+  )
   const result = inboundCoalescer.enqueue(key, { text, ctx, downloadImage, attachment })
   if (result.bypass) return handleInbound(ctx, text, undefined, undefined)
 }
@@ -7988,7 +8106,16 @@ async function handleInbound(
   // an ack on the buffered path). The snapshot is the minimal precise
   // fix. Phase 2b's state-machine extraction will revisit this
   // structurally.
-  const turnInFlightAtReceipt = activeTurnStartedAt.size > 0
+  // PR3b: gated on claudeBusyKeys (turns claude has been handed) not
+  // activeTurnStartedAt (eager set on receipt). In supergroup mode,
+  // topic A active + topic B inbound arriving: pre-fix, B saw
+  // turnInFlightAtReceipt=true because A's key was in
+  // activeTurnStartedAt, AND B's fresh-turn branch then eagerly set
+  // its OWN key — wedging the gate forever (claude is idle on B but
+  // no turn_end ever fires). With claudeBusyKeys, B sees true (A is
+  // busy) → B is buffered correctly, AND the gate cleanly reopens
+  // when A's turn_end deletes keyA → flush triggers → B delivered.
+  const turnInFlightAtReceipt = claudeBusyKeys.size > 0
   const access = result.access
   const from = ctx.from!
@@ -8167,11 +8294,17 @@ async function handleInbound(
   // hygiene). The add-flow intercept comes first because /auth add
   // creates fresh credentials at the broker layer, vs /reauth which
   // mutates an existing agent's slot — different success paths.
-  const pendingAdd = pendingAuthAddFlows.get(chat_id)
+  //
+  // PR3 supergroup-mode: keyed by chatKey(chat, thread) so an OAuth
+  // code pasted into topic A isn't intercepted when topic B has a
+  // separate /auth add flow pending (security: prevents cross-topic
+  // credential mis-attribution).
+  const interceptKey = chatKey(chat_id, messageThreadId) as string
+  const pendingAdd = pendingAuthAddFlows.get(interceptKey)
   if (pendingAdd && looksLikeAuthCode(text)) {
     const elapsed = Date.now() - pendingAdd.startedAt
     if (elapsed < REAUTH_INTERCEPT_TTL_MS) {
-      pendingAuthAddFlows.delete(chat_id)
+      pendingAuthAddFlows.delete(interceptKey)
       try {
         const credentials = await submitAccountAuthCode(pendingAdd, text.trim())
         try {
@@ -8212,15 +8345,15 @@ async function handleInbound(
     // Stale — drop the pending entry but let the message fall through
     // to other intercepts (defensively wipe scratch).
     cancelAccountAuthSession(pendingAdd)
-    pendingAuthAddFlows.delete(chat_id)
+    pendingAuthAddFlows.delete(interceptKey)
   }
   // Auth-code intercept
-  const pendingReauth = pendingReauthFlows.get(chat_id)
+  const pendingReauth = pendingReauthFlows.get(interceptKey)
   if (pendingReauth && looksLikeAuthCode(text)) {
     const elapsed = Date.now() - pendingReauth.startedAt
     if (elapsed < REAUTH_INTERCEPT_TTL_MS) {
-      pendingReauthFlows.delete(chat_id)
+      pendingReauthFlows.delete(interceptKey)
       const { result, errorText } = execAuthCode(pendingReauth.agent, text.trim())
       if (errorText) {
         await switchroomReply(ctx, `<b>auth code failed:</b>\n${preBlock(formatSwitchroomOutput(errorText))}`, { html: true })
@@ -8242,7 +8375,7 @@ async function handleInbound(
       redactAuthCodeMessage(bot.api as never, chat_id, msgId ?? null, line => process.stderr.write(line))
       return
     }
-    pendingReauthFlows.delete(chat_id)
+    pendingReauthFlows.delete(interceptKey)
   }
   // Vault intercept
@@ -8740,7 +8873,10 @@ async function handleInbound(
         // turn-end (`purgeReactionTracking → stopTurnTypingLoop`).
         // Deterministic, framework-owned, no prose — the mechanical
         // ambient layer of the pacing contract.
-        startTurnTypingLoop(chat_id)
+        // PR3 supergroup-mode: pass thread so the indicator lands in
+        // this turn's topic (otherwise topic A's turn-end would kill
+        // topic B's typing indicator on shared chat_id keying).
+        startTurnTypingLoop(chat_id, messageThreadId ?? null)
         // #1122 KPI: emit turn_started so dashboards can compute funnel
         // start counts + correlate to turn_ended for duration / TTFO.
         emitRuntimeMetric({
@@ -8951,6 +9087,7 @@ async function handleInbound(
   }
   const delivered = ipcServer.sendToAgent(selfAgent, inboundMsg)
+  if (delivered) markClaudeBusyForInbound(inboundMsg)
   if (!delivered) {
     pendingInboundBuffer.push(selfAgent, inboundMsg)
     const threadOpts = messageThreadId != null ? { message_thread_id: messageThreadId } : {}
@@ -9194,7 +9331,10 @@ function resolveBootChatId(
   marker: { chat_id: string; thread_id: number | null; ack_message_id: number | null; ts: number } | null,
   ageMs?: number,
 ): { chatId: string; threadId: number | undefined; ackMsgId: number | undefined } | null {
-  // 1. Restart marker
+  // 1. Restart marker — operator-initiated; honor where they typed
+  //    /restart. The marker carries the exact chat+thread context; no
+  //    routing override because the user expects to see the boot card
+  //    in the same lane where they invoked the restart.
   if (marker != null && (ageMs == null || ageMs < 5 * 60_000)) {
     return {
       chatId: marker.chat_id,
@@ -9202,9 +9342,19 @@ function resolveBootChatId(
       ackMsgId: marker.ack_message_id ?? undefined,
     }
   }
+  // For non-marker paths (spontaneous boot, crash recovery, env var,
+  // history fallback): supergroup-mode agents route the boot card to
+  // the `alerts` alias topic (or default_topic_id fallback) so the
+  // operator sees lifecycle events in a predictable lane instead of
+  // chat-root. For fleet-mode / DM agents the helper returns undefined
+  // → behavior unchanged (lands at chat-root as today). PR4b of
+  // supergroup-mode rollout (docs/rfcs/supergroup-mode.md).
+  const supergroupBootTopic = resolveAgentOutboundTopic({ kind: 'boot' })
   // 2. Env var
   const envChat = process.env.SUBAGENT_OWNER_CHAT_ID
-  if (envChat) return { chatId: envChat, threadId: undefined, ackMsgId: undefined }
+  if (envChat) return { chatId: envChat, threadId: supergroupBootTopic, ackMsgId: undefined }
   // 3. Most-recent inbound from history
   if (HISTORY_ENABLED) {
     try {
@@ -9212,7 +9362,7 @@ function resolveBootChatId(
       const ownerChatId = access.allowFrom[0]
       if (ownerChatId) {
         const recent = queryHistory({ chat_id: ownerChatId, limit: 1 })
-        if (recent.length > 0) return { chatId: ownerChatId, threadId: undefined, ackMsgId: undefined }
+        if (recent.length > 0) return { chatId: ownerChatId, threadId: supergroupBootTopic, ackMsgId: undefined }
       }
     } catch {}
   }
@@ -9220,6 +9370,40 @@ function resolveBootChatId(
   return null
 }
+/**
+ * Resolve the supergroup-mode topic for an outbound event, or
+ * undefined when the agent isn't in supergroup-owned mode. Best-effort
+ * — any config-read failure returns undefined and the caller falls
+ * through to today's behavior. Generic over every OutboundEvent
+ * variant so the same helper backs boot card, compact card, vault,
+ * permission, hostd, and watchdog emitters.
+ *
+ * Called sparingly (boot/reconnect, compaction edges, approval-card
+ * dispatch) — not per turn — so the cost of a fresh config-read per
+ * call is well within budget.
+ */
+function resolveAgentOutboundTopic(
+  event: Parameters<typeof resolveOutboundTopicHelper>[1],
+): number | undefined {
+  const agentName = process.env.SWITCHROOM_AGENT_NAME
+  if (!agentName) return undefined
+  try {
+    const cfg = loadSwitchroomConfig()
+    const rawAgent = cfg.agents?.[agentName]
+    if (!rawAgent) return undefined
+    const resolved = resolveAgentConfig(cfg.defaults, cfg.profiles, rawAgent)
+    const tg = resolved.channels?.telegram
+    if (!tg) return undefined
+    // The router treats the absence of default_topic_id as
+    // "fleet-mode" and returns undefined for ops-lane events (the
+    // caller's existing fallback). Only supergroup-owned agents
+    // (with default_topic_id set) get a routed value.
+    return resolveOutboundTopicHelper(tg as _OutboundRouterConfig, event)
+  } catch {
+    return undefined
+  }
+}
 /**
  * Stamp a user-facing restart reason into the clean-shutdown marker
  * (same file the SIGTERM handler writes to and the next session greeting
@@ -11242,19 +11426,24 @@ bot.command("auth", async ctx => {
       )
       return
     }
+    // PR3 supergroup-mode: key auth-add flows by (chat, thread) so
+    // separate flows in two topics of one supergroup can't collide.
+    // In DM chats message_thread_id is undefined → key collapses to
+    // `chatId:_`, identical to today's behavior.
+    const authAddKey = chatKey(chatId, ctx.message?.message_thread_id ?? null) as string
     if (parsed.kind === 'cancel') {
-      const existing = pendingAuthAddFlows.get(chatId)
+      const existing = pendingAuthAddFlows.get(authAddKey)
       if (!existing) {
         await switchroomReply(ctx, "<i>No pending <code>/auth add</code> flow in this chat.</i>", { html: true })
         return
       }
       cancelAccountAuthSession(existing)
-      pendingAuthAddFlows.delete(chatId)
+      pendingAuthAddFlows.delete(authAddKey)
       await switchroomReply(ctx, "Cancelled.", { html: true })
       return
     }
     // parsed.kind === 'add'
-    if (pendingAuthAddFlows.has(chatId)) {
+    if (pendingAuthAddFlows.has(authAddKey)) {
       await switchroomReply(
         ctx,
         "<i>An <code>/auth add</code> flow is already in progress for this chat. " +
@@ -11265,7 +11454,7 @@ bot.command("auth", async ctx => {
     }
     try {
       const { loginUrl, scratchDir, child } = await startAccountAuthSession(parsed.label)
-      pendingAuthAddFlows.set(chatId, {
+      pendingAuthAddFlows.set(authAddKey, {
         label: parsed.label,
         scratchDir,
         child,
@@ -11822,6 +12011,7 @@ async function performVaultAccessApproval(
     operatorId: senderId,
   })
   const delivered = ipcServer.sendToAgent(pending.agent, synthetic)
+  if (delivered) markClaudeBusyForInbound(synthetic)
   process.stderr.write(
     `telegram gateway: vault_grant_approved injection agent=${pending.agent} ` +
     `key=${pending.key} stage=${stageId} delivered=${delivered}\n`,
@@ -11901,6 +12091,7 @@ async function handleVaultRequestAccessCallback(ctx: Context, data: string): Pro
       operatorId: senderId,
     })
     const denyDelivered = ipcServer.sendToAgent(pending.agent, denyInbound)
+    if (denyDelivered) markClaudeBusyForInbound(denyInbound)
     process.stderr.write(
       `telegram gateway: vault_grant_denied injection agent=${pending.agent} ` +
       `key=${pending.key} stage=${stageId} delivered=${denyDelivered}\n`,
@@ -12051,6 +12242,7 @@ async function handleVaultRequestSaveCallback(ctx: Context, data: string): Promi
       operatorId: senderId,
     })
     const dDelivered = ipcServer.sendToAgent(pending.agent, discardInbound)
+    if (dDelivered) markClaudeBusyForInbound(discardInbound)
     process.stderr.write(
       `telegram gateway: vault_save_discarded injection agent=${pending.agent} ` +
       `key=${pending.key} stage=${stageId} delivered=${dDelivered}\n`,
@@ -12174,6 +12366,7 @@ async function handleVaultRequestSaveCallback(ctx: Context, data: string): Promi
         reason: failReason,
       })
       const fDelivered = ipcServer.sendToAgent(pending.agent, failInbound)
+      if (fDelivered) markClaudeBusyForInbound(failInbound)
       process.stderr.write(
         `telegram gateway: vault_save_failed injection agent=${pending.agent} ` +
         `key=${pending.key} stage=${stageId} delivered=${fDelivered}\n`,
@@ -12203,6 +12396,7 @@ async function handleVaultRequestSaveCallback(ctx: Context, data: string): Promi
       operatorId: senderId,
     })
     const okDelivered = ipcServer.sendToAgent(pending.agent, okInbound)
+    if (okDelivered) markClaudeBusyForInbound(okInbound)
     process.stderr.write(
       `telegram gateway: vault_save_completed injection agent=${pending.agent} ` +
       `key=${pending.key} stage=${stageId} delivered=${okDelivered}\n`,
@@ -13029,7 +13223,14 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
         parseMode: 'HTML',
         synthInbound: async () => {
           await runSwitchroomAuthCommand(ctx, ['auth', 'reauth', agent], `auth reauth ${agent}`)
-          pendingReauthFlows.set(String(ctx.chat!.id), { agent, startedAt: Date.now() })
+          // PR3 supergroup-mode: key by (chat, thread) so an OAuth code
+          // pasted into a different topic isn't mistakenly intercepted
+          // as this flow's reauth code.
+          const reauthThreadId = ctx.callbackQuery?.message?.message_thread_id
+          pendingReauthFlows.set(
+            chatKey(String(ctx.chat!.id), reauthThreadId ?? null) as string,
+            { agent, startedAt: Date.now() },
+          )
         },
       })
       return
@@ -14194,6 +14395,7 @@ bot.on('callback_query:data', async ctx => {
     // by onClientRegistered) makes the "queued" promise real.
     const selfAgentBtn = process.env.SWITCHROOM_AGENT_NAME ?? ''
     const btnDelivered = ipcServer.sendToAgent(selfAgentBtn, inboundMsg)
+    if (btnDelivered) markClaudeBusyForInbound(inboundMsg)
     if (!btnDelivered) {
       pendingInboundBuffer.push(selfAgentBtn, inboundMsg)
       // No registered bridge — the agent's mid-restart. Tell the user
@@ -15078,6 +15280,7 @@ function flushReactionBatch(batch: ReactionBatch): void {
     meta,
   }
   const delivered = ipcServer.sendToAgent(agentName, inbound)
+  if (delivered) markClaudeBusyForInbound(inbound)
   process.stderr.write(
     `telegram gateway: reactions.dispatch agent=${agentName} chat=${batch.chatId} ` +
     `count=${batch.reactions.length} batched=${batch.batched} delivered=${delivered}\n`,