npm - switchroom - Versions diffs - 0.14.64 → 0.14.66 - Mend

switchroom 0.14.64 → 0.14.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/cli/switchroom.js +3 -3
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +109 -20
package/telegram-plugin/gateway/answer-thread-resolve.test.ts +85 -0
package/telegram-plugin/gateway/answer-thread-resolve.ts +30 -4
package/telegram-plugin/gateway/gateway.ts +174 -19
package/telegram-plugin/gateway/source-message-id.test.ts +32 -0
package/telegram-plugin/gateway/source-message-id.ts +41 -0
package/telegram-plugin/gateway/status-surface-log.test.ts +98 -0
package/telegram-plugin/gateway/status-surface-log.ts +102 -0
package/telegram-plugin/silence-poke.ts +47 -0
package/telegram-plugin/tests/multitopic-routing-wiring.test.ts +4 -2
package/telegram-plugin/tests/silence-poke.test.ts +69 -1
package/telegram-plugin/tests/worker-activity-feed.test.ts +61 -0
package/telegram-plugin/worker-activity-feed.ts +15 -0

package/dist/cli/switchroom.js CHANGED Viewed

@@ -49452,8 +49452,8 @@ var {
 } = import__.default;
 // src/build-info.ts
-var VERSION = "0.14.64";
-var COMMIT_SHA = "fb6bbe00";
+var VERSION = "0.14.66";
+var COMMIT_SHA = "0f4f029d";
 // src/cli/agent.ts
 init_source();
@@ -52027,7 +52027,7 @@ function buildSettingsHooksBlock(p) {
 ` + 'Do NOT send a trailing confirmation after your answer \u2014 no "Done.", ' + '"Sent.", "Hope that helps." as a separate message once you have ' + "already replied. Your answer is the last thing the user should " + `see; a follow-up "Done." is dead-air clutter (and the user's ` + `device already pinged on the answer). Stop after the answer.
-` + 'CRITICAL: "answer" means a call to the reply tool ' + "(mcp__switchroom-telegram__reply, or stream_reply with done=true). " + "Your terminal/transcript text is NEVER delivered to Telegram \u2014 the " + "user sees only what you send through the reply tool. After a long " + "tool sequence (scheduling, multi-step research, sub-agent handback), " + "do not let your closing narration stand as the answer: end the turn " + "by passing that narration to the reply tool. No reply tool call = the " + "user got nothing, however much text you wrote.</turn-pacing>";
+` + 'CRITICAL: "answer" means a call to the reply tool ' + "(mcp__switchroom-telegram__reply, or stream_reply with done=true). " + "Your terminal/transcript text is NEVER delivered to Telegram \u2014 the " + "user sees only what you send through the reply tool. After a long " + "tool sequence (scheduling, multi-step research, sub-agent handback), " + "do not let your closing narration stand as the answer: end the turn " + "by passing that narration to the reply tool. No reply tool call = the " + "user got nothing, however much text you wrote. Call the reply tool as " + "your FIRST action when you have the answer \u2014 do not write it out as " + "transcript text first and call reply afterward: a framework backstop " + "flushes unsent text after a delay and then your real reply lands late " + "and out of order.</turn-pacing>";
   const switchroomUserPromptSubmit = [
     ...useHotReloadStable ? [
       {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "switchroom",
-  "version": "0.14.64",
+  "version": "0.14.66",
   "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
   "type": "module",
   "bin": {

package/telegram-plugin/dist/gateway/gateway.js CHANGED Viewed

@@ -32622,6 +32622,7 @@ function createWorkerActivityFeed(opts) {
         h.messageId = sent.message_id;
         h.lastBody = body;
         h.lastEditAt = nowFn();
+        log(`worker-feed: paint agent=${h.agentId} chat=${h.chatId} ` + `thread=${h.threadId ?? "-"} msgId=${h.messageId} bytes=${body.length}`);
       } catch (err) {
         noteRateLimited(h, err, "send");
         log(`worker-feed: send failed: ${err.message}`);
@@ -32636,6 +32637,7 @@ function createWorkerActivityFeed(opts) {
       await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h));
       h.lastBody = body;
       h.lastEditAt = nowFn();
+      log(`worker-feed: edit agent=${h.agentId} chat=${h.chatId} ` + `thread=${h.threadId ?? "-"} msgId=${h.messageId} bytes=${body.length}`);
     } catch (err) {
       noteRateLimited(h, err, "edit");
       log(`worker-feed: edit failed, will re-post: ${err.message}`);
@@ -32656,6 +32658,7 @@ function createWorkerActivityFeed(opts) {
       await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h));
       h.lastBody = body;
       h.lastEditAt = nowFn();
+      log(`worker-feed: finish agent=${h.agentId} chat=${h.chatId} ` + `thread=${h.threadId ?? "-"} msgId=${h.messageId} state=${view.state} bytes=${body.length}`);
     } catch (err) {
       noteRateLimited(h, err, "finish");
       log(`worker-feed: finish edit failed: ${err.message}`);
@@ -32674,6 +32677,7 @@ function createWorkerActivityFeed(opts) {
       let h = handles.get(agentId);
       if (h == null) {
         h = {
+          agentId,
           chatId,
           threadId,
           messageId: null,
@@ -32708,6 +32712,38 @@ function createWorkerActivityFeed(opts) {
   };
 }
+// gateway/status-surface-log.ts
+function formatTurnLifecycle(action, reason, t, now) {
+  const ageMs = action === "clear" ? Math.max(0, now - t.startedAt) : 0;
+  return `turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` + `chat=${t.sessionChatId} thread=${t.sessionThreadId ?? "-"} ` + `tools=${t.toolCallCount} activityMsgId=${t.activityMessageId ?? "none"} ` + `feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` + `replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`;
+}
+function detectStatusSurfaceDegraded(t) {
+  if (t.toolCallCount === 0)
+    return null;
+  if (t.activityEverOpened)
+    return null;
+  if (t.activityDrainFailures === 0)
+    return null;
+  return {
+    reason: "feed-never-opened",
+    detail: `tools=${t.toolCallCount} drainFailures=${t.activityDrainFailures} ` + `activityMsgId=none \u2014 the live activity feed failed every send this turn ` + `(card was dark despite tool work)`
+  };
+}
+// gateway/source-message-id.ts
+var MAX_TELEGRAM_MESSAGE_ID = 2 ** 31;
+function parseSourceMessageId(raw) {
+  if (raw == null)
+    return null;
+  const s = String(raw);
+  if (!/^\d+$/.test(s))
+    return null;
+  const n = Number(s);
+  if (!Number.isSafeInteger(n) || n <= 0 || n >= MAX_TELEGRAM_MESSAGE_ID)
+    return null;
+  return n;
+}
 // tool-names.ts
 var TELEGRAM_TOOL_PREFIX_RE = /^mcp__[^_].*?telegram__/;
 function stripPrefix(toolName) {
@@ -39067,6 +39103,9 @@ function tick(now) {
     if (silence < 0)
       continue;
     if (!s.fallbackFired && silence >= thresholds.fallback) {
+      if (activeDeps.deferFallbackWhileToolInFlight === true && s.inFlightTools.size > 0 && silence < (thresholds.fallbackHardCeiling ?? Number.POSITIVE_INFINITY)) {
+        continue;
+      }
       s.fallbackFired = true;
       const { chatId, threadId } = parseKey(key);
       const recentThinking = s.lastThinkingAt != null && now - s.lastThinkingAt < 30000;
@@ -47897,6 +47936,10 @@ function resolveAnswerThreadId(input) {
     return input.explicitThreadId;
   if (input.originResolved)
     return input.originThreadId;
+  if (input.liveThreadId != null)
+    return input.liveThreadId;
+  if (input.lastEndedResolvedForChat)
+    return input.lastEndedThreadIdForChat;
   return input.liveThreadId;
 }
@@ -52720,11 +52763,11 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
 }
 // ../src/build-info.ts
-var VERSION = "0.14.64";
-var COMMIT_SHA = "fb6bbe00";
-var COMMIT_DATE = "2026-06-04T23:21:00Z";
-var LATEST_PR = 2161;
-var COMMITS_AHEAD_OF_TAG = 0;
+var VERSION = "0.14.66";
+var COMMIT_SHA = "0f4f029d";
+var COMMIT_DATE = "2026-06-05T07:05:45Z";
+var LATEST_PR = 2167;
+var COMMITS_AHEAD_OF_TAG = 2;
 // gateway/boot-version.ts
 function formatRelativeAgo(iso) {
@@ -54022,6 +54065,33 @@ function findTurnByOriginId(originTurnId) {
     return currentTurn;
   return recentTurnsById.get(originTurnId) ?? null;
 }
+var LATE_REPLY_TOPIC_RECOVERY_ENABLED = process.env.SWITCHROOM_LATE_REPLY_TOPIC_RECOVERY !== "0";
+function findLatestEndedTurnForChat(chatId) {
+  let latest = null;
+  for (const t of recentTurnsById.values()) {
+    if (t.sessionChatId === chatId)
+      latest = t;
+  }
+  return latest;
+}
+function resolveAnswerThreadWithLog(chatId, explicitThreadId, originTurn, liveTurn, surface) {
+  const recovered = LATE_REPLY_TOPIC_RECOVERY_ENABLED && explicitThreadId == null && originTurn == null && liveTurn?.sessionThreadId == null ? findLatestEndedTurnForChat(chatId) : null;
+  const threadId = resolveAnswerThreadId({
+    explicitThreadId,
+    originResolved: originTurn != null,
+    originThreadId: originTurn?.sessionThreadId,
+    liveThreadId: liveTurn?.sessionThreadId,
+    lastEndedResolvedForChat: recovered != null,
+    lastEndedThreadIdForChat: recovered?.sessionThreadId
+  });
+  const via = explicitThreadId != null ? "explicit" : originTurn != null ? "origin" : liveTurn?.sessionThreadId != null ? "live" : recovered != null ? "recovered" : "none";
+  const ownerTurn = originTurn ?? recovered ?? liveTurn;
+  const isSupergroup = chatId.startsWith("-100");
+  const unrouted = isSupergroup && threadId == null;
+  process.stderr.write(`telegram gateway: reply-route surface=${surface} chat=${chatId} resolved_thread=${threadId ?? "-"} via=${via} late=${liveTurn == null} originTurn=${ownerTurn?.turnId ?? "-"} origin_thread=${ownerTurn?.sessionThreadId ?? "-"}` + (via === "recovered" ? " RECOVERED" : "") + (unrouted ? " UNROUTED(supergroup\u2192no-topic)" : "") + `
+`);
+  return threadId;
+}
 function closeObligationOnSubstantiveReply(args, liveTurn) {
   if (!OBLIGATION_LEDGER_ENABLED)
     return;
@@ -54267,6 +54337,13 @@ function endCurrentTurnAtomic(turn) {
   if (currentTurn !== turn)
     return;
   currentTurn = null;
+  process.stderr.write(`telegram gateway: ${formatTurnLifecycle("clear", "turn_end", turn, Date.now())}
+`);
+  const degraded = detectStatusSurfaceDegraded(turn);
+  if (degraded != null) {
+    process.stderr.write(`telegram gateway: status-surface DEGRADED reason=${degraded.reason} turnId=${turn.turnId} chat=${turn.sessionChatId} thread=${turn.sessionThreadId ?? "-"} ${degraded.detail}
+`);
+  }
   if (OBLIGATION_LEDGER_ENABLED) {
     if (turn.finalAnswerDelivered) {
       obligationLedger.close(turn.turnId);
@@ -54344,7 +54421,11 @@ async function postCompactCard(occ, cap) {
     const chatId = loadAccess().allowFrom[0];
     if (!chatId)
       return;
-    const threadId = resolveAgentOutboundTopic({ kind: "compact-watchdog" }) ?? chatThreadMap.get(chatId);
+    const threadId = topicForRecipient({
+      recipientChatId: chatId,
+      resolvedTopic: resolveAgentOutboundTopic({ kind: "compact-watchdog" }) ?? chatThreadMap.get(chatId),
+      supergroupChatId: resolveAgentSupergroupChatId()
+    });
     const text = `\uD83D\uDDDC\uFE0F <b>Context compaction</b>
 ` + `Working context hit ~${occ.toLocaleString()} tokens (cap ${cap.toLocaleString()}) \u2014 running <code>/compact</code>. ` + `Older detail moves to Hindsight; I'll confirm here once the context has shrunk (may take a turn or two).`;
     const sent = await swallowingApiCall(() => bot.api.sendMessage(chatId, text, {
@@ -55169,7 +55250,19 @@ function ensureIssuesCard(chatId, threadId) {
   }
 }
 var inFlightUpdate = null;
+function parsePositiveMsEnv(name, fallbackMs) {
+  const raw = process.env[name];
+  if (raw == null || raw === "")
+    return fallbackMs;
+  const n = Number(raw);
+  return Number.isFinite(n) && n > 0 ? Math.floor(n) : fallbackMs;
+}
+var SILENCE_FALLBACK_MS = parsePositiveMsEnv("SWITCHROOM_SILENCE_FALLBACK_MS", 300000);
+var SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv("SWITCHROOM_SILENCE_FALLBACK_HARD_MS", 900000);
+var SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === "1";
 startTimer({
+  thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
+  deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
   emitMetric: (event) => {
     emitRuntimeMetric(event);
   },
@@ -56229,12 +56322,7 @@ ${url}`;
   if (TURN_ORIGIN_ROUTING_ENABLED) {
     const explicit = args.message_thread_id != null ? Number(args.message_thread_id) : undefined;
     const originTurn = findTurnByOriginId(args.origin_turn_id);
-    threadId = resolveAnswerThreadId({
-      explicitThreadId: Number.isFinite(explicit) ? explicit : undefined,
-      originResolved: originTurn != null,
-      originThreadId: originTurn?.sessionThreadId,
-      liveThreadId: turn?.sessionThreadId
-    });
+    threadId = resolveAnswerThreadWithLog(chat_id, Number.isFinite(explicit) ? explicit : undefined, originTurn, turn, "reply");
   } else {
     threadId = resolveThreadId(chat_id, args.message_thread_id ?? (turn?.sessionThreadId != null ? turn.sessionThreadId : undefined));
   }
@@ -56594,12 +56682,7 @@ async function executeStreamReply(args) {
     let injected;
     if (TURN_ORIGIN_ROUTING_ENABLED) {
       const originTurn = findTurnByOriginId(args.origin_turn_id);
-      injected = resolveAnswerThreadId({
-        explicitThreadId: undefined,
-        originResolved: originTurn != null,
-        originThreadId: originTurn?.sessionThreadId,
-        liveThreadId: turn?.sessionThreadId
-      });
+      injected = resolveAnswerThreadWithLog(String(args.chat_id), undefined, originTurn, turn, "stream_reply");
     } else {
       injected = turn?.sessionThreadId;
     }
@@ -57760,6 +57843,7 @@ async function drainActivitySummary(turn) {
             ...replyAnchor
           }), { chat_id: chat, ...thread != null ? { threadId: thread } : {}, verb: "activity-summary.send" });
           turn.activityMessageId = sent.message_id;
+          turn.activityEverOpened = true;
         } else {
           const id = turn.activityMessageId;
           await robustApiCall(() => bot.api.editMessageText(chat, id, html, { parse_mode: "HTML" }), { chat_id: chat, ...thread != null ? { threadId: thread } : {}, verb: "activity-summary.edit" });
@@ -57768,7 +57852,8 @@ async function drainActivitySummary(turn) {
       } catch (err) {
         const msg = err instanceof Error ? err.message : String(err);
         if (!msg.includes("message is not modified")) {
-          process.stderr.write(`telegram gateway: activity-summary drain failed: ${msg}
+          turn.activityDrainFailures += 1;
+          process.stderr.write(`telegram gateway: activity-summary drain failed: ${msg} (chat=${chat} thread=${thread ?? "-"} replyAnchor=${turn.sourceMessageId ?? "none"} everOpened=${turn.activityEverOpened} failures=${turn.activityDrainFailures})
 `);
         }
         turn.activityLastSentRender = target;
@@ -57855,7 +57940,7 @@ function handleSessionEvent(ev) {
         const next = {
           sessionChatId: ev.chatId,
           sessionThreadId: enqThreadIdNum,
-          sourceMessageId: ev.messageId != null && /^\d+$/.test(ev.messageId) ? Number(ev.messageId) : null,
+          sourceMessageId: parseSourceMessageId(ev.messageId),
           startedAt,
           gatewayReceiveAt: startedAt,
           replyCalled: false,
@@ -57876,12 +57961,16 @@ function handleSessionEvent(ev) {
           activityInFlight: null,
           activityPendingRender: null,
           activityLastSentRender: null,
+          activityEverOpened: false,
+          activityDrainFailures: 0,
           mirrorLines: [],
           foregroundSubAgents: new Map,
           answerStream: null,
           isDm: isDmChatId(ev.chatId)
         };
         currentTurn = next;
+        process.stderr.write(`telegram gateway: ${formatTurnLifecycle("set", "enqueue", next, startedAt)}
+`);
         rememberRecentTurn(next);
         promoteQueuedStatus(ev.chatId, enqThreadIdNum);
         if (DELIVERY_CONFIRM_ENABLED) {

package/telegram-plugin/gateway/answer-thread-resolve.test.ts ADDED Viewed

@@ -0,0 +1,85 @@
+import { describe, it, expect } from 'vitest'
+import { resolveAnswerThreadId } from './answer-thread-resolve.js'
+describe('resolveAnswerThreadId — precedence', () => {
+  it('(1) explicit model thread wins over everything', () => {
+    expect(
+      resolveAnswerThreadId({
+        explicitThreadId: 7,
+        originResolved: true,
+        originThreadId: 3,
+        liveThreadId: 4,
+        lastEndedResolvedForChat: true,
+        lastEndedThreadIdForChat: 9,
+      }),
+    ).toBe(7)
+  })
+  it('(2) origin turn thread wins over the live turn (the Brevo→Meta fix)', () => {
+    expect(
+      resolveAnswerThreadId({ originResolved: true, originThreadId: 3, liveThreadId: 4 }),
+    ).toBe(3)
+  })
+  it('(2) a DM origin (resolved, thread undefined) pins to undefined, not the live thread', () => {
+    expect(
+      resolveAnswerThreadId({ originResolved: true, originThreadId: undefined, liveThreadId: 4 }),
+    ).toBeUndefined()
+  })
+  it('(3) no origin → falls back to the live turn thread (legacy #1664)', () => {
+    expect(
+      resolveAnswerThreadId({ originResolved: false, liveThreadId: 4 }),
+    ).toBe(4)
+  })
+  // ── tier (4): late-reply topic recovery (2026-06-05) ──────────────────────
+  it('(4) no explicit, no origin, NO live turn → recovers the most-recent ended turn thread', () => {
+    // The marko bug: a reply that fired after the orphaned-reply backstop ended
+    // its turn. Pre-fix this returned undefined (General); now it recovers topic 3.
+    expect(
+      resolveAnswerThreadId({
+        originResolved: false,
+        liveThreadId: undefined,
+        lastEndedResolvedForChat: true,
+        lastEndedThreadIdForChat: 3,
+      }),
+    ).toBe(3)
+  })
+  it('(4) a recovered DM turn (ended, thread undefined) stays threadless', () => {
+    expect(
+      resolveAnswerThreadId({
+        originResolved: false,
+        liveThreadId: undefined,
+        lastEndedResolvedForChat: true,
+        lastEndedThreadIdForChat: undefined,
+      }),
+    ).toBeUndefined()
+  })
+  it('(4) recovery does NOT override a live turn — live thread still wins at tier 3', () => {
+    expect(
+      resolveAnswerThreadId({
+        originResolved: false,
+        liveThreadId: 4,
+        lastEndedResolvedForChat: true,
+        lastEndedThreadIdForChat: 3,
+      }),
+    ).toBe(4)
+  })
+  it('(4) no recovery candidate → legacy result (undefined), unchanged', () => {
+    expect(
+      resolveAnswerThreadId({
+        originResolved: false,
+        liveThreadId: undefined,
+        lastEndedResolvedForChat: false,
+      }),
+    ).toBeUndefined()
+  })
+  it('pure DM (every tier undefined) → undefined', () => {
+    expect(resolveAnswerThreadId({ originResolved: false })).toBeUndefined()
+  })
+})

package/telegram-plugin/gateway/answer-thread-resolve.ts CHANGED Viewed

@@ -26,10 +26,14 @@
  *   3. Else the LIVE turn's thread — but ONLY when the live turn IS the
  *      origin turn (no flip happened) OR no origin turn could be resolved
  *      at all (origin id absent/unknown; legacy / pre-stamp path).
- *   4. Else (origin resolved AND it differs from the live turn) we pin to
- *      the origin thread and explicitly DO NOT fall through to the chat's
- *      last-seen `chatThreadMap` thread. For answer surfaces the chat
- *      last-seen heuristic is exactly what produced the wrong-topic bug.
+ *   4. Else (no explicit, no origin echoed, no live turn) — a LATE reply that
+ *      fired after its turn already ended (the orphaned-reply backstop case) —
+ *      recover the origin topic from the most-recently-ended turn for this
+ *      chat. Without this, such a reply defaults to the main chat (General in a
+ *      supergroup) and its answer vanishes from the topic the user is reading
+ *      (the 2026-06-05 marko triage). Still NOT the `chatThreadMap` last-seen
+ *      heuristic — the recovered turn is the chat's own most-recent turn, not
+ *      whichever topic last received any message.
  *
  * The `chatThreadMap` last-seen fallback is preserved for NON-answer
  * surfaces (`send_typing`, `forward_message`, `progress_update`) by NOT
@@ -53,6 +57,20 @@ export interface AnswerThreadInput {
    *  (no live turn, or a DM live turn). The legacy (#1664) fallback when
    *  no origin turn is resolvable. */
   liveThreadId?: number | undefined
+  /**
+   * Late-reply topic recovery (2026-06-05). Thread of the most-recently-ended
+   * turn for THIS chat (from `recentTurnsById`), used as a deterministic
+   * fallback when the model echoed no `origin_turn_id` AND there is no live
+   * turn — the late-reply-after-turn-end case. Without it, a reply that fires
+   * after the orphaned-reply backstop closed its turn defaults to the main chat
+   * (General topic in a supergroup), so its answer vanishes from the topic the
+   * user is reading. Only consulted at tier (4); a DM origin yields undefined,
+   * which is correct.
+   */
+  lastEndedThreadIdForChat?: number | undefined
+  /** Whether a recently-ended turn exists for this chat — distinguishes
+   *  "ended turn exists, DM (thread undefined)" from "no ended turn at all". */
+  lastEndedResolvedForChat?: boolean
 }
 /**
@@ -75,5 +93,13 @@ export function resolveAnswerThreadId(input: AnswerThreadInput): number | undefi
   if (input.originResolved) return input.originThreadId
   // (3) no origin resolved (legacy / pre-stamp / evicted) → fall back to
   //     the live turn's thread, the existing turn-pinned behaviour (#1664).
+  if (input.liveThreadId != null) return input.liveThreadId
+  // (4) no explicit, no origin echoed, no live turn — a LATE reply that fired
+  //     after its turn already ended (the orphaned-reply backstop case).
+  //     Recover the origin topic from the most-recently-ended turn for this
+  //     chat so the answer lands in the topic it belongs to instead of
+  //     defaulting to the main chat (General). When no ended turn is known,
+  //     fall through to liveThreadId (undefined) — the legacy result.
+  if (input.lastEndedResolvedForChat) return input.lastEndedThreadIdForChat
   return input.liveThreadId
 }

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -65,6 +65,8 @@ import {
 import { StatusReactionController } from '../status-reactions.js'
 import { DeferredDoneReactions } from '../reaction-defer.js'
 import { createWorkerActivityFeed, isWorkerActivityFeedEnabled } from '../worker-activity-feed.js'
+import { formatTurnLifecycle, detectStatusSurfaceDegraded } from './status-surface-log.js'
+import { parseSourceMessageId } from './source-message-id.js'
 import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
 import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
 import { toolLabel } from '../tool-labels.js'
@@ -1798,6 +1800,14 @@ type CurrentTurn = {
   activityInFlight: Promise<void> | null
   activityPendingRender: string | null
   activityLastSentRender: string | null
+  // Status-surface observability. `activityEverOpened` is sticky-true once the
+  // feed posts its first message — unlike `activityMessageId`, it is NOT nulled
+  // by `clearActivitySummary`, so the turn-end DEGRADED check can tell "feed
+  // never opened" (the resume-400 signature) from "feed finalized + cleared".
+  // `activityDrainFailures` counts real activity-feed send/edit failures this
+  // turn (429s + "message is not modified" excluded). Both reset per turn.
+  activityEverOpened: boolean
+  activityDrainFailures: number
   // Wall-clock anchor for the newest in-progress feed step — set each time a
   // tool_label re-renders the feed. The heartbeat (`feedHeartbeatTick`) reads
   // it to show a climbing " · Ns" elapsed on the live line so a long single
@@ -1878,6 +1888,83 @@ function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTur
   return recentTurnsById.get(originTurnId) ?? null
 }
+// Late-reply topic recovery (2026-06-05 marko triage). Default ON; kill switch
+// SWITCHROOM_LATE_REPLY_TOPIC_RECOVERY=0 restores the legacy behaviour (a late
+// reply with no echoed origin and no live turn defaults to General).
+const LATE_REPLY_TOPIC_RECOVERY_ENABLED =
+  process.env.SWITCHROOM_LATE_REPLY_TOPIC_RECOVERY !== '0'
+/**
+ * The most-recently-started turn for a chat from the bounded recently-ended
+ * registry — the deterministic fallback for a LATE answer reply when the model
+ * echoed no `origin_turn_id` and `currentTurn` has already cleared. Iterates in
+ * insertion order so the last match is the most recent turn for that chat.
+ * Returns null when the chat has no remembered turn (so the caller keeps the
+ * legacy result). NB: this is the chat's own most-recent TURN, not the
+ * `chatThreadMap` last-seen-any-message heuristic that caused the wrong-topic
+ * bug — a late reply almost always belongs to the turn that just ended.
+ */
+function findLatestEndedTurnForChat(chatId: string): CurrentTurn | null {
+  let latest: CurrentTurn | null = null
+  for (const t of recentTurnsById.values()) {
+    if (t.sessionChatId === chatId) latest = t
+  }
+  return latest
+}
+/**
+ * Resolve the answer-reply thread AND emit `reply-route` telemetry. The
+ * 2026-06-05 triage showed reply routing was the blind spot: `reply: invoked`
+ * logged only chat + char count, so a late reply landing in the wrong topic was
+ * invisible without hand-correlating raw tg-post threads against turn-lifecycle
+ * timestamps. This wrapper logs, per reply: which precedence tier won (`via`),
+ * the resolved thread, the origin turn + its thread, and whether the reply was
+ * late (turn already ended). `via=recovered` marks a late reply this fix saved
+ * from General; `UNROUTED` flags a supergroup reply that still resolved to no
+ * topic (the residual gap to watch).
+ */
+function resolveAnswerThreadWithLog(
+  chatId: string,
+  explicitThreadId: number | undefined,
+  originTurn: CurrentTurn | null,
+  liveTurn: CurrentTurn | null,
+  surface: 'reply' | 'stream_reply',
+): number | undefined {
+  const recovered =
+    LATE_REPLY_TOPIC_RECOVERY_ENABLED &&
+    explicitThreadId == null &&
+    originTurn == null &&
+    liveTurn?.sessionThreadId == null
+      ? findLatestEndedTurnForChat(chatId)
+      : null
+  const threadId = resolveAnswerThreadId({
+    explicitThreadId,
+    originResolved: originTurn != null,
+    originThreadId: originTurn?.sessionThreadId,
+    liveThreadId: liveTurn?.sessionThreadId,
+    lastEndedResolvedForChat: recovered != null,
+    lastEndedThreadIdForChat: recovered?.sessionThreadId,
+  })
+  const via =
+    explicitThreadId != null ? 'explicit'
+    : originTurn != null ? 'origin'
+    : liveTurn?.sessionThreadId != null ? 'live'
+    : recovered != null ? 'recovered'
+    : 'none'
+  const ownerTurn = originTurn ?? recovered ?? liveTurn
+  const isSupergroup = chatId.startsWith('-100')
+  const unrouted = isSupergroup && threadId == null
+  process.stderr.write(
+    `telegram gateway: reply-route surface=${surface} chat=${chatId} ` +
+      `resolved_thread=${threadId ?? '-'} via=${via} late=${liveTurn == null} ` +
+      `originTurn=${ownerTurn?.turnId ?? '-'} origin_thread=${ownerTurn?.sessionThreadId ?? '-'}` +
+      (via === 'recovered' ? ' RECOVERED' : '') +
+      (unrouted ? ' UNROUTED(supergroup→no-topic)' : '') +
+      '\n',
+  )
+  return threadId
+}
 /**
  * PR2 obligation-ledger CLOSE. Called when a SUBSTANTIVE final answer lands
  * (not a bare interim ack — using finalAnswerSubstantive, the #2141 signal): the
@@ -2488,6 +2575,20 @@ function releaseTurnBufferGate(key: string, endingTurn?: CurrentTurn): void {
 function endCurrentTurnAtomic(turn: CurrentTurn): void {
   if (currentTurn !== turn) return
   currentTurn = null
+  // Status-surface observability: one line at every turn CLEAR (with how far
+  // the turn got), plus a DEGRADED warning when the turn did tool work but the
+  // live feed never opened because its sends failed (the resume-400 signature).
+  process.stderr.write(
+    `telegram gateway: ${formatTurnLifecycle('clear', 'turn_end', turn, Date.now())}\n`,
+  )
+  const degraded = detectStatusSurfaceDegraded(turn)
+  if (degraded != null) {
+    process.stderr.write(
+      `telegram gateway: status-surface DEGRADED reason=${degraded.reason} ` +
+        `turnId=${turn.turnId} chat=${turn.sessionChatId} ` +
+        `thread=${turn.sessionThreadId ?? '-'} ${degraded.detail}\n`,
+    )
+  }
   // PR2 obligation-ledger CLOSE-at-turn-end. Close the ended turn's obligation
   // when it delivered a final answer. finalAnswerDelivered is the right signal
   // HERE (not isSubstantiveFinalReply at reply-time): a SHORT genuine answer
@@ -2658,9 +2759,18 @@ async function postCompactCard(occ: number, cap: number): Promise<void> {
     // instead of conversation lanes. Fleet/DM agents fall through to
     // the existing chatThreadMap last-seen-thread fallback (no
     // observable change).
-    const threadId =
-      resolveAgentOutboundTopic({ kind: 'compact-watchdog' })
-      ?? chatThreadMap.get(chatId);
+    // The compact-watchdog topic is valid ONLY in the agent's supergroup;
+    // attaching it to an operator DM recipient 400s "message thread not found"
+    // and the notice silently vanishes (the marko #2096 class — proactiveCompact
+    // was the one operator-send still missing this guard, 2026-06-05). DM
+    // recipients get a thread-less send; the supergroup owner keeps the lane.
+    const threadId = topicForRecipient({
+      recipientChatId: chatId,
+      resolvedTopic:
+        resolveAgentOutboundTopic({ kind: 'compact-watchdog' })
+        ?? chatThreadMap.get(chatId),
+      supergroupChatId: resolveAgentSupergroupChatId(),
+    });
     const text =
       `🗜️ <b>Context compaction</b>\n` +
       `Working context hit ~${occ.toLocaleString()} tokens ` +
@@ -4546,7 +4656,27 @@ function ensureIssuesCard(chatId: string, threadId: number | undefined): void {
 // incident fix. In-memory only; a gateway recreate naturally resets it.
 let inFlightUpdate: { requestId: string; startedAt: number } | null = null
+// Fix A — silence-fallback tuning (status-surface darkening, 2026-06-05). A long
+// quiet tool stretch (foreground sub-agent / big research) crossed the 300s
+// fallback and nulled currentTurn, darkening the live activity feed mid-work.
+//   SWITCHROOM_SILENCE_FALLBACK_MS         — base threshold (default 300000)
+//   SWITCHROOM_SILENCE_FALLBACK_HARD_MS    — hard ceiling for the in-flight-tool
+//                                            defer (default 900000 = 15min)
+//   SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer (default OFF;
+//                                            canary on marko against #2162 telemetry)
+function parsePositiveMsEnv(name: string, fallbackMs: number): number {
+  const raw = process.env[name]
+  if (raw == null || raw === '') return fallbackMs
+  const n = Number(raw)
+  return Number.isFinite(n) && n > 0 ? Math.floor(n) : fallbackMs
+}
+const SILENCE_FALLBACK_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_MS', 300_000)
+const SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_HARD_MS', 900_000)
+const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === '1'
 silencePoke.startTimer({
+  thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
+  deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
   emitMetric: (event) => {
     // Re-emit through the unified runtime-metrics fan-out (PostHog + JSONL).
     emitRuntimeMetric(event)
@@ -6469,12 +6599,13 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
   if (TURN_ORIGIN_ROUTING_ENABLED) {
     const explicit = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
     const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
-    threadId = resolveAnswerThreadId({
-      explicitThreadId: Number.isFinite(explicit as number) ? (explicit as number) : undefined,
-      originResolved: originTurn != null,
-      originThreadId: originTurn?.sessionThreadId,
-      liveThreadId: turn?.sessionThreadId,
-    })
+    threadId = resolveAnswerThreadWithLog(
+      chat_id,
+      Number.isFinite(explicit as number) ? (explicit as number) : undefined,
+      originTurn,
+      turn,
+      'reply',
+    )
   } else {
     threadId = resolveThreadId(
       chat_id,
@@ -7125,12 +7256,13 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
     let injected: number | undefined
     if (TURN_ORIGIN_ROUTING_ENABLED) {
       const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
-      injected = resolveAnswerThreadId({
-        explicitThreadId: undefined,
-        originResolved: originTurn != null,
-        originThreadId: originTurn?.sessionThreadId,
-        liveThreadId: turn?.sessionThreadId,
-      })
+      injected = resolveAnswerThreadWithLog(
+        String(args.chat_id),
+        undefined,
+        originTurn,
+        turn,
+        'stream_reply',
+      )
     } else {
       injected = turn?.sessionThreadId
     }
@@ -8850,6 +8982,7 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
             { chat_id: chat, ...(thread != null ? { threadId: thread } : {}), verb: 'activity-summary.send' },
           )
           turn.activityMessageId = sent.message_id
+          turn.activityEverOpened = true
         } else {
           const id = turn.activityMessageId
           await robustApiCall(
@@ -8861,7 +8994,18 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
       } catch (err) {
         const msg = err instanceof Error ? err.message : String(err)
         if (!msg.includes('message is not modified')) {
-          process.stderr.write(`telegram gateway: activity-summary drain failed: ${msg}\n`)
+          turn.activityDrainFailures += 1
+          // Surface the failing anchor + topic: the resume-400 bug fed a
+          // fabricated 13-digit message_id as the reply anchor here, so every
+          // send 400'd and the feed never opened. Logging the anchor +
+          // everOpened makes a feed-blanking send self-explanatory (and the
+          // turn-end DEGRADED line aggregates it).
+          process.stderr.write(
+            `telegram gateway: activity-summary drain failed: ${msg} ` +
+              `(chat=${chat} thread=${thread ?? '-'} ` +
+              `replyAnchor=${turn.sourceMessageId ?? 'none'} ` +
+              `everOpened=${turn.activityEverOpened} failures=${turn.activityDrainFailures})\n`,
+          )
         }
         // Mark as sent so we don't infinite-loop on a stuck render.
         turn.activityLastSentRender = target
@@ -9019,9 +9163,13 @@ function handleSessionEvent(ev: SessionEvent): void {
         const next: CurrentTurn = {
           sessionChatId: ev.chatId,
           sessionThreadId: enqThreadIdNum,
-          sourceMessageId: ev.messageId != null && /^\d+$/.test(ev.messageId)
-            ? Number(ev.messageId)
-            : null,
+          // Accept the inbound id as a reply anchor only when it is a plausible
+          // Telegram message id. Synthetic boot-resume inbounds fabricate a
+          // 13-digit Date.now() message_id (for ack-tracking); if that reached
+          // the activity-feed reply anchor it 400'd every feed send and darkened
+          // the live feed for the whole resume turn (2026-06-05). The ack-queue
+          // still keys on ev.messageId independently — only the anchor is gated.
+          sourceMessageId: parseSourceMessageId(ev.messageId),
           startedAt,
           gatewayReceiveAt: startedAt,
           replyCalled: false,
@@ -9042,12 +9190,19 @@ function handleSessionEvent(ev: SessionEvent): void {
           activityInFlight: null,
           activityPendingRender: null,
           activityLastSentRender: null,
+          activityEverOpened: false,
+          activityDrainFailures: 0,
           mirrorLines: [],
           foregroundSubAgents: new Map(),
           answerStream: null,
           isDm: isDmChatId(ev.chatId),
         }
         currentTurn = next
+        // Status-surface observability: one line at every turn SET so a later
+        // dark card is traceable to which turn/topic key it belonged to.
+        process.stderr.write(
+          `telegram gateway: ${formatTurnLifecycle('set', 'enqueue', next, startedAt)}\n`,
+        )
         // Component 3 — retain in the bounded recently-ended registry so a
         // LATE reply (landing after currentTurn flips to a successor) can
         // still resolve THIS turn's origin thread by its turnId.

package/telegram-plugin/gateway/source-message-id.test.ts ADDED Viewed

@@ -0,0 +1,32 @@
+import { describe, it, expect } from 'vitest'
+import { parseSourceMessageId, MAX_TELEGRAM_MESSAGE_ID } from './source-message-id.js'
+describe('parseSourceMessageId', () => {
+  it('accepts a plausible Telegram message id (string or number)', () => {
+    expect(parseSourceMessageId('903')).toBe(903)
+    expect(parseSourceMessageId(905)).toBe(905)
+    expect(parseSourceMessageId('1')).toBe(1)
+  })
+  it('REJECTS a fabricated 13-digit Date.now() timestamp (the resume-dark-feed bug)', () => {
+    // 2026-06-04T23:34:21.578Z — the exact value that 400'd every feed send.
+    expect(parseSourceMessageId('1780616061578')).toBeNull()
+    expect(parseSourceMessageId(1_780_616_061_578)).toBeNull()
+  })
+  it('rejects anything at or above the Telegram message-id ceiling (2^31)', () => {
+    expect(parseSourceMessageId(MAX_TELEGRAM_MESSAGE_ID)).toBeNull()
+    expect(parseSourceMessageId(MAX_TELEGRAM_MESSAGE_ID - 1)).toBe(MAX_TELEGRAM_MESSAGE_ID - 1)
+  })
+  it('rejects null / undefined / empty / non-numeric / non-positive', () => {
+    expect(parseSourceMessageId(null)).toBeNull()
+    expect(parseSourceMessageId(undefined)).toBeNull()
+    expect(parseSourceMessageId('')).toBeNull()
+    expect(parseSourceMessageId('12a')).toBeNull()
+    expect(parseSourceMessageId('-5')).toBeNull() // leading "-" fails the digit test
+    expect(parseSourceMessageId(0)).toBeNull()
+    expect(parseSourceMessageId(-5)).toBeNull()
+    expect(parseSourceMessageId('3.5')).toBeNull()
+  })
+})

package/telegram-plugin/gateway/source-message-id.ts ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * Guard for the per-turn reply anchor (`turn.sourceMessageId`).
+ *
+ * Telegram Bot API message ids are positive integers that fit within a signed
+ * 32-bit int; `reply_parameters.message_id` HARD-rejects anything larger with
+ * 400 "field 'message_id' must be a valid Number" (and `allow_sending_without_reply`
+ * does NOT bypass that range check).
+ *
+ * Synthetic boot-resume inbounds (`resume-inbound-builder.ts`) fabricate a
+ * `message_id` from `Date.now()` (~1.78e13) so the deliver-until-acked queue can
+ * ack the synthetic by its own enqueue id. That round-trip is fine on its own —
+ * but the enqueue handler also turns `ev.messageId` into `turn.sourceMessageId`,
+ * which `drainActivitySummary` sends as the activity-feed reply anchor. A
+ * fabricated 13-digit timestamp there 400s EVERY feed send for the whole turn,
+ * so the live status feed is dark for the entire first post-restart turn (the
+ * resume-dark-feed incident, 2026-06-05).
+ *
+ * This guard accepts a value as a real anchor ONLY when it is a plausible
+ * Telegram message id; anything non-numeric or out of range yields null, so the
+ * feed posts UNANCHORED (still correct — the anchor is a nicety, not required).
+ * The synthetic's ack-tracking is unaffected: it keys on the enqueue event's own
+ * id, never on this anchor.
+ */
+/** Telegram message ids fit within a signed 32-bit int for reply anchoring;
+ *  anything at/above this is not a real message id (e.g. a wall-clock ms ts). */
+export const MAX_TELEGRAM_MESSAGE_ID = 2 ** 31
+/**
+ * Parse an inbound's `messageId` into a usable reply anchor, or null when it is
+ * not a plausible Telegram message id (non-numeric, non-positive, non-integer,
+ * or out of the reply-anchor range — e.g. a fabricated `Date.now()` timestamp).
+ */
+export function parseSourceMessageId(raw: string | number | undefined | null): number | null {
+  if (raw == null) return null
+  const s = String(raw)
+  if (!/^\d+$/.test(s)) return null
+  const n = Number(s)
+  if (!Number.isSafeInteger(n) || n <= 0 || n >= MAX_TELEGRAM_MESSAGE_ID) return null
+  return n
+}

package/telegram-plugin/gateway/status-surface-log.test.ts ADDED Viewed

@@ -0,0 +1,98 @@
+import { describe, it, expect } from 'vitest'
+import {
+  formatTurnLifecycle,
+  detectStatusSurfaceDegraded,
+  type StatusSurfaceTurnView,
+} from './status-surface-log.js'
+function turn(overrides: Partial<StatusSurfaceTurnView> = {}): StatusSurfaceTurnView {
+  return {
+    turnId: '-100123:_#1780000000000',
+    sessionChatId: '-100123',
+    sessionThreadId: undefined,
+    startedAt: 1_780_000_000_000,
+    toolCallCount: 0,
+    activityMessageId: null,
+    activityEverOpened: false,
+    activityDrainFailures: 0,
+    replyCalled: false,
+    finalAnswerDelivered: false,
+    ...overrides,
+  }
+}
+describe('formatTurnLifecycle', () => {
+  it('renders a set line with no age and a "-" thread for General', () => {
+    const line = formatTurnLifecycle('set', 'enqueue', turn(), 1_780_000_005_000)
+    expect(line).toContain('turn-lifecycle set reason=enqueue')
+    expect(line).toContain('turnId=-100123:_#1780000000000')
+    expect(line).toContain('chat=-100123')
+    expect(line).toContain('thread=-')
+    expect(line).toContain('age_ms=0') // set never reports age
+  })
+  it('renders a clear line with the turn age and live state', () => {
+    const line = formatTurnLifecycle(
+      'clear',
+      'turn_end',
+      turn({ sessionThreadId: 3, toolCallCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
+      1_780_000_300_000, // +300s
+    )
+    expect(line).toContain('turn-lifecycle clear reason=turn_end')
+    expect(line).toContain('thread=3')
+    expect(line).toContain('tools=5')
+    expect(line).toContain('activityMsgId=42')
+    expect(line).toContain('feedOpened=true')
+    expect(line).toContain('replyCalled=true')
+    expect(line).toContain('finalAnswer=true')
+    expect(line).toContain('age_ms=300000')
+  })
+  it('never emits a negative age even if startedAt is in the future (clock skew)', () => {
+    const line = formatTurnLifecycle('clear', 'turn_end', turn({ startedAt: 2_000_000_000_000 }), 1_780_000_000_000)
+    expect(line).toContain('age_ms=0')
+  })
+  it('carries no prefix or trailing newline — the caller owns transport', () => {
+    const line = formatTurnLifecycle('set', 'enqueue', turn(), 0)
+    expect(line.startsWith('telegram gateway:')).toBe(false)
+    expect(line.endsWith('\n')).toBe(false)
+  })
+})
+describe('detectStatusSurfaceDegraded', () => {
+  it('flags a turn that did tool work but never opened the feed due to send failures (the resume-400 signature)', () => {
+    const d = detectStatusSurfaceDegraded(
+      turn({ toolCallCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
+    )
+    expect(d).not.toBeNull()
+    expect(d!.reason).toBe('feed-never-opened')
+    expect(d!.detail).toContain('drainFailures=10')
+  })
+  it('does NOT flag a healthy turn where the feed opened, even if later cleared (activityMessageId nulled)', () => {
+    // clearActivitySummary nulls activityMessageId async on the healthy path;
+    // the sticky activityEverOpened keeps this from false-positiving.
+    expect(
+      detectStatusSurfaceDegraded(
+        turn({ toolCallCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
+      ),
+    ).toBeNull()
+  })
+  it('does NOT flag a turn that never attempted a feed send (e.g. ack-first suppression)', () => {
+    expect(
+      detectStatusSurfaceDegraded(
+        turn({ toolCallCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
+      ),
+    ).toBeNull()
+  })
+  it('does NOT flag a turn with no tool work (nothing to surface)', () => {
+    expect(
+      detectStatusSurfaceDegraded(
+        turn({ toolCallCount: 0, activityEverOpened: false, activityDrainFailures: 3 }),
+      ),
+    ).toBeNull()
+  })
+})

package/telegram-plugin/gateway/status-surface-log.ts ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * Status-surface observability — pure formatters for the gateway's live-status
+ * lane (progress card / activity feed / typing indicator).
+ *
+ * Why a dedicated module: when an agent's live status went dark (marko,
+ * 2026-06-05), the lane was nearly silent in the logs — `currentTurn` (the
+ * variable that drives the card/feed/typing) was nulled with no breadcrumb, and
+ * the activity feed failed every send with no turn-level signal. Two latent
+ * bugs were invisible for days: a 300s silence-poke teardown that nulled the
+ * card mid-work, and a resume-synthetic whose fabricated 13-digit message_id
+ * made every feed send 400. Neither left a greppable "the card went dark and
+ * here's why" line.
+ *
+ * These pure functions give the gateway exactly that: ONE structured line per
+ * currentTurn lifecycle transition, and a single DEGRADED warning when a turn
+ * did tool work but the feed never opened because its sends failed. Pure
+ * formatters + injected transport (the caller owns `process.stderr.write`),
+ * mirroring `silence-poke.ts` / `worker-activity-feed.ts`, so they're
+ * unit-testable without a live gateway.
+ */
+/**
+ * The `currentTurn` fields the status-surface logs read. The gateway's
+ * `CurrentTurn` atom structurally satisfies this (TS structural typing), so the
+ * gateway passes the turn directly — no import cycle back into `gateway.ts`.
+ */
+export interface StatusSurfaceTurnView {
+  turnId: string
+  sessionChatId: string
+  sessionThreadId: number | undefined
+  startedAt: number
+  toolCallCount: number
+  /** Live activity-feed message id; null until the first send captures it. */
+  activityMessageId: number | null
+  /**
+   * Sticky: true once the activity feed ever opened a message this turn. Unlike
+   * `activityMessageId` (which `clearActivitySummary` nulls async on the
+   * healthy finalize path), this is never reset — so a turn that DID surface
+   * the feed can't false-positive as degraded at turn-end.
+   */
+  activityEverOpened: boolean
+  /** Count of real activity-feed send/edit failures this turn (429s and
+   *  "message is not modified" excluded). */
+  activityDrainFailures: number
+  replyCalled: boolean
+  finalAnswerDelivered: boolean
+}
+export type TurnLifecycleAction = 'set' | 'clear'
+/**
+ * One structured line per `currentTurn` set/clear. `currentTurn` drives the
+ * progress card / activity feed / typing; logging every transition — with the
+ * topic key, how far the turn got, and the reason it ended — makes a dark card
+ * explainable after the fact. Returned WITHOUT the `telegram gateway: ` prefix
+ * or trailing newline so the caller owns transport (and tests assert the body).
+ *
+ * `now` is only consulted for the `clear` age; for `set` it is ignored.
+ */
+export function formatTurnLifecycle(
+  action: TurnLifecycleAction,
+  reason: string,
+  t: StatusSurfaceTurnView,
+  now: number,
+): string {
+  const ageMs = action === 'clear' ? Math.max(0, now - t.startedAt) : 0
+  return (
+    `turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` +
+    `chat=${t.sessionChatId} thread=${t.sessionThreadId ?? '-'} ` +
+    `tools=${t.toolCallCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
+    `feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` +
+    `replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`
+  )
+}
+/**
+ * Turn-end health check: did the turn do tool work but never get a live feed
+ * message onto the screen BECAUSE its sends failed? That is the exact signature
+ * of the resume-400 bug (every activity-summary send throws, so the feed never
+ * opens) — a single greppable line would have caught it in seconds.
+ *
+ * Returns null when the surface was healthy or legitimately silent:
+ *   - no tool work this turn (nothing to surface), OR
+ *   - the feed opened fine (`activityEverOpened`), OR
+ *   - the feed never even attempted a send (`activityDrainFailures === 0`, e.g.
+ *     an ack-first turn whose feed was intentionally suppressed) — absence of a
+ *     send is not a failure.
+ */
+export function detectStatusSurfaceDegraded(
+  t: StatusSurfaceTurnView,
+): { reason: string; detail: string } | null {
+  if (t.toolCallCount === 0) return null
+  if (t.activityEverOpened) return null
+  if (t.activityDrainFailures === 0) return null
+  return {
+    reason: 'feed-never-opened',
+    detail:
+      `tools=${t.toolCallCount} drainFailures=${t.activityDrainFailures} ` +
+      `activityMsgId=none — the live activity feed failed every send this turn ` +
+      `(card was dark despite tool work)`,
+  }
+}

package/telegram-plugin/silence-poke.ts CHANGED Viewed

@@ -20,6 +20,14 @@
  * edits, and tool churn DO NOT reset the silence clock — the model could
  * be ripping through 20 tool calls and still be "silent" to the user.
  *
+ * Fix A caveat (opt-in, `deferFallbackWhileToolInFlight`): tool churn still
+ * doesn't reset the *clock*, but when the threshold is crossed WITH a parent
+ * tool genuinely in flight, the terminal unwedge is DEFERRED (not skipped) up to
+ * `fallbackHardCeiling`. Since #2162 the live activity feed renders that tool
+ * work, so the "still silent to the user" premise no longer holds while a tool
+ * is visibly running; nulling `currentTurn` there would darken the very feed the
+ * user is watching. A turn with no in-flight tool is unaffected.
+ *
  * Terminal action, once per turn:
  *
  *   t=0       startTurn() — silence clock starts at turnStartedAt
@@ -81,6 +89,16 @@ export interface ThresholdsMs {
   /** Silence (since last outbound, or turn start) after which the
    *  framework sends the user-visible fallback AND unwedges the turn. */
   fallback: number
+  /**
+   * Fix A — hard ceiling for the in-flight-tool defer. When
+   * `deferFallbackWhileToolInFlight` is on, the fallback is held back while a
+   * parent tool is genuinely in flight (the agent is demonstrably working and
+   * the live activity feed is showing it). This bounds that defer: once silence
+   * crosses the ceiling the fallback fires REGARDLESS of an in-flight tool, so a
+   * hung-mid-tool turn can't pin the conversation forever. Ignored unless the
+   * defer is on; defaults to no ceiling (Infinity) when omitted.
+   */
+  fallbackHardCeiling?: number
 }
 export const DEFAULT_THRESHOLDS: ThresholdsMs = {
@@ -122,6 +140,21 @@ export interface SilencePokeDeps {
   thresholdsMs?: ThresholdsMs
   /** Poll interval (tests). */
   pollIntervalMs?: number
+  /**
+   * Fix A — when true, the 300s framework fallback is DEFERRED while a parent
+   * tool is genuinely in flight (`inFlightTools` non-empty): the agent is
+   * demonstrably working, and since #2162 the live activity feed shows that
+   * work, so nulling `currentTurn` (which the fallback does) would darken a feed
+   * the user is actively watching. The defer is bounded by
+   * `thresholdsMs.fallbackHardCeiling` so a hung-mid-tool turn still unwedges; a
+   * turn with NO in-flight tool fires at the base threshold exactly as before.
+   * Default false (legacy behaviour) — enable per-agent to canary.
+   *
+   * A crashed agent is recovered independently by the bridge-disconnect sweep
+   * (`onDanglingTurnsSwept`), so deferring here does not reintroduce the #1556
+   * dangling-turn wedge for the crash case.
+   */
+  deferFallbackWhileToolInFlight?: boolean
 }
 const state = new Map<string, SilencePokeState>()
@@ -366,6 +399,20 @@ function tick(now: number): void {
     if (silence < 0) continue
     if (!s.fallbackFired && silence >= thresholds.fallback) {
+      // Fix A: defer the unwedge while a parent tool is genuinely in flight —
+      // the agent is demonstrably working and the live activity feed is showing
+      // it, so firing here (which nulls currentTurn) would darken that feed
+      // mid-work. Bounded by the hard ceiling so a hung-mid-tool turn still
+      // unwedges. `continue` WITHOUT setting fallbackFired so the next tick
+      // re-checks — once the tool ends and the turn stays silent past the base
+      // threshold, or the ceiling is crossed, it fires normally.
+      if (
+        activeDeps.deferFallbackWhileToolInFlight === true &&
+        s.inFlightTools.size > 0 &&
+        silence < (thresholds.fallbackHardCeiling ?? Number.POSITIVE_INFINITY)
+      ) {
+        continue
+      }
       s.fallbackFired = true
       const { chatId, threadId } = parseKey(key)
       const recentThinking = s.lastThinkingAt != null

package/telegram-plugin/tests/multitopic-routing-wiring.test.ts CHANGED Viewed

@@ -45,13 +45,15 @@ describe('component 3 — turn-origin reply routing', () => {
     const fn = gatewaySrc.split('async function executeReply')[1]?.split('\nasync function ')[0] ?? ''
     expect(fn).toMatch(/TURN_ORIGIN_ROUTING_ENABLED/)
     expect(fn).toMatch(/findTurnByOriginId\(args\.origin_turn_id/)
-    expect(fn).toMatch(/resolveAnswerThreadId\(/)
+    // The resolution + reply-route telemetry go through resolveAnswerThreadWithLog,
+    // which calls the pure resolveAnswerThreadId internally (incl. tier-4 recovery).
+    expect(fn).toMatch(/resolveAnswerThread\w*\(/)
   })
   it('executeStreamReply resolves the answer thread via the origin turn too', () => {
     const fn = gatewaySrc.split('async function executeStreamReply')[1]?.split('\nasync function ')[0] ?? ''
     expect(fn).toMatch(/findTurnByOriginId\(args\.origin_turn_id/)
-    expect(fn).toMatch(/resolveAnswerThreadId\(/)
+    expect(fn).toMatch(/resolveAnswerThread\w*\(/)
   })
   it('the reply + stream_reply tool schemas expose origin_turn_id to the model', () => {

package/telegram-plugin/tests/silence-poke.test.ts CHANGED Viewed

@@ -26,7 +26,10 @@ interface TestFixtures {
   fallbacks: FrameworkFallbackContext[]
 }
-function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }): TestFixtures {
+function setupDeps(opts?: {
+  thresholds?: Partial<typeof DEFAULT_THRESHOLDS> & { fallbackHardCeiling?: number }
+  deferFallbackWhileToolInFlight?: boolean
+}): TestFixtures {
   const fixtures: TestFixtures = { emitted: [], fallbacks: [] }
   __setDepsForTests({
     emitMetric: (e) => fixtures.emitted.push(e),
@@ -35,6 +38,9 @@ function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }):
       ...DEFAULT_THRESHOLDS,
       ...(opts?.thresholds ?? {}),
     },
+    ...(opts?.deferFallbackWhileToolInFlight != null
+      ? { deferFallbackWhileToolInFlight: opts.deferFallbackWhileToolInFlight }
+      : {}),
   })
   return fixtures
 }
@@ -528,3 +534,65 @@ describe('silence-poke — performance', () => {
     expect(elapsed).toBeLessThan(50)
   })
 })
+// ─── Fix A: defer the unwedge while a parent tool is genuinely in flight ──────
+// A long quiet tool stretch (foreground sub-agent / big research) crossed the
+// 300s fallback and nulled currentTurn, darkening the live activity feed
+// mid-work. The opt-in defer keeps the turn alive while a tool is in flight,
+// bounded by a hard ceiling so a hung-mid-tool turn still unwedges.
+describe('silence-poke — Fix A: in-flight-tool defer', () => {
+  it('legacy default (defer OFF): fires at 300s even with a tool in flight', () => {
+    const f = setupDeps() // deferFallbackWhileToolInFlight unset → off
+    startTurn('c:0', 0)
+    noteToolStart('c:0', 't1', 'Bash', 'long audit', 10_000)
+    __tickForTests(300_000)
+    expect(f.fallbacks).toHaveLength(1) // unchanged legacy behaviour
+  })
+  it('defer ON: does NOT fire at 300s while a tool is in flight', () => {
+    const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
+    startTurn('c:0', 0)
+    noteToolStart('c:0', 't1', 'Bash', 'long audit', 10_000)
+    __tickForTests(300_000)
+    __tickForTests(450_000) // still working, tool still in flight
+    expect(f.fallbacks).toHaveLength(0) // deferred — the live feed stays alive
+  })
+  it('defer ON: fires once the tool ends and the turn stays silent past threshold', () => {
+    const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
+    startTurn('c:0', 0)
+    noteToolStart('c:0', 't1', 'Bash', null, 10_000)
+    __tickForTests(300_000)
+    expect(f.fallbacks).toHaveLength(0) // deferred while in flight
+    noteToolEnd('c:0', 't1', 400_000) // tool completes, no reply follows
+    __tickForTests(400_001) // silence (from turn start) already well past 300s
+    expect(f.fallbacks).toHaveLength(1) // now unwedges promptly
+  })
+  it('defer ON: fires at the hard ceiling even with a tool still in flight (hung-mid-tool)', () => {
+    const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
+    startTurn('c:0', 0)
+    noteToolStart('c:0', 't1', 'Bash', 'wedged tool', 10_000)
+    __tickForTests(300_000)
+    expect(f.fallbacks).toHaveLength(0) // deferred
+    __tickForTests(900_000) // crosses the hard ceiling
+    expect(f.fallbacks).toHaveLength(1) // bounded — still unwedges
+  })
+  it('defer ON: a turn with NO in-flight tool fires at the base threshold (genuine silence)', () => {
+    const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
+    startTurn('c:0', 0)
+    // no tool ever started — genuinely silent/wedged
+    __tickForTests(300_000)
+    expect(f.fallbacks).toHaveLength(1) // unaffected by the defer
+  })
+  it('defer ON without a hard ceiling: defers indefinitely while the tool stays in flight', () => {
+    const f = setupDeps({ deferFallbackWhileToolInFlight: true }) // no fallbackHardCeiling → Infinity
+    startTurn('c:0', 0)
+    noteToolStart('c:0', 't1', 'Bash', null, 10_000)
+    __tickForTests(300_000)
+    __tickForTests(3_600_000) // an hour in
+    expect(f.fallbacks).toHaveLength(0)
+  })
+})

package/telegram-plugin/tests/worker-activity-feed.test.ts CHANGED Viewed

@@ -440,3 +440,64 @@ describe('createWorkerActivityFeed', () => {
     expect(bot.sent[0].opts?.message_thread_id).toBe(42)
   })
 })
+// ─── log sink: success-path observability ────────────────────────────────────
+// Before this, the feed only logged on FAILURE, so a feed that rendered fine
+// was invisible in the gateway log — the exact gap that made the marko
+// status-dark incident hard to triage. Assert paint/edit/finish each emit a
+// structured, greppable line naming the worker, chat, thread, and message id.
+describe('createWorkerActivityFeed — log sink', () => {
+  it('logs paint on first send, edit on each in-place update, and finish on terminal', async () => {
+    const bot = makeFakeBot()
+    const logs: string[] = []
+    let clock = 10_000
+    const feed = createWorkerActivityFeed({
+      bot,
+      now: () => clock,
+      minEditIntervalMs: 0,
+      log: (m) => logs.push(m),
+    })
+    await feed.update('w-research', 'chat-9', view({ toolCount: 1, latestSummary: 'first' }), 7)
+    clock = 11_000
+    await feed.update('w-research', 'chat-9', view({ toolCount: 2, latestSummary: 'second' }), 7)
+    clock = 12_000
+    await feed.finish('w-research', view({ state: 'done', toolCount: 2 }))
+    const paint = logs.find((l) => l.startsWith('worker-feed: paint'))
+    const edit = logs.find((l) => l.startsWith('worker-feed: edit'))
+    const finish = logs.find((l) => l.startsWith('worker-feed: finish'))
+    expect(paint).toBeDefined()
+    expect(paint).toContain('agent=w-research')
+    expect(paint).toContain('chat=chat-9')
+    expect(paint).toContain('thread=7')
+    expect(paint).toMatch(/msgId=\d+/)
+    expect(paint).toMatch(/bytes=\d+/)
+    expect(edit).toBeDefined()
+    expect(edit).toContain('agent=w-research')
+    expect(finish).toBeDefined()
+    expect(finish).toContain('state=done')
+  })
+  it('renders thread=- in the log line when no forum topic is set', async () => {
+    const bot = makeFakeBot()
+    const logs: string[] = []
+    let clock = 10_000
+    const feed = createWorkerActivityFeed({ bot, now: () => clock, log: (m) => logs.push(m) })
+    await feed.update('w1', 'chat', view()) // no threadId
+    expect(logs.find((l) => l.startsWith('worker-feed: paint'))).toContain('thread=-')
+  })
+  it('does not log a paint when the worker stays below firstPaintMin (still silent)', async () => {
+    const bot = makeFakeBot()
+    const logs: string[] = []
+    let clock = 0
+    const feed = createWorkerActivityFeed({ bot, now: () => clock, firstPaintMinMs: 8000, log: (m) => logs.push(m) })
+    clock = 3000
+    await feed.update('w1', 'chat', view({ elapsedMs: 3000 }))
+    expect(logs.some((l) => l.startsWith('worker-feed: paint'))).toBe(false)
+  })
+})

package/telegram-plugin/worker-activity-feed.ts CHANGED Viewed

@@ -208,6 +208,8 @@ export interface WorkerActivityFeedOpts {
 }
 interface WorkerHandle {
+  /** jsonl agent id — carried so success/failure log lines can name the worker. */
+  agentId: string
   chatId: string
   threadId?: number
   messageId: number | null
@@ -309,6 +311,10 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
         h.messageId = sent.message_id
         h.lastBody = body
         h.lastEditAt = nowFn()
+        log(
+          `worker-feed: paint agent=${h.agentId} chat=${h.chatId} ` +
+            `thread=${h.threadId ?? '-'} msgId=${h.messageId} bytes=${body.length}`,
+        )
       } catch (err) {
         noteRateLimited(h, err, 'send')
         log(`worker-feed: send failed: ${(err as Error).message}`)
@@ -324,6 +330,10 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
       await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h))
       h.lastBody = body
       h.lastEditAt = nowFn()
+      log(
+        `worker-feed: edit agent=${h.agentId} chat=${h.chatId} ` +
+          `thread=${h.threadId ?? '-'} msgId=${h.messageId} bytes=${body.length}`,
+      )
     } catch (err) {
       noteRateLimited(h, err, 'edit')
       // Stale message_id (manually deleted / edit window gone). Re-post
@@ -351,6 +361,10 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
       await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h))
       h.lastBody = body
       h.lastEditAt = nowFn()
+      log(
+        `worker-feed: finish agent=${h.agentId} chat=${h.chatId} ` +
+          `thread=${h.threadId ?? '-'} msgId=${h.messageId} state=${view.state} bytes=${body.length}`,
+      )
     } catch (err) {
       noteRateLimited(h, err, 'finish')
       log(`worker-feed: finish edit failed: ${(err as Error).message}`)
@@ -371,6 +385,7 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
       let h = handles.get(agentId)
       if (h == null) {
         h = {
+          agentId,
           chatId,
           threadId,
           messageId: null,