npm - switchroom - Versions diffs - 0.13.9 → 0.13.11 - Mend

switchroom 0.13.9 → 0.13.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/cli/switchroom.js +38 -14
package/dist/host-control/main.js +222 -7
package/examples/switchroom.yaml +25 -7
package/package.json +1 -1
package/profiles/_shared/telegram-style.md.hbs +1 -1
package/telegram-plugin/dist/bridge/bridge.js +23 -4
package/telegram-plugin/dist/gateway/gateway.js +540 -147
package/telegram-plugin/dist/server.js +23 -4
package/telegram-plugin/gateway/config-approval-handler.test.ts +246 -0
package/telegram-plugin/gateway/config-approval-handler.ts +284 -0
package/telegram-plugin/gateway/gateway.ts +218 -25
package/telegram-plugin/gateway/ipc-protocol.ts +72 -2
package/telegram-plugin/gateway/ipc-server.ts +101 -0
package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +185 -0
package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +69 -0
package/telegram-plugin/model-unavailable.ts +11 -1
package/telegram-plugin/operator-events.fixtures.json +14 -24
package/telegram-plugin/operator-events.ts +11 -2
package/telegram-plugin/session-tail.ts +71 -4
package/telegram-plugin/subagent-watcher.ts +39 -0
package/telegram-plugin/tests/model-unavailable.test.ts +15 -2
package/telegram-plugin/tests/operator-events-session-tail.test.ts +53 -2
package/telegram-plugin/tests/operator-events.test.ts +14 -7
package/telegram-plugin/tests/subagent-handback-decision.test.ts +112 -0
package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts +105 -0
package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +61 -0
package/telegram-plugin/tests/subagent-watcher.test.ts +67 -1
package/telegram-plugin/uat/scenarios/jtbd-subagent-handback-dm.test.ts +95 -0
package/profiles/default/CLAUDE.md +0 -193

package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts ADDED Viewed

@@ -0,0 +1,185 @@
+/**
+ * Pure builder for the synthetic `subagent_handback` inbound the gateway
+ * injects when a *background* sub-agent (worker / researcher) finishes.
+ *
+ * Why this exists (conversational-pacing beat 4 — the handback):
+ * A foreground sub-agent hands its result straight back as the `Task`
+ * tool result, in the parent's own turn — the model sees it in-context.
+ * A background sub-agent does not: it finishes decoupled from any turn
+ * boundary, and when it completes the parent agent is typically idle
+ * with no turn in flight to receive the result. Claude Code surfaces a
+ * background result only on the parent's *next* turn — for a Telegram
+ * agent that means the user must send another message before they ever
+ * hear back. The agent never proactively says "the worker's done".
+ *
+ * This builder produces the InboundMessage that closes that gap. The
+ * gateway's subagent-watcher `onFinish` callback (which already knows
+ * the moment a background sub-agent terminates) feeds the worker's
+ * result text in here; the gateway delivers the envelope through the
+ * same idle-drain path cron and vault-grant wake-ups use. The model
+ * wakes, sees `<channel source="subagent_handback">`, and synthesises a
+ * user-facing handback in its own voice — beat 4 made deterministic.
+ *
+ * Shape contract (mirrors `vault-grant-inbound-builders.ts`): the
+ * `meta.source` string is load-bearing — the MCP channel notification
+ * wraps it as `<channel source="subagent_handback">`. A regression that
+ * changes the source string or drops a meta field silently breaks the
+ * wake-up. Pinned by `subagent-handback-inbound-builder.test.ts`.
+ */
+import type { InboundMessage } from './ipc-protocol.js'
+/** Cap on the worker result text carried in the inbound. The model
+ *  synthesises a fresh handback from it — the full transcript is never
+ *  needed, and an unbounded paste bloats the parent's context. */
+export const HANDBACK_RESULT_MAX = 3000
+/** Cap on the dispatch-time task description echoed back for context. */
+export const HANDBACK_DESC_MAX = 200
+export interface SubagentHandbackContext {
+  /** Telegram chat the work was dispatched from — the synthesized
+   *  handback turn lands here so it stays with the conversation. */
+  chatId: string
+  /** Dispatch-time task description (the sub-agent's `description`). */
+  taskDescription: string
+  /** The worker's final result text — its last narrative emission
+   *  before terminating. May be empty if the watcher never observed a
+   *  text line (rare: a worker that only ran tools then exited). */
+  resultText: string
+  /** Terminal outcome as classified by the watcher. */
+  outcome: 'completed' | 'failed'
+}
+function truncate(s: string, max: number): string {
+  const t = s.trim()
+  return t.length > max ? t.slice(0, max) + '…' : t
+}
+/**
+ * Build the synthetic InboundMessage for a finished background
+ * sub-agent. Deterministic under a fixed `nowMs` for tests.
+ */
+export function buildSubagentHandbackInbound(opts: {
+  ctx: SubagentHandbackContext
+  nowMs?: number
+}): InboundMessage {
+  const ts = opts.nowMs ?? Date.now()
+  const desc = truncate(opts.ctx.taskDescription, HANDBACK_DESC_MAX) || '(no description)'
+  const result = truncate(opts.ctx.resultText, HANDBACK_RESULT_MAX)
+  const text =
+    opts.ctx.outcome === 'failed'
+      ? `🤝 A background worker you dispatched has FAILED.\n\n` +
+        `Task: ${desc}\n\n` +
+        (result ? `What it reported before failing:\n${result}\n\n` : '') +
+        `This is beat 4 — the handback. Tell the user plainly that the ` +
+        `delegated work did not complete, what is known, and your ` +
+        `recommended next step — one \`reply\` in your own voice. Do not ` +
+        `stay silent.`
+      : `🤝 A background worker you dispatched has finished.\n\n` +
+        `Task: ${desc}\n\n` +
+        (result
+          ? `What the worker reported:\n${result}\n\n`
+          : `The worker left no summary text.\n\n`) +
+        `This is beat 4 — the handback. Synthesise this for the user ` +
+        `now: one \`reply\` in your own voice covering what the worker ` +
+        `found and your recommended next step. Do NOT paste the raw ` +
+        `report and do NOT stay silent — the user dispatched this and ` +
+        `is waiting to hear back.`
+  return {
+    type: 'inbound',
+    chatId: opts.ctx.chatId,
+    messageId: ts, // synthetic — no Telegram message id exists
+    user: 'subagent-watcher',
+    userId: 0,
+    ts,
+    text,
+    meta: {
+      source: 'subagent_handback',
+      outcome: opts.ctx.outcome,
+    },
+  }
+}
+// ───────────────────────────────────────────────────────────────────────────
+// Handback decision (pure — unit-testable gate for the gateway onFinish path)
+// ───────────────────────────────────────────────────────────────────────────
+/**
+ * Inputs to the handback decision. The gateway's `subagent-watcher`
+ * `onFinish` callback does the IO — resolves `isBackground` from the
+ * registry DB, `fleetChatId` from the progress-driver fleet, and
+ * `ownerChatId` from access.json — then hands the resolved values here.
+ * Keeping the *decision* pure makes the gate (which injects turns)
+ * testable without standing up a gateway.
+ */
+export interface SubagentHandbackDecisionInput {
+  /** `SWITCHROOM_SUBAGENT_HANDBACK` env var value (any non-'0' = enabled). */
+  handbackEnvValue: string | undefined
+  /** Terminal outcome the watcher reported. */
+  outcome: 'completed' | 'failed' | 'orphan'
+  /** Whether the sub-agent was a background dispatch (registry DB flag).
+   *  Foreground sub-agents hand back natively in the parent's turn. */
+  isBackground: boolean
+  /** Chat id from the progress-driver fleet entry; '' if not found. */
+  fleetChatId: string
+  /** Owner chat fallback (access.json allowFrom[0]); '' if none. */
+  ownerChatId: string
+  taskDescription: string
+  resultText: string
+  /** Deterministic clock for tests. */
+  nowMs?: number
+}
+/** Why a handback was NOT delivered — one of these, or `delivered`. */
+export type SubagentHandbackSkipReason =
+  | 'env-disabled'
+  | 'outcome-not-terminal'
+  | 'foreground'
+  | 'no-chat'
+export type SubagentHandbackDecision =
+  | { deliver: false; reason: SubagentHandbackSkipReason }
+  | { deliver: true; chatId: string; inbound: InboundMessage }
+/**
+ * Decide whether a finished sub-agent warrants a handback turn, and if
+ * so build the inbound. Pure: all IO is the caller's job.
+ *
+ * Gates, in order:
+ *   1. kill-switch — `SWITCHROOM_SUBAGENT_HANDBACK=0` disables entirely.
+ *   2. outcome — only `completed`/`failed` hand back; `orphan` is a
+ *      stale historical-at-boot row, not a fresh completion.
+ *   3. foreground — a foreground sub-agent already handed its result
+ *      back as the Task tool result in the parent's own turn.
+ *   4. no-chat — neither the fleet entry nor the owner chat resolved,
+ *      so there is nowhere to deliver.
+ */
+export function decideSubagentHandback(
+  input: SubagentHandbackDecisionInput,
+): SubagentHandbackDecision {
+  if (input.handbackEnvValue === '0') {
+    return { deliver: false, reason: 'env-disabled' }
+  }
+  if (input.outcome !== 'completed' && input.outcome !== 'failed') {
+    return { deliver: false, reason: 'outcome-not-terminal' }
+  }
+  if (!input.isBackground) {
+    return { deliver: false, reason: 'foreground' }
+  }
+  const chatId = input.fleetChatId || input.ownerChatId
+  if (!chatId) {
+    return { deliver: false, reason: 'no-chat' }
+  }
+  const inbound = buildSubagentHandbackInbound({
+    ctx: {
+      chatId,
+      taskDescription: input.taskDescription,
+      resultText: input.resultText,
+      outcome: input.outcome,
+    },
+    ...(input.nowMs !== undefined ? { nowMs: input.nowMs } : {}),
+  })
+  return { deliver: true, chatId, inbound }
+}

package/telegram-plugin/hooks/subagent-tracker-posttool.mjs CHANGED Viewed

@@ -249,6 +249,58 @@ function updateRow(dbPath, { id, status, resultSummary, now }, done) {
   })
 }
+// ---------------------------------------------------------------------------
+// Foreground handback nudge (conversational-pacing beat 4)
+// ---------------------------------------------------------------------------
+/**
+ * Synchronously read the `background` flag for a subagent row. Returns
+ * 0 (foreground), 1 (background), or null (unknown — sync SQLite
+ * unavailable, or row not found). Used to gate the foreground handback
+ * nudge: a background sub-agent's PostToolUse fires on the ~10s launch
+ * ACK, not on completion, so it must NOT be nudged here (the gateway's
+ * subagent-watcher handles the background handback via inject_inbound).
+ */
+function readBackgroundFlagSync(dbPath, id) {
+  const DatabaseSync = resolveSyncSqlite()
+  if (DatabaseSync == null) return null
+  try {
+    const db = new DatabaseSync(dbPath)
+    const row = db.prepare('SELECT background FROM subagents WHERE id = ?').get(id)
+    db.close()
+    if (row == null) return null
+    return row.background === 1 ? 1 : 0
+  } catch {
+    return null
+  }
+}
+/**
+ * Emit a PostToolUse `additionalContext` nudge. For a foreground
+ * sub-agent this fires at real completion, mid-parent-turn, with the
+ * result already in the parent's context — the nudge steers the parent
+ * to synthesise a user-facing handback (beat 4) instead of dumping the
+ * raw report or moving on silently. Same channel `sandbox-hint-posttool`
+ * uses; capped well under Claude Code's 10k hook-output limit.
+ */
+function emitForegroundHandbackNudge() {
+  const out = {
+    hookSpecificOutput: {
+      hookEventName: 'PostToolUse',
+      additionalContext:
+        'A sub-agent you dispatched just returned. Beat 4 — the handback: '
+        + 'before you move on, send the user a reply in your own voice that '
+        + 'synthesises what the sub-agent found and your next step. Do not '
+        + 'paste its raw report and do not go silent.',
+    },
+  }
+  try {
+    process.stdout.write(JSON.stringify(out) + '\n')
+  } catch {
+    /* stdout write failures never block the tool flow */
+  }
+}
 // ---------------------------------------------------------------------------
 // main
 // ---------------------------------------------------------------------------
@@ -292,6 +344,23 @@ function main() {
   if (!existsSync(dbPath)) process.exit(0)
   const toolResponse = event.tool_response ?? null
+  // conversational-pacing beat 4 (foreground half). A foreground
+  // sub-agent's PostToolUse fires at real completion, mid-parent-turn,
+  // with its result in tool_response — nudge the parent to synthesise a
+  // user-facing handback. Background sub-agents are gated OUT: their
+  // PostToolUse fires on the launch ACK (BACKGROUND_SQL leaves status
+  // untouched for that reason), and their handback is driven by the
+  // gateway's subagent-watcher onFinish path instead. Fail-silent: an
+  // unknown background flag (null) skips the nudge.
+  if (
+    process.env.SWITCHROOM_SUBAGENT_HANDBACK !== '0'
+    && detectStatus(toolResponse) === 'completed'
+    && readBackgroundFlagSync(dbPath, id) === 0
+  ) {
+    emitForegroundHandbackNudge()
+  }
   updateRow(
     dbPath,
     {

package/telegram-plugin/model-unavailable.ts CHANGED Viewed

@@ -326,7 +326,17 @@ export function resolveModelUnavailableFromOperatorEvent(
     return detectModelUnavailable(detail) ?? { kind: 'quota_exhausted', raw: detail }
   }
   if (ev.kind === 'rate-limited') {
-    return detectModelUnavailable(detail) ?? { kind: 'overload', raw: detail }
+    // A rate-limited / transient overload is NOT "model unavailable" —
+    // it is retryable and Claude Code retries it internally. Escalate
+    // to the model-unavailable card ONLY if the detail carries a
+    // genuine quota signal (a 4xx that slipped past the classifier
+    // with usage-limit wording in its body). A bare overload /
+    // rate-limit returns null → the caller renders the calm
+    // `rate-limited` card, never the scary "⚠️ Model unavailable" one.
+    // Returning `{kind:'overload'}` here is what fired a false
+    // model-unavailable card on every transient 529.
+    const detected = detectModelUnavailable(detail)
+    return detected?.kind === 'quota_exhausted' ? detected : null
   }
   if (ev.kind === 'unknown-5xx') {
     return detectModelUnavailable(detail) ?? { kind: 'overload', raw: detail }

package/telegram-plugin/operator-events.fixtures.json CHANGED Viewed

@@ -1,6 +1,5 @@
 {
   "_comment": "Captured error shapes per OperatorEventKind. Real API keys/IDs have been scrubbed.",
   "credentials-expired": [
     {
       "_source": "Anthropic API — 401 with authentication_error + expired hint",
@@ -16,7 +15,6 @@
       "message": "OAuth token expired, please re-authenticate to continue"
     }
   ],
   "credentials-invalid": [
     {
       "_source": "Anthropic API — 401 with invalid_api_key",
@@ -40,7 +38,6 @@
       "message": "Invalid API key"
     }
   ],
   "credit-exhausted": [
     {
       "_source": "Anthropic API — 402 credit_balance_too_low",
@@ -56,23 +53,7 @@
       "message": "credit balance insufficient"
     }
   ],
-  "quota-exhausted": [
-    {
-      "_source": "Anthropic API — 529 overloaded_error (Claude Code converts to quota-exhausted)",
-      "status": 529,
-      "error": {
-        "type": "overloaded_error",
-        "message": "Overloaded"
-      }
-    },
-    {
-      "_source": "Synthetic — set by session-tail after repeated 429 + slot exhaustion",
-      "type": "overloaded_error",
-      "message": "Service overloaded, usage limits reached"
-    }
-  ],
+  "quota-exhausted": [],
   "rate-limited": [
     {
       "_source": "Anthropic API — 429 rate_limit_error",
@@ -86,9 +67,21 @@
       "_source": "Top-level rate_limit_error",
       "type": "rate_limit_error",
       "message": "rate limit exceeded"
+    },
+    {
+      "_source": "Anthropic API — 529 overloaded_error (transient server capacity → rate-limited, NOT quota)",
+      "status": 529,
+      "error": {
+        "type": "overloaded_error",
+        "message": "Overloaded"
+      }
+    },
+    {
+      "_source": "Synthetic — overloaded_error from session-tail (transient → rate-limited, NOT quota)",
+      "type": "overloaded_error",
+      "message": "Service overloaded, usage limits reached"
     }
   ],
   "agent-crashed": [
     {
       "_source": "Synthetic — emitted by IPC bridge when Claude child exits nonzero",
@@ -101,7 +94,6 @@
       "message": "IPC socket disconnected unexpectedly"
     }
   ],
   "agent-restarted-unexpectedly": [
     {
       "_source": "Synthetic — emitted by gateway boot-banner diff when uptime drops unexpectedly",
@@ -114,7 +106,6 @@
       "message": "systemd unit restarted outside of operator request"
     }
   ],
   "unknown-4xx": [
     {
       "_source": "Novel 4xx not matching any known Anthropic error type",
@@ -142,7 +133,6 @@
       "_value": "something went wrong"
     }
   ],
   "unknown-5xx": [
     {
       "_source": "500 with no recognised type",

package/telegram-plugin/operator-events.ts CHANGED Viewed

@@ -139,8 +139,17 @@ function classifyInner(raw: unknown): OperatorEventKind {
     message.toLowerCase().includes('overloaded_error') ||
     message.toLowerCase().includes('overloaded')
   ) {
-    // Anthropic overloaded = quota exhausted / service rate-limiting
-    return 'quota-exhausted'
+    // Anthropic "overloaded" (HTTP 529) is transient SERVER-side
+    // capacity pressure — orthogonal to account quota. It is retryable
+    // (`x-should-retry: true`) and Claude Code retries it internally.
+    // Classifying it `quota-exhausted` fired a false "Model
+    // unavailable — quota exhausted" card AND a self-cancelling fleet
+    // auto-fallback on every 529 (the active account always probes
+    // healthy — nothing is actually exhausted — so the fallback no-ops
+    // with "probed healthy / Stale event?"). It is a rate-limit-family
+    // transient; failing over to another account does nothing because
+    // every account is equally affected.
+    return 'rate-limited'
   }
   // Synthetic kinds (non-Anthropic — set by session-tail or IPC bridge)

package/telegram-plugin/session-tail.ts CHANGED Viewed

@@ -409,9 +409,37 @@ export function projectSubagentLine(
  * Returns null when no actionable error is detected (routine lines).
  * Never throws — delegates to classifyClaudeError's own safety guarantee.
  */
+/**
+ * Extract Claude Code's retry-state annotations from a transcript line.
+ * Claude Code writes top-level `retryAttempt` / `maxRetries` on a
+ * retried API error (e.g. a 529 it is internally retrying). Used to
+ * tell an in-flight retry from an exhausted (terminal) one. Both
+ * optional — non-retried errors and older Claude Code versions omit
+ * them.
+ */
+function extractRetryState(obj: Record<string, unknown>): {
+  retryAttempt: number | null
+  maxRetries: number | null
+} {
+  return {
+    retryAttempt: typeof obj.retryAttempt === 'number' ? obj.retryAttempt : null,
+    maxRetries: typeof obj.maxRetries === 'number' ? obj.maxRetries : null,
+  }
+}
 export function detectErrorInTranscriptLine(
   line: string,
-): { kind: OperatorEventKind; raw: unknown; detail: string } | null {
+): {
+  kind: OperatorEventKind
+  raw: unknown
+  detail: string
+  /** True for the rate-limit / transient-overload family. */
+  transient: boolean
+  /** True when the error is final — NOT an in-flight retry. A transient
+   *  error mid-retry is `transient:true, terminal:false`; the caller
+   *  suppresses it (no operator card until the failure is terminal). */
+  terminal: boolean
+} | null {
   if (!line || line.length > 2 * 1024 * 1024) return null
   let obj: Record<string, unknown>
   try {
@@ -447,7 +475,16 @@ export function detectErrorInTranscriptLine(
       status === 429
         ? 'quota-exhausted'
         : classifyClaudeError({ type: errStr, status, message: text })
-    return { kind, raw: obj, detail: text || errStr || 'api error' }
+    // An `isApiErrorMessage` line is Claude surfacing the failure to the
+    // user — terminal by construction (Claude writes this shape only
+    // after its own internal retries are exhausted).
+    return {
+      kind,
+      raw: obj,
+      detail: text || errStr || 'api error',
+      transient: kind === 'rate-limited',
+      terminal: true,
+    }
   }
   // Explicit error line types from Claude Code JSONL
@@ -472,7 +509,23 @@ export function detectErrorInTranscriptLine(
     extractDetailMessage(obj) ??
     String(type ?? '')
-  return { kind, raw, detail }
+  // Transient = the rate-limit / overload family. For a transient,
+  // decide `terminal` from Claude Code's retry annotations: below the
+  // cap → still retrying (in-flight); at/above → exhausted. With no
+  // retry state, an explicit `type:"api_error"`/`"error"` LINE means
+  // Claude surfaced the failure (terminal); an embedded-error object
+  // with no retry state is ambiguous → treat as in-flight and suppress
+  // (the silence-poke covers a genuinely stuck turn; a false card is
+  // the bug we are fixing, a missed ambiguous card costs nothing).
+  const transient = kind === 'rate-limited'
+  const retry = extractRetryState(obj)
+  const terminal = !transient
+    ? true
+    : retry.retryAttempt != null && retry.maxRetries != null
+      ? retry.retryAttempt >= retry.maxRetries
+      : isErrorLine
+  return { kind, raw, detail, transient, terminal }
 }
 function extractDetailMessage(obj: Record<string, unknown> | null): string | null {
@@ -514,6 +567,10 @@ export interface TailOperatorEvent {
   kind: OperatorEventKind
   detail: string
   raw: unknown
+  /** True for the rate-limit / transient-overload family. */
+  transient: boolean
+  /** True when the failure is final, not an in-flight retry. */
+  terminal: boolean
 }
 export interface SessionTailConfig {
@@ -665,7 +722,17 @@ export function startSessionTail(config: SessionTailConfig): SessionTailHandle {
           try {
             const errEvent = detectErrorInTranscriptLine(line)
             if (errEvent) {
-              onOperatorEvent(errEvent)
+              // Honest escalation: a transient overload Claude is still
+              // retrying (transient && !terminal) posts NO operator
+              // card — it almost always resolves on the next retry.
+              // Escalate only terminal failures + non-transient errors.
+              if (errEvent.terminal || !errEvent.transient) {
+                onOperatorEvent(errEvent)
+              } else {
+                log?.(
+                  `session-tail: transient overload suppressed (in-flight retry) kind=${errEvent.kind}`,
+                )
+              }
             }
           } catch (err) {
             log?.(`session-tail: onOperatorEvent threw: ${(err as Error).message}`)

package/telegram-plugin/subagent-watcher.ts CHANGED Viewed

@@ -105,6 +105,15 @@ export interface WorkerEntry {
   stallTerminalSynthesised: boolean
   /** Short summary from last completed tool / narrative, for completion message. */
   lastSummaryLine: string
+  /**
+   * Full text (capped at SUBAGENT_RESULT_TEXT_MAX) of the most recent
+   * `sub_agent_text` emission. For a worker the final such line before
+   * `turn_end` is its result summary. Carried to the gateway via
+   * `onFinish` so a background sub-agent's result can be handed back to
+   * the user (conversational-pacing beat 4). Empty until the first
+   * narrative line.
+   */
+  lastResultText: string
   /**
    * Most recent tool call observed on this sub-agent's JSONL tail —
    * tool name + sanitised arg for fleet-row display (P0 of #662). Null
@@ -270,6 +279,12 @@ export interface SubagentWatcherConfig {
     outcome: 'completed' | 'failed' | 'orphan'
     toolCount: number
     durationMs: number
+    /** Dispatch-time task description, for the handback envelope. */
+    description: string
+    /** The worker's final narrative emission (capped). May be empty if
+     *  no `sub_agent_text` line was ever observed. Feeds the
+     *  `subagent_handback` inbound. */
+    resultText: string
   }) => void
   /** `Date.now` override for tests. */
   now?: () => number
@@ -321,6 +336,15 @@ const DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300_000
  */
 const DEFAULT_SILENT_STALL_TERMINAL_MS = 300_000
+/**
+ * Cap on the result text retained per sub-agent (`entry.lastResultText`)
+ * and carried to the gateway via `onFinish`. The gateway feeds this into
+ * the `subagent_handback` inbound; the model synthesises a fresh
+ * user-facing summary from it, so the full transcript is never needed
+ * and an unbounded retain would bloat the parent's context.
+ */
+const SUBAGENT_RESULT_TEXT_MAX = 3000
 /**
  * Resolve a threshold-knob env var (e.g.
  * `SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS`) to a positive integer ms
@@ -580,6 +604,16 @@ function readSubTail(
           // and must remain stable. Overwriting it with the sub-agent's first
           // narrative line caused a race-condition-dependent display (issue #352).
           entry.lastSummaryLine = ev.text.split('\n')[0].trim().slice(0, 120)
+          // Retain the full text of the most recent narrative emission —
+          // for a worker the final such line before turn_end IS its
+          // result summary (the worker prompt asks it to "return a
+          // concise summary"). Carried to the gateway via onFinish so a
+          // *background* sub-agent's result can be handed back to the
+          // user (conversational-pacing beat 4). Replace-on-write +
+          // capped: this is the worker's intended output, never tool
+          // args or file content — consistent with the watcher's
+          // "descriptions only" privacy posture.
+          entry.lastResultText = ev.text.trim().slice(0, SUBAGENT_RESULT_TEXT_MAX)
         } else if (ev.kind === 'sub_agent_turn_end') {
           if (entry.state === 'running') {
             entry.state = 'done'
@@ -750,6 +784,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
       completionNotified: false,
       stallTerminalSynthesised: false,
       lastSummaryLine: '',
+      lastResultText: '',
       lastTool: null,
       historical: isHistorical,
     }
@@ -850,6 +885,8 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
             outcome: entry.historical ? 'orphan' : 'completed',
             toolCount: entry.toolCount,
             durationMs: nowFn() - entry.dispatchedAt,
+            description: entry.description,
+            resultText: entry.lastResultText,
           })
         } catch (cbErr) {
           log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
@@ -869,6 +906,8 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
             outcome: 'failed',
             toolCount: entry.toolCount,
             durationMs: nowFn() - entry.dispatchedAt,
+            description: entry.description,
+            resultText: entry.lastResultText,
           })
         } catch (cbErr) {
           log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)

package/telegram-plugin/tests/model-unavailable.test.ts CHANGED Viewed

@@ -247,9 +247,22 @@ describe('resolveModelUnavailableFromOperatorEvent — kind-driven mapping', ()
     expect(d?.kind).toBe('quota_exhausted')
   })
-  it('always treats kind=rate-limited as overload', () => {
+  it('treats a bare kind=rate-limited as NOT model-unavailable (transient → calm card)', () => {
+    // A transient overload / rate-limit is retryable — Claude Code
+    // retries it internally. resolveModelUnavailableFromOperatorEvent
+    // returns null so the gateway renders the calm `rate-limited` card,
+    // never the scary "⚠️ Model unavailable" one. Returning
+    // `{kind:'overload'}` here is what fired a false card on every 529.
     const d = resolveModelUnavailableFromOperatorEvent({ kind: 'rate-limited', detail: '' })
-    expect(d?.kind).toBe('overload')
+    expect(d).toBeNull()
+  })
+  it('escalates a kind=rate-limited that carries a genuine quota signal', () => {
+    const d = resolveModelUnavailableFromOperatorEvent({
+      kind: 'rate-limited',
+      detail: "You've hit your limit · resets 8:50am",
+    })
+    expect(d?.kind).toBe('quota_exhausted')
   })
   it('always treats kind=unknown-5xx as overload', () => {