switchroom 0.13.9 → 0.13.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/cli/switchroom.js +38 -14
  2. package/dist/host-control/main.js +222 -7
  3. package/examples/switchroom.yaml +25 -7
  4. package/package.json +1 -1
  5. package/profiles/_shared/telegram-style.md.hbs +1 -1
  6. package/telegram-plugin/dist/bridge/bridge.js +23 -4
  7. package/telegram-plugin/dist/gateway/gateway.js +540 -147
  8. package/telegram-plugin/dist/server.js +23 -4
  9. package/telegram-plugin/gateway/config-approval-handler.test.ts +246 -0
  10. package/telegram-plugin/gateway/config-approval-handler.ts +284 -0
  11. package/telegram-plugin/gateway/gateway.ts +218 -25
  12. package/telegram-plugin/gateway/ipc-protocol.ts +72 -2
  13. package/telegram-plugin/gateway/ipc-server.ts +101 -0
  14. package/telegram-plugin/gateway/subagent-handback-inbound-builder.ts +185 -0
  15. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +69 -0
  16. package/telegram-plugin/model-unavailable.ts +11 -1
  17. package/telegram-plugin/operator-events.fixtures.json +14 -24
  18. package/telegram-plugin/operator-events.ts +11 -2
  19. package/telegram-plugin/session-tail.ts +71 -4
  20. package/telegram-plugin/subagent-watcher.ts +39 -0
  21. package/telegram-plugin/tests/model-unavailable.test.ts +15 -2
  22. package/telegram-plugin/tests/operator-events-session-tail.test.ts +53 -2
  23. package/telegram-plugin/tests/operator-events.test.ts +14 -7
  24. package/telegram-plugin/tests/subagent-handback-decision.test.ts +112 -0
  25. package/telegram-plugin/tests/subagent-handback-inbound-builder.test.ts +105 -0
  26. package/telegram-plugin/tests/subagent-tracker-hooks.test.ts +61 -0
  27. package/telegram-plugin/tests/subagent-watcher.test.ts +67 -1
  28. package/telegram-plugin/uat/scenarios/jtbd-subagent-handback-dm.test.ts +95 -0
  29. package/profiles/default/CLAUDE.md +0 -193
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Pure builder for the synthetic `subagent_handback` inbound the gateway
3
+ * injects when a *background* sub-agent (worker / researcher) finishes.
4
+ *
5
+ * Why this exists (conversational-pacing beat 4 — the handback):
6
+ * A foreground sub-agent hands its result straight back as the `Task`
7
+ * tool result, in the parent's own turn — the model sees it in-context.
8
+ * A background sub-agent does not: it finishes decoupled from any turn
9
+ * boundary, and when it completes the parent agent is typically idle
10
+ * with no turn in flight to receive the result. Claude Code surfaces a
11
+ * background result only on the parent's *next* turn — for a Telegram
12
+ * agent that means the user must send another message before they ever
13
+ * hear back. The agent never proactively says "the worker's done".
14
+ *
15
+ * This builder produces the InboundMessage that closes that gap. The
16
+ * gateway's subagent-watcher `onFinish` callback (which already knows
17
+ * the moment a background sub-agent terminates) feeds the worker's
18
+ * result text in here; the gateway delivers the envelope through the
19
+ * same idle-drain path cron and vault-grant wake-ups use. The model
20
+ * wakes, sees `<channel source="subagent_handback">`, and synthesises a
21
+ * user-facing handback in its own voice — beat 4 made deterministic.
22
+ *
23
+ * Shape contract (mirrors `vault-grant-inbound-builders.ts`): the
24
+ * `meta.source` string is load-bearing — the MCP channel notification
25
+ * wraps it as `<channel source="subagent_handback">`. A regression that
26
+ * changes the source string or drops a meta field silently breaks the
27
+ * wake-up. Pinned by `subagent-handback-inbound-builder.test.ts`.
28
+ */
29
+
30
+ import type { InboundMessage } from './ipc-protocol.js'
31
+
32
+ /** Cap on the worker result text carried in the inbound. The model
33
+ * synthesises a fresh handback from it — the full transcript is never
34
+ * needed, and an unbounded paste bloats the parent's context. */
35
+ export const HANDBACK_RESULT_MAX = 3000
36
+ /** Cap on the dispatch-time task description echoed back for context. */
37
+ export const HANDBACK_DESC_MAX = 200
38
+
39
+ export interface SubagentHandbackContext {
40
+ /** Telegram chat the work was dispatched from — the synthesized
41
+ * handback turn lands here so it stays with the conversation. */
42
+ chatId: string
43
+ /** Dispatch-time task description (the sub-agent's `description`). */
44
+ taskDescription: string
45
+ /** The worker's final result text — its last narrative emission
46
+ * before terminating. May be empty if the watcher never observed a
47
+ * text line (rare: a worker that only ran tools then exited). */
48
+ resultText: string
49
+ /** Terminal outcome as classified by the watcher. */
50
+ outcome: 'completed' | 'failed'
51
+ }
52
+
53
+ function truncate(s: string, max: number): string {
54
+ const t = s.trim()
55
+ return t.length > max ? t.slice(0, max) + '…' : t
56
+ }
57
+
58
+ /**
59
+ * Build the synthetic InboundMessage for a finished background
60
+ * sub-agent. Deterministic under a fixed `nowMs` for tests.
61
+ */
62
+ export function buildSubagentHandbackInbound(opts: {
63
+ ctx: SubagentHandbackContext
64
+ nowMs?: number
65
+ }): InboundMessage {
66
+ const ts = opts.nowMs ?? Date.now()
67
+ const desc = truncate(opts.ctx.taskDescription, HANDBACK_DESC_MAX) || '(no description)'
68
+ const result = truncate(opts.ctx.resultText, HANDBACK_RESULT_MAX)
69
+
70
+ const text =
71
+ opts.ctx.outcome === 'failed'
72
+ ? `🤝 A background worker you dispatched has FAILED.\n\n` +
73
+ `Task: ${desc}\n\n` +
74
+ (result ? `What it reported before failing:\n${result}\n\n` : '') +
75
+ `This is beat 4 — the handback. Tell the user plainly that the ` +
76
+ `delegated work did not complete, what is known, and your ` +
77
+ `recommended next step — one \`reply\` in your own voice. Do not ` +
78
+ `stay silent.`
79
+ : `🤝 A background worker you dispatched has finished.\n\n` +
80
+ `Task: ${desc}\n\n` +
81
+ (result
82
+ ? `What the worker reported:\n${result}\n\n`
83
+ : `The worker left no summary text.\n\n`) +
84
+ `This is beat 4 — the handback. Synthesise this for the user ` +
85
+ `now: one \`reply\` in your own voice covering what the worker ` +
86
+ `found and your recommended next step. Do NOT paste the raw ` +
87
+ `report and do NOT stay silent — the user dispatched this and ` +
88
+ `is waiting to hear back.`
89
+
90
+ return {
91
+ type: 'inbound',
92
+ chatId: opts.ctx.chatId,
93
+ messageId: ts, // synthetic — no Telegram message id exists
94
+ user: 'subagent-watcher',
95
+ userId: 0,
96
+ ts,
97
+ text,
98
+ meta: {
99
+ source: 'subagent_handback',
100
+ outcome: opts.ctx.outcome,
101
+ },
102
+ }
103
+ }
104
+
105
+ // ───────────────────────────────────────────────────────────────────────────
106
+ // Handback decision (pure — unit-testable gate for the gateway onFinish path)
107
+ // ───────────────────────────────────────────────────────────────────────────
108
+
109
+ /**
110
+ * Inputs to the handback decision. The gateway's `subagent-watcher`
111
+ * `onFinish` callback does the IO — resolves `isBackground` from the
112
+ * registry DB, `fleetChatId` from the progress-driver fleet, and
113
+ * `ownerChatId` from access.json — then hands the resolved values here.
114
+ * Keeping the *decision* pure makes the gate (which injects turns)
115
+ * testable without standing up a gateway.
116
+ */
117
+ export interface SubagentHandbackDecisionInput {
118
+ /** `SWITCHROOM_SUBAGENT_HANDBACK` env var value (any non-'0' = enabled). */
119
+ handbackEnvValue: string | undefined
120
+ /** Terminal outcome the watcher reported. */
121
+ outcome: 'completed' | 'failed' | 'orphan'
122
+ /** Whether the sub-agent was a background dispatch (registry DB flag).
123
+ * Foreground sub-agents hand back natively in the parent's turn. */
124
+ isBackground: boolean
125
+ /** Chat id from the progress-driver fleet entry; '' if not found. */
126
+ fleetChatId: string
127
+ /** Owner chat fallback (access.json allowFrom[0]); '' if none. */
128
+ ownerChatId: string
129
+ taskDescription: string
130
+ resultText: string
131
+ /** Deterministic clock for tests. */
132
+ nowMs?: number
133
+ }
134
+
135
+ /** Why a handback was NOT delivered — one of these, or `delivered`. */
136
+ export type SubagentHandbackSkipReason =
137
+ | 'env-disabled'
138
+ | 'outcome-not-terminal'
139
+ | 'foreground'
140
+ | 'no-chat'
141
+
142
+ export type SubagentHandbackDecision =
143
+ | { deliver: false; reason: SubagentHandbackSkipReason }
144
+ | { deliver: true; chatId: string; inbound: InboundMessage }
145
+
146
+ /**
147
+ * Decide whether a finished sub-agent warrants a handback turn, and if
148
+ * so build the inbound. Pure: all IO is the caller's job.
149
+ *
150
+ * Gates, in order:
151
+ * 1. kill-switch — `SWITCHROOM_SUBAGENT_HANDBACK=0` disables entirely.
152
+ * 2. outcome — only `completed`/`failed` hand back; `orphan` is a
153
+ * stale historical-at-boot row, not a fresh completion.
154
+ * 3. foreground — a foreground sub-agent already handed its result
155
+ * back as the Task tool result in the parent's own turn.
156
+ * 4. no-chat — neither the fleet entry nor the owner chat resolved,
157
+ * so there is nowhere to deliver.
158
+ */
159
+ export function decideSubagentHandback(
160
+ input: SubagentHandbackDecisionInput,
161
+ ): SubagentHandbackDecision {
162
+ if (input.handbackEnvValue === '0') {
163
+ return { deliver: false, reason: 'env-disabled' }
164
+ }
165
+ if (input.outcome !== 'completed' && input.outcome !== 'failed') {
166
+ return { deliver: false, reason: 'outcome-not-terminal' }
167
+ }
168
+ if (!input.isBackground) {
169
+ return { deliver: false, reason: 'foreground' }
170
+ }
171
+ const chatId = input.fleetChatId || input.ownerChatId
172
+ if (!chatId) {
173
+ return { deliver: false, reason: 'no-chat' }
174
+ }
175
+ const inbound = buildSubagentHandbackInbound({
176
+ ctx: {
177
+ chatId,
178
+ taskDescription: input.taskDescription,
179
+ resultText: input.resultText,
180
+ outcome: input.outcome,
181
+ },
182
+ ...(input.nowMs !== undefined ? { nowMs: input.nowMs } : {}),
183
+ })
184
+ return { deliver: true, chatId, inbound }
185
+ }
@@ -249,6 +249,58 @@ function updateRow(dbPath, { id, status, resultSummary, now }, done) {
249
249
  })
250
250
  }
251
251
 
252
+ // ---------------------------------------------------------------------------
253
+ // Foreground handback nudge (conversational-pacing beat 4)
254
+ // ---------------------------------------------------------------------------
255
+
256
+ /**
257
+ * Synchronously read the `background` flag for a subagent row. Returns
258
+ * 0 (foreground), 1 (background), or null (unknown — sync SQLite
259
+ * unavailable, or row not found). Used to gate the foreground handback
260
+ * nudge: a background sub-agent's PostToolUse fires on the ~10s launch
261
+ * ACK, not on completion, so it must NOT be nudged here (the gateway's
262
+ * subagent-watcher handles the background handback via inject_inbound).
263
+ */
264
+ function readBackgroundFlagSync(dbPath, id) {
265
+ const DatabaseSync = resolveSyncSqlite()
266
+ if (DatabaseSync == null) return null
267
+ try {
268
+ const db = new DatabaseSync(dbPath)
269
+ const row = db.prepare('SELECT background FROM subagents WHERE id = ?').get(id)
270
+ db.close()
271
+ if (row == null) return null
272
+ return row.background === 1 ? 1 : 0
273
+ } catch {
274
+ return null
275
+ }
276
+ }
277
+
278
+ /**
279
+ * Emit a PostToolUse `additionalContext` nudge. For a foreground
280
+ * sub-agent this fires at real completion, mid-parent-turn, with the
281
+ * result already in the parent's context — the nudge steers the parent
282
+ * to synthesise a user-facing handback (beat 4) instead of dumping the
283
+ * raw report or moving on silently. Same channel `sandbox-hint-posttool`
284
+ * uses; capped well under Claude Code's 10k hook-output limit.
285
+ */
286
+ function emitForegroundHandbackNudge() {
287
+ const out = {
288
+ hookSpecificOutput: {
289
+ hookEventName: 'PostToolUse',
290
+ additionalContext:
291
+ 'A sub-agent you dispatched just returned. Beat 4 — the handback: '
292
+ + 'before you move on, send the user a reply in your own voice that '
293
+ + 'synthesises what the sub-agent found and your next step. Do not '
294
+ + 'paste its raw report and do not go silent.',
295
+ },
296
+ }
297
+ try {
298
+ process.stdout.write(JSON.stringify(out) + '\n')
299
+ } catch {
300
+ /* stdout write failures never block the tool flow */
301
+ }
302
+ }
303
+
252
304
  // ---------------------------------------------------------------------------
253
305
  // main
254
306
  // ---------------------------------------------------------------------------
@@ -292,6 +344,23 @@ function main() {
292
344
  if (!existsSync(dbPath)) process.exit(0)
293
345
 
294
346
  const toolResponse = event.tool_response ?? null
347
+
348
+ // conversational-pacing beat 4 (foreground half). A foreground
349
+ // sub-agent's PostToolUse fires at real completion, mid-parent-turn,
350
+ // with its result in tool_response — nudge the parent to synthesise a
351
+ // user-facing handback. Background sub-agents are gated OUT: their
352
+ // PostToolUse fires on the launch ACK (BACKGROUND_SQL leaves status
353
+ // untouched for that reason), and their handback is driven by the
354
+ // gateway's subagent-watcher onFinish path instead. Fail-silent: an
355
+ // unknown background flag (null) skips the nudge.
356
+ if (
357
+ process.env.SWITCHROOM_SUBAGENT_HANDBACK !== '0'
358
+ && detectStatus(toolResponse) === 'completed'
359
+ && readBackgroundFlagSync(dbPath, id) === 0
360
+ ) {
361
+ emitForegroundHandbackNudge()
362
+ }
363
+
295
364
  updateRow(
296
365
  dbPath,
297
366
  {
@@ -326,7 +326,17 @@ export function resolveModelUnavailableFromOperatorEvent(
326
326
  return detectModelUnavailable(detail) ?? { kind: 'quota_exhausted', raw: detail }
327
327
  }
328
328
  if (ev.kind === 'rate-limited') {
329
- return detectModelUnavailable(detail) ?? { kind: 'overload', raw: detail }
329
+ // A rate-limited / transient overload is NOT "model unavailable" —
330
+ // it is retryable and Claude Code retries it internally. Escalate
331
+ // to the model-unavailable card ONLY if the detail carries a
332
+ // genuine quota signal (a 4xx that slipped past the classifier
333
+ // with usage-limit wording in its body). A bare overload /
334
+ // rate-limit returns null → the caller renders the calm
335
+ // `rate-limited` card, never the scary "⚠️ Model unavailable" one.
336
+ // Returning `{kind:'overload'}` here is what fired a false
337
+ // model-unavailable card on every transient 529.
338
+ const detected = detectModelUnavailable(detail)
339
+ return detected?.kind === 'quota_exhausted' ? detected : null
330
340
  }
331
341
  if (ev.kind === 'unknown-5xx') {
332
342
  return detectModelUnavailable(detail) ?? { kind: 'overload', raw: detail }
@@ -1,6 +1,5 @@
1
1
  {
2
2
  "_comment": "Captured error shapes per OperatorEventKind. Real API keys/IDs have been scrubbed.",
3
-
4
3
  "credentials-expired": [
5
4
  {
6
5
  "_source": "Anthropic API — 401 with authentication_error + expired hint",
@@ -16,7 +15,6 @@
16
15
  "message": "OAuth token expired, please re-authenticate to continue"
17
16
  }
18
17
  ],
19
-
20
18
  "credentials-invalid": [
21
19
  {
22
20
  "_source": "Anthropic API — 401 with invalid_api_key",
@@ -40,7 +38,6 @@
40
38
  "message": "Invalid API key"
41
39
  }
42
40
  ],
43
-
44
41
  "credit-exhausted": [
45
42
  {
46
43
  "_source": "Anthropic API — 402 credit_balance_too_low",
@@ -56,23 +53,7 @@
56
53
  "message": "credit balance insufficient"
57
54
  }
58
55
  ],
59
-
60
- "quota-exhausted": [
61
- {
62
- "_source": "Anthropic API — 529 overloaded_error (Claude Code converts to quota-exhausted)",
63
- "status": 529,
64
- "error": {
65
- "type": "overloaded_error",
66
- "message": "Overloaded"
67
- }
68
- },
69
- {
70
- "_source": "Synthetic — set by session-tail after repeated 429 + slot exhaustion",
71
- "type": "overloaded_error",
72
- "message": "Service overloaded, usage limits reached"
73
- }
74
- ],
75
-
56
+ "quota-exhausted": [],
76
57
  "rate-limited": [
77
58
  {
78
59
  "_source": "Anthropic API — 429 rate_limit_error",
@@ -86,9 +67,21 @@
86
67
  "_source": "Top-level rate_limit_error",
87
68
  "type": "rate_limit_error",
88
69
  "message": "rate limit exceeded"
70
+ },
71
+ {
72
+ "_source": "Anthropic API — 529 overloaded_error (transient server capacity → rate-limited, NOT quota)",
73
+ "status": 529,
74
+ "error": {
75
+ "type": "overloaded_error",
76
+ "message": "Overloaded"
77
+ }
78
+ },
79
+ {
80
+ "_source": "Synthetic — overloaded_error from session-tail (transient → rate-limited, NOT quota)",
81
+ "type": "overloaded_error",
82
+ "message": "Service overloaded, usage limits reached"
89
83
  }
90
84
  ],
91
-
92
85
  "agent-crashed": [
93
86
  {
94
87
  "_source": "Synthetic — emitted by IPC bridge when Claude child exits nonzero",
@@ -101,7 +94,6 @@
101
94
  "message": "IPC socket disconnected unexpectedly"
102
95
  }
103
96
  ],
104
-
105
97
  "agent-restarted-unexpectedly": [
106
98
  {
107
99
  "_source": "Synthetic — emitted by gateway boot-banner diff when uptime drops unexpectedly",
@@ -114,7 +106,6 @@
114
106
  "message": "systemd unit restarted outside of operator request"
115
107
  }
116
108
  ],
117
-
118
109
  "unknown-4xx": [
119
110
  {
120
111
  "_source": "Novel 4xx not matching any known Anthropic error type",
@@ -142,7 +133,6 @@
142
133
  "_value": "something went wrong"
143
134
  }
144
135
  ],
145
-
146
136
  "unknown-5xx": [
147
137
  {
148
138
  "_source": "500 with no recognised type",
@@ -139,8 +139,17 @@ function classifyInner(raw: unknown): OperatorEventKind {
139
139
  message.toLowerCase().includes('overloaded_error') ||
140
140
  message.toLowerCase().includes('overloaded')
141
141
  ) {
142
- // Anthropic overloaded = quota exhausted / service rate-limiting
143
- return 'quota-exhausted'
142
+ // Anthropic "overloaded" (HTTP 529) is transient SERVER-side
143
+ // capacity pressure — orthogonal to account quota. It is retryable
144
+ // (`x-should-retry: true`) and Claude Code retries it internally.
145
+ // Classifying it `quota-exhausted` fired a false "Model
146
+ // unavailable — quota exhausted" card AND a self-cancelling fleet
147
+ // auto-fallback on every 529 (the active account always probes
148
+ // healthy — nothing is actually exhausted — so the fallback no-ops
149
+ // with "probed healthy / Stale event?"). It is a rate-limit-family
150
+ // transient; failing over to another account does nothing because
151
+ // every account is equally affected.
152
+ return 'rate-limited'
144
153
  }
145
154
 
146
155
  // Synthetic kinds (non-Anthropic — set by session-tail or IPC bridge)
@@ -409,9 +409,37 @@ export function projectSubagentLine(
409
409
  * Returns null when no actionable error is detected (routine lines).
410
410
  * Never throws — delegates to classifyClaudeError's own safety guarantee.
411
411
  */
412
+ /**
413
+ * Extract Claude Code's retry-state annotations from a transcript line.
414
+ * Claude Code writes top-level `retryAttempt` / `maxRetries` on a
415
+ * retried API error (e.g. a 529 it is internally retrying). Used to
416
+ * tell an in-flight retry from an exhausted (terminal) one. Both
417
+ * optional — non-retried errors and older Claude Code versions omit
418
+ * them.
419
+ */
420
+ function extractRetryState(obj: Record<string, unknown>): {
421
+ retryAttempt: number | null
422
+ maxRetries: number | null
423
+ } {
424
+ return {
425
+ retryAttempt: typeof obj.retryAttempt === 'number' ? obj.retryAttempt : null,
426
+ maxRetries: typeof obj.maxRetries === 'number' ? obj.maxRetries : null,
427
+ }
428
+ }
429
+
412
430
  export function detectErrorInTranscriptLine(
413
431
  line: string,
414
- ): { kind: OperatorEventKind; raw: unknown; detail: string } | null {
432
+ ): {
433
+ kind: OperatorEventKind
434
+ raw: unknown
435
+ detail: string
436
+ /** True for the rate-limit / transient-overload family. */
437
+ transient: boolean
438
+ /** True when the error is final — NOT an in-flight retry. A transient
439
+ * error mid-retry is `transient:true, terminal:false`; the caller
440
+ * suppresses it (no operator card until the failure is terminal). */
441
+ terminal: boolean
442
+ } | null {
415
443
  if (!line || line.length > 2 * 1024 * 1024) return null
416
444
  let obj: Record<string, unknown>
417
445
  try {
@@ -447,7 +475,16 @@ export function detectErrorInTranscriptLine(
447
475
  status === 429
448
476
  ? 'quota-exhausted'
449
477
  : classifyClaudeError({ type: errStr, status, message: text })
450
- return { kind, raw: obj, detail: text || errStr || 'api error' }
478
+ // An `isApiErrorMessage` line is Claude surfacing the failure to the
479
+ // user — terminal by construction (Claude writes this shape only
480
+ // after its own internal retries are exhausted).
481
+ return {
482
+ kind,
483
+ raw: obj,
484
+ detail: text || errStr || 'api error',
485
+ transient: kind === 'rate-limited',
486
+ terminal: true,
487
+ }
451
488
  }
452
489
 
453
490
  // Explicit error line types from Claude Code JSONL
@@ -472,7 +509,23 @@ export function detectErrorInTranscriptLine(
472
509
  extractDetailMessage(obj) ??
473
510
  String(type ?? '')
474
511
 
475
- return { kind, raw, detail }
512
+ // Transient = the rate-limit / overload family. For a transient,
513
+ // decide `terminal` from Claude Code's retry annotations: below the
514
+ // cap → still retrying (in-flight); at/above → exhausted. With no
515
+ // retry state, an explicit `type:"api_error"`/`"error"` LINE means
516
+ // Claude surfaced the failure (terminal); an embedded-error object
517
+ // with no retry state is ambiguous → treat as in-flight and suppress
518
+ // (the silence-poke covers a genuinely stuck turn; a false card is
519
+ // the bug we are fixing, a missed ambiguous card costs nothing).
520
+ const transient = kind === 'rate-limited'
521
+ const retry = extractRetryState(obj)
522
+ const terminal = !transient
523
+ ? true
524
+ : retry.retryAttempt != null && retry.maxRetries != null
525
+ ? retry.retryAttempt >= retry.maxRetries
526
+ : isErrorLine
527
+
528
+ return { kind, raw, detail, transient, terminal }
476
529
  }
477
530
 
478
531
  function extractDetailMessage(obj: Record<string, unknown> | null): string | null {
@@ -514,6 +567,10 @@ export interface TailOperatorEvent {
514
567
  kind: OperatorEventKind
515
568
  detail: string
516
569
  raw: unknown
570
+ /** True for the rate-limit / transient-overload family. */
571
+ transient: boolean
572
+ /** True when the failure is final, not an in-flight retry. */
573
+ terminal: boolean
517
574
  }
518
575
 
519
576
  export interface SessionTailConfig {
@@ -665,7 +722,17 @@ export function startSessionTail(config: SessionTailConfig): SessionTailHandle {
665
722
  try {
666
723
  const errEvent = detectErrorInTranscriptLine(line)
667
724
  if (errEvent) {
668
- onOperatorEvent(errEvent)
725
+ // Honest escalation: a transient overload Claude is still
726
+ // retrying (transient && !terminal) posts NO operator
727
+ // card — it almost always resolves on the next retry.
728
+ // Escalate only terminal failures + non-transient errors.
729
+ if (errEvent.terminal || !errEvent.transient) {
730
+ onOperatorEvent(errEvent)
731
+ } else {
732
+ log?.(
733
+ `session-tail: transient overload suppressed (in-flight retry) kind=${errEvent.kind}`,
734
+ )
735
+ }
669
736
  }
670
737
  } catch (err) {
671
738
  log?.(`session-tail: onOperatorEvent threw: ${(err as Error).message}`)
@@ -105,6 +105,15 @@ export interface WorkerEntry {
105
105
  stallTerminalSynthesised: boolean
106
106
  /** Short summary from last completed tool / narrative, for completion message. */
107
107
  lastSummaryLine: string
108
+ /**
109
+ * Full text (capped at SUBAGENT_RESULT_TEXT_MAX) of the most recent
110
+ * `sub_agent_text` emission. For a worker the final such line before
111
+ * `turn_end` is its result summary. Carried to the gateway via
112
+ * `onFinish` so a background sub-agent's result can be handed back to
113
+ * the user (conversational-pacing beat 4). Empty until the first
114
+ * narrative line.
115
+ */
116
+ lastResultText: string
108
117
  /**
109
118
  * Most recent tool call observed on this sub-agent's JSONL tail —
110
119
  * tool name + sanitised arg for fleet-row display (P0 of #662). Null
@@ -270,6 +279,12 @@ export interface SubagentWatcherConfig {
270
279
  outcome: 'completed' | 'failed' | 'orphan'
271
280
  toolCount: number
272
281
  durationMs: number
282
+ /** Dispatch-time task description, for the handback envelope. */
283
+ description: string
284
+ /** The worker's final narrative emission (capped). May be empty if
285
+ * no `sub_agent_text` line was ever observed. Feeds the
286
+ * `subagent_handback` inbound. */
287
+ resultText: string
273
288
  }) => void
274
289
  /** `Date.now` override for tests. */
275
290
  now?: () => number
@@ -321,6 +336,15 @@ const DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300_000
321
336
  */
322
337
  const DEFAULT_SILENT_STALL_TERMINAL_MS = 300_000
323
338
 
339
+ /**
340
+ * Cap on the result text retained per sub-agent (`entry.lastResultText`)
341
+ * and carried to the gateway via `onFinish`. The gateway feeds this into
342
+ * the `subagent_handback` inbound; the model synthesises a fresh
343
+ * user-facing summary from it, so the full transcript is never needed
344
+ * and an unbounded retain would bloat the parent's context.
345
+ */
346
+ const SUBAGENT_RESULT_TEXT_MAX = 3000
347
+
324
348
  /**
325
349
  * Resolve a threshold-knob env var (e.g.
326
350
  * `SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS`) to a positive integer ms
@@ -580,6 +604,16 @@ function readSubTail(
580
604
  // and must remain stable. Overwriting it with the sub-agent's first
581
605
  // narrative line caused a race-condition-dependent display (issue #352).
582
606
  entry.lastSummaryLine = ev.text.split('\n')[0].trim().slice(0, 120)
607
+ // Retain the full text of the most recent narrative emission —
608
+ // for a worker the final such line before turn_end IS its
609
+ // result summary (the worker prompt asks it to "return a
610
+ // concise summary"). Carried to the gateway via onFinish so a
611
+ // *background* sub-agent's result can be handed back to the
612
+ // user (conversational-pacing beat 4). Replace-on-write +
613
+ // capped: this is the worker's intended output, never tool
614
+ // args or file content — consistent with the watcher's
615
+ // "descriptions only" privacy posture.
616
+ entry.lastResultText = ev.text.trim().slice(0, SUBAGENT_RESULT_TEXT_MAX)
583
617
  } else if (ev.kind === 'sub_agent_turn_end') {
584
618
  if (entry.state === 'running') {
585
619
  entry.state = 'done'
@@ -750,6 +784,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
750
784
  completionNotified: false,
751
785
  stallTerminalSynthesised: false,
752
786
  lastSummaryLine: '',
787
+ lastResultText: '',
753
788
  lastTool: null,
754
789
  historical: isHistorical,
755
790
  }
@@ -850,6 +885,8 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
850
885
  outcome: entry.historical ? 'orphan' : 'completed',
851
886
  toolCount: entry.toolCount,
852
887
  durationMs: nowFn() - entry.dispatchedAt,
888
+ description: entry.description,
889
+ resultText: entry.lastResultText,
853
890
  })
854
891
  } catch (cbErr) {
855
892
  log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
@@ -869,6 +906,8 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
869
906
  outcome: 'failed',
870
907
  toolCount: entry.toolCount,
871
908
  durationMs: nowFn() - entry.dispatchedAt,
909
+ description: entry.description,
910
+ resultText: entry.lastResultText,
872
911
  })
873
912
  } catch (cbErr) {
874
913
  log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
@@ -247,9 +247,22 @@ describe('resolveModelUnavailableFromOperatorEvent — kind-driven mapping', ()
247
247
  expect(d?.kind).toBe('quota_exhausted')
248
248
  })
249
249
 
250
- it('always treats kind=rate-limited as overload', () => {
250
+ it('treats a bare kind=rate-limited as NOT model-unavailable (transient → calm card)', () => {
251
+ // A transient overload / rate-limit is retryable — Claude Code
252
+ // retries it internally. resolveModelUnavailableFromOperatorEvent
253
+ // returns null so the gateway renders the calm `rate-limited` card,
254
+ // never the scary "⚠️ Model unavailable" one. Returning
255
+ // `{kind:'overload'}` here is what fired a false card on every 529.
251
256
  const d = resolveModelUnavailableFromOperatorEvent({ kind: 'rate-limited', detail: '' })
252
- expect(d?.kind).toBe('overload')
257
+ expect(d).toBeNull()
258
+ })
259
+
260
+ it('escalates a kind=rate-limited that carries a genuine quota signal', () => {
261
+ const d = resolveModelUnavailableFromOperatorEvent({
262
+ kind: 'rate-limited',
263
+ detail: "You've hit your limit · resets 8:50am",
264
+ })
265
+ expect(d?.kind).toBe('quota_exhausted')
253
266
  })
254
267
 
255
268
  it('always treats kind=unknown-5xx as overload', () => {