switchroom 0.13.10 → 0.13.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47314,8 +47314,8 @@ var {
47314
47314
  } = import__.default;
47315
47315
 
47316
47316
  // src/build-info.ts
47317
- var VERSION = "0.13.10";
47318
- var COMMIT_SHA = "e0fd6617";
47317
+ var VERSION = "0.13.11";
47318
+ var COMMIT_SHA = "5984798c";
47319
47319
 
47320
47320
  // src/cli/agent.ts
47321
47321
  init_source();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "switchroom",
3
- "version": "0.13.10",
3
+ "version": "0.13.11",
4
4
  "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -23004,7 +23004,7 @@ function classifyInner(raw) {
23004
23004
  return "rate-limited";
23005
23005
  }
23006
23006
  if (errorType === "overloaded_error" || errorCode === "overloaded_error" || sdkCode === "overloaded_error" || message.toLowerCase().includes("overloaded_error") || message.toLowerCase().includes("overloaded")) {
23007
- return "quota-exhausted";
23007
+ return "rate-limited";
23008
23008
  }
23009
23009
  if (errorType === "agent-crashed" || errorCode === "agent-crashed") {
23010
23010
  return "agent-crashed";
@@ -23349,6 +23349,12 @@ function projectSubagentLine(line, agentId, state) {
23349
23349
  }
23350
23350
  return [];
23351
23351
  }
23352
+ function extractRetryState(obj) {
23353
+ return {
23354
+ retryAttempt: typeof obj.retryAttempt === "number" ? obj.retryAttempt : null,
23355
+ maxRetries: typeof obj.maxRetries === "number" ? obj.maxRetries : null
23356
+ };
23357
+ }
23352
23358
  function detectErrorInTranscriptLine(line) {
23353
23359
  if (!line || line.length > 2 * 1024 * 1024)
23354
23360
  return null;
@@ -23366,7 +23372,13 @@ function detectErrorInTranscriptLine(line) {
23366
23372
  const errStr = typeof obj.error === "string" ? obj.error : "";
23367
23373
  const text = extractAssistantText(obj);
23368
23374
  const kind2 = status === 429 ? "quota-exhausted" : classifyClaudeError({ type: errStr, status, message: text });
23369
- return { kind: kind2, raw: obj, detail: text || errStr || "api error" };
23375
+ return {
23376
+ kind: kind2,
23377
+ raw: obj,
23378
+ detail: text || errStr || "api error",
23379
+ transient: kind2 === "rate-limited",
23380
+ terminal: true
23381
+ };
23370
23382
  }
23371
23383
  const isErrorLine = type === "api_error" || type === "error";
23372
23384
  const embeddedError = typeof obj.error === "object" && obj.error != null ? obj.error : null;
@@ -23375,7 +23387,10 @@ function detectErrorInTranscriptLine(line) {
23375
23387
  const raw = embeddedError ?? obj;
23376
23388
  const kind = classifyClaudeError(embeddedError ?? obj);
23377
23389
  const detail = extractDetailMessage(embeddedError) ?? extractDetailMessage(obj) ?? String(type ?? "");
23378
- return { kind, raw, detail };
23390
+ const transient = kind === "rate-limited";
23391
+ const retry = extractRetryState(obj);
23392
+ const terminal = !transient ? true : retry.retryAttempt != null && retry.maxRetries != null ? retry.retryAttempt >= retry.maxRetries : isErrorLine;
23393
+ return { kind, raw, detail, transient, terminal };
23379
23394
  }
23380
23395
  function extractDetailMessage(obj) {
23381
23396
  if (!obj)
@@ -23497,7 +23512,11 @@ function startSessionTail(config2) {
23497
23512
  try {
23498
23513
  const errEvent = detectErrorInTranscriptLine(line);
23499
23514
  if (errEvent) {
23500
- onOperatorEvent(errEvent);
23515
+ if (errEvent.terminal || !errEvent.transient) {
23516
+ onOperatorEvent(errEvent);
23517
+ } else {
23518
+ log?.(`session-tail: transient overload suppressed (in-flight retry) kind=${errEvent.kind}`);
23519
+ }
23501
23520
  }
23502
23521
  } catch (err) {
23503
23522
  log?.(`session-tail: onOperatorEvent threw: ${err.message}`);
@@ -39632,7 +39632,8 @@ function resolveModelUnavailableFromOperatorEvent(ev) {
39632
39632
  return detectModelUnavailable(detail) ?? { kind: "quota_exhausted", raw: detail };
39633
39633
  }
39634
39634
  if (ev.kind === "rate-limited") {
39635
- return detectModelUnavailable(detail) ?? { kind: "overload", raw: detail };
39635
+ const detected = detectModelUnavailable(detail);
39636
+ return detected?.kind === "quota_exhausted" ? detected : null;
39636
39637
  }
39637
39638
  if (ev.kind === "unknown-5xx") {
39638
39639
  return detectModelUnavailable(detail) ?? { kind: "overload", raw: detail };
@@ -44782,6 +44783,31 @@ ${result}
44782
44783
  }
44783
44784
  };
44784
44785
  }
44786
+ function decideSubagentHandback(input) {
44787
+ if (input.handbackEnvValue === "0") {
44788
+ return { deliver: false, reason: "env-disabled" };
44789
+ }
44790
+ if (input.outcome !== "completed" && input.outcome !== "failed") {
44791
+ return { deliver: false, reason: "outcome-not-terminal" };
44792
+ }
44793
+ if (!input.isBackground) {
44794
+ return { deliver: false, reason: "foreground" };
44795
+ }
44796
+ const chatId = input.fleetChatId || input.ownerChatId;
44797
+ if (!chatId) {
44798
+ return { deliver: false, reason: "no-chat" };
44799
+ }
44800
+ const inbound = buildSubagentHandbackInbound({
44801
+ ctx: {
44802
+ chatId,
44803
+ taskDescription: input.taskDescription,
44804
+ resultText: input.resultText,
44805
+ outcome: input.outcome
44806
+ },
44807
+ ...input.nowMs !== undefined ? { nowMs: input.nowMs } : {}
44808
+ });
44809
+ return { deliver: true, chatId, inbound };
44810
+ }
44785
44811
 
44786
44812
  // gateway/poll-health.ts
44787
44813
  var DEFAULT_LOG = (msg) => {
@@ -48001,11 +48027,11 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
48001
48027
  }
48002
48028
 
48003
48029
  // ../src/build-info.ts
48004
- var VERSION = "0.13.10";
48005
- var COMMIT_SHA = "e0fd6617";
48006
- var COMMIT_DATE = "2026-05-22T12:01:29+10:00";
48030
+ var VERSION = "0.13.11";
48031
+ var COMMIT_SHA = "5984798c";
48032
+ var COMMIT_DATE = "2026-05-22T15:59:07+10:00";
48007
48033
  var LATEST_PR = null;
48008
- var COMMITS_AHEAD_OF_TAG = 6;
48034
+ var COMMITS_AHEAD_OF_TAG = 3;
48009
48035
 
48010
48036
  // gateway/boot-version.ts
48011
48037
  function formatRelativeAgo(iso) {
@@ -49628,7 +49654,7 @@ function emitGatewayOperatorEvent(event) {
49628
49654
  let renderedText;
49629
49655
  let renderedKeyboard;
49630
49656
  if (modelUnavailable) {
49631
- const isAutoKind = modelUnavailable.kind === "quota_exhausted" || modelUnavailable.kind === "overload";
49657
+ const isAutoKind = modelUnavailable.kind === "quota_exhausted";
49632
49658
  const willActuallyFire = isAutoKind && wouldFireFleetAutoFallback();
49633
49659
  process.stderr.write(`telegram gateway: operator-event suppressing-raw-stderr-for-model-unavailable agent=${agent} kind=${kind} detected=${modelUnavailable.kind} autoKind=${isAutoKind} willFire=${willActuallyFire}
49634
49660
  `);
@@ -57321,17 +57347,13 @@ var didOneTimeSetup = false;
57321
57347
  }
57322
57348
  },
57323
57349
  onFinish: ({ agentId, outcome, description, resultText }) => {
57324
- if (process.env.SWITCHROOM_SUBAGENT_HANDBACK === "0")
57325
- return;
57326
- if (outcome !== "completed" && outcome !== "failed")
57327
- return;
57328
- let chatId = "";
57350
+ let fleetChatId = "";
57329
57351
  let isBackground = false;
57330
57352
  try {
57331
57353
  const fleets = progressDriver?.peekAllFleets() ?? [];
57332
57354
  for (const f of fleets) {
57333
57355
  if (f.fleet.has(agentId)) {
57334
- chatId = f.chatId ?? "";
57356
+ fleetChatId = f.chatId ?? "";
57335
57357
  break;
57336
57358
  }
57337
57359
  }
@@ -57343,24 +57365,24 @@ var didOneTimeSetup = false;
57343
57365
  isBackground = row.background === 1;
57344
57366
  } catch {}
57345
57367
  }
57346
- if (!isBackground)
57347
- return;
57348
- const handbackChatId = chatId || (loadAccess().allowFrom[0] ?? "");
57349
- if (!handbackChatId) {
57350
- process.stderr.write(`telegram gateway: subagent-handback ${agentId} \u2014 no chat to deliver to; skipped
57368
+ const decision = decideSubagentHandback({
57369
+ handbackEnvValue: process.env.SWITCHROOM_SUBAGENT_HANDBACK,
57370
+ outcome,
57371
+ isBackground,
57372
+ fleetChatId,
57373
+ ownerChatId: loadAccess().allowFrom[0] ?? "",
57374
+ taskDescription: description,
57375
+ resultText
57376
+ });
57377
+ if (!decision.deliver) {
57378
+ if (decision.reason === "no-chat") {
57379
+ process.stderr.write(`telegram gateway: subagent-handback ${agentId} \u2014 no chat to deliver to; skipped
57351
57380
  `);
57381
+ }
57352
57382
  return;
57353
57383
  }
57354
- const inbound = buildSubagentHandbackInbound({
57355
- ctx: {
57356
- chatId: String(handbackChatId),
57357
- taskDescription: description,
57358
- resultText,
57359
- outcome
57360
- }
57361
- });
57362
- pendingInboundBuffer.push(process.env.SWITCHROOM_AGENT_NAME ?? "", inbound);
57363
- process.stderr.write(`telegram gateway: subagent-handback queued agent=${agentId} outcome=${outcome} chat=${handbackChatId} resultChars=${resultText.length}
57384
+ pendingInboundBuffer.push(process.env.SWITCHROOM_AGENT_NAME ?? "", decision.inbound);
57385
+ process.stderr.write(`telegram gateway: subagent-handback queued agent=${agentId} outcome=${outcome} chat=${decision.chatId} resultChars=${resultText.length}
57364
57386
  `);
57365
57387
  }
57366
57388
  });
@@ -17029,7 +17029,7 @@ function classifyInner(raw) {
17029
17029
  return "rate-limited";
17030
17030
  }
17031
17031
  if (errorType === "overloaded_error" || errorCode === "overloaded_error" || sdkCode === "overloaded_error" || message.toLowerCase().includes("overloaded_error") || message.toLowerCase().includes("overloaded")) {
17032
- return "quota-exhausted";
17032
+ return "rate-limited";
17033
17033
  }
17034
17034
  if (errorType === "agent-crashed" || errorCode === "agent-crashed") {
17035
17035
  return "agent-crashed";
@@ -17387,6 +17387,12 @@ function projectSubagentLine(line, agentId, state) {
17387
17387
  }
17388
17388
  return [];
17389
17389
  }
17390
+ function extractRetryState(obj) {
17391
+ return {
17392
+ retryAttempt: typeof obj.retryAttempt === "number" ? obj.retryAttempt : null,
17393
+ maxRetries: typeof obj.maxRetries === "number" ? obj.maxRetries : null
17394
+ };
17395
+ }
17390
17396
  function detectErrorInTranscriptLine(line) {
17391
17397
  if (!line || line.length > 2 * 1024 * 1024)
17392
17398
  return null;
@@ -17404,7 +17410,13 @@ function detectErrorInTranscriptLine(line) {
17404
17410
  const errStr = typeof obj.error === "string" ? obj.error : "";
17405
17411
  const text = extractAssistantText(obj);
17406
17412
  const kind2 = status === 429 ? "quota-exhausted" : classifyClaudeError({ type: errStr, status, message: text });
17407
- return { kind: kind2, raw: obj, detail: text || errStr || "api error" };
17413
+ return {
17414
+ kind: kind2,
17415
+ raw: obj,
17416
+ detail: text || errStr || "api error",
17417
+ transient: kind2 === "rate-limited",
17418
+ terminal: true
17419
+ };
17408
17420
  }
17409
17421
  const isErrorLine = type === "api_error" || type === "error";
17410
17422
  const embeddedError = typeof obj.error === "object" && obj.error != null ? obj.error : null;
@@ -17413,7 +17425,10 @@ function detectErrorInTranscriptLine(line) {
17413
17425
  const raw = embeddedError ?? obj;
17414
17426
  const kind = classifyClaudeError(embeddedError ?? obj);
17415
17427
  const detail = extractDetailMessage(embeddedError) ?? extractDetailMessage(obj) ?? String(type ?? "");
17416
- return { kind, raw, detail };
17428
+ const transient = kind === "rate-limited";
17429
+ const retry = extractRetryState(obj);
17430
+ const terminal = !transient ? true : retry.retryAttempt != null && retry.maxRetries != null ? retry.retryAttempt >= retry.maxRetries : isErrorLine;
17431
+ return { kind, raw, detail, transient, terminal };
17417
17432
  }
17418
17433
  function extractDetailMessage(obj) {
17419
17434
  if (!obj)
@@ -17535,7 +17550,11 @@ function startSessionTail(config2) {
17535
17550
  try {
17536
17551
  const errEvent = detectErrorInTranscriptLine(line);
17537
17552
  if (errEvent) {
17538
- onOperatorEvent(errEvent);
17553
+ if (errEvent.terminal || !errEvent.transient) {
17554
+ onOperatorEvent(errEvent);
17555
+ } else {
17556
+ log?.(`session-tail: transient overload suppressed (in-flight retry) kind=${errEvent.kind}`);
17557
+ }
17539
17558
  }
17540
17559
  } catch (err) {
17541
17560
  log?.(`session-tail: onOperatorEvent threw: ${err.message}`);
@@ -281,7 +281,7 @@ import {
281
281
  buildVaultSaveFailedInbound,
282
282
  buildVaultSaveDiscardedInbound,
283
283
  } from './vault-grant-inbound-builders.js'
284
- import { buildSubagentHandbackInbound } from './subagent-handback-inbound-builder.js'
284
+ import { decideSubagentHandback } from './subagent-handback-inbound-builder.js'
285
285
  import { createPollHealthCheck, type PollHealthCheckHandle } from './poll-health.js'
286
286
  import type {
287
287
  ToolCallMessage,
@@ -2712,8 +2712,14 @@ function emitGatewayOperatorEvent(event: OperatorEvent): void {
2712
2712
  // Card text branches on the AND. wouldFireFleetAutoFallback is a
2713
2713
  // pure read of the dedup state; calling fireFleetAutoFallback only
2714
2714
  // when both are true keeps the card honest.
2715
- const isAutoKind =
2716
- modelUnavailable.kind === 'quota_exhausted' || modelUnavailable.kind === 'overload'
2715
+ // Only a genuine quota / usage-limit hit is addressable by fleet
2716
+ // auto-fallback (swap to an account that still has runway). An
2717
+ // `overload` is transient Anthropic SERVER-side capacity pressure —
2718
+ // every account is equally affected, so failing over does nothing;
2719
+ // it just produces a self-cancelling "probed healthy / Stale event?"
2720
+ // loop on every 529. Overload is handled by Claude Code's own
2721
+ // internal retry, not by switching accounts.
2722
+ const isAutoKind = modelUnavailable.kind === 'quota_exhausted'
2717
2723
  const willActuallyFire = isAutoKind && wouldFireFleetAutoFallback()
2718
2724
  process.stderr.write(
2719
2725
  `telegram gateway: operator-event suppressing-raw-stderr-for-model-unavailable agent=${agent} kind=${kind} detected=${modelUnavailable.kind} autoKind=${isAutoKind} willFire=${willActuallyFire}\n`,
@@ -15063,22 +15069,24 @@ void (async () => {
15063
15069
  // need nothing here, and 'orphan' is a stale historical-at-
15064
15070
  // boot row, not a fresh completion the user is waiting on.
15065
15071
  onFinish: ({ agentId, outcome, description, resultText }) => {
15066
- if (process.env.SWITCHROOM_SUBAGENT_HANDBACK === '0') return
15067
- if (outcome !== 'completed' && outcome !== 'failed') return
15068
-
15069
- let chatId = ''
15072
+ // IO: resolve the fleet chat id and the background flag.
15073
+ // The DECISION (gating + inbound build) is delegated to
15074
+ // the pure `decideSubagentHandback` so it is unit-tested
15075
+ // independent of the gateway — see
15076
+ // `subagent-handback-decision.test.ts`.
15077
+ let fleetChatId = ''
15070
15078
  let isBackground = false
15071
15079
  try {
15072
15080
  const fleets = progressDriver?.peekAllFleets() ?? []
15073
15081
  for (const f of fleets) {
15074
15082
  if (f.fleet.has(agentId)) {
15075
- chatId = f.chatId ?? ''
15083
+ fleetChatId = f.chatId ?? ''
15076
15084
  break
15077
15085
  }
15078
15086
  }
15079
15087
  } catch {
15080
15088
  // peek failures are non-fatal — fall through to the
15081
- // owner-chat fallback below.
15089
+ // owner-chat fallback inside decideSubagentHandback.
15082
15090
  }
15083
15091
  if (turnsDb != null) {
15084
15092
  try {
@@ -15088,36 +15096,36 @@ void (async () => {
15088
15096
  if (row != null) isBackground = row.background === 1
15089
15097
  } catch { /* best-effort */ }
15090
15098
  }
15091
- if (!isBackground) return
15092
-
15093
- // chatId fallback: if the progress-driver fleet entry was
15094
- // already cleaned up by the time onFinish fires, route to
15095
- // the owner chat. Every switchroom fleet agent is
15096
- // DM-shaped, so allowFrom[0] is the conversation that
15097
- // dispatched the work.
15098
- const handbackChatId = chatId || (loadAccess().allowFrom[0] ?? '')
15099
- if (!handbackChatId) {
15100
- process.stderr.write(
15101
- `telegram gateway: subagent-handback ${agentId} — no chat to deliver to; skipped\n`,
15102
- )
15099
+
15100
+ const decision = decideSubagentHandback({
15101
+ handbackEnvValue: process.env.SWITCHROOM_SUBAGENT_HANDBACK,
15102
+ outcome,
15103
+ isBackground,
15104
+ fleetChatId,
15105
+ // Owner-chat fallback: if the progress-driver fleet
15106
+ // entry was already cleaned up, route to the owner
15107
+ // chat. Every switchroom fleet agent is DM-shaped, so
15108
+ // allowFrom[0] is the conversation that dispatched.
15109
+ ownerChatId: loadAccess().allowFrom[0] ?? '',
15110
+ taskDescription: description,
15111
+ resultText,
15112
+ })
15113
+ if (!decision.deliver) {
15114
+ if (decision.reason === 'no-chat') {
15115
+ process.stderr.write(
15116
+ `telegram gateway: subagent-handback ${agentId} — no chat to deliver to; skipped\n`,
15117
+ )
15118
+ }
15103
15119
  return
15104
15120
  }
15105
15121
 
15106
- const inbound = buildSubagentHandbackInbound({
15107
- ctx: {
15108
- chatId: String(handbackChatId),
15109
- taskDescription: description,
15110
- resultText,
15111
- outcome,
15112
- },
15113
- })
15114
15122
  // Deliver via pendingInboundBuffer + the idle-drain tick.
15115
15123
  // The drain only releases at an idle prompt (no active
15116
15124
  // turn), so the handback always lands as a clean fresh
15117
15125
  // turn and never races a turn-in-flight composer (#1556).
15118
- pendingInboundBuffer.push(process.env.SWITCHROOM_AGENT_NAME ?? '', inbound)
15126
+ pendingInboundBuffer.push(process.env.SWITCHROOM_AGENT_NAME ?? '', decision.inbound)
15119
15127
  process.stderr.write(
15120
- `telegram gateway: subagent-handback queued agent=${agentId} outcome=${outcome} chat=${handbackChatId} resultChars=${resultText.length}\n`,
15128
+ `telegram gateway: subagent-handback queued agent=${agentId} outcome=${outcome} chat=${decision.chatId} resultChars=${resultText.length}\n`,
15121
15129
  )
15122
15130
  },
15123
15131
  })
@@ -101,3 +101,85 @@ export function buildSubagentHandbackInbound(opts: {
101
101
  },
102
102
  }
103
103
  }
104
+
105
+ // ───────────────────────────────────────────────────────────────────────────
106
+ // Handback decision (pure — unit-testable gate for the gateway onFinish path)
107
+ // ───────────────────────────────────────────────────────────────────────────
108
+
109
+ /**
110
+ * Inputs to the handback decision. The gateway's `subagent-watcher`
111
+ * `onFinish` callback does the IO — resolves `isBackground` from the
112
+ * registry DB, `fleetChatId` from the progress-driver fleet, and
113
+ * `ownerChatId` from access.json — then hands the resolved values here.
114
+ * Keeping the *decision* pure makes the gate (which injects turns)
115
+ * testable without standing up a gateway.
116
+ */
117
+ export interface SubagentHandbackDecisionInput {
118
+ /** `SWITCHROOM_SUBAGENT_HANDBACK` env var value (any non-'0' = enabled). */
119
+ handbackEnvValue: string | undefined
120
+ /** Terminal outcome the watcher reported. */
121
+ outcome: 'completed' | 'failed' | 'orphan'
122
+ /** Whether the sub-agent was a background dispatch (registry DB flag).
123
+ * Foreground sub-agents hand back natively in the parent's turn. */
124
+ isBackground: boolean
125
+ /** Chat id from the progress-driver fleet entry; '' if not found. */
126
+ fleetChatId: string
127
+ /** Owner chat fallback (access.json allowFrom[0]); '' if none. */
128
+ ownerChatId: string
129
+ taskDescription: string
130
+ resultText: string
131
+ /** Deterministic clock for tests. */
132
+ nowMs?: number
133
+ }
134
+
135
+ /** Why a handback was NOT delivered — one of these, or `delivered`. */
136
+ export type SubagentHandbackSkipReason =
137
+ | 'env-disabled'
138
+ | 'outcome-not-terminal'
139
+ | 'foreground'
140
+ | 'no-chat'
141
+
142
+ export type SubagentHandbackDecision =
143
+ | { deliver: false; reason: SubagentHandbackSkipReason }
144
+ | { deliver: true; chatId: string; inbound: InboundMessage }
145
+
146
+ /**
147
+ * Decide whether a finished sub-agent warrants a handback turn, and if
148
+ * so build the inbound. Pure: all IO is the caller's job.
149
+ *
150
+ * Gates, in order:
151
+ * 1. kill-switch — `SWITCHROOM_SUBAGENT_HANDBACK=0` disables entirely.
152
+ * 2. outcome — only `completed`/`failed` hand back; `orphan` is a
153
+ * stale historical-at-boot row, not a fresh completion.
154
+ * 3. foreground — a foreground sub-agent already handed its result
155
+ * back as the Task tool result in the parent's own turn.
156
+ * 4. no-chat — neither the fleet entry nor the owner chat resolved,
157
+ * so there is nowhere to deliver.
158
+ */
159
+ export function decideSubagentHandback(
160
+ input: SubagentHandbackDecisionInput,
161
+ ): SubagentHandbackDecision {
162
+ if (input.handbackEnvValue === '0') {
163
+ return { deliver: false, reason: 'env-disabled' }
164
+ }
165
+ if (input.outcome !== 'completed' && input.outcome !== 'failed') {
166
+ return { deliver: false, reason: 'outcome-not-terminal' }
167
+ }
168
+ if (!input.isBackground) {
169
+ return { deliver: false, reason: 'foreground' }
170
+ }
171
+ const chatId = input.fleetChatId || input.ownerChatId
172
+ if (!chatId) {
173
+ return { deliver: false, reason: 'no-chat' }
174
+ }
175
+ const inbound = buildSubagentHandbackInbound({
176
+ ctx: {
177
+ chatId,
178
+ taskDescription: input.taskDescription,
179
+ resultText: input.resultText,
180
+ outcome: input.outcome,
181
+ },
182
+ ...(input.nowMs !== undefined ? { nowMs: input.nowMs } : {}),
183
+ })
184
+ return { deliver: true, chatId, inbound }
185
+ }
@@ -326,7 +326,17 @@ export function resolveModelUnavailableFromOperatorEvent(
326
326
  return detectModelUnavailable(detail) ?? { kind: 'quota_exhausted', raw: detail }
327
327
  }
328
328
  if (ev.kind === 'rate-limited') {
329
- return detectModelUnavailable(detail) ?? { kind: 'overload', raw: detail }
329
+ // A rate-limited / transient overload is NOT "model unavailable" —
330
+ // it is retryable and Claude Code retries it internally. Escalate
331
+ // to the model-unavailable card ONLY if the detail carries a
332
+ // genuine quota signal (a 4xx that slipped past the classifier
333
+ // with usage-limit wording in its body). A bare overload /
334
+ // rate-limit returns null → the caller renders the calm
335
+ // `rate-limited` card, never the scary "⚠️ Model unavailable" one.
336
+ // Returning `{kind:'overload'}` here is what fired a false
337
+ // model-unavailable card on every transient 529.
338
+ const detected = detectModelUnavailable(detail)
339
+ return detected?.kind === 'quota_exhausted' ? detected : null
330
340
  }
331
341
  if (ev.kind === 'unknown-5xx') {
332
342
  return detectModelUnavailable(detail) ?? { kind: 'overload', raw: detail }
@@ -1,6 +1,5 @@
1
1
  {
2
2
  "_comment": "Captured error shapes per OperatorEventKind. Real API keys/IDs have been scrubbed.",
3
-
4
3
  "credentials-expired": [
5
4
  {
6
5
  "_source": "Anthropic API — 401 with authentication_error + expired hint",
@@ -16,7 +15,6 @@
16
15
  "message": "OAuth token expired, please re-authenticate to continue"
17
16
  }
18
17
  ],
19
-
20
18
  "credentials-invalid": [
21
19
  {
22
20
  "_source": "Anthropic API — 401 with invalid_api_key",
@@ -40,7 +38,6 @@
40
38
  "message": "Invalid API key"
41
39
  }
42
40
  ],
43
-
44
41
  "credit-exhausted": [
45
42
  {
46
43
  "_source": "Anthropic API — 402 credit_balance_too_low",
@@ -56,23 +53,7 @@
56
53
  "message": "credit balance insufficient"
57
54
  }
58
55
  ],
59
-
60
- "quota-exhausted": [
61
- {
62
- "_source": "Anthropic API — 529 overloaded_error (Claude Code converts to quota-exhausted)",
63
- "status": 529,
64
- "error": {
65
- "type": "overloaded_error",
66
- "message": "Overloaded"
67
- }
68
- },
69
- {
70
- "_source": "Synthetic — set by session-tail after repeated 429 + slot exhaustion",
71
- "type": "overloaded_error",
72
- "message": "Service overloaded, usage limits reached"
73
- }
74
- ],
75
-
56
+ "quota-exhausted": [],
76
57
  "rate-limited": [
77
58
  {
78
59
  "_source": "Anthropic API — 429 rate_limit_error",
@@ -86,9 +67,21 @@
86
67
  "_source": "Top-level rate_limit_error",
87
68
  "type": "rate_limit_error",
88
69
  "message": "rate limit exceeded"
70
+ },
71
+ {
72
+ "_source": "Anthropic API — 529 overloaded_error (transient server capacity → rate-limited, NOT quota)",
73
+ "status": 529,
74
+ "error": {
75
+ "type": "overloaded_error",
76
+ "message": "Overloaded"
77
+ }
78
+ },
79
+ {
80
+ "_source": "Synthetic — overloaded_error from session-tail (transient → rate-limited, NOT quota)",
81
+ "type": "overloaded_error",
82
+ "message": "Service overloaded, usage limits reached"
89
83
  }
90
84
  ],
91
-
92
85
  "agent-crashed": [
93
86
  {
94
87
  "_source": "Synthetic — emitted by IPC bridge when Claude child exits nonzero",
@@ -101,7 +94,6 @@
101
94
  "message": "IPC socket disconnected unexpectedly"
102
95
  }
103
96
  ],
104
-
105
97
  "agent-restarted-unexpectedly": [
106
98
  {
107
99
  "_source": "Synthetic — emitted by gateway boot-banner diff when uptime drops unexpectedly",
@@ -114,7 +106,6 @@
114
106
  "message": "systemd unit restarted outside of operator request"
115
107
  }
116
108
  ],
117
-
118
109
  "unknown-4xx": [
119
110
  {
120
111
  "_source": "Novel 4xx not matching any known Anthropic error type",
@@ -142,7 +133,6 @@
142
133
  "_value": "something went wrong"
143
134
  }
144
135
  ],
145
-
146
136
  "unknown-5xx": [
147
137
  {
148
138
  "_source": "500 with no recognised type",
@@ -139,8 +139,17 @@ function classifyInner(raw: unknown): OperatorEventKind {
139
139
  message.toLowerCase().includes('overloaded_error') ||
140
140
  message.toLowerCase().includes('overloaded')
141
141
  ) {
142
- // Anthropic overloaded = quota exhausted / service rate-limiting
143
- return 'quota-exhausted'
142
+ // Anthropic "overloaded" (HTTP 529) is transient SERVER-side
143
+ // capacity pressure — orthogonal to account quota. It is retryable
144
+ // (`x-should-retry: true`) and Claude Code retries it internally.
145
+ // Classifying it `quota-exhausted` fired a false "Model
146
+ // unavailable — quota exhausted" card AND a self-cancelling fleet
147
+ // auto-fallback on every 529 (the active account always probes
148
+ // healthy — nothing is actually exhausted — so the fallback no-ops
149
+ // with "probed healthy / Stale event?"). It is a rate-limit-family
150
+ // transient; failing over to another account does nothing because
151
+ // every account is equally affected.
152
+ return 'rate-limited'
144
153
  }
145
154
 
146
155
  // Synthetic kinds (non-Anthropic — set by session-tail or IPC bridge)
@@ -409,9 +409,37 @@ export function projectSubagentLine(
409
409
  * Returns null when no actionable error is detected (routine lines).
410
410
  * Never throws — delegates to classifyClaudeError's own safety guarantee.
411
411
  */
412
+ /**
413
+ * Extract Claude Code's retry-state annotations from a transcript line.
414
+ * Claude Code writes top-level `retryAttempt` / `maxRetries` on a
415
+ * retried API error (e.g. a 529 it is internally retrying). Used to
416
+ * tell an in-flight retry from an exhausted (terminal) one. Both
417
+ * optional — non-retried errors and older Claude Code versions omit
418
+ * them.
419
+ */
420
+ function extractRetryState(obj: Record<string, unknown>): {
421
+ retryAttempt: number | null
422
+ maxRetries: number | null
423
+ } {
424
+ return {
425
+ retryAttempt: typeof obj.retryAttempt === 'number' ? obj.retryAttempt : null,
426
+ maxRetries: typeof obj.maxRetries === 'number' ? obj.maxRetries : null,
427
+ }
428
+ }
429
+
412
430
  export function detectErrorInTranscriptLine(
413
431
  line: string,
414
- ): { kind: OperatorEventKind; raw: unknown; detail: string } | null {
432
+ ): {
433
+ kind: OperatorEventKind
434
+ raw: unknown
435
+ detail: string
436
+ /** True for the rate-limit / transient-overload family. */
437
+ transient: boolean
438
+ /** True when the error is final — NOT an in-flight retry. A transient
439
+ * error mid-retry is `transient:true, terminal:false`; the caller
440
+ * suppresses it (no operator card until the failure is terminal). */
441
+ terminal: boolean
442
+ } | null {
415
443
  if (!line || line.length > 2 * 1024 * 1024) return null
416
444
  let obj: Record<string, unknown>
417
445
  try {
@@ -447,7 +475,16 @@ export function detectErrorInTranscriptLine(
447
475
  status === 429
448
476
  ? 'quota-exhausted'
449
477
  : classifyClaudeError({ type: errStr, status, message: text })
450
- return { kind, raw: obj, detail: text || errStr || 'api error' }
478
+ // An `isApiErrorMessage` line is Claude surfacing the failure to the
479
+ // user — terminal by construction (Claude writes this shape only
480
+ // after its own internal retries are exhausted).
481
+ return {
482
+ kind,
483
+ raw: obj,
484
+ detail: text || errStr || 'api error',
485
+ transient: kind === 'rate-limited',
486
+ terminal: true,
487
+ }
451
488
  }
452
489
 
453
490
  // Explicit error line types from Claude Code JSONL
@@ -472,7 +509,23 @@ export function detectErrorInTranscriptLine(
472
509
  extractDetailMessage(obj) ??
473
510
  String(type ?? '')
474
511
 
475
- return { kind, raw, detail }
512
+ // Transient = the rate-limit / overload family. For a transient,
513
+ // decide `terminal` from Claude Code's retry annotations: below the
514
+ // cap → still retrying (in-flight); at/above → exhausted. With no
515
+ // retry state, an explicit `type:"api_error"`/`"error"` LINE means
516
+ // Claude surfaced the failure (terminal); an embedded-error object
517
+ // with no retry state is ambiguous → treat as in-flight and suppress
518
+ // (the silence-poke covers a genuinely stuck turn; a false card is
519
+ // the bug we are fixing, a missed ambiguous card costs nothing).
520
+ const transient = kind === 'rate-limited'
521
+ const retry = extractRetryState(obj)
522
+ const terminal = !transient
523
+ ? true
524
+ : retry.retryAttempt != null && retry.maxRetries != null
525
+ ? retry.retryAttempt >= retry.maxRetries
526
+ : isErrorLine
527
+
528
+ return { kind, raw, detail, transient, terminal }
476
529
  }
477
530
 
478
531
  function extractDetailMessage(obj: Record<string, unknown> | null): string | null {
@@ -514,6 +567,10 @@ export interface TailOperatorEvent {
514
567
  kind: OperatorEventKind
515
568
  detail: string
516
569
  raw: unknown
570
+ /** True for the rate-limit / transient-overload family. */
571
+ transient: boolean
572
+ /** True when the failure is final, not an in-flight retry. */
573
+ terminal: boolean
517
574
  }
518
575
 
519
576
  export interface SessionTailConfig {
@@ -665,7 +722,17 @@ export function startSessionTail(config: SessionTailConfig): SessionTailHandle {
665
722
  try {
666
723
  const errEvent = detectErrorInTranscriptLine(line)
667
724
  if (errEvent) {
668
- onOperatorEvent(errEvent)
725
+ // Honest escalation: a transient overload Claude is still
726
+ // retrying (transient && !terminal) posts NO operator
727
+ // card — it almost always resolves on the next retry.
728
+ // Escalate only terminal failures + non-transient errors.
729
+ if (errEvent.terminal || !errEvent.transient) {
730
+ onOperatorEvent(errEvent)
731
+ } else {
732
+ log?.(
733
+ `session-tail: transient overload suppressed (in-flight retry) kind=${errEvent.kind}`,
734
+ )
735
+ }
669
736
  }
670
737
  } catch (err) {
671
738
  log?.(`session-tail: onOperatorEvent threw: ${(err as Error).message}`)
@@ -247,9 +247,22 @@ describe('resolveModelUnavailableFromOperatorEvent — kind-driven mapping', ()
247
247
  expect(d?.kind).toBe('quota_exhausted')
248
248
  })
249
249
 
250
- it('always treats kind=rate-limited as overload', () => {
250
+ it('treats a bare kind=rate-limited as NOT model-unavailable (transient → calm card)', () => {
251
+ // A transient overload / rate-limit is retryable — Claude Code
252
+ // retries it internally. resolveModelUnavailableFromOperatorEvent
253
+ // returns null so the gateway renders the calm `rate-limited` card,
254
+ // never the scary "⚠️ Model unavailable" one. Returning
255
+ // `{kind:'overload'}` here is what fired a false card on every 529.
251
256
  const d = resolveModelUnavailableFromOperatorEvent({ kind: 'rate-limited', detail: '' })
252
- expect(d?.kind).toBe('overload')
257
+ expect(d).toBeNull()
258
+ })
259
+
260
+ it('escalates a kind=rate-limited that carries a genuine quota signal', () => {
261
+ const d = resolveModelUnavailableFromOperatorEvent({
262
+ kind: 'rate-limited',
263
+ detail: "You've hit your limit · resets 8:50am",
264
+ })
265
+ expect(d?.kind).toBe('quota_exhausted')
253
266
  })
254
267
 
255
268
  it('always treats kind=unknown-5xx as overload', () => {
@@ -56,13 +56,64 @@ describe('detectErrorInTranscriptLine — error detection', () => {
56
56
  expect(result!.kind).toBe('credit-exhausted')
57
57
  })
58
58
 
59
- it('classifies overloaded_error as quota-exhausted', () => {
59
+ it('classifies overloaded_error as rate-limited (transient), NOT quota-exhausted', () => {
60
+ // A 529 "overloaded" is transient Anthropic server-capacity
61
+ // pressure — orthogonal to account quota. Classifying it
62
+ // quota-exhausted fired a false "Model unavailable" card + a
63
+ // self-cancelling fleet auto-fallback on every 529.
60
64
  const line = JSON.stringify({
61
65
  type: 'api_error',
62
66
  error: { type: 'overloaded_error', message: 'Overloaded' },
63
67
  })
64
68
  const result = detectErrorInTranscriptLine(line)
65
- expect(result!.kind).toBe('quota-exhausted')
69
+ expect(result!.kind).toBe('rate-limited')
70
+ expect(result!.transient).toBe(true)
71
+ // An explicit `type:"api_error"` line (no retry state) = Claude
72
+ // surfaced the failure → terminal.
73
+ expect(result!.terminal).toBe(true)
74
+ })
75
+
76
+ it('marks an in-flight 529 retry transient + NOT terminal (suppressed)', () => {
77
+ // Real on-disk shape: a 529 Claude Code is internally retrying,
78
+ // annotated with retryAttempt < maxRetries.
79
+ const line = JSON.stringify({
80
+ type: 'system',
81
+ subtype: 'api_error',
82
+ error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
83
+ retryAttempt: 9,
84
+ maxRetries: 10,
85
+ retryInMs: 34479,
86
+ })
87
+ const result = detectErrorInTranscriptLine(line)
88
+ expect(result!.kind).toBe('rate-limited')
89
+ expect(result!.transient).toBe(true)
90
+ // 9 < 10 — still retrying → in-flight → the caller suppresses it.
91
+ expect(result!.terminal).toBe(false)
92
+ })
93
+
94
+ it('marks an exhausted 529 retry terminal (escalates)', () => {
95
+ const line = JSON.stringify({
96
+ type: 'system',
97
+ subtype: 'api_error',
98
+ error: { status: 529, type: 'overloaded_error', message: 'Overloaded' },
99
+ retryAttempt: 10,
100
+ maxRetries: 10,
101
+ })
102
+ const result = detectErrorInTranscriptLine(line)
103
+ expect(result!.kind).toBe('rate-limited')
104
+ expect(result!.transient).toBe(true)
105
+ // retries exhausted → terminal → escalates.
106
+ expect(result!.terminal).toBe(true)
107
+ })
108
+
109
+ it('marks non-transient errors terminal (always escalate)', () => {
110
+ const line = JSON.stringify({
111
+ type: 'api_error',
112
+ error: { type: 'authentication_error', message: 'expired' },
113
+ })
114
+ const result = detectErrorInTranscriptLine(line)
115
+ expect(result!.transient).toBe(false)
116
+ expect(result!.terminal).toBe(true)
66
117
  })
67
118
 
68
119
  it('returns null for lines without error field', () => {
@@ -57,13 +57,20 @@ describe('classifyClaudeError — credit-exhausted fixtures', () => {
57
57
  }
58
58
  })
59
59
 
60
- describe('classifyClaudeError — quota-exhausted fixtures', () => {
61
- for (const fixture of fixtures['quota-exhausted']) {
62
- it(`classifies: ${fixture._source}`, () => {
63
- const input = '_value' in fixture ? fixture._value : fixture
64
- expect(classifyClaudeError(input)).toBe('quota-exhausted')
65
- })
66
- }
60
+ describe('classifyClaudeError — quota-exhausted', () => {
61
+ // classifyClaudeError is type/code/status-based and intentionally
62
+ // does NOT self-classify quota-exhausted: a genuine subscription
63
+ // usage-limit hit has no reliable Anthropic error TYPE — it is
64
+ // detected from the response TEXT. session-tail's `isApiErrorMessage`
65
+ // 429 branch + the `detectModelUnavailable` text path own quota
66
+ // detection. (`overloaded_error` used to be mapped here — wrongly;
67
+ // a 529 overload is transient server capacity, now `rate-limited`.)
68
+ it('no error TYPE maps to quota-exhausted (the text path owns it)', () => {
69
+ expect(fixtures['quota-exhausted']).toHaveLength(0)
70
+ expect(
71
+ classifyClaudeError({ type: 'overloaded_error', message: 'Overloaded' }),
72
+ ).not.toBe('quota-exhausted')
73
+ })
67
74
  })
68
75
 
69
76
  describe('classifyClaudeError — rate-limited fixtures', () => {
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Regression coverage for `decideSubagentHandback` — the gate the
3
+ * gateway's subagent-watcher `onFinish` callback runs to decide whether
4
+ * a finished sub-agent gets a handback turn injected.
5
+ *
6
+ * This is the highest-risk surface of the handback feature (#1650): it
7
+ * injects a fresh turn. Before this suite the decision lived inline in
8
+ * the gateway's `onFinish` closure with no automated test — a refactor
9
+ * that broke the `isBackground` gate would have fired handbacks for
10
+ * foreground sub-agents (double messages) with nothing to catch it.
11
+ * The decision is now a pure function; these cases pin every gate.
12
+ */
13
+
14
+ import { describe, it, expect } from 'vitest'
15
+ import { decideSubagentHandback } from '../gateway/subagent-handback-inbound-builder.js'
16
+
17
+ const FIXED_NOW = 1_700_000_000_000
18
+
19
+ const base = {
20
+ handbackEnvValue: undefined as string | undefined,
21
+ outcome: 'completed' as 'completed' | 'failed' | 'orphan',
22
+ isBackground: true,
23
+ fleetChatId: '777',
24
+ ownerChatId: '999',
25
+ taskDescription: 'Do the thing',
26
+ resultText: 'Done.',
27
+ nowMs: FIXED_NOW,
28
+ }
29
+
30
+ describe('decideSubagentHandback', () => {
31
+ it('delivers for a background completed sub-agent', () => {
32
+ const d = decideSubagentHandback({ ...base })
33
+ expect(d.deliver).toBe(true)
34
+ if (d.deliver) {
35
+ expect(d.chatId).toBe('777')
36
+ expect(d.inbound.meta.source).toBe('subagent_handback')
37
+ expect(d.inbound.chatId).toBe('777')
38
+ }
39
+ })
40
+
41
+ it('delivers for a background FAILED sub-agent', () => {
42
+ const d = decideSubagentHandback({ ...base, outcome: 'failed' })
43
+ expect(d.deliver).toBe(true)
44
+ if (d.deliver) expect(d.inbound.meta.outcome).toBe('failed')
45
+ })
46
+
47
+ it('skips a foreground sub-agent (handed back natively in-turn)', () => {
48
+ const d = decideSubagentHandback({ ...base, isBackground: false })
49
+ expect(d).toEqual({ deliver: false, reason: 'foreground' })
50
+ })
51
+
52
+ it("skips an 'orphan' outcome (stale historical-at-boot row)", () => {
53
+ const d = decideSubagentHandback({ ...base, outcome: 'orphan' })
54
+ expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
55
+ })
56
+
57
+ it('skips when the kill-switch is set (SWITCHROOM_SUBAGENT_HANDBACK=0)', () => {
58
+ const d = decideSubagentHandback({ ...base, handbackEnvValue: '0' })
59
+ expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
60
+ })
61
+
62
+ it('treats any non-"0" env value (incl. undefined) as enabled', () => {
63
+ expect(decideSubagentHandback({ ...base, handbackEnvValue: undefined }).deliver).toBe(true)
64
+ expect(decideSubagentHandback({ ...base, handbackEnvValue: '1' }).deliver).toBe(true)
65
+ expect(decideSubagentHandback({ ...base, handbackEnvValue: '' }).deliver).toBe(true)
66
+ })
67
+
68
+ it('falls back to the owner chat when the fleet entry is gone', () => {
69
+ const d = decideSubagentHandback({ ...base, fleetChatId: '' })
70
+ expect(d.deliver).toBe(true)
71
+ if (d.deliver) {
72
+ expect(d.chatId).toBe('999')
73
+ expect(d.inbound.chatId).toBe('999')
74
+ }
75
+ })
76
+
77
+ it('prefers the fleet chat id over the owner chat when both are present', () => {
78
+ const d = decideSubagentHandback({ ...base, fleetChatId: '777', ownerChatId: '999' })
79
+ expect(d.deliver).toBe(true)
80
+ if (d.deliver) expect(d.chatId).toBe('777')
81
+ })
82
+
83
+ it('skips when no chat resolves at all', () => {
84
+ const d = decideSubagentHandback({ ...base, fleetChatId: '', ownerChatId: '' })
85
+ expect(d).toEqual({ deliver: false, reason: 'no-chat' })
86
+ })
87
+
88
+ it('gate order: kill-switch wins over every other condition', () => {
89
+ // env-disabled even though it is a deliverable background completion.
90
+ const d = decideSubagentHandback({ ...base, handbackEnvValue: '0', isBackground: true })
91
+ expect(d).toEqual({ deliver: false, reason: 'env-disabled' })
92
+ })
93
+
94
+ it('gate order: outcome filter applies before the foreground check', () => {
95
+ // orphan + foreground — outcome filter is checked first.
96
+ const d = decideSubagentHandback({ ...base, outcome: 'orphan', isBackground: false })
97
+ expect(d).toEqual({ deliver: false, reason: 'outcome-not-terminal' })
98
+ })
99
+
100
+ it('carries the task description and result text into the inbound', () => {
101
+ const d = decideSubagentHandback({
102
+ ...base,
103
+ taskDescription: 'Migrate the DB',
104
+ resultText: 'Applied 3 migrations, 0 rows dropped.',
105
+ })
106
+ expect(d.deliver).toBe(true)
107
+ if (d.deliver) {
108
+ expect(d.inbound.text).toContain('Migrate the DB')
109
+ expect(d.inbound.text).toContain('Applied 3 migrations')
110
+ }
111
+ })
112
+ })