switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3158 -1178
  10. package/dist/host-control/main.js +2833 -355
  11. package/dist/vault/approvals/kernel-server.js +7479 -7439
  12. package/dist/vault/broker/server.js +11312 -11272
  13. package/examples/minimal.yaml +1 -0
  14. package/examples/switchroom.yaml +1 -0
  15. package/package.json +3 -3
  16. package/profiles/_base/start.sh.hbs +88 -1
  17. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  18. package/profiles/default/CLAUDE.md.hbs +0 -19
  19. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  20. package/telegram-plugin/answer-stream-flag.ts +12 -49
  21. package/telegram-plugin/answer-stream.ts +5 -150
  22. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  23. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  24. package/telegram-plugin/context-exhaustion.ts +12 -0
  25. package/telegram-plugin/demo-mask.ts +154 -0
  26. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  27. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  28. package/telegram-plugin/dist/server.js +215 -172
  29. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  30. package/telegram-plugin/draft-stream.ts +47 -410
  31. package/telegram-plugin/final-answer-detect.ts +17 -12
  32. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  33. package/telegram-plugin/format.ts +56 -19
  34. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  35. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  36. package/telegram-plugin/gateway/auth-command.ts +70 -14
  37. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  38. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  39. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  40. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  41. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  42. package/telegram-plugin/gateway/effort-command.ts +8 -3
  43. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  44. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  45. package/telegram-plugin/gateway/gateway.ts +1837 -291
  46. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  103. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  104. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  105. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  106. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  107. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  108. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  109. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  110. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  111. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  112. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  113. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  114. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  115. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  116. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  117. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  118. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  119. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  120. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  121. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  122. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  123. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  124. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  125. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  126. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  127. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  128. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  129. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  130. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  131. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  132. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  133. package/telegram-plugin/tool-activity-summary.ts +375 -58
  134. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  135. package/telegram-plugin/uat/assertions.ts +115 -0
  136. package/telegram-plugin/uat/driver.ts +68 -0
  137. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  138. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  139. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  145. package/telegram-plugin/welcome-text.ts +13 -1
  146. package/telegram-plugin/worker-activity-feed.ts +157 -82
  147. package/telegram-plugin/draft-transport.ts +0 -122
  148. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  149. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -19,7 +19,9 @@
19
19
  */
20
20
 
21
21
  import type { QuotaResult, QuotaUtilization } from './quota-check.js';
22
+ import { isProbeThin, refillNormalizedUtils } from '../src/auth/quota.js';
22
23
  import type { AccountState, LastQuotaSnapshot, ListStateData } from '../src/auth/broker/client.js';
24
+ import { maskEmail } from './demo-mask.js';
23
25
 
24
26
  // ── shared types ─────────────────────────────────────────────────────
25
27
 
@@ -60,23 +62,76 @@ export interface AccountSnapshot {
60
62
  export const THROTTLING_THRESHOLD_PCT = 80;
61
63
 
62
64
  /**
63
- * Decide the health verdict for one account. The two "binding" facts:
64
- * - 5h or 7d utilization >= 100% (or `representativeClaim` non-null
65
- * plus utilization >= 99.5%) → blocked
66
- * - either window above 80%, or representativeClaim set with > 50%
67
- * throttling
65
+ * INFORMATIONAL ALLOWLIST of `overageDisabledReason` values that mean the
66
+ * account has no overage headroom. Replicated from the broker's
67
+ * `OVERAGE_EXHAUSTED_REASONS` (src/auth/broker/account-eligibility.ts) because
68
+ * the plugin can't import across the package boundary keep the two in sync.
69
+ *
70
+ * These are NOT serve-blocking: the fleet runs on quota, not credits. An account
71
+ * with `out_of_credits` at low util serves fine. `org_level_disabled` → benign
72
+ * (the live active fleet account: overage off but serving fine off subscription).
73
+ * `null` / unknown → benign (deny-by-omission).
74
+ *
75
+ * MUST NEVER gate serving or failover eligibility — informational annotation
76
+ * only (e.g. "overage off (out_of_credits) — serving from quota").
77
+ * Do NOT key on `overageStatus` ("rejected" appears on the healthy account too).
78
+ * The drift test (overage-allowlist-drift.test.ts) guards these two copies stay
79
+ * in sync — update BOTH when this list changes.
80
+ */
81
+ const OVERAGE_EXHAUSTED_REASONS = new Set<string>(['out_of_credits']);
82
+
83
+ /**
84
+ * Decide the health verdict for one account. Binding facts (in order):
85
+ * - probe failure → unknown
86
+ * - thin/headerless probe → unknown (no real utilization signal)
87
+ * - 5h or 7d utilization >= 99.5% → blocked (quota wall)
88
+ * - either window above 80% → throttling
68
89
  * - everything else → healthy
69
90
  * - probe failure → unknown
91
+ *
92
+ * NOTE: `out_of_credits` (overageDisabledReason) is NOT a serve-block here.
93
+ * The fleet runs on quota, not on overage credits. An account with `out_of_credits`
94
+ * at low util (e.g. carol@example.com at 5h=0%, 7d=2%) serves fine and is a
95
+ * valid failover target. Overage fields are informational only — surfaced as an
96
+ * annotation on healthy/throttling rows, never as a blocked verdict.
97
+ * Failover safety against a real 429 is preserved via the mark-exhausted path.
70
98
  */
71
- export function classifyHealth(snap: AccountSnapshot): AccountHealth {
99
+ export function classifyHealth(snap: AccountSnapshot, now: Date = new Date()): AccountHealth {
72
100
  if (!snap.quota) return 'unknown';
73
101
  const q = snap.quota;
74
- const max = Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
102
+ // #2494 Bug C — a thin/headerless probe (no real utilization signal on
103
+ // EITHER window) must not masquerade as a confident 0% / healthy. Treat it
104
+ // as unknown so the card surfaces a data-quality gap, not "healthy".
105
+ if (isProbeThin(q)) return 'unknown';
106
+ // #2494 Bug A — read utilization through the refill normalization: a window
107
+ // whose reset has already passed has rolled since the snapshot was captured,
108
+ // so its stale high utilization must be treated as 0%. A just-refilled
109
+ // account self-corrects to healthy without an extra probe.
110
+ const norm = refillNormalizedUtils(q, now);
111
+ const max = Math.max(norm.fiveHourUtilizationPct, norm.sevenDayUtilizationPct);
75
112
  if (max >= 99.5) return 'blocked';
76
113
  if (max >= THROTTLING_THRESHOLD_PCT) return 'throttling';
77
114
  return 'healthy';
78
115
  }
79
116
 
117
+ /**
118
+ * Why is a BLOCKED account blocked? Only one cause now: quota exhaustion.
119
+ * - 'quota-exhausted' — a util window is maxed but recovers when that window
120
+ * rolls. Show the reset countdown.
121
+ *
122
+ * NOTE: 'billing-dead' has been removed. `out_of_credits` accounts are now
123
+ * healthy (not blocked) — they appear in the HEALTHY group with an informational
124
+ * overage annotation. See classifyHealth for the rationale.
125
+ *
126
+ * Returns null for non-blocked accounts.
127
+ */
128
+ export type BlockedReason = 'quota-exhausted';
129
+
130
+ export function blockedReason(snap: AccountSnapshot, now: Date = new Date()): BlockedReason | null {
131
+ if (classifyHealth(snap, now) !== 'blocked') return null;
132
+ return 'quota-exhausted';
133
+ }
134
+
80
135
  /**
81
136
  * Which window is the user-visible "binding" one — the one that ran
82
137
  * out, or is closer to running out. Returned as a label for headers
@@ -150,6 +205,31 @@ export interface SnapshotRenderOpts {
150
205
  /** Refresh stamp shown in the footer; usually `Date.now()` of the
151
206
  * most recent live probe. Omit to suppress. */
152
207
  liveProbedAtMs?: number;
208
+ /**
209
+ * #2495 Change 2 — the probe-on-open attempted a live refresh but it
210
+ * FAILED, so the card is rendered off the durable cache. When set, the
211
+ * footer shows an explicit "⚠ cached Nm ago" warning (age measured from
212
+ * this `capturedAt`) instead of a false "Live · refreshed 0s ago" stamp.
213
+ * Takes precedence over `liveProbedAtMs`.
214
+ */
215
+ staleCachedAtMs?: number;
216
+ /**
217
+ * Demo mode (the `/usage demo` / `/auth demo` suffix). When true, every
218
+ * account label is run through `maskEmail` before rendering so a screen
219
+ * recording shows stable realistic fakes instead of the operator's real
220
+ * account emails. Off by default — normal output is unchanged. Scope is
221
+ * the email-label PII tier only; topology/percentages/resets are untouched.
222
+ */
223
+ demo?: boolean;
224
+ }
225
+
226
+ /**
227
+ * Apply demo-mode email masking to an account label when `opts.demo` is set,
228
+ * otherwise return the label unchanged. Single helper so the three label
229
+ * render sites stay in lockstep.
230
+ */
231
+ function displayLabel(label: string, opts: SnapshotRenderOpts): string {
232
+ return opts.demo ? maskEmail(label) : label;
153
233
  }
154
234
 
155
235
  /**
@@ -184,11 +264,13 @@ const HEALTH_TITLE: Record<AccountHealth, string> = {
184
264
  * One-line per-account summary inside its health group.
185
265
  *
186
266
  * you@example.com ● 8% / 20%
187
- * 5h refills 11:00 AM (in 6m) · 7d resets Sun 11:00 AM
267
+ * 5h refills 11:00 AM (in 6m)
268
+ * 7d resets Sun 11:00 AM
188
269
  *
189
- * Two lines actually: the label/percent line and a sub-line with the
190
- * reset details. The blocked variant replaces the sub-line with the
191
- * recovery countdown.
270
+ * Three lines for a healthy/throttling row: the label/percent line plus
271
+ * two reset sub-lines (each window on its own line so the 7d segment
272
+ * doesn't wrap mid-line on a narrow phone). The blocked variant replaces
273
+ * the sub-lines with a single recovery countdown.
192
274
  */
193
275
  function renderAccountRow(
194
276
  snap: AccountSnapshot,
@@ -198,10 +280,11 @@ function renderAccountRow(
198
280
  const tz = opts.tz ?? 'UTC';
199
281
  const lines: string[] = [];
200
282
  const marker = snap.isActive ? '● ' : '';
283
+ const label = displayLabel(snap.label, opts);
201
284
 
202
285
  if (!snap.quota) {
203
286
  lines.push(
204
- `${marker}<code>${escapeHtml(snap.label)}</code> <i>quota probe failed</i>`,
287
+ `${marker}<code>${escapeHtml(label)}</code> <i>quota probe failed</i>`,
205
288
  );
206
289
  if (snap.quotaError) {
207
290
  lines.push(` <i>${escapeHtml(snap.quotaError)}</i>`);
@@ -210,29 +293,41 @@ function renderAccountRow(
210
293
  }
211
294
 
212
295
  const q = snap.quota;
213
- const fiveStr = fmtPct(q.fiveHourUtilizationPct);
214
- const sevenStr = fmtPct(q.sevenDayUtilizationPct);
296
+ // #2494 Bug C — a thin/headerless probe carries no real utilization; render
297
+ // it as a data-quality gap, never a confident "0% / 0%".
298
+ if (isProbeThin(q)) {
299
+ lines.push(
300
+ `${marker}<code>${escapeHtml(label)}</code> <i>quota unknown (thin probe)</i>`,
301
+ );
302
+ return lines;
303
+ }
304
+ // #2494 Bug A — show refill-normalized utilization so a window that has
305
+ // already reset reads its true post-refill 0%, not the stale capture value.
306
+ const norm = refillNormalizedUtils(q, now);
307
+ const fiveStr = fmtPct(norm.fiveHourUtilizationPct);
308
+ const sevenStr = fmtPct(norm.sevenDayUtilizationPct);
215
309
  lines.push(
216
- `${marker}<code>${escapeHtml(snap.label)}</code> ${fiveStr} / ${sevenStr}`,
310
+ `${marker}<code>${escapeHtml(label)}</code> ${fiveStr} / ${sevenStr}`,
217
311
  );
218
312
 
219
- const health = classifyHealth(snap);
313
+ const health = classifyHealth(snap, now);
220
314
  if (health === 'blocked') {
221
- // Surface only the recovery countdown — the binding window's reset
222
- // is the only thing that matters until then.
315
+ // quota-exhausted (recoverable): surface only the recovery countdown — the
316
+ // binding window's reset is the only thing that matters until then.
223
317
  const win = bindingWindow(q);
224
318
  const reset = win === '5h' ? q.fiveHourResetAt : q.sevenDayResetAt;
225
319
  const winLabel = win === '5h' ? '5-hour' : '7-day';
226
320
  lines.push(
227
- ` <i>back ${formatAbsolute(reset, tz)} (in ${formatRelative(reset, now)}, ${winLabel} cap)</i>`,
321
+ ` <i>quota exhausted — back ${formatAbsolute(reset, tz)} (in ${formatRelative(reset, now)}, ${winLabel} cap)</i>`,
228
322
  );
229
323
  return lines;
230
324
  }
231
325
 
232
326
  // Healthy / throttling: show whichever window is closer to refresh
233
- // first, then the other on the same line. Reverses the screenshot's
327
+ // first, then the other on the next line. Reverses the screenshot's
234
328
  // "5h then 7d" ordering when 7d is the more pressing one — the user
235
- // wants the imminent number first.
329
+ // wants the imminent number first. Each window gets its own line so the
330
+ // second segment doesn't wrap mid-line on a narrow phone screen.
236
331
  const fiveResetIn = q.fiveHourResetAt ? q.fiveHourResetAt.getTime() - now.getTime() : Infinity;
237
332
  const sevenResetIn = q.sevenDayResetAt ? q.sevenDayResetAt.getTime() - now.getTime() : Infinity;
238
333
  const fiveFirst = fiveResetIn <= sevenResetIn;
@@ -242,7 +337,15 @@ function renderAccountRow(
242
337
  const sevenSeg = q.sevenDayResetAt
243
338
  ? `7d resets ${formatAbsolute(q.sevenDayResetAt, tz)} (in ${formatRelative(q.sevenDayResetAt, now)})`
244
339
  : '7d resets —';
245
- lines.push(` <i>${fiveFirst ? fiveSeg : sevenSeg} · ${fiveFirst ? sevenSeg : fiveSeg}</i>`);
340
+ lines.push(` <i>${fiveFirst ? fiveSeg : sevenSeg}</i>`);
341
+ lines.push(` <i>${fiveFirst ? sevenSeg : fiveSeg}</i>`);
342
+ // Informational overage annotation: if out_of_credits (no overage headroom),
343
+ // surface it as a sub-line on a healthy/throttling row — NOT a blocked badge.
344
+ if (q.overageDisabledReason != null && OVERAGE_EXHAUSTED_REASONS.has(q.overageDisabledReason)) {
345
+ lines.push(
346
+ ` <i>overage off (${escapeHtml(q.overageDisabledReason)}) — serving from quota</i>`,
347
+ );
348
+ }
246
349
  return lines;
247
350
  }
248
351
 
@@ -263,6 +366,14 @@ function renderAccountRow(
263
366
  * `buildSnapshotKeyboard` below) — keep the formatting and the
264
367
  * keyboard in lockstep so the buttons always reflect current state.
265
368
  */
369
+ /** Relative-age stamp shared by the live + degraded footers: "0s ago",
370
+ * "3m ago". Measured against `now` (defaults to wall-clock) so tests with
371
+ * an injected clock get deterministic output. */
372
+ function formatAgeStamp(atMs: number, now: Date = new Date()): string {
373
+ const ageSec = Math.max(0, Math.round((now.getTime() - atMs) / 1000));
374
+ return ageSec < 60 ? `${ageSec}s ago` : `${Math.round(ageSec / 60)}m ago`;
375
+ }
376
+
266
377
  export function renderAuthSnapshotFormat2(
267
378
  snapshots: AccountSnapshot[],
268
379
  opts: SnapshotRenderOpts = {},
@@ -278,7 +389,7 @@ export function renderAuthSnapshotFormat2(
278
389
  const order: AccountHealth[] = ['blocked', 'throttling', 'healthy', 'unknown'];
279
390
  const grouped = new Map<AccountHealth, AccountSnapshot[]>();
280
391
  for (const s of snapshots) {
281
- const h = classifyHealth(s);
392
+ const h = classifyHealth(s, now);
282
393
  if (!grouped.has(h)) grouped.set(h, []);
283
394
  grouped.get(h)!.push(s);
284
395
  }
@@ -299,11 +410,14 @@ export function renderAuthSnapshotFormat2(
299
410
 
300
411
  lines.push('');
301
412
  lines.push('────────────────────────────');
302
- lines.push(`<i>${recommendation(snapshots, now)}</i>`);
303
- if (opts.liveProbedAtMs != null) {
304
- const ageSec = Math.max(0, Math.round((Date.now() - opts.liveProbedAtMs) / 1000));
305
- const ageStr = ageSec < 60 ? `${ageSec}s ago` : `${Math.round(ageSec / 60)}m ago`;
306
- lines.push(`<i>Live · refreshed ${ageStr}</i>`);
413
+ lines.push(`<i>${recommendation(snapshots, now, opts.demo ?? false)}</i>`);
414
+ // #2495 Change 2 — a failed probe-on-open renders an explicit "cached Nm
415
+ // ago" warning, never a false live stamp. The degraded variant takes
416
+ // precedence over the live stamp.
417
+ if (opts.staleCachedAtMs != null) {
418
+ lines.push(`<i>⚠ cached ${formatAgeStamp(opts.staleCachedAtMs, now)}</i>`);
419
+ } else if (opts.liveProbedAtMs != null) {
420
+ lines.push(`<i>Live · refreshed ${formatAgeStamp(opts.liveProbedAtMs, now)}</i>`);
307
421
  } else {
308
422
  lines.push('<i>Live</i>');
309
423
  }
@@ -321,40 +435,109 @@ export function renderAuthSnapshotFormat2(
321
435
  * "Active <active> is BLOCKED. Switch to <healthy> now."
322
436
  * "All accounts blocked. Earliest recovery: <label> in <eta>."
323
437
  */
324
- export function recommendation(snapshots: AccountSnapshot[], now: Date = new Date()): string {
438
+ export function recommendation(
439
+ snapshots: AccountSnapshot[],
440
+ now: Date = new Date(),
441
+ demo = false,
442
+ ): string {
325
443
  const active = snapshots.find((s) => s.isActive);
326
444
  if (!active) return 'No active account set.';
327
- const activeHealth = classifyHealth(active);
445
+ const activeHealth = classifyHealth(active, now);
328
446
  const others = snapshots.filter((s) => !s.isActive);
329
- const healthyAlt = others.find((s) => classifyHealth(s) === 'healthy');
447
+ const healthyAlt = others.find((s) => classifyHealth(s, now) === 'healthy');
448
+ // Demo mode masks the email labels that appear in the recommendation
449
+ // sentence, in lockstep with the per-account rows above.
450
+ const lbl = (s: AccountSnapshot) => (demo ? maskEmail(s.label) : s.label);
451
+ const activeLabel = lbl(active);
330
452
 
331
453
  if (activeHealth === 'healthy') {
332
- return `Recommendation: stay on ${active.label}.`;
454
+ return `Recommendation: stay on ${activeLabel}.`;
333
455
  }
334
456
 
335
457
  if (activeHealth === 'throttling') {
336
458
  if (healthyAlt) {
337
- return `Recommendation: active ${active.label} is throttling. Switch to ${healthyAlt.label} for headroom.`;
459
+ return `Recommendation: active ${activeLabel} is throttling. Switch to ${lbl(healthyAlt)} for headroom.`;
338
460
  }
339
- return `Recommendation: active ${active.label} is throttling; no healthy alternative — wait for refill.`;
461
+ return `Recommendation: active ${activeLabel} is throttling; no healthy alternative — wait for refill.`;
340
462
  }
341
463
 
342
464
  if (activeHealth === 'blocked') {
343
465
  if (healthyAlt) {
344
- return `Recommendation: active ${active.label} is BLOCKED — switch to ${healthyAlt.label} now.`;
466
+ return `Recommendation: active ${activeLabel} is BLOCKED — switch to ${lbl(healthyAlt)} now.`;
467
+ }
468
+ // #2494 Bug B — no healthy alternative. Do NOT collapse to "All accounts
469
+ // blocked": that's only honest when EVERY account is truly walled with no
470
+ // usable or imminently-refilling slot. Distinguish the buckets first.
471
+ return summarizeNoHealthyAlt(snapshots, now, demo);
472
+ }
473
+
474
+ // unknown
475
+ return `Active ${activeLabel}: quota probe failed; broker last_seen unknown.`;
476
+ }
477
+
478
+ /**
479
+ * #2494 Bug B — honest fleet summary when the active account is blocked and no
480
+ * fully-healthy alternative exists. Buckets every account so the summary never
481
+ * claims "all blocked" while a throttling / imminently-refilling / usable slot
482
+ * exists. Surfaces the soonest refill ETA across the fleet.
483
+ */
484
+ function summarizeNoHealthyAlt(snapshots: AccountSnapshot[], now: Date, demo = false): string {
485
+ const mask = (label: string) => (demo ? maskEmail(label) : label);
486
+ let throttlingLabel: string | null = null;
487
+ let allTrulyBlocked = true;
488
+ for (const s of snapshots) {
489
+ const h = classifyHealth(s, now);
490
+ if (h === 'throttling') {
491
+ // A throttling account is still usable.
492
+ if (!throttlingLabel) throttlingLabel = s.label;
493
+ allTrulyBlocked = false;
494
+ } else if (h === 'healthy' || h === 'unknown') {
495
+ // Healthy is handled by the caller; unknown is not provably blocked.
496
+ allTrulyBlocked = false;
497
+ } else if (h === 'blocked' && blockedReason(s, now) === 'quota-exhausted') {
498
+ // Quota-exhausted recovers WHEN its window rolls — but only counts as
499
+ // "refilling" (not terminal) if it actually carries a future reset on the
500
+ // binding window. A maxed window with no reset timestamp has no imminent
501
+ // recovery and stays in the truly-blocked bucket (Bug B: "blocked = ≥99.5%
502
+ // AND no imminent reset").
503
+ if (s.quota) {
504
+ const win = bindingWindow(s.quota);
505
+ const at = win === '5h' ? s.quota.fiveHourResetAt : s.quota.sevenDayResetAt;
506
+ if (at && at.getTime() > now.getTime()) allTrulyBlocked = false;
507
+ }
345
508
  }
346
- // No healthy alternative; surface the earliest recovery time.
347
- const earliestRecovery = pickEarliestRecovery(snapshots, now);
509
+ }
510
+
511
+ const earliestRecovery = pickEarliestRecovery(snapshots, now);
512
+
513
+ if (throttlingLabel) {
514
+ // A usable (throttling) slot exists — recommend it, with the soonest refill.
515
+ const eta = earliestRecovery
516
+ ? ` Soonest full refill: ${mask(earliestRecovery.label)} in ${formatRelative(earliestRecovery.at, now)}.`
517
+ : '';
518
+ return `No fully-healthy account; ${mask(throttlingLabel)} is throttling but still usable.${eta}`;
519
+ }
520
+
521
+ if (!allTrulyBlocked) {
522
+ // No usable slot now, but at least one account is refilling — not all dead.
348
523
  if (earliestRecovery) {
349
- return `All accounts blocked. Earliest recovery: ${earliestRecovery.label} in ${formatRelative(earliestRecovery.at, now)}.`;
524
+ return `All accounts at capacity; soonest refill: ${mask(earliestRecovery.label)} in ${formatRelative(earliestRecovery.at, now)}.`;
350
525
  }
351
- return `All accounts blocked. Run /auth add to attach another subscription.`;
526
+ return `All accounts at capacity waiting on a window refill.`;
352
527
  }
353
528
 
354
- // unknown
355
- return `Active ${active.label}: quota probe failed; broker last_seen unknown.`;
529
+ // Genuinely all blocked (quota-exhausted with no upcoming reset, or no data).
530
+ if (earliestRecovery) {
531
+ return `All accounts blocked. Earliest recovery: ${mask(earliestRecovery.label)} in ${formatRelative(earliestRecovery.at, now)}.`;
532
+ }
533
+ return `All accounts blocked. Run /auth add to attach another subscription.`;
356
534
  }
357
535
 
536
+ /**
537
+ * Earliest refill ETA across the fleet. #2494 Bug A/B — only counts a future
538
+ * reset on the binding window; a window whose reset has already passed has
539
+ * refilled (handled by refill normalization) and is not "recovery pending".
540
+ */
358
541
  function pickEarliestRecovery(
359
542
  snapshots: AccountSnapshot[],
360
543
  now: Date,
@@ -362,6 +545,7 @@ function pickEarliestRecovery(
362
545
  let best: { label: string; at: Date } | null = null;
363
546
  for (const s of snapshots) {
364
547
  if (!s.quota) continue;
548
+ if (isProbeThin(s.quota)) continue;
365
549
  const win = bindingWindow(s.quota);
366
550
  const at = win === '5h' ? s.quota.fiveHourResetAt : s.quota.sevenDayResetAt;
367
551
  if (!at || at.getTime() <= now.getTime()) continue;
@@ -387,6 +571,17 @@ export interface FallbackAnnouncementInput {
387
571
  /** Agent that triggered the fallback (for context — fleet swap
388
572
  * affects all agents but the user wants to know which one tripped). */
389
573
  triggerAgent: string;
574
+ /**
575
+ * Bug 3 — the full per-account fleet snapshot, threaded in so the all-blocked
576
+ * card can enumerate EVERY account (5h%/7d% + recovery ETA), not just the one
577
+ * triggering account. Built by `buildSnapshotsFromState` one frame up in
578
+ * `runFleetAutoFallback`. Optional/back-compat: when absent (or empty), the
579
+ * all-blocked branch falls back to the old single-account shape.
580
+ *
581
+ * ONLY consumed on the all-blocked branch. The successful-swap branch already
582
+ * shows the target's headroom and is unchanged.
583
+ */
584
+ fleetSnapshots?: AccountSnapshot[];
390
585
  tz?: string;
391
586
  now?: Date;
392
587
  }
@@ -414,14 +609,42 @@ export function renderFallbackAnnouncement(input: FallbackAnnouncementInput): st
414
609
  const headerLimit = limitWord === 'quota' ? 'quota cap' : `${limitWord} limit`;
415
610
 
416
611
  if (!input.newLabel) {
417
- // All-blocked path — no swap occurred. Tell user what's broken
418
- // and when the earliest reset is.
612
+ // All-blocked path — no swap occurred. Tell user what's broken and, so they
613
+ // can VERIFY the fleet is truly exhausted, enumerate EVERY account's 5h%/7d%
614
+ // + recovery ETA (Bug 3) — not just the one triggering account. Reuses the
615
+ // same per-account row + earliest-recovery helpers the /auth table uses so
616
+ // the formatting stays consistent with the rest of the auth surface.
419
617
  lines.push(
420
618
  `🔴 <b>All accounts blocked · ${headerLimit} on ${escapeHtml(input.oldLabel)}</b>`,
421
619
  );
422
620
  lines.push('');
423
621
  lines.push(`Triggered by: agent <b>${escapeHtml(input.triggerAgent)}</b>`);
424
- if (input.oldQuota) {
622
+
623
+ const fleet = input.fleetSnapshots ?? [];
624
+ if (fleet.length > 0) {
625
+ lines.push('');
626
+ const rowOpts: SnapshotRenderOpts = { now, tz };
627
+ // Blocked-first ordering mirrors renderAuthSnapshotFormat2 — the user
628
+ // scans the walled accounts (and their recovery times) at the top, with
629
+ // the active account floating first within its group.
630
+ const healthOrder: AccountHealth[] = ['blocked', 'throttling', 'healthy', 'unknown'];
631
+ const rank = (s: AccountSnapshot): number => healthOrder.indexOf(classifyHealth(s, now));
632
+ const ordered = [...fleet].sort(
633
+ (a, b) => rank(a) - rank(b) || Number(b.isActive) - Number(a.isActive),
634
+ );
635
+ for (const snap of ordered) {
636
+ for (const ln of renderAccountRow(snap, rowOpts)) lines.push(ln);
637
+ }
638
+ const earliest = pickEarliestRecovery(fleet, now);
639
+ if (earliest) {
640
+ lines.push('');
641
+ lines.push(
642
+ `Earliest recovery: <code>${escapeHtml(earliest.label)}</code> ` +
643
+ `${formatAbsolute(earliest.at, tz)} (in ${formatRelative(earliest.at, now)})`,
644
+ );
645
+ }
646
+ } else if (input.oldQuota) {
647
+ // Back-compat: no fleet snapshot supplied → old single-account shape.
425
648
  const recovery = recoveryAtFor(input.oldQuota);
426
649
  if (recovery) {
427
650
  lines.push(
@@ -522,6 +745,10 @@ export interface SnapshotKeyboardOpts {
522
745
  /** Limit how many "Switch → X" buttons we render. Beyond this, the
523
746
  * user can drill in via /usage. Default 3. */
524
747
  maxSwitchButtons?: number;
748
+ /** #2495 folded nit A — clock for health classification, threaded so the
749
+ * keyboard agrees with the card body instead of defaulting to a second
750
+ * `new Date()`. Defaults to wall-clock. */
751
+ now?: Date;
525
752
  }
526
753
 
527
754
  /**
@@ -540,14 +767,15 @@ export function buildSnapshotKeyboard(
540
767
  opts: SnapshotKeyboardOpts = {},
541
768
  ): KeyboardRow[] {
542
769
  const max = opts.maxSwitchButtons ?? 3;
770
+ const now = opts.now ?? new Date();
543
771
  const rows: KeyboardRow[] = [];
544
772
 
545
773
  // Switch buttons — healthy non-active first, then throttling
546
774
  // non-active. Skip blocked entirely.
547
775
  const switchTargets = snapshots
548
776
  .filter((s) => !s.isActive)
549
- .sort((a, b) => switchPriority(a) - switchPriority(b))
550
- .filter((s) => classifyHealth(s) !== 'blocked' && classifyHealth(s) !== 'unknown')
777
+ .sort((a, b) => switchPriority(a, now) - switchPriority(b, now))
778
+ .filter((s) => classifyHealth(s, now) !== 'blocked' && classifyHealth(s, now) !== 'unknown')
551
779
  .slice(0, max);
552
780
 
553
781
  for (const t of switchTargets) {
@@ -569,8 +797,8 @@ export function buildSnapshotKeyboard(
569
797
  }
570
798
 
571
799
  /** Lower number = higher priority for "switch to me" button. */
572
- function switchPriority(s: AccountSnapshot): number {
573
- const h = classifyHealth(s);
800
+ function switchPriority(s: AccountSnapshot, now: Date = new Date()): number {
801
+ const h = classifyHealth(s, now);
574
802
  if (h === 'healthy') return 0;
575
803
  if (h === 'throttling') return 1;
576
804
  if (h === 'unknown') return 2;
@@ -634,6 +862,10 @@ export function reviveLastQuota(snap: LastQuotaSnapshot | null | undefined): Quo
634
862
  representativeClaim: snap.representativeClaim,
635
863
  overageStatus: snap.overageStatus,
636
864
  overageDisabledReason: snap.overageDisabledReason,
865
+ // #2494 Bug C — forward the header-presence markers so a cached thin probe
866
+ // still renders as `unknown`, not a confident 0%.
867
+ fiveHourUtilPresent: snap.fiveHourUtilPresent,
868
+ sevenDayUtilPresent: snap.sevenDayUtilPresent,
637
869
  };
638
870
  }
639
871
 
@@ -101,6 +101,41 @@ export function evaluateFallbackFailureNotice(
101
101
  return { send: false, next: prev };
102
102
  }
103
103
 
104
+ /**
105
+ * Cooldown for the "All accounts blocked" card (Bug 2). The all-blocked outcome
106
+ * is a NO-OP swap — `doFireFleetAutoFallback` returns false on it, so the
107
+ * fleetFallbackGate's dedup window (which arms ONLY on a successful swap) never
108
+ * arms. Meanwhile the card-less `quota_wall_detected` trigger re-signals every
109
+ * ~60s for the whole duration of a weekly wall, so the identical all-blocked
110
+ * card re-broadcasts every minute. This is the notice-level bound that the swap
111
+ * dedup window can't provide for the no-op path — same shape and rationale as
112
+ * the failure-notice cooldown above.
113
+ *
114
+ * Deliberately a plain per-gateway time cooldown (not keyed by trigger account /
115
+ * earliest-recovery): the all-blocked condition is fleet-wide, so a single
116
+ * window suppresses the repeat regardless of which agent's wall re-fired it.
117
+ * A genuinely NEW state transition is NOT suppressed by this: a later SUCCESSFUL
118
+ * swap arms the separate gate window and the next all-blocked (a real new
119
+ * exhaustion) is bounded only by this window, not silenced.
120
+ */
121
+ export const FALLBACK_ALL_BLOCKED_NOTICE_COOLDOWN_MS = 30 * 60_000;
122
+
123
+ export interface FallbackAllBlockedNoticeState {
124
+ /** Unix ms of the last all-blocked card this gateway sent. 0 = never. */
125
+ lastSentAtMs: number;
126
+ }
127
+
128
+ export function evaluateAllBlockedNotice(
129
+ prev: FallbackAllBlockedNoticeState,
130
+ now: number,
131
+ cooldownMs: number = FALLBACK_ALL_BLOCKED_NOTICE_COOLDOWN_MS,
132
+ ): { send: boolean; next: FallbackAllBlockedNoticeState } {
133
+ if (now - prev.lastSentAtMs >= cooldownMs) {
134
+ return { send: true, next: { lastSentAtMs: now } };
135
+ }
136
+ return { send: false, next: prev };
137
+ }
138
+
104
139
  export type FleetFallbackOutcome =
105
140
  | {
106
141
  kind: 'switched';
@@ -186,7 +221,11 @@ export async function runFleetAutoFallback(
186
221
  // Idempotency guard: don't swap a healthy active account, even if
187
222
  // the trigger event said quota_exhausted. The event may be stale
188
223
  // (event posted, window rolled over, gateway picked it up late).
189
- const oldHealth = classifyHealth(oldSnap);
224
+ // #2494 Bug A — classify against this run's `now` so the refill
225
+ // normalization uses the same clock as the rest of the decision (a default
226
+ // `new Date()` would diverge from `deps.now` and could mis-zero a window
227
+ // whose reset is still future relative to the event's clock).
228
+ const oldHealth = classifyHealth(oldSnap, now);
190
229
  if (oldHealth === 'healthy') {
191
230
  return {
192
231
  kind: 'no-eligible-target',
@@ -220,6 +259,10 @@ export async function runFleetAutoFallback(
220
259
  newLabel: null,
221
260
  newQuota: null,
222
261
  triggerAgent: deps.triggerAgent,
262
+ // Bug 3 — thread the full per-account fleet snapshot so the all-blocked
263
+ // card enumerates EVERY account (5h%/7d% + recovery ETA), letting the
264
+ // user verify the fleet is truly exhausted, not just the trigger account.
265
+ fleetSnapshots: snapshots,
223
266
  tz,
224
267
  now,
225
268
  }),
@@ -14,6 +14,18 @@
14
14
  export const CONTEXT_EXHAUSTION_MARKER = 'Prompt is too long'
15
15
  export const ORPHANED_REPLY_TIMEOUT_MS = 30_000
16
16
 
17
+ /**
18
+ * Maximum number of times the orphaned-reply backstop timer may re-arm
19
+ * itself when a tool call is in flight, before it fires a synthetic turn_end
20
+ * anyway (to surface a genuinely hung tool).
21
+ *
22
+ * Math: 20 re-arms × 30 s fuse = 10 min of genuine tool activity before the
23
+ * backstop surfaces. Chosen to cover multi-phase agent turns (write → compile
24
+ * → test → fix loop) while still catching a truly wedged single tool within a
25
+ * reasonable wall-clock bound.
26
+ */
27
+ export const ORPHANED_REPLY_MAX_REARMS = 20
28
+
17
29
  export function isContextExhaustionText(text: string): boolean {
18
30
  return text.includes(CONTEXT_EXHAUSTION_MARKER)
19
31
  }