switchroom 0.14.82 → 0.14.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -176,6 +176,71 @@ function sendKeys2(agentName, keys) {
176
176
 
177
177
  // src/agents/wedge-watchdog.ts
178
178
  var WEDGE_FOOTER_SIGNATURE = /(?=[\s\S]*[Ee]sc(?:ape)?[^\n]*cancel)(?=[\s\S]*(?:to select|to navigate|\u2191\/\u2193))/;
179
+ var RATE_LIMIT_MENU_SIGNATURE = /(?=[\s\S]*\/rate-limit-options)(?=[\s\S]*(?:Switch to usage credits|Upgrade your plan))/;
180
+ var MONTHS = {
181
+ jan: 0,
182
+ feb: 1,
183
+ mar: 2,
184
+ apr: 3,
185
+ may: 4,
186
+ jun: 5,
187
+ jul: 6,
188
+ aug: 7,
189
+ sep: 8,
190
+ oct: 9,
191
+ nov: 10,
192
+ dec: 11
193
+ };
194
+ function parseWeeklyReset(text, nowMs = Date.now()) {
195
+ const m = text.match(/resets\s+([A-Za-z]{3,9})\s+(\d{1,2}),?\s+(\d{1,2})(?::(\d{2}))?\s*([ap]m)?\s*(?:\(([^)]+)\))?/i);
196
+ if (!m)
197
+ return null;
198
+ const mon = MONTHS[m[1].slice(0, 3).toLowerCase()];
199
+ if (mon === undefined)
200
+ return null;
201
+ const day = Number(m[2]);
202
+ let hour = Number(m[3]);
203
+ const minute = m[4] ? Number(m[4]) : 0;
204
+ const ampm = m[5]?.toLowerCase();
205
+ if (ampm === "pm" && hour < 12)
206
+ hour += 12;
207
+ if (ampm === "am" && hour === 12)
208
+ hour = 0;
209
+ if (!Number.isFinite(day) || !Number.isFinite(hour) || day < 1 || day > 31 || hour > 23) {
210
+ return null;
211
+ }
212
+ const tz = m[6]?.trim();
213
+ const probeYear = new Date(nowMs).getUTCFullYear();
214
+ for (const year of [probeYear, probeYear + 1]) {
215
+ const epoch = wallClockToEpoch(year, mon, day, hour, minute, tz);
216
+ if (epoch != null && epoch > nowMs - 60000)
217
+ return epoch;
218
+ }
219
+ return null;
220
+ }
221
+ function wallClockToEpoch(year, month, day, hour, minute, tz) {
222
+ const asUtc = Date.UTC(year, month, day, hour, minute, 0);
223
+ if (!tz)
224
+ return asUtc;
225
+ try {
226
+ const fmt = new Intl.DateTimeFormat("en-US", {
227
+ timeZone: tz,
228
+ year: "numeric",
229
+ month: "2-digit",
230
+ day: "2-digit",
231
+ hour: "2-digit",
232
+ minute: "2-digit",
233
+ second: "2-digit",
234
+ hour12: false
235
+ });
236
+ const parts = Object.fromEntries(fmt.formatToParts(new Date(asUtc)).filter((p) => p.type !== "literal").map((p) => [p.type, p.value]));
237
+ const shown = Date.UTC(Number(parts.year), Number(parts.month) - 1, Number(parts.day), Number(parts.hour) % 24, Number(parts.minute), Number(parts.second));
238
+ const offset = shown - asUtc;
239
+ return asUtc - offset;
240
+ } catch {
241
+ return null;
242
+ }
243
+ }
179
244
  var DEFAULT_POLL_MS2 = 5000;
180
245
  var DEFAULT_STABILITY_THRESHOLD = 3;
181
246
  var DEFAULT_COOLDOWN_MS = 60000;
@@ -193,6 +258,9 @@ async function runWedgeWatchdog(opts) {
193
258
  const cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS;
194
259
  const deferToPrompts = opts.deferToPrompts ?? PROMPTS2;
195
260
  const signature = opts.wedgeSignature ?? WEDGE_FOOTER_SIGNATURE;
261
+ const rateLimitSignature = opts.rateLimitSignature === null ? null : opts.rateLimitSignature ?? RATE_LIMIT_MENU_SIGNATURE;
262
+ const onRateLimitMenu = opts.onRateLimitMenu;
263
+ const parseReset = opts.parseReset ?? parseWeeklyReset;
196
264
  const maxPolls = opts.maxPolls ?? Number.POSITIVE_INFINITY;
197
265
  const now = opts.now ?? Date.now;
198
266
  const sleep = opts.sleep ?? defaultSleep2;
@@ -202,6 +270,7 @@ async function runWedgeWatchdog(opts) {
202
270
  let lastKey = null;
203
271
  let cooldownUntil = 0;
204
272
  let fires = 0;
273
+ let rateLimitFires = 0;
205
274
  let polls = 0;
206
275
  while (polls < maxPolls) {
207
276
  polls++;
@@ -212,8 +281,38 @@ async function runWedgeWatchdog(opts) {
212
281
  console.error(`[wedge-watchdog] ${opts.agentName}: capture threw: ${err.message}`);
213
282
  text = "";
214
283
  }
215
- const isBlockingModal = !!text && signature.test(text) && !deferToPrompts.some((p) => p.match.test(text));
216
- if (isBlockingModal) {
284
+ const isRateLimitMenu = !!text && rateLimitSignature !== null && rateLimitSignature.test(text);
285
+ const isBlockingModal = !isRateLimitMenu && !!text && signature.test(text) && !deferToPrompts.some((p) => p.match.test(text));
286
+ if (isRateLimitMenu) {
287
+ const key = stabilityKey(text);
288
+ if (key === lastKey) {
289
+ stableCount++;
290
+ } else {
291
+ stableCount = 1;
292
+ lastKey = key;
293
+ }
294
+ if (stableCount >= stabilityThreshold && now() >= cooldownUntil) {
295
+ const resetAt = parseReset(text, now());
296
+ console.error(`[wedge-watchdog] ${opts.agentName}: rate-limit (weekly-quota) menu detected ` + `after ${stableCount} stable polls \u2014 signalling failover` + (resetAt != null ? ` (resets ${new Date(resetAt).toISOString()})` : " (reset unparsed)") + ` then parking with Esc`);
297
+ if (onRateLimitMenu) {
298
+ try {
299
+ onRateLimitMenu(opts.agentName, resetAt);
300
+ } catch (err) {
301
+ console.error(`[wedge-watchdog] ${opts.agentName}: onRateLimitMenu threw: ${err.message}`);
302
+ }
303
+ }
304
+ try {
305
+ send(opts.agentName, ["Escape"]);
306
+ } catch (err) {
307
+ console.error(`[wedge-watchdog] ${opts.agentName}: send threw: ${err.message}`);
308
+ }
309
+ fires++;
310
+ rateLimitFires++;
311
+ cooldownUntil = now() + cooldownMs;
312
+ stableCount = 0;
313
+ lastKey = null;
314
+ }
315
+ } else if (isBlockingModal) {
217
316
  const key = stabilityKey(text);
218
317
  if (key === lastKey) {
219
318
  stableCount++;
@@ -239,7 +338,74 @@ async function runWedgeWatchdog(opts) {
239
338
  }
240
339
  await sleep(pollIntervalMs);
241
340
  }
242
- return { fires, polls, reason: "max-polls" };
341
+ return { fires, rateLimitFires, polls, reason: "max-polls" };
342
+ }
343
+
344
+ // src/agents/rate-limit-signal.ts
345
+ import { createConnection } from "node:net";
346
+ import { join } from "node:path";
347
+ function resolveGatewaySocketPath() {
348
+ const stateDir = process.env.TELEGRAM_STATE_DIR ?? "/state/agent/telegram";
349
+ return process.env.SWITCHROOM_GATEWAY_SOCKET ?? join(stateDir, "gateway.sock");
350
+ }
351
+ function signalQuotaWall(agentName, resetAt, opts = {}) {
352
+ const socketPath = opts.socketPath ?? resolveGatewaySocketPath();
353
+ const connect = opts._connect ?? ((p) => createConnection(p));
354
+ const log = opts._log ?? ((m) => console.error(m));
355
+ const connectTimeoutMs = opts.connectTimeoutMs ?? 5000;
356
+ const msg = { type: "quota_wall_detected", agentName };
357
+ if (resetAt != null)
358
+ msg.resetAt = resetAt;
359
+ const line = JSON.stringify(msg) + `
360
+ `;
361
+ return new Promise((resolve) => {
362
+ let settled = false;
363
+ const done = (ok) => {
364
+ if (settled)
365
+ return;
366
+ settled = true;
367
+ resolve(ok);
368
+ };
369
+ let sock;
370
+ try {
371
+ sock = connect(socketPath);
372
+ } catch (err) {
373
+ log(`[rate-limit-signal] ${agentName}: connect threw: ${err.message}`);
374
+ done(false);
375
+ return;
376
+ }
377
+ const timer = setTimeout(() => {
378
+ log(`[rate-limit-signal] ${agentName}: connect timed out`);
379
+ try {
380
+ sock.destroy();
381
+ } catch {}
382
+ done(false);
383
+ }, connectTimeoutMs);
384
+ sock.on("connect", () => {
385
+ try {
386
+ sock.write(line, () => {
387
+ clearTimeout(timer);
388
+ try {
389
+ sock.end();
390
+ } catch {}
391
+ log(`[rate-limit-signal] ${agentName}: quota_wall_detected sent`);
392
+ done(true);
393
+ });
394
+ } catch (err) {
395
+ clearTimeout(timer);
396
+ log(`[rate-limit-signal] ${agentName}: write threw: ${err.message}`);
397
+ try {
398
+ sock.destroy();
399
+ } catch {}
400
+ done(false);
401
+ }
402
+ });
403
+ sock.on("error", (err) => {
404
+ clearTimeout(timer);
405
+ log(`[rate-limit-signal] ${agentName}: socket error: ${err.message}`);
406
+ done(false);
407
+ });
408
+ });
243
409
  }
244
410
 
245
411
  // src/cli/autoaccept-poll.ts
@@ -259,10 +425,17 @@ async function main() {
259
425
  console.error(`[autoaccept-poll] ${agentName}: wedge-watchdog disabled (SWITCHROOM_WEDGE_WATCHDOG=0) \u2014 exiting after boot phase`);
260
426
  process.exit(0);
261
427
  }
428
+ const rateLimitDetect = process.env.SWITCHROOM_RATE_LIMIT_DETECT !== "0";
262
429
  try {
263
- console.error(`[autoaccept-poll] ${agentName}: entering wedge-watchdog (continuous)`);
264
- const res = await runWedgeWatchdog({ agentName });
265
- console.error(`[autoaccept-poll] ${agentName}: wedge-watchdog returned reason=${res.reason} fires=${res.fires}`);
430
+ console.error(`[autoaccept-poll] ${agentName}: entering wedge-watchdog (continuous)` + (rateLimitDetect ? " +rate-limit-detect" : " (rate-limit-detect OFF)"));
431
+ const res = await runWedgeWatchdog({
432
+ agentName,
433
+ rateLimitSignature: rateLimitDetect ? undefined : null,
434
+ onRateLimitMenu: rateLimitDetect ? (name, resetAt) => {
435
+ signalQuotaWall(name, resetAt);
436
+ } : undefined
437
+ });
438
+ console.error(`[autoaccept-poll] ${agentName}: wedge-watchdog returned reason=${res.reason} fires=${res.fires} rateLimitFires=${res.rateLimitFires}`);
266
439
  } catch (err) {
267
440
  console.error(`[autoaccept-poll] ${agentName}: wedge-watchdog unexpected throw: ${err.message}`);
268
441
  }
@@ -14863,6 +14863,39 @@ async function parseSseOrJson(resp) {
14863
14863
  const payload = dataLine ? dataLine.slice("data: ".length) : text;
14864
14864
  return JSON.parse(payload);
14865
14865
  }
14866
+ async function fetchHindsightToolsList(apiUrl, opts) {
14867
+ const fetchImpl = opts?.fetchImpl ?? fetch;
14868
+ const timeoutMs = opts?.timeoutMs ?? 4000;
14869
+ const bankId = opts?.bankId ?? "__doctor_probe__";
14870
+ const controller = new AbortController;
14871
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
14872
+ try {
14873
+ const resp = await fetchImpl(`${apiUrl}`, {
14874
+ method: "POST",
14875
+ headers: {
14876
+ "Content-Type": "application/json",
14877
+ Accept: "application/json, text/event-stream",
14878
+ "X-Bank-Id": bankId
14879
+ },
14880
+ body: JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/list" }),
14881
+ signal: controller.signal
14882
+ });
14883
+ clearTimeout(timeout);
14884
+ if (!resp.ok)
14885
+ return { ok: false, reason: `HTTP ${resp.status}` };
14886
+ const parsed = await parseSseOrJson(resp);
14887
+ const raw = parsed.result?.tools;
14888
+ if (!Array.isArray(raw))
14889
+ return { ok: false, reason: "no tools in tools/list response" };
14890
+ const tools = raw.filter((t) => typeof t?.name === "string").map((t) => ({ name: t.name, required: t.inputSchema?.required ?? [] }));
14891
+ return { ok: true, tools };
14892
+ } catch (err) {
14893
+ clearTimeout(timeout);
14894
+ if (err.name === "AbortError")
14895
+ return { ok: false, reason: "Timeout" };
14896
+ return { ok: false, reason: String(err.message ?? err) };
14897
+ }
14898
+ }
14866
14899
  async function probeHindsight(apiUrl, opts) {
14867
14900
  const fetchImpl = opts?.fetchImpl ?? fetch;
14868
14901
  const timeoutMs = opts?.timeoutMs ?? 3000;
@@ -14991,8 +15024,7 @@ async function ensureUserProfileMentalModel(apiUrl, bankId, opts) {
14991
15024
  name: "create_mental_model",
14992
15025
  arguments: {
14993
15026
  name: "user-profile",
14994
- source_query: "What are the key facts, preferences, context, and communication style about the user I talk to? Summarize what matters for making the agent feel like it knows them.",
14995
- types: ["world", "experience"]
15027
+ source_query: "What are the key facts, preferences, context, and communication style about the user I talk to? Summarize what matters for making the agent feel like it knows them."
14996
15028
  }
14997
15029
  }
14998
15030
  }),
@@ -15077,6 +15109,12 @@ async function createBank(apiUrl, bankId, opts) {
15077
15109
  if (!toolResponse.ok) {
15078
15110
  return { ok: false, reason: `Tool call HTTP ${toolResponse.status}` };
15079
15111
  }
15112
+ try {
15113
+ const created = await parseSseOrJson(toolResponse);
15114
+ if (created.result?.isError === true) {
15115
+ return { ok: false, reason: created.result.content?.[0]?.text ?? "create_bank returned isError" };
15116
+ }
15117
+ } catch {}
15080
15118
  return { ok: true };
15081
15119
  } catch (err) {
15082
15120
  if (err.name === "AbortError") {
@@ -15137,8 +15175,8 @@ async function updateBankMissions(apiUrl, bankId, missions, opts) {
15137
15175
  name: "update_bank",
15138
15176
  arguments: {
15139
15177
  bank_id: bankId,
15140
- mission: missions.bank_mission,
15141
- retain_mission: missions.retain_mission
15178
+ ...missions.bank_mission != null ? { mission: missions.bank_mission } : {},
15179
+ ...missions.retain_mission != null ? { config_updates: { retain_mission: missions.retain_mission } } : {}
15142
15180
  }
15143
15181
  }
15144
15182
  }),
@@ -15148,6 +15186,12 @@ async function updateBankMissions(apiUrl, bankId, missions, opts) {
15148
15186
  if (!toolResponse.ok) {
15149
15187
  return { ok: false, reason: `Tool call HTTP ${toolResponse.status}` };
15150
15188
  }
15189
+ try {
15190
+ const updated = await parseSseOrJson(toolResponse);
15191
+ if (updated.result?.isError === true) {
15192
+ return { ok: false, reason: updated.result.content?.[0]?.text ?? "update_bank returned isError" };
15193
+ }
15194
+ } catch {}
15151
15195
  return { ok: true };
15152
15196
  } catch (err) {
15153
15197
  if (err.name === "AbortError") {
@@ -28959,6 +29003,30 @@ var init_manifest = __esm(() => {
28959
29003
  ]);
28960
29004
  });
28961
29005
 
29006
+ // src/memory/hindsight-tools.ts
29007
+ var EXPECTED_HINDSIGHT_TOOLS;
29008
+ var init_hindsight_tools = __esm(() => {
29009
+ EXPECTED_HINDSIGHT_TOOLS = {
29010
+ recall: { required: ["query"] },
29011
+ reflect: { required: ["query"] },
29012
+ retain: { required: ["content"] },
29013
+ sync_retain: { required: ["content"] },
29014
+ delete_document: { required: ["document_id"] },
29015
+ create_directive: { required: ["content", "name"] },
29016
+ list_directives: { required: [] },
29017
+ delete_directive: { required: ["directive_id"] },
29018
+ create_bank: { required: ["bank_id"] },
29019
+ update_bank: { required: [] },
29020
+ list_banks: { required: [] },
29021
+ create_mental_model: { required: ["name", "source_query"] },
29022
+ list_mental_models: { required: [] },
29023
+ update_mental_model: { required: ["mental_model_id"] },
29024
+ refresh_mental_model: { required: ["mental_model_id"] },
29025
+ list_memories: { required: [] },
29026
+ get_memory: { required: ["memory_id"] }
29027
+ };
29028
+ });
29029
+
28962
29030
  // src/cli/doctor-memory.ts
28963
29031
  import { execFileSync as execFileSync17 } from "node:child_process";
28964
29032
  function classifyShmSize(bytes) {
@@ -29030,8 +29098,51 @@ function checkHindsightContainerHealth(opts) {
29030
29098
  } catch {}
29031
29099
  return results;
29032
29100
  }
29101
+ function classifyToolContract(advertised) {
29102
+ const byName = new Map(advertised.map((t) => [t.name, t]));
29103
+ const results = [];
29104
+ for (const [tool, spec] of Object.entries(EXPECTED_HINDSIGHT_TOOLS)) {
29105
+ const real = byName.get(tool);
29106
+ if (real === undefined) {
29107
+ results.push({
29108
+ name: `hindsight contract: ${tool}`,
29109
+ status: "fail",
29110
+ detail: `switchroom calls \`${tool}\` but the server no longer advertises it ` + `(renamed/removed upstream) \u2014 every callsite silently no-ops`,
29111
+ fix: "Upstream hindsight changed its MCP tool contract. Update the callsite " + "+ EXPECTED_HINDSIGHT_TOOLS (src/memory/hindsight-tools.ts) to the new " + "name, refresh tests/fixtures/hindsight-tools-list.snapshot.json, or pin " + "the prior hindsight image."
29112
+ });
29113
+ continue;
29114
+ }
29115
+ const missing = spec.required.filter((arg) => !real.required.includes(arg));
29116
+ const added = real.required.filter((arg) => !spec.required.includes(arg));
29117
+ if (added.length > 0) {
29118
+ results.push({
29119
+ name: `hindsight contract: ${tool}`,
29120
+ status: "fail",
29121
+ detail: `server now requires [${added.join(", ")}] on \`${tool}\` which ` + `switchroom does not track \u2014 calls may silently no-op`,
29122
+ fix: "Reconcile EXPECTED_HINDSIGHT_TOOLS + the callsite args with the new " + "server schema, then refresh the snapshot fixture."
29123
+ });
29124
+ } else if (missing.length > 0) {
29125
+ results.push({
29126
+ name: `hindsight contract: ${tool}`,
29127
+ status: "warn",
29128
+ detail: `switchroom treats [${missing.join(", ")}] as required on \`${tool}\` ` + `but the server no longer does (loosened upstream) \u2014 harmless, but the ` + `fixture is stale`,
29129
+ fix: "Refresh EXPECTED_HINDSIGHT_TOOLS + the snapshot fixture."
29130
+ });
29131
+ }
29132
+ }
29133
+ if (results.length === 0) {
29134
+ const used = Object.keys(EXPECTED_HINDSIGHT_TOOLS).length;
29135
+ results.push({
29136
+ name: "hindsight contract",
29137
+ status: "ok",
29138
+ detail: `${used} used tools present, required args satisfied (${advertised.length} advertised)`
29139
+ });
29140
+ }
29141
+ return results;
29142
+ }
29033
29143
  var MIN_HINDSIGHT_SHM_BYTES;
29034
29144
  var init_doctor_memory = __esm(() => {
29145
+ init_hindsight_tools();
29035
29146
  MIN_HINDSIGHT_SHM_BYTES = 1024 * 1024 * 1024;
29036
29147
  });
29037
29148
 
@@ -32043,6 +32154,10 @@ async function checkHindsight(config) {
32043
32154
  status: "ok",
32044
32155
  detail: `${probe2.serverName} ${probe2.serverVersion} at ${host}:${port}`
32045
32156
  });
32157
+ const toolsList = await fetchHindsightToolsList(url);
32158
+ if (toolsList.ok) {
32159
+ results.push(...classifyToolContract(toolsList.tools));
32160
+ }
32046
32161
  results.push(checkHindsightConsumer(config));
32047
32162
  results.push(...checkHindsightContainerHealth());
32048
32163
  for (const [agentName, agentConfig] of Object.entries(config.agents)) {
@@ -49700,8 +49815,8 @@ var {
49700
49815
  } = import__.default;
49701
49816
 
49702
49817
  // src/build-info.ts
49703
- var VERSION = "0.14.82";
49704
- var COMMIT_SHA = "91bc41d1";
49818
+ var VERSION = "0.14.84";
49819
+ var COMMIT_SHA = "af97bc41";
49705
49820
 
49706
49821
  // src/cli/agent.ts
49707
49822
  init_source();
@@ -559,16 +559,42 @@
559
559
  }
560
560
  }
561
561
 
562
+ // Guards a second click from starting a second device-code flow (each
563
+ // start makes Microsoft send a sign-in email → the "2 emails" bug).
564
+ let msConnecting = false;
565
+
566
+ // The set of Microsoft account emails currently known to the broker.
567
+ // Used as a resilience baseline: the connect status lives only in the web
568
+ // process's memory, so if that process restarts mid-connect the status
569
+ // reads 'unknown' even when the token WAS stored. Diffing this list tells
570
+ // us the account really connected regardless of the lost status.
571
+ async function fetchMicrosoftAccountEmails() {
572
+ try {
573
+ const r = await fetch(`${API}/api/microsoft-accounts`, { headers: authHeaders() });
574
+ if (!r.ok) return new Set();
575
+ const list = await r.json();
576
+ return new Set((list || []).filter(a => a.brokerKnown).map(a => String(a.account).toLowerCase()));
577
+ } catch { return new Set(); }
578
+ }
579
+
562
580
  // Start an in-browser Microsoft connect: show the device code + link,
563
581
  // then poll until the operator completes sign-in on Microsoft's site.
564
582
  async function connectMicrosoft() {
583
+ if (msConnecting) return; // double-submit guard
584
+ msConnecting = true;
585
+ const btn = document.getElementById('ms-connect-btn');
586
+ if (btn) btn.disabled = true;
587
+ const done = () => { msConnecting = false; const b = document.getElementById('ms-connect-btn'); if (b) b.disabled = false; };
565
588
  const card = document.getElementById('ms-connect-card');
566
589
  const show = (html) => { if (card) card.innerHTML = html; };
567
590
  show('<div class="loading" style="padding:.8rem">Starting…</div>');
591
+ // Snapshot already-connected accounts BEFORE starting, for the
592
+ // restart-resilient terminal check below.
593
+ const before = await fetchMicrosoftAccountEmails();
568
594
  try {
569
595
  const res = await fetch(`${API}/api/connections/microsoft/connect`, { method: 'POST', headers: authHeaders() });
570
596
  const data = await res.json();
571
- if (!res.ok || !data.ok) { show(''); showError(data.error || `HTTP ${res.status}`); return; }
597
+ if (!res.ok || !data.ok) { show(''); showError(data.error || `HTTP ${res.status}`); done(); return; }
572
598
  const url = data.verificationUri, code = data.userCode;
573
599
  show(`<div class="account-card" style="border-color:var(--accent)">
574
600
  <div class="account-card-header"><div class="account-label">Connect a Microsoft account</div></div>
@@ -580,27 +606,58 @@
580
606
  <div id="ms-connect-status" style="color:var(--text-dim);margin-top:.3rem">Waiting for sign-in… (this card expires in ~15 min)</div>
581
607
  </div>`);
582
608
  const statusEl = () => document.getElementById('ms-connect-status');
583
- const started = Date.now();
609
+ const deadline = Date.now() + ((data.expiresInSec || 900) * 1000 + 30000);
610
+ const showConnected = (label) => {
611
+ show(`<div class="loading" style="padding:.8rem;color:var(--green)">✓ Connected ${escapeHtml(label)}. Use the access toggles below to grant an agent.</div>`);
612
+ fetchConnections();
613
+ };
614
+ // On any non-'connected' terminal state ('failed' or 'unknown'),
615
+ // re-check the broker's actual account list before declaring failure:
616
+ // a new account appearing means the connect really succeeded (e.g. the
617
+ // status was lost to a web restart). Only error if nothing new landed.
618
+ // Limitations (acceptable — the in-memory 'connected' state is the
619
+ // primary signal; this is only the lost-status fallback): a concurrent
620
+ // connect of a DIFFERENT account in another tab/CLI could be mistaken
621
+ // for this one; and re-connecting an ALREADY-known account (token
622
+ // refresh) shows no new account so falls through to the error.
623
+ const settleNonConnected = async (reason) => {
624
+ const after = await fetchMicrosoftAccountEmails();
625
+ const fresh = [...after].find(a => !before.has(a));
626
+ if (fresh) { showConnected(fresh); } else { show(''); showError(reason || 'connect failed'); }
627
+ done();
628
+ };
584
629
  const poll = async () => {
585
- const sres = await fetch(`${API}/api/connections/microsoft/connect/${encodeURIComponent(data.requestId)}`, { headers: authHeaders() });
586
- const s = sres.ok ? await sres.json() : { state: 'failed', reason: `HTTP ${sres.status}` };
630
+ let s;
631
+ try {
632
+ const sres = await fetch(`${API}/api/connections/microsoft/connect/${encodeURIComponent(data.requestId)}`, { headers: authHeaders() });
633
+ s = sres.ok ? await sres.json() : { state: 'failed', reason: `HTTP ${sres.status}` };
634
+ } catch {
635
+ // Transient fetch failure — most likely the web process restarting
636
+ // mid-connect (the exact case this flow must survive). Don't die
637
+ // (that would strand msConnecting=true and lock the button): keep
638
+ // polling until the device-code deadline, then settle via the
639
+ // broker re-check (which recovers a token stored before the restart).
640
+ if (Date.now() > deadline) { await settleNonConnected('connection lost'); return; }
641
+ setTimeout(poll, 3000);
642
+ return;
643
+ }
587
644
  if (s.state === 'pending') {
588
- if (Date.now() - started > ((data.expiresInSec || 900) * 1000 + 30000)) { const e = statusEl(); if (e) e.textContent = 'Expired — click Connect to try again.'; return; }
645
+ if (Date.now() > deadline) { const e = statusEl(); if (e) e.textContent = 'Expired — click Connect to try again.'; done(); return; }
589
646
  setTimeout(poll, 3000);
590
647
  return;
591
648
  }
592
649
  if (s.state === 'connected') {
593
- show(`<div class="loading" style="padding:.8rem;color:var(--green)">✓ Connected ${escapeHtml(s.account)} (${escapeHtml(s.accountType)}). Use the access toggles below to grant an agent.</div>`);
594
- fetchConnections();
650
+ showConnected(`${s.account} (${s.accountType})`);
651
+ done();
595
652
  } else {
596
- show('');
597
- showError(s.reason || 'connect failed');
653
+ await settleNonConnected(s.reason);
598
654
  }
599
655
  };
600
656
  setTimeout(poll, 3000);
601
657
  } catch (err) {
602
658
  show('');
603
659
  showError(err.message);
660
+ done();
604
661
  }
605
662
  }
606
663
 
@@ -1165,7 +1222,7 @@
1165
1222
  <div style="margin-bottom:1.5rem">
1166
1223
  <h3 style="margin:0 0 .6rem;font-size:.95rem;color:var(--text-dim);text-transform:uppercase;letter-spacing:.04em">
1167
1224
  Microsoft 365
1168
- <button onclick="connectMicrosoft()" class="usage-pill primary" style="margin-left:.6rem;cursor:pointer;border:none;text-transform:none;font-weight:600">+ Connect a Microsoft account</button>
1225
+ <button id="ms-connect-btn" onclick="connectMicrosoft()" class="usage-pill primary" style="margin-left:.6rem;cursor:pointer;border:none;text-transform:none;font-weight:600">+ Connect a Microsoft account</button>
1169
1226
  </h3>
1170
1227
  <div id="ms-connect-card"></div>
1171
1228
  ${msCards
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "switchroom",
3
- "version": "0.14.82",
3
+ "version": "0.14.84",
4
4
  "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -46487,6 +46487,13 @@ function validateClientMessage(msg) {
46487
46487
  const inb = m.inbound;
46488
46488
  return inb.type === "inbound" && typeof inb.chatId === "string" && inb.chatId.length > 0 && typeof inb.text === "string" && typeof inb.messageId === "number" && typeof inb.user === "string" && typeof inb.userId === "number" && typeof inb.ts === "number" && typeof inb.meta === "object" && inb.meta !== null;
46489
46489
  }
46490
+ case "quota_wall_detected": {
46491
+ if (typeof m.agentName !== "string" || !AGENT_NAME_RE3.test(m.agentName))
46492
+ return false;
46493
+ if (m.resetAt !== undefined && (typeof m.resetAt !== "number" || !Number.isFinite(m.resetAt)))
46494
+ return false;
46495
+ return true;
46496
+ }
46490
46497
  case "request_config_approval": {
46491
46498
  if (typeof m.requestId !== "string" || m.requestId.length === 0 || m.requestId.length > 64)
46492
46499
  return false;
@@ -46548,6 +46555,7 @@ function createIpcServer(options) {
46548
46555
  onOperatorEvent,
46549
46556
  onPtyPartial,
46550
46557
  onInjectInbound,
46558
+ onQuotaWallDetected,
46551
46559
  onRequestDriveApproval,
46552
46560
  onRequestMs365Approval,
46553
46561
  onRequestConfigApproval,
@@ -46632,6 +46640,10 @@ function createIpcServer(options) {
46632
46640
  if (onInjectInbound)
46633
46641
  onInjectInbound(client3, msg);
46634
46642
  break;
46643
+ case "quota_wall_detected":
46644
+ if (onQuotaWallDetected)
46645
+ onQuotaWallDetected(client3, msg);
46646
+ break;
46635
46647
  case "request_drive_approval":
46636
46648
  if (onRequestDriveApproval) {
46637
46649
  onRequestDriveApproval(client3, msg).catch((err) => {
@@ -52661,6 +52673,53 @@ function evaluateQuotaWatchAccount(args) {
52661
52673
  }
52662
52674
  return { kind: "skip", accountLabel: label, reason: "no-matching-transition" };
52663
52675
  }
52676
+ var FLEET_ALL_EXHAUSTED_KEY = "__fleet_all_exhausted__";
52677
+ function evaluateFleetAllExhausted(args) {
52678
+ const { accounts, prev, now } = args;
52679
+ const allExhausted = accounts.length > 0 && accounts.every((a) => a.exhausted);
52680
+ const wasAlerting = prev.lastNotifiedHealth === "throttling";
52681
+ if (allExhausted && !wasAlerting) {
52682
+ return {
52683
+ kind: "notify",
52684
+ message: buildAllExhaustedMessage(accounts, now),
52685
+ newState: { lastNotifiedHealth: "throttling", lastNotifiedAt: now },
52686
+ transition: "entered"
52687
+ };
52688
+ }
52689
+ if (!allExhausted && wasAlerting) {
52690
+ return {
52691
+ kind: "notify",
52692
+ message: buildFleetRecoveredMessage(accounts),
52693
+ newState: { lastNotifiedHealth: "healthy", lastNotifiedAt: now },
52694
+ transition: "recovered"
52695
+ };
52696
+ }
52697
+ return { kind: "skip", reason: allExhausted ? "still-all-exhausted" : "not-all-exhausted" };
52698
+ }
52699
+ function buildAllExhaustedMessage(accounts, now) {
52700
+ const resets = accounts.map((a) => a.exhausted_until).filter((x) => typeof x === "number" && x > now);
52701
+ const earliest = resets.length > 0 ? Math.min(...resets) : null;
52702
+ const resetLine = earliest ? `Earliest reset: ${formatRelative(new Date(earliest), new Date(now))}.` : `Reset time unknown (no window data).`;
52703
+ return [
52704
+ `\uD83D\uDD34 <b>All accounts exhausted</b>`,
52705
+ ``,
52706
+ `Every Anthropic account (${accounts.length}) is quota-walled \u2014 there is no healthy account to fail over to.`,
52707
+ resetLine,
52708
+ ``,
52709
+ `<i>This is self-healing: agents resume and deferred scheduled jobs run automatically once a window resets. Nothing is lost. Add headroom with <code>/auth add</code> if this recurs.</i>`
52710
+ ].join(`
52711
+ `);
52712
+ }
52713
+ function buildFleetRecoveredMessage(accounts) {
52714
+ const healthy = accounts.filter((a) => !a.exhausted).map((a) => a.label);
52715
+ const which = healthy.length > 0 ? ` (<code>${escapeHtml10(healthy[0])}</code>)` : "";
52716
+ return [
52717
+ `\uD83D\uDFE2 <b>Fleet recovered</b> \u2014 at least one account is healthy again${which}.`,
52718
+ ``,
52719
+ `<i>Agents are back; any deferred scheduled jobs will run on their next occurrence.</i>`
52720
+ ].join(`
52721
+ `);
52722
+ }
52664
52723
  function buildThrottlingMessage(agentName3, snap) {
52665
52724
  const q = snap.quota;
52666
52725
  const fiveStr = fmtPct(q.fiveHourUtilizationPct);
@@ -52810,9 +52869,9 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
52810
52869
  }
52811
52870
 
52812
52871
  // ../src/build-info.ts
52813
- var VERSION = "0.14.82";
52814
- var COMMIT_SHA = "91bc41d1";
52815
- var COMMIT_DATE = "2026-06-07T12:22:49+10:00";
52872
+ var VERSION = "0.14.84";
52873
+ var COMMIT_SHA = "af97bc41";
52874
+ var COMMIT_DATE = "2026-06-07T13:38:19+10:00";
52816
52875
  var LATEST_PR = null;
52817
52876
  var COMMITS_AHEAD_OF_TAG = 2;
52818
52877
 
@@ -56111,6 +56170,13 @@ var ipcServer = createIpcServer({
56111
56170
  pendingInboundBuffer.push(msg.agentName, msg.inbound);
56112
56171
  }
56113
56172
  },
56173
+ onQuotaWallDetected(_client, msg) {
56174
+ const WEEKLY_MS = 604800000;
56175
+ const untilMs = typeof msg.resetAt === "number" && Number.isFinite(msg.resetAt) && msg.resetAt > Date.now() ? msg.resetAt : Date.now() + WEEKLY_MS;
56176
+ process.stderr.write(`telegram gateway: quota_wall_detected agent=${msg.agentName} until=${new Date(untilMs).toISOString()}` + (msg.resetAt == null ? " (reset unparsed \u2192 +7d default)" : "") + ` \u2014 triggering fleet auto-fallback
56177
+ `);
56178
+ fireFleetAutoFallback(msg.agentName, untilMs);
56179
+ },
56114
56180
  log: (msg) => process.stderr.write(`telegram gateway: ipc \u2014 ${msg}
56115
56181
  `)
56116
56182
  });
@@ -60914,13 +60980,13 @@ var fleetFallbackGate = createFleetFallbackGate({
60914
60980
  function wouldFireFleetAutoFallback() {
60915
60981
  return fleetFallbackGate.wouldFire();
60916
60982
  }
60917
- async function fireFleetAutoFallback(triggerAgent) {
60918
- return fleetFallbackGate.fire(() => doFireFleetAutoFallback(triggerAgent), (err) => {
60983
+ async function fireFleetAutoFallback(triggerAgent, untilMs) {
60984
+ return fleetFallbackGate.fire(() => doFireFleetAutoFallback(triggerAgent, untilMs), (err) => {
60919
60985
  process.stderr.write(`telegram gateway: [fleet-fallback] error agent=${triggerAgent}: ${err?.message ?? err}
60920
60986
  `);
60921
60987
  });
60922
60988
  }
60923
- async function doFireFleetAutoFallback(triggerAgent) {
60989
+ async function doFireFleetAutoFallback(triggerAgent, untilMs) {
60924
60990
  try {
60925
60991
  const client3 = await getAuthBrokerClient(triggerAgent);
60926
60992
  if (!client3) {
@@ -60939,7 +61005,7 @@ async function doFireFleetAutoFallback(triggerAgent) {
60939
61005
  state: state4,
60940
61006
  quotas,
60941
61007
  failover: async () => {
60942
- const r = await client3.markExhausted();
61008
+ const r = await client3.markExhausted(untilMs);
60943
61009
  return { rolledTo: r.rolledTo ?? null, rolled: r.rolled };
60944
61010
  },
60945
61011
  triggerAgent,
@@ -61017,6 +61083,31 @@ async function runQuotaWatch() {
61017
61083
  let watchState = loadQuotaWatchState(stateDir);
61018
61084
  const now = Date.now();
61019
61085
  const access = loadAccess();
61086
+ {
61087
+ const fleetPrev = watchState[FLEET_ALL_EXHAUSTED_KEY] ?? emptyAccountState();
61088
+ const fleetDecision = evaluateFleetAllExhausted({
61089
+ accounts: listStateData.accounts,
61090
+ prev: fleetPrev,
61091
+ now
61092
+ });
61093
+ if (fleetDecision.kind === "notify") {
61094
+ for (const chat_id of access.allowFrom) {
61095
+ await swallowingApiCall(() => bot.api.sendMessage(chat_id, fleetDecision.message, {
61096
+ parse_mode: "HTML",
61097
+ link_preview_options: { is_disabled: true }
61098
+ }), { chat_id, verb: "quota-watch.fleet-all-exhausted" });
61099
+ }
61100
+ watchState = patchQuotaWatchState(watchState, FLEET_ALL_EXHAUSTED_KEY, fleetDecision.newState);
61101
+ try {
61102
+ saveQuotaWatchState(stateDir, watchState);
61103
+ } catch (err) {
61104
+ process.stderr.write(`telegram gateway: quota-watch: fleet-state save failed: ${err}
61105
+ `);
61106
+ }
61107
+ process.stderr.write(`telegram gateway: quota-watch: fleet all-exhausted ${fleetDecision.transition}
61108
+ `);
61109
+ }
61110
+ }
61020
61111
  const pendingTransitions = [];
61021
61112
  const labelToSnapIndex = new Map(snapshots.map((s, i) => [s.label, i]));
61022
61113
  for (const snap of snapshots) {
@@ -348,6 +348,7 @@ import type {
348
348
  PtyPartialForward,
349
349
  InboundMessage,
350
350
  InjectInboundMessage,
351
+ QuotaWallDetectedMessage,
351
352
  PermissionEvent,
352
353
  } from './ipc-protocol.js'
353
354
  import { DebounceBuffer, HourCap, buildReactionInboundMeta, buildReactionInboundText, evaluateTriggerCandidate, isGroupChat, resolveReactionsConfig, truncatePreview, type PendingReaction, type ReactionBatch, type ReactionsResolvedConfig } from './reaction-trigger.js'
@@ -412,6 +413,8 @@ import {
412
413
  } from '../credits-watch.js'
413
414
  import {
414
415
  evaluateQuotaWatchAccount,
416
+ evaluateFleetAllExhausted,
417
+ FLEET_ALL_EXHAUSTED_KEY,
415
418
  loadQuotaWatchState,
416
419
  saveQuotaWatchState,
417
420
  patchQuotaWatchState,
@@ -6257,6 +6260,30 @@ const ipcServer: IpcServer = createIpcServer({
6257
6260
  }
6258
6261
  },
6259
6262
 
6263
+ // The wedge-watchdog detected claude's /rate-limit-options weekly-quota menu
6264
+ // (a TUI wall that never produced a 429, so the inference-path auto-fallback
6265
+ // never fired). Trigger the SAME fleet auto-fallback the 429 path uses,
6266
+ // threading the parsed weekly reset as markExhausted's `until` — with a
6267
+ // weekly-scale FALLBACK when the sidecar couldn't parse it (resetAt absent):
6268
+ // passing undefined would let markExhausted use its ~5h default, which would
6269
+ // un-exhaust a weekly-walled account and re-wedge it within hours. The
6270
+ // existing chain handles the rest (roll to a fallback subscription account,
6271
+ // or the all-exhausted operator alert when none has quota). Fire-and-forget.
6272
+ onQuotaWallDetected(_client: IpcClient, msg: QuotaWallDetectedMessage) {
6273
+ const WEEKLY_MS = 7 * 24 * 60 * 60 * 1000
6274
+ const untilMs =
6275
+ typeof msg.resetAt === 'number' && Number.isFinite(msg.resetAt) && msg.resetAt > Date.now()
6276
+ ? msg.resetAt
6277
+ : Date.now() + WEEKLY_MS
6278
+ process.stderr.write(
6279
+ `telegram gateway: quota_wall_detected agent=${msg.agentName} ` +
6280
+ `until=${new Date(untilMs).toISOString()}` +
6281
+ (msg.resetAt == null ? ' (reset unparsed → +7d default)' : '') +
6282
+ ' — triggering fleet auto-fallback\n',
6283
+ )
6284
+ void fireFleetAutoFallback(msg.agentName, untilMs)
6285
+ },
6286
+
6260
6287
  log: (msg) => process.stderr.write(`telegram gateway: ipc — ${msg}\n`),
6261
6288
  })
6262
6289
 
@@ -14603,9 +14630,9 @@ function wouldFireFleetAutoFallback(): boolean {
14603
14630
  * so the user sees the outcome inline with the original "Model
14604
14631
  * unavailable" card.
14605
14632
  */
14606
- async function fireFleetAutoFallback(triggerAgent: string): Promise<void> {
14633
+ async function fireFleetAutoFallback(triggerAgent: string, untilMs?: number): Promise<void> {
14607
14634
  return fleetFallbackGate.fire(
14608
- () => doFireFleetAutoFallback(triggerAgent),
14635
+ () => doFireFleetAutoFallback(triggerAgent, untilMs),
14609
14636
  (err) => {
14610
14637
  process.stderr.write(
14611
14638
  `telegram gateway: [fleet-fallback] error agent=${triggerAgent}: ${(err as Error)?.message ?? err}\n`,
@@ -14618,7 +14645,7 @@ async function fireFleetAutoFallback(triggerAgent: string): Promise<void> {
14618
14645
  * user-visible announcement was broadcast). False on no-op /
14619
14646
  * error / idempotent-skip — caller uses this to decide whether to
14620
14647
  * arm the post-fire suppression window. */
14621
- async function doFireFleetAutoFallback(triggerAgent: string): Promise<boolean> {
14648
+ async function doFireFleetAutoFallback(triggerAgent: string, untilMs?: number): Promise<boolean> {
14622
14649
  try {
14623
14650
  const client = await getAuthBrokerClient(triggerAgent)
14624
14651
  if (!client) {
@@ -14653,7 +14680,11 @@ async function doFireFleetAutoFallback(triggerAgent: string): Promise<boolean> {
14653
14680
  // operator is explicitly choosing, and is admin); only this automatic
14654
14681
  // path moves to the non-admin verb.
14655
14682
  failover: async () => {
14656
- const r = await client.markExhausted()
14683
+ // The 429 inference path passes no `until` (broker ~5h default). The
14684
+ // rate-limit-MENU path (quota_wall_detected) passes the parsed WEEKLY
14685
+ // reset, so the walled account isn't re-probed (and re-wedged) within
14686
+ // the 5h default while it's weekly-capped.
14687
+ const r = await client.markExhausted(untilMs)
14657
14688
  return { rolledTo: r.rolledTo ?? null, rolled: r.rolled }
14658
14689
  },
14659
14690
  triggerAgent,
@@ -14805,6 +14836,44 @@ async function runQuotaWatch(): Promise<void> {
14805
14836
  const now = Date.now()
14806
14837
  const access = loadAccess()
14807
14838
 
14839
+ // Fleet-wide all-exhausted check FIRST — must run before the per-account
14840
+ // early-return below. When every account is exhausted, the per-account loop
14841
+ // produces only 'blocked' skips → pendingTransitions empty → early return;
14842
+ // so this fleet-level alert (the one the trigger-based all-blocked card
14843
+ // misses during quiet periods / for the consumer+cron paths) would never
14844
+ // fire if placed after. Authoritative source: broker `exhausted` flags.
14845
+ {
14846
+ const fleetPrev = watchState[FLEET_ALL_EXHAUSTED_KEY] ?? emptyAccountState()
14847
+ const fleetDecision = evaluateFleetAllExhausted({
14848
+ accounts: listStateData.accounts,
14849
+ prev: fleetPrev,
14850
+ now,
14851
+ })
14852
+ if (fleetDecision.kind === 'notify') {
14853
+ for (const chat_id of access.allowFrom) {
14854
+ await swallowingApiCall(
14855
+ () =>
14856
+ bot.api.sendMessage(chat_id, fleetDecision.message, {
14857
+ parse_mode: 'HTML',
14858
+ link_preview_options: { is_disabled: true },
14859
+ }),
14860
+ { chat_id, verb: 'quota-watch.fleet-all-exhausted' },
14861
+ )
14862
+ }
14863
+ // Persist immediately — the per-account early-return path below would
14864
+ // otherwise drop this flag change (edge-trigger would re-fire next poll).
14865
+ watchState = patchQuotaWatchState(watchState, FLEET_ALL_EXHAUSTED_KEY, fleetDecision.newState)
14866
+ try {
14867
+ saveQuotaWatchState(stateDir, watchState)
14868
+ } catch (err) {
14869
+ process.stderr.write(`telegram gateway: quota-watch: fleet-state save failed: ${err}\n`)
14870
+ }
14871
+ process.stderr.write(
14872
+ `telegram gateway: quota-watch: fleet all-exhausted ${fleetDecision.transition}\n`,
14873
+ )
14874
+ }
14875
+ }
14876
+
14808
14877
  // First pass: evaluate all accounts against cached state. Collect
14809
14878
  // labels that need a live probe (i.e. accounts with a detected transition
14810
14879
  // that we're about to notify about). We probe those to get fresh
@@ -393,6 +393,26 @@ export interface RequestConfigFinalizeMessage {
393
393
  detail?: string;
394
394
  }
395
395
 
396
+ /**
397
+ * The autoaccept-poll wedge-watchdog detected claude's `/rate-limit-options`
398
+ * weekly-quota menu (a TUI wall that never produced a 429 the gateway could
399
+ * see). Asks the gateway to trigger the EXISTING account-failover chain
400
+ * (markExhausted → roll to a fallback subscription account, or the
401
+ * all-exhausted operator alert). Fire-and-forget; no reply.
402
+ *
403
+ * Trust model (same as inject_inbound): the socket is per-agent inside the
404
+ * container, but `agentName` is still validated server-side and never trusted
405
+ * to authorize anything beyond triggering the agent's own failover.
406
+ */
407
+ export interface QuotaWallDetectedMessage {
408
+ type: "quota_wall_detected";
409
+ agentName: string;
410
+ /** Parsed weekly-reset epoch-ms. Omitted when the sidecar couldn't parse it;
411
+ * the gateway then uses a weekly-scale default for markExhausted's `until`
412
+ * (NOT the ~5h default, which would un-exhaust a weekly wall and re-wedge). */
413
+ resetAt?: number;
414
+ }
415
+
396
416
  export type ClientToGateway =
397
417
  | RegisterMessage
398
418
  | ToolCallMessage
@@ -407,4 +427,5 @@ export type ClientToGateway =
407
427
  | RequestDriveApprovalMessage
408
428
  | RequestMs365ApprovalMessage
409
429
  | RequestConfigApprovalMessage
410
- | RequestConfigFinalizeMessage;
430
+ | RequestConfigFinalizeMessage
431
+ | QuotaWallDetectedMessage;
@@ -4,6 +4,7 @@ import type {
4
4
  GatewayToClient,
5
5
  HeartbeatMessage,
6
6
  InjectInboundMessage,
7
+ QuotaWallDetectedMessage,
7
8
  OperatorEventForward,
8
9
  PermissionRequestForward,
9
10
  PtyPartialForward,
@@ -44,6 +45,14 @@ export interface IpcServerOptions {
44
45
  * inline scheduler simply ignore inject_inbound messages.
45
46
  */
46
47
  onInjectInbound?: (client: IpcClient, msg: InjectInboundMessage) => void;
48
+ /**
49
+ * The autoaccept-poll wedge-watchdog detected claude's `/rate-limit-options`
50
+ * weekly-quota menu (no 429 ever reached the gateway). Handler is expected to
51
+ * trigger the existing fleet auto-fallback for `msg.agentName`, threading
52
+ * `msg.resetAt` as the markExhausted `until`. Fire-and-forget; gateways that
53
+ * don't run failover simply ignore it.
54
+ */
55
+ onQuotaWallDetected?: (client: IpcClient, msg: QuotaWallDetectedMessage) => void;
47
56
  /**
48
57
  * RFC E §4.2 Cut 2 — Drive-write PreToolUse hook asks the gateway
49
58
  * to register a kernel approval request + post a diff-preview
@@ -237,6 +246,15 @@ export function validateClientMessage(msg: unknown): msg is ClientToGateway {
237
246
  && typeof inb.meta === "object"
238
247
  && inb.meta !== null;
239
248
  }
249
+ case "quota_wall_detected": {
250
+ // wedge-watchdog detected the /rate-limit-options weekly-quota menu.
251
+ if (typeof m.agentName !== "string"
252
+ || !AGENT_NAME_RE.test(m.agentName as string)) return false;
253
+ // resetAt optional; when present it must be a finite epoch-ms.
254
+ if (m.resetAt !== undefined
255
+ && (typeof m.resetAt !== "number" || !Number.isFinite(m.resetAt as number))) return false;
256
+ return true;
257
+ }
240
258
  case "request_config_approval": {
241
259
  // #1623 — hostd-initiated config-edit approval card. Wire shape
242
260
  // only; the handler module validates the diff content.
@@ -317,6 +335,7 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
317
335
  onOperatorEvent,
318
336
  onPtyPartial,
319
337
  onInjectInbound,
338
+ onQuotaWallDetected,
320
339
  onRequestDriveApproval,
321
340
  onRequestMs365Approval,
322
341
  onRequestConfigApproval,
@@ -425,6 +444,9 @@ export function createIpcServer(options: IpcServerOptions): IpcServer {
425
444
  case "inject_inbound":
426
445
  if (onInjectInbound) onInjectInbound(client, msg as InjectInboundMessage);
427
446
  break;
447
+ case "quota_wall_detected":
448
+ if (onQuotaWallDetected) onQuotaWallDetected(client, msg as QuotaWallDetectedMessage);
449
+ break;
428
450
  case "request_drive_approval":
429
451
  if (onRequestDriveApproval) {
430
452
  // Handler is async — fire-and-forget here; the handler
@@ -160,6 +160,99 @@ export function evaluateQuotaWatchAccount(args: {
160
160
  return { kind: "skip", accountLabel: label, reason: "no-matching-transition" };
161
161
  }
162
162
 
163
+ // ─── Fleet-level: all accounts exhausted ───────────────────────────────────────
164
+
165
+ /**
166
+ * Reserved key under which the fleet-wide "all accounts exhausted" alert state
167
+ * is stored in the same quota-watch.json map. Not a valid account label (emails
168
+ * can't contain this), so it never collides with a per-account entry, and the
169
+ * per-account loop (which iterates account snapshots, not state-map keys) never
170
+ * sees it. Encoded as a QuotaWatchAccountState so the existing load validator
171
+ * accepts it: lastNotifiedHealth "throttling" = currently alerting all-exhausted,
172
+ * "healthy"/null = not. Backward-compatible — old files simply lack the key.
173
+ */
174
+ export const FLEET_ALL_EXHAUSTED_KEY = "__fleet_all_exhausted__";
175
+
176
+ export type FleetAllExhaustedDecision =
177
+ | { kind: "notify"; message: string; newState: QuotaWatchAccountState; transition: "entered" | "recovered" }
178
+ | { kind: "skip"; reason: string };
179
+
180
+ /**
181
+ * Fleet-wide all-exhausted alert (edge-triggered).
182
+ *
183
+ * Fires ONCE when every account enters the broker's exhausted state (no healthy
184
+ * account to fail over to — agents go quiet, crons defer, consumers/hindsight
185
+ * silently serve an exhausted account), and ONCE on recovery. This catches the
186
+ * cases the trigger-based interactive all-blocked card misses: a quiet period
187
+ * (no agent happens to 429 into the wall) and the consumer/cron paths.
188
+ *
189
+ * Authoritative source: the broker's per-account `exhausted` flag (set by
190
+ * mark-exhausted via failover + the consumer sensor), NOT probe-derived health
191
+ * — so there is no probe-failure false-alarm. Requires at least one account;
192
+ * an empty fleet never alerts.
193
+ */
194
+ export function evaluateFleetAllExhausted(args: {
195
+ accounts: Array<{ label: string; exhausted: boolean; exhausted_until?: number }>;
196
+ prev: QuotaWatchAccountState;
197
+ now: number;
198
+ }): FleetAllExhaustedDecision {
199
+ const { accounts, prev, now } = args;
200
+ const allExhausted = accounts.length > 0 && accounts.every((a) => a.exhausted);
201
+ // "throttling" doubles as the "currently alerting all-exhausted" marker.
202
+ const wasAlerting = prev.lastNotifiedHealth === "throttling";
203
+
204
+ if (allExhausted && !wasAlerting) {
205
+ return {
206
+ kind: "notify",
207
+ message: buildAllExhaustedMessage(accounts, now),
208
+ newState: { lastNotifiedHealth: "throttling", lastNotifiedAt: now },
209
+ transition: "entered",
210
+ };
211
+ }
212
+ if (!allExhausted && wasAlerting) {
213
+ return {
214
+ kind: "notify",
215
+ message: buildFleetRecoveredMessage(accounts),
216
+ newState: { lastNotifiedHealth: "healthy", lastNotifiedAt: now },
217
+ transition: "recovered",
218
+ };
219
+ }
220
+ return { kind: "skip", reason: allExhausted ? "still-all-exhausted" : "not-all-exhausted" };
221
+ }
222
+
223
+ function buildAllExhaustedMessage(
224
+ accounts: Array<{ label: string; exhausted_until?: number }>,
225
+ now: number,
226
+ ): string {
227
+ const resets = accounts
228
+ .map((a) => a.exhausted_until)
229
+ .filter((x): x is number => typeof x === "number" && x > now);
230
+ const earliest = resets.length > 0 ? Math.min(...resets) : null;
231
+ const resetLine = earliest
232
+ ? `Earliest reset: ${formatRelative(new Date(earliest), new Date(now))}.`
233
+ : `Reset time unknown (no window data).`;
234
+ return [
235
+ `🔴 <b>All accounts exhausted</b>`,
236
+ ``,
237
+ `Every Anthropic account (${accounts.length}) is quota-walled — there is no healthy account to fail over to.`,
238
+ resetLine,
239
+ ``,
240
+ `<i>This is self-healing: agents resume and deferred scheduled jobs run automatically once a window resets. Nothing is lost. Add headroom with <code>/auth add</code> if this recurs.</i>`,
241
+ ].join("\n");
242
+ }
243
+
244
+ function buildFleetRecoveredMessage(
245
+ accounts: Array<{ label: string; exhausted: boolean }>,
246
+ ): string {
247
+ const healthy = accounts.filter((a) => !a.exhausted).map((a) => a.label);
248
+ const which = healthy.length > 0 ? ` (<code>${escapeHtml(healthy[0]!)}</code>)` : "";
249
+ return [
250
+ `🟢 <b>Fleet recovered</b> — at least one account is healthy again${which}.`,
251
+ ``,
252
+ `<i>Agents are back; any deferred scheduled jobs will run on their next occurrence.</i>`,
253
+ ].join("\n");
254
+ }
255
+
163
256
  // ─── Message builders ─────────────────────────────────────────────────────────
164
257
 
165
258
  function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): string {
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Validation contract for the `quota_wall_detected` IPC verb — the signal the
3
+ * autoaccept-poll wedge-watchdog sends when it sees claude's /rate-limit-options
4
+ * weekly-quota menu, asking the gateway to trigger account failover.
5
+ *
6
+ * A rogue process on the same UDS must not be able to inject a malformed
7
+ * payload: agentName is required + name-shaped, resetAt (optional) must be a
8
+ * finite number.
9
+ */
10
+ import { describe, it, expect } from "vitest";
11
+ import { validateClientMessage } from "../gateway/ipc-server.js";
12
+
13
+ describe("validateClientMessage — quota_wall_detected", () => {
14
+ it("accepts a well-formed signal (with resetAt)", () => {
15
+ expect(
16
+ validateClientMessage({ type: "quota_wall_detected", agentName: "finn", resetAt: 1_780_000_000_000 }),
17
+ ).toBe(true);
18
+ });
19
+
20
+ it("accepts a well-formed signal WITHOUT resetAt (optional)", () => {
21
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: "finn" })).toBe(true);
22
+ });
23
+
24
+ it("rejects a missing / non-string / malformed agentName", () => {
25
+ expect(validateClientMessage({ type: "quota_wall_detected" })).toBe(false);
26
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: 123 })).toBe(false);
27
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: "" })).toBe(false);
28
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: "../etc" })).toBe(false);
29
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: "Finn UPPER" })).toBe(false);
30
+ });
31
+
32
+ it("rejects a non-finite / non-number resetAt", () => {
33
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: "finn", resetAt: "soon" })).toBe(false);
34
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: "finn", resetAt: NaN })).toBe(false);
35
+ expect(validateClientMessage({ type: "quota_wall_detected", agentName: "finn", resetAt: Infinity })).toBe(false);
36
+ });
37
+ });
@@ -12,6 +12,7 @@ import { tmpdir } from "os";
12
12
  import { join } from "path";
13
13
  import {
14
14
  evaluateQuotaWatchAccount,
15
+ evaluateFleetAllExhausted,
15
16
  loadQuotaWatchState,
16
17
  saveQuotaWatchState,
17
18
  patchQuotaWatchState,
@@ -364,3 +365,74 @@ describe("patchQuotaWatchState", () => {
364
365
  expect(current["bob@example.com"]).toBeUndefined();
365
366
  });
366
367
  });
368
+
369
+ describe("evaluateFleetAllExhausted", () => {
370
+ const notAlerting = { lastNotifiedHealth: null, lastNotifiedAt: 0 };
371
+ const alerting = { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 };
372
+
373
+ it("notifies (entered) when every account is exhausted and we weren't alerting", () => {
374
+ const d = evaluateFleetAllExhausted({
375
+ accounts: [
376
+ { label: "a", exhausted: true, exhausted_until: 5_000 },
377
+ { label: "b", exhausted: true, exhausted_until: 9_000 },
378
+ ],
379
+ prev: notAlerting,
380
+ now: 1_000,
381
+ });
382
+ expect(d.kind).toBe("notify");
383
+ if (d.kind === "notify") {
384
+ expect(d.transition).toBe("entered");
385
+ expect(d.newState.lastNotifiedHealth).toBe("throttling");
386
+ expect(d.message).toContain("All accounts exhausted");
387
+ // earliest reset is the 5_000 one
388
+ expect(d.message).toContain("Earliest reset");
389
+ }
390
+ });
391
+
392
+ it("skips (still) when all exhausted and already alerting — no re-spam", () => {
393
+ const d = evaluateFleetAllExhausted({
394
+ accounts: [{ label: "a", exhausted: true }, { label: "b", exhausted: true }],
395
+ prev: alerting,
396
+ now: 2_000,
397
+ });
398
+ expect(d.kind).toBe("skip");
399
+ });
400
+
401
+ it("notifies (recovered) when one account frees after we were alerting", () => {
402
+ const d = evaluateFleetAllExhausted({
403
+ accounts: [{ label: "a", exhausted: false }, { label: "b", exhausted: true }],
404
+ prev: alerting,
405
+ now: 3_000,
406
+ });
407
+ expect(d.kind).toBe("notify");
408
+ if (d.kind === "notify") {
409
+ expect(d.transition).toBe("recovered");
410
+ expect(d.newState.lastNotifiedHealth).toBe("healthy");
411
+ expect(d.message).toContain("Fleet recovered");
412
+ expect(d.message).toContain("a"); // names the healthy account
413
+ }
414
+ });
415
+
416
+ it("skips (not-all) when some account is healthy and we weren't alerting", () => {
417
+ const d = evaluateFleetAllExhausted({
418
+ accounts: [{ label: "a", exhausted: false }, { label: "b", exhausted: true }],
419
+ prev: notAlerting,
420
+ now: 4_000,
421
+ });
422
+ expect(d.kind).toBe("skip");
423
+ });
424
+
425
+ it("never alerts on an empty fleet", () => {
426
+ expect(evaluateFleetAllExhausted({ accounts: [], prev: notAlerting, now: 1 }).kind).toBe("skip");
427
+ });
428
+
429
+ it("shows reset-unknown when no exhausted_until is present", () => {
430
+ const d = evaluateFleetAllExhausted({
431
+ accounts: [{ label: "a", exhausted: true }],
432
+ prev: notAlerting,
433
+ now: 1_000,
434
+ });
435
+ expect(d.kind).toBe("notify");
436
+ if (d.kind === "notify") expect(d.message).toContain("Reset time unknown");
437
+ });
438
+ });