switchroom 0.14.79 → 0.14.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-broker/index.js +79 -3
- package/dist/cli/drive-write-pretool.mjs +2 -1
- package/dist/cli/switchroom.js +4 -3
- package/package.json +1 -1
- package/telegram-plugin/auto-fallback-fleet.ts +42 -54
- package/telegram-plugin/dist/gateway/gateway.js +17 -25
- package/telegram-plugin/gateway/auth-broker-client.ts +1 -0
- package/telegram-plugin/gateway/auth-command.ts +8 -0
- package/telegram-plugin/gateway/gateway.ts +12 -1
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +41 -77
|
@@ -12286,6 +12286,35 @@ async function fetchQuota(opts) {
|
|
|
12286
12286
|
return parsed;
|
|
12287
12287
|
}
|
|
12288
12288
|
|
|
12289
|
+
// src/auth/broker/consumer-quota-sensor.ts
|
|
12290
|
+
var EXHAUSTION_PCT = 99.5;
|
|
12291
|
+
var DEFAULT_CONSUMER_PROBE_INTERVAL_MS = 10 * 60 * 1000;
|
|
12292
|
+
function quotaIndicatesExhaustion(result) {
|
|
12293
|
+
if (!result.ok)
|
|
12294
|
+
return { exhausted: false, until: null };
|
|
12295
|
+
const d = result.data;
|
|
12296
|
+
const fiveBlocked = d.fiveHourUtilizationPct >= EXHAUSTION_PCT;
|
|
12297
|
+
const sevenBlocked = d.sevenDayUtilizationPct >= EXHAUSTION_PCT;
|
|
12298
|
+
if (!fiveBlocked && !sevenBlocked)
|
|
12299
|
+
return { exhausted: false, until: null };
|
|
12300
|
+
const fiveReset = fiveBlocked ? d.fiveHourResetAt?.getTime() ?? null : null;
|
|
12301
|
+
const sevenReset = sevenBlocked ? d.sevenDayResetAt?.getTime() ?? null : null;
|
|
12302
|
+
const candidates = [fiveReset, sevenReset].filter((x) => x != null);
|
|
12303
|
+
const until = candidates.length > 0 ? Math.max(...candidates) : null;
|
|
12304
|
+
return { exhausted: true, until };
|
|
12305
|
+
}
|
|
12306
|
+
function resolveConsumerProbeIntervalMs(env) {
|
|
12307
|
+
if (env.SWITCHROOM_DISABLE_CONSUMER_QUOTA_PROBE === "1")
|
|
12308
|
+
return 0;
|
|
12309
|
+
const raw = env.SWITCHROOM_CONSUMER_QUOTA_PROBE_MS;
|
|
12310
|
+
if (raw !== undefined) {
|
|
12311
|
+
const n = Number(raw);
|
|
12312
|
+
if (Number.isFinite(n) && n >= 0)
|
|
12313
|
+
return n;
|
|
12314
|
+
}
|
|
12315
|
+
return DEFAULT_CONSUMER_PROBE_INTERVAL_MS;
|
|
12316
|
+
}
|
|
12317
|
+
|
|
12289
12318
|
// src/util/atomic.ts
|
|
12290
12319
|
import { randomBytes } from "node:crypto";
|
|
12291
12320
|
import { closeSync, constants, fsyncSync, openSync, renameSync, rmSync, writeSync } from "node:fs";
|
|
@@ -13340,7 +13369,8 @@ var SetActiveDataSchema = exports_external.object({
|
|
|
13340
13369
|
});
|
|
13341
13370
|
var MarkExhaustedDataSchema = exports_external.object({
|
|
13342
13371
|
account: exports_external.string(),
|
|
13343
|
-
rolled: exports_external.array(exports_external.string())
|
|
13372
|
+
rolled: exports_external.array(exports_external.string()),
|
|
13373
|
+
rolledTo: exports_external.string().nullable().optional()
|
|
13344
13374
|
});
|
|
13345
13375
|
var RefreshAccountDataSchema = exports_external.object({
|
|
13346
13376
|
account: exports_external.string(),
|
|
@@ -13482,6 +13512,8 @@ class AuthBroker {
|
|
|
13482
13512
|
config;
|
|
13483
13513
|
listeners = new Map;
|
|
13484
13514
|
refreshTimer = null;
|
|
13515
|
+
consumerProbeTimer = null;
|
|
13516
|
+
fetchQuotaImpl;
|
|
13485
13517
|
stateDir;
|
|
13486
13518
|
socketRoot;
|
|
13487
13519
|
home;
|
|
@@ -13505,6 +13537,7 @@ class AuthBroker {
|
|
|
13505
13537
|
this.now = opts.now ?? nowMs;
|
|
13506
13538
|
this.operatorUid = opts.operatorUid;
|
|
13507
13539
|
this.fetcher = opts.fetcher;
|
|
13540
|
+
this.fetchQuotaImpl = opts._testFetchQuota ?? fetchQuota;
|
|
13508
13541
|
this.stateDir = opts.stateDir ?? resolve7(this.homeRoot(), ".switchroom", "state", "auth-broker");
|
|
13509
13542
|
this.socketRoot = opts.socketRoot ?? AUTH_BROKER_ROOT;
|
|
13510
13543
|
this.providers = new ProviderRegistry;
|
|
@@ -13551,6 +13584,16 @@ class AuthBroker {
|
|
|
13551
13584
|
});
|
|
13552
13585
|
}, REFRESH_TICK_INTERVAL_MS);
|
|
13553
13586
|
this.refreshTimer.unref();
|
|
13587
|
+
const probeMs = resolveConsumerProbeIntervalMs(process.env);
|
|
13588
|
+
const hasConsumers = (this.config.auth?.consumers ?? []).length > 0;
|
|
13589
|
+
if (probeMs > 0 && hasConsumers) {
|
|
13590
|
+
this.consumerProbeTimer = setInterval(() => {
|
|
13591
|
+
this.consumerQuotaProbeTick().catch((err) => {
|
|
13592
|
+
this.logErr(`consumer-quota-probe threw: ${err.message}`);
|
|
13593
|
+
});
|
|
13594
|
+
}, probeMs);
|
|
13595
|
+
this.consumerProbeTimer.unref();
|
|
13596
|
+
}
|
|
13554
13597
|
}
|
|
13555
13598
|
const fanned = this.fanoutAll();
|
|
13556
13599
|
if (fanned.length > 0) {
|
|
@@ -13577,6 +13620,10 @@ class AuthBroker {
|
|
|
13577
13620
|
clearInterval(this.refreshTimer);
|
|
13578
13621
|
this.refreshTimer = null;
|
|
13579
13622
|
}
|
|
13623
|
+
if (this.consumerProbeTimer) {
|
|
13624
|
+
clearInterval(this.consumerProbeTimer);
|
|
13625
|
+
this.consumerProbeTimer = null;
|
|
13626
|
+
}
|
|
13580
13627
|
for (const [sock, lis] of this.listeners) {
|
|
13581
13628
|
try {
|
|
13582
13629
|
lis.server.close();
|
|
@@ -13994,7 +14041,7 @@ class AuthBroker {
|
|
|
13994
14041
|
this.audit({ op: "probe-quota", identity: identity2, account: label, ok: false, error: "missing-credentials" });
|
|
13995
14042
|
return { label, result: result2 };
|
|
13996
14043
|
}
|
|
13997
|
-
const result = await
|
|
14044
|
+
const result = await this.fetchQuotaImpl({ accessToken: token, timeoutMs });
|
|
13998
14045
|
this.audit({
|
|
13999
14046
|
op: "probe-quota",
|
|
14000
14047
|
identity: identity2,
|
|
@@ -14018,6 +14065,34 @@ class AuthBroker {
|
|
|
14018
14065
|
}));
|
|
14019
14066
|
socket.write(encodeSuccess(id, { results }));
|
|
14020
14067
|
}
|
|
14068
|
+
async consumerQuotaProbeTick() {
|
|
14069
|
+
const accounts = Array.from(new Set((this.config.auth?.consumers ?? []).map((c) => c.account)));
|
|
14070
|
+
for (const label of accounts) {
|
|
14071
|
+
const creds = readAccountCredentials(label, this.home);
|
|
14072
|
+
const token = creds?.claudeAiOauth?.accessToken;
|
|
14073
|
+
if (!token)
|
|
14074
|
+
continue;
|
|
14075
|
+
let result;
|
|
14076
|
+
try {
|
|
14077
|
+
result = await this.fetchQuotaImpl({ accessToken: token });
|
|
14078
|
+
} catch (err) {
|
|
14079
|
+
this.logErr(`consumer-quota-probe ${label}: ${err.message}`);
|
|
14080
|
+
continue;
|
|
14081
|
+
}
|
|
14082
|
+
const decision = quotaIndicatesExhaustion(result);
|
|
14083
|
+
if (!decision.exhausted)
|
|
14084
|
+
continue;
|
|
14085
|
+
const exhaustedUntil = decision.until ?? this.now() + MARK_EXHAUSTED_DEFAULT_MS;
|
|
14086
|
+
const existing = this.quota[label]?.exhausted_until;
|
|
14087
|
+
if (existing !== undefined && existing >= exhaustedUntil)
|
|
14088
|
+
continue;
|
|
14089
|
+
this.quota[label] = { exhausted_until: exhaustedUntil };
|
|
14090
|
+
this.persistQuota();
|
|
14091
|
+
this.audit({ op: "mark-exhausted", identity: { kind: "operator" }, account: label, ok: true });
|
|
14092
|
+
process.stdout.write(`auth-broker: consumer-quota-sensor marked ${label} exhausted until ${new Date(exhaustedUntil).toISOString()} — consumer(s) fail over
|
|
14093
|
+
`);
|
|
14094
|
+
}
|
|
14095
|
+
}
|
|
14021
14096
|
async opSetActive(socket, id, identity2, account) {
|
|
14022
14097
|
if (!this.isAdmin(identity2)) {
|
|
14023
14098
|
this.audit({ op: "set-active", identity: identity2, account, ok: false, error: "FORBIDDEN" });
|
|
@@ -14049,8 +14124,9 @@ class AuthBroker {
|
|
|
14049
14124
|
this.quota[account] = { exhausted_until: exhaustedUntil };
|
|
14050
14125
|
this.persistQuota();
|
|
14051
14126
|
const rolled = this.fanoutFailoverFor(account);
|
|
14127
|
+
const rolledTo = this.nextHealthyAccount(account, this.config.auth?.fallback_order ?? []);
|
|
14052
14128
|
this.audit({ op: "mark-exhausted", identity: identity2, account, ok: true });
|
|
14053
|
-
socket.write(encodeSuccess(id, { account, rolled }));
|
|
14129
|
+
socket.write(encodeSuccess(id, { account, rolled, rolledTo }));
|
|
14054
14130
|
}
|
|
14055
14131
|
async opRefreshAccount(socket, id, identity2, account) {
|
|
14056
14132
|
if (!this.isAdmin(identity2)) {
|
|
@@ -4165,7 +4165,8 @@ var init_protocol = __esm(() => {
|
|
|
4165
4165
|
});
|
|
4166
4166
|
MarkExhaustedDataSchema = exports_external.object({
|
|
4167
4167
|
account: exports_external.string(),
|
|
4168
|
-
rolled: exports_external.array(exports_external.string())
|
|
4168
|
+
rolled: exports_external.array(exports_external.string()),
|
|
4169
|
+
rolledTo: exports_external.string().nullable().optional()
|
|
4169
4170
|
});
|
|
4170
4171
|
RefreshAccountDataSchema = exports_external.object({
|
|
4171
4172
|
account: exports_external.string(),
|
package/dist/cli/switchroom.js
CHANGED
|
@@ -25560,7 +25560,8 @@ var init_protocol2 = __esm(() => {
|
|
|
25560
25560
|
});
|
|
25561
25561
|
MarkExhaustedDataSchema = exports_external.object({
|
|
25562
25562
|
account: exports_external.string(),
|
|
25563
|
-
rolled: exports_external.array(exports_external.string())
|
|
25563
|
+
rolled: exports_external.array(exports_external.string()),
|
|
25564
|
+
rolledTo: exports_external.string().nullable().optional()
|
|
25564
25565
|
});
|
|
25565
25566
|
RefreshAccountDataSchema = exports_external.object({
|
|
25566
25567
|
account: exports_external.string(),
|
|
@@ -49699,8 +49700,8 @@ var {
|
|
|
49699
49700
|
} = import__.default;
|
|
49700
49701
|
|
|
49701
49702
|
// src/build-info.ts
|
|
49702
|
-
var VERSION = "0.14.
|
|
49703
|
-
var COMMIT_SHA = "
|
|
49703
|
+
var VERSION = "0.14.81";
|
|
49704
|
+
var COMMIT_SHA = "4ac9cc7d";
|
|
49704
49705
|
|
|
49705
49706
|
// src/cli/agent.ts
|
|
49706
49707
|
init_source();
|
package/package.json
CHANGED
|
@@ -40,7 +40,6 @@ import {
|
|
|
40
40
|
renderFallbackAnnouncement,
|
|
41
41
|
classifyHealth,
|
|
42
42
|
buildSnapshotsFromState,
|
|
43
|
-
type AccountSnapshot,
|
|
44
43
|
} from './auth-snapshot-format.js';
|
|
45
44
|
|
|
46
45
|
export type FleetFallbackOutcome =
|
|
@@ -52,10 +51,12 @@ export type FleetFallbackOutcome =
|
|
|
52
51
|
/** Quota for the OLD account at the moment of failure — caller
|
|
53
52
|
* may persist this as the broker's `quota.json` so the next
|
|
54
53
|
* /auth render reflects the freshly-known exhaustion without
|
|
55
|
-
* another probe.
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
* another probe. Null when the live probe failed but the broker
|
|
55
|
+
* rolled anyway (it owns the authoritative exhaustion state). */
|
|
56
|
+
oldQuota: QuotaUtilization | null;
|
|
57
|
+
/** Quota for the new active account, useful for caller logging.
|
|
58
|
+
* Null when the rolled-to account had no successful probe. */
|
|
59
|
+
newQuota: QuotaUtilization | null;
|
|
59
60
|
}
|
|
60
61
|
| {
|
|
61
62
|
kind: 'all-blocked';
|
|
@@ -82,8 +83,15 @@ export interface FleetFallbackDeps {
|
|
|
82
83
|
* Get via `client.probeQuota(state.accounts.map(a => a.label))`
|
|
83
84
|
* and map the response back to per-account results (#1336). */
|
|
84
85
|
quotas: QuotaResult[];
|
|
85
|
-
/**
|
|
86
|
-
|
|
86
|
+
/** Non-admin failover invoker — the broker's `mark-exhausted` verb. Marks
|
|
87
|
+
* the triggering agent's (exhausted) account and rolls every agent on it to
|
|
88
|
+
* the next non-exhausted `fallback_order` account, returning that target as
|
|
89
|
+
* `rolledTo` (null when every fallback is also exhausted). This is what lets
|
|
90
|
+
* auto-fallback work from ANY agent — `set-active` (the admin verb the manual
|
|
91
|
+
* /auth button uses) is gated to admin agents, so a non-admin agent that
|
|
92
|
+
* 429'd could never self-heal. mark-exhausted derives the account from the
|
|
93
|
+
* caller's own identity, so it needs no admin. */
|
|
94
|
+
failover: () => Promise<{ rolledTo: string | null; rolled: string[] }>;
|
|
87
95
|
/** Agent that triggered this fallback (for the announcement byline). */
|
|
88
96
|
triggerAgent: string;
|
|
89
97
|
/** Operator timezone for absolute reset times in the announcement. */
|
|
@@ -131,10 +139,18 @@ export async function runFleetAutoFallback(
|
|
|
131
139
|
};
|
|
132
140
|
}
|
|
133
141
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
142
|
+
// Execute the non-admin swap. The broker marks the triggering agent's
|
|
143
|
+
// (exhausted) account and rolls the fleet to the next non-exhausted
|
|
144
|
+
// fallback_order account, returning it as `rolledTo`. We trust the broker's
|
|
145
|
+
// choice (same `nextHealthyAccount` selection /auth rotate uses) rather than
|
|
146
|
+
// picking here, so the announcement matches what actually happened. Caller
|
|
147
|
+
// catches and surfaces failures — we don't double-wrap.
|
|
148
|
+
const { rolledTo } = await deps.failover();
|
|
149
|
+
|
|
150
|
+
if (!rolledTo) {
|
|
151
|
+
// All-blocked path: the broker found no non-exhausted fallback. The active
|
|
152
|
+
// account IS now marked exhausted (good for consumers/telemetry), but there
|
|
153
|
+
// was nowhere to roll. Notify with earliest-reset info.
|
|
138
154
|
return {
|
|
139
155
|
kind: 'all-blocked',
|
|
140
156
|
oldLabel: oldSnap.label,
|
|
@@ -151,22 +167,22 @@ export async function runFleetAutoFallback(
|
|
|
151
167
|
};
|
|
152
168
|
}
|
|
153
169
|
|
|
154
|
-
//
|
|
155
|
-
//
|
|
156
|
-
|
|
170
|
+
// Quota for the rolled-to account, looked up from the same probe snapshots
|
|
171
|
+
// (the broker chose by fallback_order, which may differ from the
|
|
172
|
+
// lowest-utilization heuristic — the announcement reflects the real target).
|
|
173
|
+
const newQuota = snapshots.find((s) => s.label === rolledTo)?.quota ?? null;
|
|
157
174
|
|
|
158
175
|
return {
|
|
159
176
|
kind: 'switched',
|
|
160
177
|
oldLabel: oldSnap.label,
|
|
161
|
-
newLabel:
|
|
162
|
-
oldQuota: oldSnap.quota
|
|
163
|
-
|
|
164
|
-
newQuota: target.quota!,
|
|
178
|
+
newLabel: rolledTo,
|
|
179
|
+
oldQuota: oldSnap.quota,
|
|
180
|
+
newQuota,
|
|
165
181
|
announcement: renderFallbackAnnouncement({
|
|
166
182
|
oldLabel: oldSnap.label,
|
|
167
183
|
oldQuota: oldSnap.quota,
|
|
168
|
-
newLabel:
|
|
169
|
-
newQuota
|
|
184
|
+
newLabel: rolledTo,
|
|
185
|
+
newQuota,
|
|
170
186
|
triggerAgent: deps.triggerAgent,
|
|
171
187
|
tz,
|
|
172
188
|
now,
|
|
@@ -174,40 +190,12 @@ export async function runFleetAutoFallback(
|
|
|
174
190
|
};
|
|
175
191
|
}
|
|
176
192
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
* 3. Skip blocked + unknown entirely — never recommend a switch
|
|
184
|
-
* into a wall, never bet on creds we couldn't probe.
|
|
185
|
-
*
|
|
186
|
-
* Returns null when no eligible target exists.
|
|
187
|
-
*/
|
|
188
|
-
export function pickFallbackTarget(
|
|
189
|
-
snapshots: AccountSnapshot[],
|
|
190
|
-
): AccountSnapshot | null {
|
|
191
|
-
const candidates = snapshots
|
|
192
|
-
.filter((s) => !s.isActive && s.quota != null)
|
|
193
|
-
.map((s) => ({ snap: s, health: classifyHealth(s) }));
|
|
194
|
-
|
|
195
|
-
const healthy = candidates
|
|
196
|
-
.filter((c) => c.health === 'healthy')
|
|
197
|
-
.sort((a, b) => a.snap.quota!.fiveHourUtilizationPct - b.snap.quota!.fiveHourUtilizationPct);
|
|
198
|
-
if (healthy.length > 0) return healthy[0]!.snap;
|
|
199
|
-
|
|
200
|
-
const throttling = candidates
|
|
201
|
-
.filter((c) => c.health === 'throttling')
|
|
202
|
-
.sort((a, b) => maxWindow(a.snap.quota!) - maxWindow(b.snap.quota!));
|
|
203
|
-
if (throttling.length > 0) return throttling[0]!.snap;
|
|
204
|
-
|
|
205
|
-
return null;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
function maxWindow(q: QuotaUtilization): number {
|
|
209
|
-
return Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
|
|
210
|
-
}
|
|
193
|
+
// NOTE: target SELECTION now lives in the broker (`nextHealthyAccount`,
|
|
194
|
+
// fallback_order order — the same selection /auth rotate uses). This module
|
|
195
|
+
// no longer picks a target; it calls the non-admin `failover()` (mark-exhausted)
|
|
196
|
+
// and announces whatever the broker rolled to. A second, divergent selector
|
|
197
|
+
// here (the old lowest-utilization `pickFallbackTarget`) was removed so there's
|
|
198
|
+
// one authoritative chooser.
|
|
211
199
|
|
|
212
200
|
function pctSummary(q: QuotaUtilization | null): string {
|
|
213
201
|
if (!q) return 'no probe';
|
|
@@ -16325,7 +16325,8 @@ var init_protocol = __esm(() => {
|
|
|
16325
16325
|
});
|
|
16326
16326
|
MarkExhaustedDataSchema = exports_external.object({
|
|
16327
16327
|
account: exports_external.string(),
|
|
16328
|
-
rolled: exports_external.array(exports_external.string())
|
|
16328
|
+
rolled: exports_external.array(exports_external.string()),
|
|
16329
|
+
rolledTo: exports_external.string().nullable().optional()
|
|
16329
16330
|
});
|
|
16330
16331
|
RefreshAccountDataSchema = exports_external.object({
|
|
16331
16332
|
account: exports_external.string(),
|
|
@@ -40749,6 +40750,7 @@ function createAuthBrokerClient() {
|
|
|
40749
40750
|
const client3 = {
|
|
40750
40751
|
listState: () => broker.listState(),
|
|
40751
40752
|
setActive: (label) => broker.setActive(label),
|
|
40753
|
+
markExhausted: (until) => broker.markExhausted(until),
|
|
40752
40754
|
rmAccount: (label) => broker.rmAccount(label),
|
|
40753
40755
|
refreshAccount: (label) => broker.refreshAccount(label),
|
|
40754
40756
|
setOverride: (agent, account) => broker.setOverride(agent, account),
|
|
@@ -42047,8 +42049,8 @@ async function runFleetAutoFallback(deps) {
|
|
|
42047
42049
|
announcement: `<i>Auto-fallback skipped: ${oldSnap.label} probed healthy ` + `(${pctSummary(oldSnap.quota)}). Stale event?</i>`
|
|
42048
42050
|
};
|
|
42049
42051
|
}
|
|
42050
|
-
const
|
|
42051
|
-
if (!
|
|
42052
|
+
const { rolledTo } = await deps.failover();
|
|
42053
|
+
if (!rolledTo) {
|
|
42052
42054
|
return {
|
|
42053
42055
|
kind: "all-blocked",
|
|
42054
42056
|
oldLabel: oldSnap.label,
|
|
@@ -42064,37 +42066,24 @@ async function runFleetAutoFallback(deps) {
|
|
|
42064
42066
|
})
|
|
42065
42067
|
};
|
|
42066
42068
|
}
|
|
42067
|
-
|
|
42069
|
+
const newQuota = snapshots.find((s) => s.label === rolledTo)?.quota ?? null;
|
|
42068
42070
|
return {
|
|
42069
42071
|
kind: "switched",
|
|
42070
42072
|
oldLabel: oldSnap.label,
|
|
42071
|
-
newLabel:
|
|
42073
|
+
newLabel: rolledTo,
|
|
42072
42074
|
oldQuota: oldSnap.quota,
|
|
42073
|
-
newQuota
|
|
42075
|
+
newQuota,
|
|
42074
42076
|
announcement: renderFallbackAnnouncement({
|
|
42075
42077
|
oldLabel: oldSnap.label,
|
|
42076
42078
|
oldQuota: oldSnap.quota,
|
|
42077
|
-
newLabel:
|
|
42078
|
-
newQuota
|
|
42079
|
+
newLabel: rolledTo,
|
|
42080
|
+
newQuota,
|
|
42079
42081
|
triggerAgent: deps.triggerAgent,
|
|
42080
42082
|
tz,
|
|
42081
42083
|
now
|
|
42082
42084
|
})
|
|
42083
42085
|
};
|
|
42084
42086
|
}
|
|
42085
|
-
function pickFallbackTarget(snapshots) {
|
|
42086
|
-
const candidates = snapshots.filter((s) => !s.isActive && s.quota != null).map((s) => ({ snap: s, health: classifyHealth(s) }));
|
|
42087
|
-
const healthy = candidates.filter((c) => c.health === "healthy").sort((a, b) => a.snap.quota.fiveHourUtilizationPct - b.snap.quota.fiveHourUtilizationPct);
|
|
42088
|
-
if (healthy.length > 0)
|
|
42089
|
-
return healthy[0].snap;
|
|
42090
|
-
const throttling = candidates.filter((c) => c.health === "throttling").sort((a, b) => maxWindow(a.snap.quota) - maxWindow(b.snap.quota));
|
|
42091
|
-
if (throttling.length > 0)
|
|
42092
|
-
return throttling[0].snap;
|
|
42093
|
-
return null;
|
|
42094
|
-
}
|
|
42095
|
-
function maxWindow(q) {
|
|
42096
|
-
return Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
|
|
42097
|
-
}
|
|
42098
42087
|
function pctSummary(q) {
|
|
42099
42088
|
if (!q)
|
|
42100
42089
|
return "no probe";
|
|
@@ -52821,9 +52810,9 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
52821
52810
|
}
|
|
52822
52811
|
|
|
52823
52812
|
// ../src/build-info.ts
|
|
52824
|
-
var VERSION = "0.14.
|
|
52825
|
-
var COMMIT_SHA = "
|
|
52826
|
-
var COMMIT_DATE = "2026-06-
|
|
52813
|
+
var VERSION = "0.14.81";
|
|
52814
|
+
var COMMIT_SHA = "4ac9cc7d";
|
|
52815
|
+
var COMMIT_DATE = "2026-06-07T10:43:55+10:00";
|
|
52827
52816
|
var LATEST_PR = null;
|
|
52828
52817
|
var COMMITS_AHEAD_OF_TAG = 2;
|
|
52829
52818
|
|
|
@@ -60949,7 +60938,10 @@ async function doFireFleetAutoFallback(triggerAgent) {
|
|
|
60949
60938
|
const outcome = await runFleetAutoFallback({
|
|
60950
60939
|
state: state4,
|
|
60951
60940
|
quotas,
|
|
60952
|
-
|
|
60941
|
+
failover: async () => {
|
|
60942
|
+
const r = await client3.markExhausted();
|
|
60943
|
+
return { rolledTo: r.rolledTo ?? null, rolled: r.rolled };
|
|
60944
|
+
},
|
|
60953
60945
|
triggerAgent,
|
|
60954
60946
|
tz
|
|
60955
60947
|
});
|
|
@@ -27,6 +27,7 @@ export function createAuthBrokerClient(): {
|
|
|
27
27
|
const client: AuthBrokerClient = {
|
|
28
28
|
listState: () => broker.listState(),
|
|
29
29
|
setActive: (label: string) => broker.setActive(label),
|
|
30
|
+
markExhausted: (until?: number) => broker.markExhausted(until),
|
|
30
31
|
rmAccount: (label: string) => broker.rmAccount(label),
|
|
31
32
|
refreshAccount: (label: string) => broker.refreshAccount(label),
|
|
32
33
|
setOverride: (agent: string, account: string | null) =>
|
|
@@ -214,6 +214,14 @@ export function parseAuthCommand(text: string): ParsedAuthCommand | null {
|
|
|
214
214
|
export interface AuthBrokerClient {
|
|
215
215
|
listState(): Promise<ListStateData>
|
|
216
216
|
setActive(label: string): Promise<{ active: string; fanned: string[] }>
|
|
217
|
+
/**
|
|
218
|
+
* Non-admin failover (broker `mark-exhausted`). Marks the CALLER's own
|
|
219
|
+
* account exhausted and rolls every agent on it to the next non-exhausted
|
|
220
|
+
* `fallback_order` account, returned as `rolledTo` (null when none). Unlike
|
|
221
|
+
* `setActive` this needs no admin — the account is derived from the caller's
|
|
222
|
+
* identity — so auto-fallback works from any agent.
|
|
223
|
+
*/
|
|
224
|
+
markExhausted(until?: number): Promise<{ account: string; rolled: string[]; rolledTo?: string | null }>
|
|
217
225
|
rmAccount(label: string): Promise<{ label: string }>
|
|
218
226
|
refreshAccount(label: string): Promise<{ account: string; expiresAt?: number }>
|
|
219
227
|
setOverride(
|
|
@@ -14644,7 +14644,18 @@ async function doFireFleetAutoFallback(triggerAgent: string): Promise<boolean> {
|
|
|
14644
14644
|
const outcome = await runFleetAutoFallback({
|
|
14645
14645
|
state,
|
|
14646
14646
|
quotas,
|
|
14647
|
-
|
|
14647
|
+
// Non-admin swap: mark-exhausted derives the account from THIS agent's
|
|
14648
|
+
// own identity and rolls the fleet to the next fallback. Replaces the
|
|
14649
|
+
// admin-gated client.setActive(), which 403'd ("set-active requires
|
|
14650
|
+
// admin") for every non-admin agent — i.e. the whole production fleet —
|
|
14651
|
+
// so auto-fallback only ever worked when an admin agent happened to be
|
|
14652
|
+
// the one that 429'd. The manual /auth button stays on set-active (the
|
|
14653
|
+
// operator is explicitly choosing, and is admin); only this automatic
|
|
14654
|
+
// path moves to the non-admin verb.
|
|
14655
|
+
failover: async () => {
|
|
14656
|
+
const r = await client.markExhausted()
|
|
14657
|
+
return { rolledTo: r.rolledTo ?? null, rolled: r.rolled }
|
|
14658
|
+
},
|
|
14648
14659
|
triggerAgent,
|
|
14649
14660
|
tz,
|
|
14650
14661
|
})
|
|
@@ -1,10 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tests for the fleet-wide auto-fallback planner. Pure-data —
|
|
3
|
-
* no broker UDS, no Telegram bot.
|
|
4
|
-
*
|
|
3
|
+
* no broker UDS, no Telegram bot.
|
|
4
|
+
*
|
|
5
|
+
* Contract change (fix/auto-fallback-non-admin): the swap now goes through
|
|
6
|
+
* the broker's NON-ADMIN `mark-exhausted` verb via the injected `failover()`
|
|
7
|
+
* dep, which returns the account the broker rolled TO (`rolledTo`). Target
|
|
8
|
+
* SELECTION moved to the broker (`nextHealthyAccount`, fallback_order order —
|
|
9
|
+
* what /auth rotate uses); this module no longer picks, it announces whatever
|
|
10
|
+
* the broker rolled to. The old admin-gated `setActive` dep is gone — that
|
|
11
|
+
* gate is exactly why a non-admin agent that 429'd could never self-heal.
|
|
5
12
|
*/
|
|
6
13
|
import { describe, it, expect, vi } from 'vitest';
|
|
7
|
-
import { runFleetAutoFallback
|
|
14
|
+
import { runFleetAutoFallback } from '../auto-fallback-fleet.js';
|
|
8
15
|
import type { QuotaResult, QuotaUtilization } from '../quota-check.js';
|
|
9
16
|
import type { ListStateData } from '../../src/auth/broker/client.js';
|
|
10
17
|
|
|
@@ -38,38 +45,35 @@ function state(active: string, accounts: string[]): ListStateData {
|
|
|
38
45
|
}
|
|
39
46
|
|
|
40
47
|
describe('runFleetAutoFallback', () => {
|
|
41
|
-
it('
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
fanned: ['alice', 'bob'],
|
|
45
|
-
}));
|
|
48
|
+
it('swaps via the non-admin failover() and announces the broker’s rolledTo', async () => {
|
|
49
|
+
// The broker (mark-exhausted → nextHealthyAccount) chose you@x.
|
|
50
|
+
const failover = vi.fn(async () => ({ rolledTo: 'you@x', rolled: ['alice', 'bob'] }));
|
|
46
51
|
const out = await runFleetAutoFallback({
|
|
47
52
|
state: state('ken@x', ['ken@x', 'me@x', 'you@x']),
|
|
48
53
|
quotas: [
|
|
49
|
-
// ken: just blew 5h
|
|
54
|
+
// ken: just blew 5h (the trigger)
|
|
50
55
|
qOk({
|
|
51
56
|
fiveHourUtilizationPct: 100,
|
|
52
57
|
fiveHourResetAt: new Date('2026-05-15T05:50:00Z'),
|
|
53
58
|
representativeClaim: 'five_hour',
|
|
54
59
|
}),
|
|
55
|
-
// me: dead on 7d
|
|
60
|
+
// me: dead on 7d
|
|
56
61
|
qOk({
|
|
57
62
|
sevenDayUtilizationPct: 100,
|
|
58
63
|
sevenDayResetAt: new Date('2026-05-17T10:00:00Z'),
|
|
59
64
|
representativeClaim: 'seven_day',
|
|
60
65
|
}),
|
|
61
|
-
// you: healthy
|
|
66
|
+
// you: healthy — the rolled-to account, used for the headroom line
|
|
62
67
|
qOk({ fiveHourUtilizationPct: 8, sevenDayUtilizationPct: 20 }),
|
|
63
68
|
],
|
|
64
|
-
|
|
69
|
+
failover,
|
|
65
70
|
triggerAgent: 'carrie',
|
|
66
71
|
now: NOW,
|
|
67
72
|
tz: 'UTC',
|
|
68
73
|
});
|
|
69
74
|
|
|
70
75
|
expect(out.kind).toBe('switched');
|
|
71
|
-
expect(
|
|
72
|
-
expect(setActive).toHaveBeenCalledWith('you@x');
|
|
76
|
+
expect(failover).toHaveBeenCalledTimes(1);
|
|
73
77
|
if (out.kind === 'switched') {
|
|
74
78
|
expect(out.oldLabel).toBe('ken@x');
|
|
75
79
|
expect(out.newLabel).toBe('you@x');
|
|
@@ -79,8 +83,8 @@ describe('runFleetAutoFallback', () => {
|
|
|
79
83
|
}
|
|
80
84
|
});
|
|
81
85
|
|
|
82
|
-
it('returns all-blocked
|
|
83
|
-
const
|
|
86
|
+
it('returns all-blocked when the broker reports rolledTo=null (nowhere to roll)', async () => {
|
|
87
|
+
const failover = vi.fn(async () => ({ rolledTo: null, rolled: [] }));
|
|
84
88
|
const out = await runFleetAutoFallback({
|
|
85
89
|
state: state('ken@x', ['ken@x', 'me@x']),
|
|
86
90
|
quotas: [
|
|
@@ -95,117 +99,77 @@ describe('runFleetAutoFallback', () => {
|
|
|
95
99
|
representativeClaim: 'seven_day',
|
|
96
100
|
}),
|
|
97
101
|
],
|
|
98
|
-
|
|
102
|
+
failover,
|
|
99
103
|
triggerAgent: 'carrie',
|
|
100
104
|
now: NOW,
|
|
101
105
|
tz: 'UTC',
|
|
102
106
|
});
|
|
103
107
|
|
|
104
108
|
expect(out.kind).toBe('all-blocked');
|
|
105
|
-
|
|
109
|
+
// failover IS called even on all-blocked — marking the active exhausted is
|
|
110
|
+
// correct (consumers/telemetry); there was just nowhere to roll.
|
|
111
|
+
expect(failover).toHaveBeenCalledTimes(1);
|
|
106
112
|
if (out.kind === 'all-blocked') {
|
|
107
113
|
expect(out.announcement).toContain('All accounts blocked');
|
|
108
114
|
expect(out.announcement).toContain('/auth add');
|
|
109
115
|
}
|
|
110
116
|
});
|
|
111
117
|
|
|
112
|
-
it('idempotency: skips swap when active probes healthy
|
|
113
|
-
const
|
|
118
|
+
it('idempotency: skips the swap WITHOUT calling failover when active probes healthy', async () => {
|
|
119
|
+
const failover = vi.fn();
|
|
114
120
|
const out = await runFleetAutoFallback({
|
|
115
121
|
state: state('ken@x', ['ken@x', 'you@x']),
|
|
116
122
|
quotas: [
|
|
117
123
|
qOk({ fiveHourUtilizationPct: 5, sevenDayUtilizationPct: 10 }),
|
|
118
124
|
qOk({ fiveHourUtilizationPct: 5, sevenDayUtilizationPct: 10 }),
|
|
119
125
|
],
|
|
120
|
-
|
|
126
|
+
failover,
|
|
121
127
|
triggerAgent: 'carrie',
|
|
122
128
|
now: NOW,
|
|
123
129
|
tz: 'UTC',
|
|
124
130
|
});
|
|
125
131
|
|
|
126
132
|
expect(out.kind).toBe('no-eligible-target');
|
|
127
|
-
expect(
|
|
133
|
+
expect(failover).not.toHaveBeenCalled();
|
|
128
134
|
expect(out.announcement).toContain('skipped');
|
|
129
135
|
expect(out.announcement).toContain('Stale event?');
|
|
130
136
|
});
|
|
131
137
|
|
|
132
|
-
it('returns no-old-active when broker has no active account
|
|
133
|
-
const
|
|
138
|
+
it('returns no-old-active (no failover) when broker has no active account', async () => {
|
|
139
|
+
const failover = vi.fn();
|
|
134
140
|
const out = await runFleetAutoFallback({
|
|
135
141
|
state: { active: '', fallback_order: [], accounts: [], agents: [], consumers: [] },
|
|
136
142
|
quotas: [],
|
|
137
|
-
|
|
143
|
+
failover,
|
|
138
144
|
triggerAgent: 'carrie',
|
|
139
145
|
now: NOW,
|
|
140
146
|
tz: 'UTC',
|
|
141
147
|
});
|
|
142
148
|
|
|
143
149
|
expect(out.kind).toBe('no-old-active');
|
|
144
|
-
expect(
|
|
150
|
+
expect(failover).not.toHaveBeenCalled();
|
|
145
151
|
});
|
|
146
152
|
|
|
147
|
-
it('
|
|
148
|
-
|
|
153
|
+
it('announces even when the live probe of the active account failed (broker still rolled)', async () => {
|
|
154
|
+
// Probe failure for the active account → oldQuota null, but the broker
|
|
155
|
+
// (authoritative exhaustion state) still rolled. We must still announce.
|
|
156
|
+
const failover = vi.fn(async () => ({ rolledTo: 'you@x', rolled: ['alice'] }));
|
|
149
157
|
const out = await runFleetAutoFallback({
|
|
150
158
|
state: state('ken@x', ['ken@x', 'you@x']),
|
|
151
159
|
quotas: [
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
fiveHourResetAt: new Date('2026-05-15T05:50:00Z'),
|
|
155
|
-
representativeClaim: 'five_hour',
|
|
156
|
-
}),
|
|
157
|
-
// you throttling at 85% but not blocked
|
|
158
|
-
qOk({ fiveHourUtilizationPct: 85, sevenDayUtilizationPct: 20 }),
|
|
160
|
+
{ ok: false, reason: 'HTTP 401' }, // active probe failed → unknown health (not 'healthy', so we proceed)
|
|
161
|
+
qOk({ fiveHourUtilizationPct: 5 }),
|
|
159
162
|
],
|
|
160
|
-
|
|
163
|
+
failover,
|
|
161
164
|
triggerAgent: 'carrie',
|
|
162
165
|
now: NOW,
|
|
163
166
|
tz: 'UTC',
|
|
164
167
|
});
|
|
165
168
|
|
|
166
169
|
expect(out.kind).toBe('switched');
|
|
167
|
-
expect(
|
|
170
|
+
expect(failover).toHaveBeenCalledTimes(1);
|
|
168
171
|
if (out.kind === 'switched') {
|
|
169
|
-
expect(out.
|
|
172
|
+
expect(out.newLabel).toBe('you@x');
|
|
170
173
|
}
|
|
171
174
|
});
|
|
172
|
-
|
|
173
|
-
it('skips unknown-health (probe failed) when picking a target', async () => {
|
|
174
|
-
const setActive = vi.fn(async (label: string) => ({ active: label, fanned: [] }));
|
|
175
|
-
const out = await runFleetAutoFallback({
|
|
176
|
-
state: state('ken@x', ['ken@x', 'broken@x', 'you@x']),
|
|
177
|
-
quotas: [
|
|
178
|
-
qOk({ fiveHourUtilizationPct: 100, fiveHourResetAt: new Date('2026-05-15T05:50:00Z') }),
|
|
179
|
-
{ ok: false, reason: 'HTTP 401' },
|
|
180
|
-
qOk({ fiveHourUtilizationPct: 5 }),
|
|
181
|
-
],
|
|
182
|
-
setActive,
|
|
183
|
-
triggerAgent: 'carrie',
|
|
184
|
-
now: NOW,
|
|
185
|
-
tz: 'UTC',
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
expect(out.kind).toBe('switched');
|
|
189
|
-
expect(setActive).toHaveBeenCalledWith('you@x');
|
|
190
|
-
});
|
|
191
|
-
});
|
|
192
|
-
|
|
193
|
-
describe('pickFallbackTarget', () => {
|
|
194
|
-
it('prefers lower-5h-utilization healthy account', () => {
|
|
195
|
-
const snaps = [
|
|
196
|
-
{ label: 'a@x', isActive: true, quota: quota({ fiveHourUtilizationPct: 100 }) },
|
|
197
|
-
{ label: 'low@x', isActive: false, quota: quota({ fiveHourUtilizationPct: 5 }) },
|
|
198
|
-
{ label: 'med@x', isActive: false, quota: quota({ fiveHourUtilizationPct: 30 }) },
|
|
199
|
-
];
|
|
200
|
-
const target = pickFallbackTarget(snaps);
|
|
201
|
-
expect(target?.label).toBe('low@x');
|
|
202
|
-
});
|
|
203
|
-
|
|
204
|
-
it('returns null when only blocked alternatives exist', () => {
|
|
205
|
-
const snaps = [
|
|
206
|
-
{ label: 'a@x', isActive: true, quota: quota({ fiveHourUtilizationPct: 100 }) },
|
|
207
|
-
{ label: 'b@x', isActive: false, quota: quota({ sevenDayUtilizationPct: 100 }) },
|
|
208
|
-
];
|
|
209
|
-
expect(pickFallbackTarget(snaps)).toBeNull();
|
|
210
|
-
});
|
|
211
175
|
});
|