switchroom 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +5 -4
  2. package/dist/agent-scheduler/index.js +2 -2
  3. package/dist/auth-broker/index.js +125 -3
  4. package/dist/cli/drive-write-pretool.mjs +5436 -0
  5. package/dist/cli/switchroom.js +231 -29
  6. package/dist/host-control/main.js +2 -2
  7. package/dist/vault/approvals/kernel-server.js +2 -2
  8. package/dist/vault/broker/server.js +2 -2
  9. package/package.json +1 -1
  10. package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
  11. package/telegram-plugin/admin-commands/index.ts +2 -0
  12. package/telegram-plugin/auth-snapshot-format.ts +612 -0
  13. package/telegram-plugin/auto-fallback-fleet.ts +215 -0
  14. package/telegram-plugin/auto-fallback.ts +28 -301
  15. package/telegram-plugin/dist/gateway/gateway.js +4314 -2143
  16. package/telegram-plugin/fleet-fallback-gate.ts +105 -0
  17. package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
  18. package/telegram-plugin/gateway/approval-callback.ts +31 -3
  19. package/telegram-plugin/gateway/auth-broker-client.ts +2 -0
  20. package/telegram-plugin/gateway/auth-command.ts +131 -10
  21. package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
  22. package/telegram-plugin/gateway/boot-card.ts +1 -1
  23. package/telegram-plugin/gateway/boot-probes.ts +6 -9
  24. package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
  25. package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
  26. package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
  27. package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
  28. package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
  29. package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
  30. package/telegram-plugin/gateway/gateway.ts +903 -173
  31. package/telegram-plugin/gateway/hostd-dispatch.ts +137 -2
  32. package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
  33. package/telegram-plugin/gateway/ipc-server.ts +69 -0
  34. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
  35. package/telegram-plugin/model-unavailable.ts +28 -12
  36. package/telegram-plugin/silence-poke.ts +153 -1
  37. package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
  38. package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
  39. package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
  40. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
  41. package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
  42. package/telegram-plugin/tests/boot-probes.test.ts +16 -18
  43. package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
  44. package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
  45. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
  46. package/telegram-plugin/tests/silence-poke.test.ts +237 -0
  47. package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
  48. package/telegram-plugin/turn-flush-safety.ts +55 -1
  49. package/telegram-plugin/uat/SETUP.md +16 -12
  50. package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
  51. package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
  52. package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129
@@ -0,0 +1,215 @@
1
+ /**
2
+ * Fleet-wide auto-fallback (RFC H — successor to the per-agent
3
+ * `performAutoFallback` in `auto-fallback.ts`).
4
+ *
5
+ * Why this exists alongside the legacy per-agent path:
6
+ *
7
+ * The pre-#XYZ auto-fallback called `fallbackToNextSlot(agentDir)`,
8
+ * which writes the new active slot to ONE agent's local
9
+ * `.claude/credentials.json`. That left the rest of the fleet still
10
+ * pointing at the just-exhausted account — which would then hit the
11
+ * wall on its own next call, surfacing N separate "Model unavailable"
12
+ * cards for the same root cause.
13
+ *
14
+ * Manual `/auth use <label>` already takes the fleet-wide path
15
+ * (broker.setActive → fan-out to all per-agent credential mirrors).
16
+ * Auto-fallback now uses the same path so scope is consistent and
17
+ * one quota event resolves the whole fleet in one swap.
18
+ *
19
+ * What this module does:
20
+ *
21
+ * 1. Probe live quota for every account in parallel via the
22
+ * broker (`client.probeQuota(...)`, #1336) so we pick the best
23
+ * target with current data, not stale broker disk-cache.
24
+ * 2. Skip blocked accounts entirely; pick the lowest-utilization
25
+ * healthy candidate (or, if none, the lowest throttling one).
26
+ * 3. Call `client.setActive(target)` — same broker verb /auth use
27
+ * uses. Broker re-mirrors creds to all agents.
28
+ * 4. Render the causal-shape announcement
29
+ * (`renderFallbackAnnouncement`) with the OLD account's binding
30
+ * window in the headline (5-hour vs 7-day) and the new
31
+ * account's headroom in the body.
32
+ *
33
+ * Pure-data return shape — caller does the actual Telegram send +
34
+ * lockout-record bookkeeping, mirroring the legacy module's contract.
35
+ */
36
+
37
+ import type { QuotaResult, QuotaUtilization } from './quota-check.js';
38
+ import type { ListStateData } from '../src/auth/broker/client.js';
39
+ import {
40
+ renderFallbackAnnouncement,
41
+ classifyHealth,
42
+ buildSnapshotsFromState,
43
+ type AccountSnapshot,
44
+ } from './auth-snapshot-format.js';
45
+
46
+ export type FleetFallbackOutcome =
47
+ | {
48
+ kind: 'switched';
49
+ oldLabel: string;
50
+ newLabel: string;
51
+ announcement: string;
52
+ /** Quota for the OLD account at the moment of failure — caller
53
+ * may persist this as the broker's `quota.json` so the next
54
+ * /auth render reflects the freshly-known exhaustion without
55
+ * another probe. */
56
+ oldQuota: QuotaUtilization;
57
+ /** Quota for the new active account, useful for caller logging. */
58
+ newQuota: QuotaUtilization;
59
+ }
60
+ | {
61
+ kind: 'all-blocked';
62
+ oldLabel: string;
63
+ announcement: string;
64
+ oldQuota: QuotaUtilization | null;
65
+ }
66
+ | {
67
+ kind: 'no-old-active';
68
+ announcement: string;
69
+ }
70
+ | {
71
+ kind: 'no-eligible-target';
72
+ oldLabel: string;
73
+ announcement: string;
74
+ oldQuota: QuotaUtilization | null;
75
+ };
76
+
77
+ export interface FleetFallbackDeps {
78
+ /** Live broker state. Caller passes pre-fetched data so this module
79
+ * is testable without spinning up a UDS. */
80
+ state: ListStateData;
81
+ /** Parallel array of live quota probes, same order as `state.accounts`.
82
+ * Get via `client.probeQuota(state.accounts.map(a => a.label))`
83
+ * and map the response back to per-account results (#1336). */
84
+ quotas: QuotaResult[];
85
+ /** Broker `setActive` invoker. Returns the result for logging. */
86
+ setActive: (label: string) => Promise<{ active: string; fanned: string[] }>;
87
+ /** Agent that triggered this fallback (for the announcement byline). */
88
+ triggerAgent: string;
89
+ /** Operator timezone for absolute reset times in the announcement. */
90
+ tz?: string;
91
+ now?: Date;
92
+ }
93
+
94
+ /**
95
+ * Plan + execute the fleet-wide swap. Returns a structured outcome the
96
+ * caller can both log and notify on.
97
+ *
98
+ * Idempotency: when the active account is already healthy (a stale
99
+ * model-unavailable event arrives after the quota window already
100
+ * rolled over, for example), we DO NOT swap. Returns
101
+ * `'no-eligible-target'` so the caller silently no-ops the
102
+ * announcement.
103
+ */
104
+ export async function runFleetAutoFallback(
105
+ deps: FleetFallbackDeps,
106
+ ): Promise<FleetFallbackOutcome> {
107
+ const now = deps.now ?? new Date();
108
+ const tz = deps.tz ?? 'UTC';
109
+ const snapshots = buildSnapshotsFromState(deps.state, deps.quotas);
110
+
111
+ const oldSnap = snapshots.find((s) => s.isActive);
112
+ if (!oldSnap) {
113
+ return {
114
+ kind: 'no-old-active',
115
+ announcement: '<i>Auto-fallback skipped: no active account in broker state.</i>',
116
+ };
117
+ }
118
+
119
+ // Idempotency guard: don't swap a healthy active account, even if
120
+ // the trigger event said quota_exhausted. The event may be stale
121
+ // (event posted, window rolled over, gateway picked it up late).
122
+ const oldHealth = classifyHealth(oldSnap);
123
+ if (oldHealth === 'healthy') {
124
+ return {
125
+ kind: 'no-eligible-target',
126
+ oldLabel: oldSnap.label,
127
+ oldQuota: oldSnap.quota,
128
+ announcement:
129
+ `<i>Auto-fallback skipped: ${oldSnap.label} probed healthy ` +
130
+ `(${pctSummary(oldSnap.quota)}). Stale event?</i>`,
131
+ };
132
+ }
133
+
134
+ const target = pickFallbackTarget(snapshots);
135
+ if (!target) {
136
+ // All-blocked path: no eligible target. Still notify the user with
137
+ // earliest-reset info via the announcement formatter.
138
+ return {
139
+ kind: 'all-blocked',
140
+ oldLabel: oldSnap.label,
141
+ oldQuota: oldSnap.quota,
142
+ announcement: renderFallbackAnnouncement({
143
+ oldLabel: oldSnap.label,
144
+ oldQuota: oldSnap.quota,
145
+ newLabel: null,
146
+ newQuota: null,
147
+ triggerAgent: deps.triggerAgent,
148
+ tz,
149
+ now,
150
+ }),
151
+ };
152
+ }
153
+
154
+ // Execute the broker swap. Caller catches and surfaces the failure
155
+ // — we don't double-wrap.
156
+ await deps.setActive(target.label);
157
+
158
+ return {
159
+ kind: 'switched',
160
+ oldLabel: oldSnap.label,
161
+ newLabel: target.label,
162
+ oldQuota: oldSnap.quota!, // non-null: only `unknown` health gets here through
163
+ // the no-target branch, never the switched one
164
+ newQuota: target.quota!,
165
+ announcement: renderFallbackAnnouncement({
166
+ oldLabel: oldSnap.label,
167
+ oldQuota: oldSnap.quota,
168
+ newLabel: target.label,
169
+ newQuota: target.quota,
170
+ triggerAgent: deps.triggerAgent,
171
+ tz,
172
+ now,
173
+ }),
174
+ };
175
+ }
176
+
177
+ /**
178
+ * Pick the best non-active fallback target. Selection order:
179
+ * 1. Healthy accounts, sorted by lowest 5h utilization (most
180
+ * runway).
181
+ * 2. If no healthy alternative, throttling accounts sorted by
182
+ * lowest binding-window utilization (least worst).
183
+ * 3. Skip blocked + unknown entirely — never recommend a switch
184
+ * into a wall, never bet on creds we couldn't probe.
185
+ *
186
+ * Returns null when no eligible target exists.
187
+ */
188
+ export function pickFallbackTarget(
189
+ snapshots: AccountSnapshot[],
190
+ ): AccountSnapshot | null {
191
+ const candidates = snapshots
192
+ .filter((s) => !s.isActive && s.quota != null)
193
+ .map((s) => ({ snap: s, health: classifyHealth(s) }));
194
+
195
+ const healthy = candidates
196
+ .filter((c) => c.health === 'healthy')
197
+ .sort((a, b) => a.snap.quota!.fiveHourUtilizationPct - b.snap.quota!.fiveHourUtilizationPct);
198
+ if (healthy.length > 0) return healthy[0]!.snap;
199
+
200
+ const throttling = candidates
201
+ .filter((c) => c.health === 'throttling')
202
+ .sort((a, b) => maxWindow(a.snap.quota!) - maxWindow(b.snap.quota!));
203
+ if (throttling.length > 0) return throttling[0]!.snap;
204
+
205
+ return null;
206
+ }
207
+
208
+ function maxWindow(q: QuotaUtilization): number {
209
+ return Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
210
+ }
211
+
212
+ function pctSummary(q: QuotaUtilization | null): string {
213
+ if (!q) return 'no probe';
214
+ return `${Math.round(q.fiveHourUtilizationPct)}% / ${Math.round(q.sevenDayUtilizationPct)}%`;
215
+ }
@@ -1,252 +1,51 @@
1
1
  /**
2
- * Auto-fallback on quota exhaustion pure decision logic + side-effect
3
- * plan builder, separate from gateway.ts so it can be unit-tested without
4
- * spinning up the bot or the filesystem.
2
+ * Read-only persistence for the legacy per-agent auto-fallback lockout
3
+ * file. The lockout writer + decision logic + plan executor were retired
4
+ * in PR #1329 (fleet-wide auto-fallback path supersedes the per-agent
5
+ * one); this module's only remaining job is to support
6
+ * `isAutoFallbackCooldownActive` in gateway.ts, which reads the existing
7
+ * on-disk lockout to defer pending-restart drains while a recent
8
+ * rotation is still settling.
5
9
  *
6
- * Runtime flow (assembled by the caller):
7
- * 1. Poll quota via `fetchQuota` from quota-check.ts
8
- * 2. Pass the result into `evaluateFallbackTrigger` to decide if we
9
- * should act, together with an in-memory lockout record that
10
- * prevents rapid re-fire.
11
- * 3. If the trigger says "fallback", call `performAutoFallback`
12
- * which returns a plan + side-effect descriptor the caller
13
- * executes (mark exhausted, swap slot, restart agent, notify).
10
+ * Existing on-disk lockouts (written by pre-#1329 gateways) age out via
11
+ * `DEFAULT_FALLBACK_COOLDOWN_MS`; new lockouts are never written. Once
12
+ * every operator has run `switchroom update` post-#1329, the file goes
13
+ * cold and `isAutoFallbackCooldownActive` always returns false. This
14
+ * module + the drain-cap consumer can then be retired together in a
15
+ * follow-up.
14
16
  */
15
17
 
16
- import type { QuotaResult, QuotaUtilization } from './quota-check.js';
17
- import { renderOperatorEvent } from './operator-events.js';
18
-
19
- /** Threshold over which we treat the active slot as functionally out
20
- * of quota. 99.5% leaves a tiny head-room for clock skew between the
21
- * Anthropic rate-limit window and wall clock, matching the dashboard's
22
- * own rounding behaviour. Tune with care. */
23
- export const DEFAULT_TRIGGER_UTILIZATION_PCT = 99.5;
24
-
25
18
  /** Minimum time between two consecutive fallback attempts for the same
26
- * slot name, in milliseconds. Guards against a poll-storm firing the
27
- * restart-notify pipeline repeatedly before the quota meta file has
28
- * a chance to flush to disk. */
19
+ * slot guard against poll-storm fallback loops. Read-only since
20
+ * PR #1329; only consumed by `isAutoFallbackCooldownActive` to bound
21
+ * the drain-cap defer. */
29
22
  export const DEFAULT_FALLBACK_COOLDOWN_MS = 2 * 60_000;
30
23
 
31
24
  export type LockoutRecord = {
32
- /** Slot name most recently marked exhausted by this process. */
25
+ /** Slot name most recently marked exhausted by the legacy writer. */
33
26
  lastTransitionedFrom: string | null;
34
- /** Wall-clock ms timestamp of the last transition. */
27
+ /** Wall-clock ms timestamp of that transition. */
35
28
  lastTransitionAt: number;
36
29
  };
37
30
 
38
- export type FallbackDecision =
39
- | { action: 'noop'; reason: string }
40
- | {
41
- action: 'fallback';
42
- triggerReason: 'utilization-over-threshold' | '429-response' | 'explicit';
43
- resetAtMs: number | null;
44
- utilizationPct: number | null;
45
- };
46
-
47
- export type EvaluateArgs = {
48
- quota: QuotaResult;
49
- activeSlot: string | null;
50
- now: number;
51
- lockout: LockoutRecord;
52
- thresholdPct?: number;
53
- cooldownMs?: number;
54
- /** Set to true when the caller already saw a 429 response body;
55
- * this short-circuits past utilization-based decisions. */
56
- saw429?: boolean;
57
- };
58
-
59
- /** Pure decision function — takes a quota result + lockout state and
60
- * returns whether the caller should trigger auto-fallback.
61
- * No side effects. Throws only on programmer error. */
62
- export function evaluateFallbackTrigger(args: EvaluateArgs): FallbackDecision {
63
- const threshold = args.thresholdPct ?? DEFAULT_TRIGGER_UTILIZATION_PCT;
64
- const cooldown = args.cooldownMs ?? DEFAULT_FALLBACK_COOLDOWN_MS;
65
-
66
- if (!args.activeSlot) {
67
- return { action: 'noop', reason: 'no active slot (nothing to fall back from)' };
68
- }
69
-
70
- // Cooldown guard: if we already transitioned out of this slot
71
- // recently, don't flap. The caller can safely re-poll without
72
- // creating noise.
73
- if (
74
- args.lockout.lastTransitionedFrom === args.activeSlot &&
75
- args.now - args.lockout.lastTransitionAt < cooldown
76
- ) {
77
- return { action: 'noop', reason: 'recent transition, within cooldown' };
78
- }
79
-
80
- if (args.saw429) {
81
- return {
82
- action: 'fallback',
83
- triggerReason: '429-response',
84
- resetAtMs: extractNearestResetMs(args.quota),
85
- utilizationPct: extractHighestUtilization(args.quota),
86
- };
87
- }
88
-
89
- if (!args.quota.ok) {
90
- return { action: 'noop', reason: `quota check failed: ${args.quota.reason}` };
91
- }
92
-
93
- const highest = extractHighestUtilization(args.quota);
94
- if (highest == null) {
95
- return { action: 'noop', reason: 'no utilization headers' };
96
- }
97
-
98
- if (highest >= threshold) {
99
- return {
100
- action: 'fallback',
101
- triggerReason: 'utilization-over-threshold',
102
- resetAtMs: extractNearestResetMs(args.quota),
103
- utilizationPct: highest,
104
- };
105
- }
106
-
107
- return { action: 'noop', reason: `utilization ${highest.toFixed(1)}% below ${threshold}%` };
108
- }
109
-
110
- function extractHighestUtilization(q: QuotaResult): number | null {
111
- if (!q.ok) return null;
112
- const u: QuotaUtilization = q.data;
113
- const five = u.fiveHourUtilizationPct ?? null;
114
- const seven = u.sevenDayUtilizationPct ?? null;
115
- if (five == null && seven == null) return null;
116
- if (five == null) return seven;
117
- if (seven == null) return five;
118
- return Math.max(five, seven);
119
- }
120
-
121
- function extractNearestResetMs(q: QuotaResult): number | null {
122
- if (!q.ok) return null;
123
- const candidates: number[] = [];
124
- if (q.data.fiveHourResetAt) candidates.push(q.data.fiveHourResetAt.getTime());
125
- if (q.data.sevenDayResetAt) candidates.push(q.data.sevenDayResetAt.getTime());
126
- if (candidates.length === 0) return null;
127
- return Math.min(...candidates);
128
- }
129
-
130
- /** The full plan built by the orchestrator — mirrored by the
131
- * executor in gateway.ts. Pure data so tests can assert on it. */
132
- export type FallbackPlan =
133
- | {
134
- kind: 'executed';
135
- previousSlot: string;
136
- newSlot: string;
137
- resetAtMs: number | null;
138
- notificationHtml: string;
139
- agentName: string;
140
- /** Carried through from the FallbackDecision so the executor can
141
- * decide whether to do a hard or graceful restart. Reactive
142
- * (`429-response`) failover wants a hard restart — the request
143
- * the user just made already failed, so there's no in-flight
144
- * turn worth preserving. Preemptive (`utilization-over-threshold`
145
- * / `explicit`) failover wants a graceful one. See #420. */
146
- triggerReason: 'utilization-over-threshold' | '429-response' | 'explicit';
147
- }
148
- | {
149
- kind: 'exhausted-all';
150
- activeSlot: string;
151
- resetAtMs: number | null;
152
- notificationHtml: string;
153
- agentName: string;
154
- };
155
-
156
- export type PerformArgs = {
157
- agentDir: string;
158
- agentName: string;
159
- decision: Extract<FallbackDecision, { action: 'fallback' }>;
160
- deps: {
161
- /** Current active slot; null means caller has already detached. */
162
- currentActiveSlot: (agentDir: string) => string | null;
163
- markSlotQuotaExhausted: (agentDir: string, slot: string, resetAtMs?: number, reason?: string) => void;
164
- fallbackToNextSlot: (name: string, agentDir: string) => { newActive: string | null; previous: string | null };
165
- };
166
- };
167
-
168
- /** Run the side-effects for a fallback decision and return a plan
169
- * describing what happened. Caller is responsible for:
170
- * - Executing the agent restart CLI (via runSwitchroomCommand)
171
- * - Sending the notification via Telegram
172
- * - Updating the in-memory lockout record (see `nextLockout`)
173
- */
174
- export function performAutoFallback(args: PerformArgs): FallbackPlan {
175
- const active = args.deps.currentActiveSlot(args.agentDir);
176
- if (!active) {
177
- return {
178
- kind: 'exhausted-all',
179
- activeSlot: 'unknown',
180
- resetAtMs: args.decision.resetAtMs,
181
- notificationHtml: buildAllExhaustedMessage('unknown', args.agentName, args.decision.resetAtMs),
182
- agentName: args.agentName,
183
- };
184
- }
185
-
186
- args.deps.markSlotQuotaExhausted(
187
- args.agentDir,
188
- active,
189
- args.decision.resetAtMs ?? undefined,
190
- args.decision.triggerReason,
191
- );
192
-
193
- const { newActive, previous } = args.deps.fallbackToNextSlot(args.agentName, args.agentDir);
194
- const prev = previous ?? active;
195
-
196
- if (!newActive || newActive === prev) {
197
- return {
198
- kind: 'exhausted-all',
199
- activeSlot: prev,
200
- resetAtMs: args.decision.resetAtMs,
201
- notificationHtml: buildAllExhaustedMessage(prev, args.agentName, args.decision.resetAtMs),
202
- agentName: args.agentName,
203
- };
204
- }
205
-
206
- return {
207
- kind: 'executed',
208
- previousSlot: prev,
209
- newSlot: newActive,
210
- resetAtMs: args.decision.resetAtMs,
211
- notificationHtml: buildSwitchedMessage(prev, newActive, args.agentName, args.decision.resetAtMs),
212
- agentName: args.agentName,
213
- triggerReason: args.decision.triggerReason,
214
- };
215
- }
216
-
217
- /** Compute the next lockout record after a successful fallback. */
218
- export function nextLockout(previousSlot: string, now: number): LockoutRecord {
219
- return { lastTransitionedFrom: previousSlot, lastTransitionAt: now };
220
- }
221
-
222
- export function emptyLockout(): LockoutRecord {
223
- return { lastTransitionedFrom: null, lastTransitionAt: 0 };
224
- }
225
-
226
- /**
227
- * Disk-persistence helpers for the lockout record. The cooldown guard
228
- * lives entirely in process memory pre-fix, so a gateway restart inside
229
- * the cooldown window resets the timer to zero — and a quota-flap on
230
- * the now-recovering slot can re-trigger fallback the moment the
231
- * gateway comes back. See #417.
232
- *
233
- * Storage path: \`<agentDir>/.claude/auto-fallback-lockout.json\`. We
234
- * tolerate any read/parse error by returning emptyLockout (the same
235
- * outcome as a fresh process), since the cooldown is a noise filter,
236
- * not a security boundary.
237
- */
238
- const LOCKOUT_FILE = "auto-fallback-lockout.json";
239
-
240
31
  export interface LockoutPersistOps {
241
32
  readFileSync: (path: string, encoding: BufferEncoding) => string;
33
+ // writeFileSync + mkdirSync stay in the interface so the gateway's
34
+ // existing lockoutOps bundle still type-checks. They're never called
35
+ // by this module any more (the writer was retired).
242
36
  writeFileSync: (path: string, data: string, opts: { mode?: number }) => void;
243
37
  existsSync: (path: string) => boolean;
244
38
  mkdirSync: (path: string, opts: { recursive: true }) => void;
245
39
  joinPath: (...parts: string[]) => string;
246
- now?: () => number;
247
40
  }
248
41
 
249
- export function lockoutPath(agentDir: string, joinPath: LockoutPersistOps['joinPath']): string {
42
+ const LOCKOUT_FILE = "auto-fallback-lockout.json";
43
+
44
+ function emptyLockout(): LockoutRecord {
45
+ return { lastTransitionedFrom: null, lastTransitionAt: 0 };
46
+ }
47
+
48
+ function lockoutPath(agentDir: string, joinPath: LockoutPersistOps['joinPath']): string {
250
49
  return joinPath(agentDir, '.claude', LOCKOUT_FILE);
251
50
  }
252
51
 
@@ -274,75 +73,3 @@ export function loadLockout(agentDir: string, ops: LockoutPersistOps): LockoutRe
274
73
  }
275
74
  return emptyLockout();
276
75
  }
277
-
278
- export function saveLockout(
279
- agentDir: string,
280
- record: LockoutRecord,
281
- ops: LockoutPersistOps,
282
- ): void {
283
- const path = lockoutPath(agentDir, ops.joinPath);
284
- // Best-effort: ensure the .claude directory exists, then write. Any
285
- // failure is swallowed by the caller's try/catch — losing the lockout
286
- // file just degrades to in-memory-only behaviour, not a hard failure.
287
- ops.mkdirSync(ops.joinPath(agentDir, '.claude'), { recursive: true });
288
- ops.writeFileSync(
289
- path,
290
- JSON.stringify(record, null, 2) + '\n',
291
- { mode: 0o600 },
292
- );
293
- }
294
-
295
- /**
296
- * Build the notification HTML for a successful slot switch.
297
- * Delegates to renderOperatorEvent for quota-exhausted; appends
298
- * slot-transition detail as structured context.
299
- */
300
- function buildSwitchedMessage(
301
- prev: string,
302
- next: string,
303
- agent: string,
304
- resetAtMs: number | null,
305
- ): string {
306
- const reset = resetAtMs ? formatResetAt(resetAtMs) : 'unknown';
307
- const detail = [
308
- `Switched from slot ${prev} to ${next}. Restarting agent.`,
309
- `Reset at: ${reset}.`,
310
- ].join(' ');
311
- return renderOperatorEvent({
312
- kind: 'quota-exhausted',
313
- agent,
314
- detail,
315
- suggestedActions: [],
316
- firstSeenAt: new Date(),
317
- }).text;
318
- }
319
-
320
- /**
321
- * Build the notification HTML when all slots are exhausted.
322
- * Delegates to renderOperatorEvent for quota-exhausted; appends
323
- * all-exhausted detail.
324
- */
325
- function buildAllExhaustedMessage(
326
- active: string,
327
- agent: string,
328
- resetAtMs: number | null,
329
- ): string {
330
- const reset = resetAtMs ? formatResetAt(resetAtMs) : 'unknown';
331
- const detail = [
332
- `All account slots exhausted. Active slot: ${active}.`,
333
- `Earliest reset at: ${reset}.`,
334
- `Run /auth add ${agent} to attach another subscription.`,
335
- ].join(' ');
336
- return renderOperatorEvent({
337
- kind: 'quota-exhausted',
338
- agent,
339
- detail,
340
- suggestedActions: [],
341
- firstSeenAt: new Date(),
342
- }).text;
343
- }
344
-
345
- function formatResetAt(ms: number): string {
346
- // ISO with seconds trimmed — Telegram doesn't need millisecond precision.
347
- return new Date(ms).toISOString().replace(/\.\d{3}Z$/, 'Z');
348
- }