switchroom 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -373,6 +373,14 @@ import {
373
373
  loadCreditState,
374
374
  saveCreditState,
375
375
  } from '../credits-watch.js'
376
+ import {
377
+ evaluateQuotaWatchAccount,
378
+ loadQuotaWatchState,
379
+ saveQuotaWatchState,
380
+ patchQuotaWatchState,
381
+ emptyAccountState,
382
+ } from '../quota-watch.js'
383
+ import { buildSnapshotsFromState, buildSnapshotsFromCachedState } from '../auth-snapshot-format.js'
376
384
  import {
377
385
  writeTurnActiveMarker,
378
386
  touchTurnActiveMarker,
@@ -3898,11 +3906,12 @@ const ipcServer: IpcServer = createIpcServer({
3898
3906
  const updateOutcomeLine = (() => {
3899
3907
  try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
3900
3908
  })()
3909
+ const resolvedAgentDirForCard = agentDir ?? (process.env.TELEGRAM_STATE_DIR ? require('path').dirname(process.env.TELEGRAM_STATE_DIR) : '/tmp')
3901
3910
  startBootCard(chatId, threadId, botApiForCard, {
3902
3911
  agentName: agentDisplayName,
3903
3912
  agentSlug,
3904
3913
  version: formatBootVersion(),
3905
- agentDir: agentDir ?? (process.env.TELEGRAM_STATE_DIR ? require('path').dirname(process.env.TELEGRAM_STATE_DIR) : '/tmp'),
3914
+ agentDir: resolvedAgentDirForCard,
3906
3915
  gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
3907
3916
  restartReason: reason,
3908
3917
  restartAgeMs: markerAgeMs,
@@ -3911,6 +3920,7 @@ const ipcServer: IpcServer = createIpcServer({
3911
3920
  probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
3912
3921
  tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
3913
3922
  dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
3923
+ configSnapshotPath: join(resolvedAgentDirForCard, '.config-snapshot.json'),
3914
3924
  ...(updateOutcomeLine ? { updateOutcomeLine } : {}),
3915
3925
  }, ackMsgId).then(handle => {
3916
3926
  activeBootCard = handle
@@ -11862,6 +11872,183 @@ async function runCreditWatch(): Promise<void> {
11862
11872
  }
11863
11873
  }
11864
11874
 
11875
+ /**
11876
+ * Quota threshold-tier push loop body (#E4). Reads the broker's in-memory
11877
+ * cached utilization (populated by previous probeQuota calls from /auth,
11878
+ * auto-fallback, and boot cards) via `listState.accounts[].last_quota`.
11879
+ * Classifies each account via classifyHealth, and fires one Telegram message
11880
+ * per healthy ↔ throttling transition (edge-triggered, not level-triggered).
11881
+ * Does NOT fire on healthy → blocked or blocked → healthy — credits-watch.ts
11882
+ * owns those.
11883
+ *
11884
+ * Probe discipline:
11885
+ * - Steady-state polls: ONE broker `listState` IPC call only (no network).
11886
+ * - Accounts with no cached snapshot (null last_quota): skipped silently
11887
+ * (classifyHealth returns 'unknown'). Cache populates from /auth, boot
11888
+ * card, and auto-fallback probes in normal use.
11889
+ * - State transition detected: ONE targeted `probeQuota` call for ONLY the
11890
+ * crossing account, immediately before sending the notification, to get
11891
+ * fresh numbers for the message body. All other steady-state accounts
11892
+ * are not probed.
11893
+ *
11894
+ * This replaces the previous implementation that called probeQuota for ALL
11895
+ * accounts unconditionally on every 15-minute poll (~768 live Anthropic
11896
+ * network calls/day for an 8-account fleet). The corrected version makes
11897
+ * 0 network calls on steady-state polls and at most 1 call per crossing
11898
+ * event (which is also when we need to notify the user anyway).
11899
+ *
11900
+ * State persists across restarts via `<stateDir>/quota-watch.json`.
11901
+ * Mirrors runCreditWatch's structure and notification routing.
11902
+ */
11903
+ async function runQuotaWatch(): Promise<void> {
11904
+ const agentName = getMyAgentName()
11905
+ const stateDir = STATE_DIR
11906
+
11907
+ // Read broker state. The listState response now includes last_quota
11908
+ // per account — the broker's in-memory cache from previous probeQuota
11909
+ // calls. This is a local IPC call: no network, no Anthropic contact.
11910
+ const brokerClient = await getAuthBrokerClient(agentName)
11911
+ if (!brokerClient) {
11912
+ process.stderr.write('telegram gateway: quota-watch: broker client unavailable — skipping\n')
11913
+ return
11914
+ }
11915
+
11916
+ let listStateData: Awaited<ReturnType<typeof brokerClient.listState>>
11917
+ try {
11918
+ listStateData = await brokerClient.listState()
11919
+ } catch (err) {
11920
+ process.stderr.write(`telegram gateway: quota-watch: listState failed: ${err}\n`)
11921
+ return
11922
+ }
11923
+
11924
+ if (!listStateData.accounts || listStateData.accounts.length === 0) {
11925
+ return // No accounts — nothing to watch.
11926
+ }
11927
+
11928
+ // Build AccountSnapshot[] from cached broker state only — no live probe.
11929
+ // Accounts with null last_quota produce quota=null snapshots; classifyHealth
11930
+ // returns 'unknown'; evaluateQuotaWatchAccount skips — no false alarms.
11931
+ const snapshots = buildSnapshotsFromCachedState(listStateData)
11932
+
11933
+ // Load persisted per-account state.
11934
+ let watchState = loadQuotaWatchState(stateDir)
11935
+ const now = Date.now()
11936
+ const access = loadAccess()
11937
+
11938
+ // First pass: evaluate all accounts against cached state. Collect
11939
+ // labels that need a live probe (i.e. accounts with a detected transition
11940
+ // that we're about to notify about). We probe those to get fresh
11941
+ // utilization numbers for the notification body — not for classification.
11942
+ const pendingTransitions: Array<{
11943
+ accountLabel: string
11944
+ snapIndex: number
11945
+ decision: ReturnType<typeof evaluateQuotaWatchAccount>
11946
+ }> = []
11947
+
11948
+ const labelToSnapIndex = new Map<string, number>(
11949
+ snapshots.map((s, i) => [s.label, i]),
11950
+ )
11951
+
11952
+ for (const snap of snapshots) {
11953
+ const prev = watchState[snap.label] ?? emptyAccountState()
11954
+ const decision = evaluateQuotaWatchAccount({ agentName, snap, prev, now })
11955
+ if (decision.kind !== 'skip') {
11956
+ pendingTransitions.push({
11957
+ accountLabel: snap.label,
11958
+ snapIndex: labelToSnapIndex.get(snap.label) ?? -1,
11959
+ decision,
11960
+ })
11961
+ }
11962
+ }
11963
+
11964
+ if (pendingTransitions.length === 0) {
11965
+ return // Steady-state: no notifications, no probes, no state write.
11966
+ }
11967
+
11968
+ // Transition detected: probe ONLY the crossing accounts to get fresh
11969
+ // numbers for the notification message bodies. One batched RPC for all
11970
+ // crossing accounts (typically 1, rarely 2+).
11971
+ const crossingLabels = pendingTransitions.map(t => t.accountLabel)
11972
+ let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]['result']>()
11973
+ try {
11974
+ const probeData = await brokerClient.probeQuota(crossingLabels, 8000)
11975
+ for (const entry of probeData.results) {
11976
+ freshProbeMap.set(entry.label, entry.result)
11977
+ }
11978
+ } catch (err) {
11979
+ // Probe failed — still send notifications using cached data.
11980
+ // Don't abort: the user should know about the threshold crossing
11981
+ // even if the message body shows slightly stale numbers.
11982
+ process.stderr.write(`telegram gateway: quota-watch: probe for crossing accounts failed: ${err}\n`)
11983
+ }
11984
+
11985
+ // Build final notifications, enriching the snapshot with fresh probe
11986
+ // data where available.
11987
+ let mutatedState = watchState
11988
+ const notifications: Array<{ message: string; accountLabel: string }> = []
11989
+
11990
+ for (const { accountLabel, snapIndex, decision } of pendingTransitions) {
11991
+ // Re-evaluate with fresh probe data to get an accurate message body.
11992
+ // If the fresh probe succeeded, replace the snap's quota with live data.
11993
+ const freshResult = freshProbeMap.get(accountLabel)
11994
+ let enrichedDecision = decision
11995
+ // pendingTransitions only ever holds notify decisions (pushed under
11996
+ // `decision.kind !== 'skip'`). Narrow explicitly so `decision.transition`
11997
+ // type-checks below; this continue never fires at runtime.
11998
+ if (decision.kind !== 'notify') continue
11999
+ if (freshResult && freshResult.ok && snapIndex >= 0) {
12000
+ const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshResult.data }
12001
+ const prev = watchState[accountLabel] ?? emptyAccountState()
12002
+ const re = evaluateQuotaWatchAccount({ agentName, snap: enrichedSnap, prev, now })
12003
+ // If the fresh probe still shows the same transition, use the
12004
+ // enriched message. If it no longer shows a transition (e.g. the
12005
+ // account recovered in the 100ms between listState and probe),
12006
+ // fall through to skip this notification.
12007
+ if (re.kind === 'notify' && re.transition === decision.transition) {
12008
+ enrichedDecision = re
12009
+ } else if (re.kind === 'skip') {
12010
+ // State normalised by the time of the probe — don't notify.
12011
+ continue
12012
+ }
12013
+ }
12014
+
12015
+ if (enrichedDecision.kind !== 'notify') continue
12016
+ notifications.push({ message: enrichedDecision.message, accountLabel })
12017
+ mutatedState = patchQuotaWatchState(mutatedState, accountLabel, enrichedDecision.newAccountState)
12018
+ }
12019
+
12020
+ if (notifications.length === 0) {
12021
+ return // All transitions resolved by the time of the live probe.
12022
+ }
12023
+
12024
+ // Send all notifications (one message per crossing account).
12025
+ for (const { message, accountLabel } of notifications) {
12026
+ for (const chat_id of access.allowFrom) {
12027
+ // Quota-watch notify — best-effort. Wrap via swallowingApiCall so
12028
+ // flood-wait / deleted-chat / not-found surface as a stderr log
12029
+ // rather than a thrown exception that aborts the loop and leaves
12030
+ // half the allowFrom chats unnotified. Matches the wrapping
12031
+ // contract enforced by scripts/check-bot-api-wrapping.sh (#1075).
12032
+ await swallowingApiCall(
12033
+ () =>
12034
+ bot.api.sendMessage(chat_id, message, {
12035
+ parse_mode: 'HTML',
12036
+ link_preview_options: { is_disabled: true },
12037
+ }),
12038
+ { chat_id, verb: 'quota-watch.notify' },
12039
+ )
12040
+ }
12041
+ process.stderr.write(`telegram gateway: quota-watch: notified transition for account=${accountLabel}\n`)
12042
+ }
12043
+
12044
+ // Persist updated state regardless of whether sends succeeded.
12045
+ try {
12046
+ saveQuotaWatchState(stateDir, mutatedState)
12047
+ } catch (err) {
12048
+ process.stderr.write(`telegram gateway: quota-watch state persist failed: ${err}\n`)
12049
+ }
12050
+ }
12051
+
11865
12052
  bot.command("auth", async ctx => {
11866
12053
  // sec WS7-F2b (#1394): `/auth` drives the auth-broker credential
11867
12054
  // lifecycle (`/auth add` mints/attaches an Anthropic account token,
@@ -12146,8 +12333,7 @@ function resolveAgentDirForName(agent: string): string | null {
12146
12333
  * restart — systemctl --user restart switchroom-<agent>
12147
12334
  * reauth — delegate to runSwitchroomAuthCommand (same flow as /auth reauth)
12148
12335
  * logs — post last 30 lines of journalctl for the agent
12149
- * swap-slot, add-slotPhase 4c will wire these; for now toast with the
12150
- * equivalent CLI command for the user to run manually.
12336
+ * slot management buttons removed (E5); use /auth use or /auth add instead.
12151
12337
  */
12152
12338
  /**
12153
12339
  * Issue #44: handle taps on the deferred-secret card's inline buttons.
@@ -13776,15 +13962,6 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
13776
13962
  }
13777
13963
  return
13778
13964
  }
13779
- case 'swap-slot':
13780
- case 'add-slot': {
13781
- await ctx.answerCallbackQuery({ text: 'Phase 4c will wire this' }).catch(() => {})
13782
- const cmd = action === 'swap-slot' ? `auth use ${agent} <slot-name>` : `auth add ${agent}`
13783
- await ctx.reply(`Phase 4c will wire ${action} buttons. Until then, run in terminal: <code>switchroom ${cmd}</code>`, {
13784
- parse_mode: 'HTML',
13785
- })
13786
- return
13787
- }
13788
13965
  default: {
13789
13966
  await ctx.answerCallbackQuery({ text: `Unknown action: ${action}` }).catch(() => {})
13790
13967
  return
@@ -14679,7 +14856,7 @@ bot.on('callback_query:data', async ctx => {
14679
14856
 
14680
14857
  // op:<action>:<encoded-agent> callbacks from operator-events.ts
14681
14858
  // renderOperatorEvent(). Agent name is URL-encoded at emit (issue #24).
14682
- // Actions: dismiss, restart, reauth, swap-slot, add-slot, logs.
14859
+ // Actions: dismiss, restart, reauth, logs.
14683
14860
  if (data.startsWith('op:')) {
14684
14861
  await handleOperatorEventCallback(ctx, data)
14685
14862
  return
@@ -16641,11 +16818,12 @@ void (async () => {
16641
16818
  const updateOutcomeLine = (() => {
16642
16819
  try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
16643
16820
  })()
16821
+ const resolvedAgentDirForBootCard = agentDir ?? join(homedir(), '.switchroom', 'agents', agentSlug)
16644
16822
  const handle = await startBootCard(chatId, threadId, botApiForCard, {
16645
16823
  agentName: agentDisplayName,
16646
16824
  agentSlug,
16647
16825
  version: formatBootVersion(),
16648
- agentDir: agentDir ?? join(homedir(), '.switchroom', 'agents', agentSlug),
16826
+ agentDir: resolvedAgentDirForBootCard,
16649
16827
  gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
16650
16828
  restartReason: reason,
16651
16829
  restartAgeMs: markerAgeMs,
@@ -16654,6 +16832,7 @@ void (async () => {
16654
16832
  probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
16655
16833
  tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
16656
16834
  dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
16835
+ configSnapshotPath: join(resolvedAgentDirForBootCard, '.config-snapshot.json'),
16657
16836
  ...(updateOutcomeLine ? { updateOutcomeLine } : {}),
16658
16837
  }, ackMsgId)
16659
16838
  activeBootCard = handle
@@ -16705,6 +16884,34 @@ void (async () => {
16705
16884
  }, CREDIT_WATCH_POLL_MS).unref()
16706
16885
  }
16707
16886
 
16887
+ // Proactive quota threshold-tier push (#E4). Reads broker cached
16888
+ // quota for all accounts in the pool, classifies health via
16889
+ // classifyHealth, and fires one Telegram message per
16890
+ // healthy ↔ throttling transition (edge-triggered). Does NOT
16891
+ // cover healthy → blocked or blocked → healthy — credits-watch
16892
+ // handles those fatal-billing transitions above.
16893
+ //
16894
+ // Cadence: 15 min by default (same as credit-watch). Each poll
16895
+ // calls broker listState (local IPC, cheap) + probeQuota only
16896
+ // when a state-change is detected (to get fresh numbers for
16897
+ // the notification body). SWITCHROOM_QUOTA_WATCH_POLL_MS=0 disables.
16898
+ const QUOTA_WATCH_POLL_MS = Number(process.env.SWITCHROOM_QUOTA_WATCH_POLL_MS ?? 15 * 60_000)
16899
+ if (QUOTA_WATCH_POLL_MS > 0) {
16900
+ // Delay the initial run by 30 s to let the broker connection
16901
+ // settle after boot (avoids a probe race with the boot-card
16902
+ // quota probe that fires in the first few seconds).
16903
+ setTimeout(() => {
16904
+ void runQuotaWatch().catch((err) => {
16905
+ process.stderr.write(`telegram gateway: quota-watch initial run failed: ${err}\n`)
16906
+ })
16907
+ }, 30_000)
16908
+ setInterval(() => {
16909
+ void runQuotaWatch().catch((err) => {
16910
+ process.stderr.write(`telegram gateway: quota-watch scheduled run failed: ${err}\n`)
16911
+ })
16912
+ }, QUOTA_WATCH_POLL_MS).unref()
16913
+ }
16914
+
16708
16915
  // Restart-watchdog: poll systemd's NRestarts for the agent unit.
16709
16916
  // When the count ticks up without a corresponding restart-pending
16710
16917
  // marker (= user-initiated /restart), emit an operator event.
@@ -257,16 +257,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
257
257
  text: [
258
258
  `💳 <b>Credit balance too low</b> for <b>${agent}</b>.`,
259
259
  detail ? `<i>${detail}</i>` : '',
260
- `Swap to another account slot or add a new one.`,
260
+ `Use <code>/auth use &lt;label&gt;</code> to switch account slot or <code>/auth add</code> to add one.`,
261
261
  ]
262
262
  .filter(Boolean)
263
263
  .join('\n'),
264
264
  keyboard: {
265
265
  inline_keyboard: [
266
- [
267
- { text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
268
- { text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
269
- ],
270
266
  [{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
271
267
  ],
272
268
  },
@@ -280,16 +276,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
280
276
  text: [
281
277
  `⚠️ <b>Quota exhausted</b> for <b>${agent}</b>.`,
282
278
  detail ? `<i>${detail}</i>` : '',
283
- `All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available.`,
279
+ `All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available. Use <code>/auth use &lt;label&gt;</code> to switch manually.`,
284
280
  ]
285
281
  .filter(Boolean)
286
282
  .join('\n'),
287
283
  keyboard: {
288
284
  inline_keyboard: [
289
- [
290
- { text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
291
- { text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
292
- ],
293
285
  [{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
294
286
  ],
295
287
  },
@@ -0,0 +1,276 @@
1
+ /**
2
+ * Proactive quota threshold-tier push (#E4).
3
+ *
4
+ * Background: JTBD `track-plan-quota-live` anti-pattern: "Quota visible
5
+ * only in a separate dashboard or a command. If the user has to go
6
+ * looking, they won't, and they'll hit the wall." The existing stack
7
+ * covers the wall (auto-fallback at 99.5%, credits-watch on fatal billing
8
+ * transitions) but fires zero proactive signal at 80% — the point where
9
+ * the user can still act by switching accounts. This module closes that gap.
10
+ *
11
+ * It is a pure decision layer. It reads the broker's cached quota state
12
+ * for all accounts, classifies health via the same `classifyHealth`
13
+ * three-state machine used by the /auth dashboard, compares against a
14
+ * persisted last-notified state, and tells the gateway whether to emit
15
+ * a Telegram message + what to say. The gateway wires the actual
16
+ * `bot.api.sendMessage` call (via `swallowingApiCall`) — same as
17
+ * `credits-watch.ts`.
18
+ *
19
+ * Edge-trigger discipline: only fires on health *transitions*
20
+ * (healthy → throttling and throttling → healthy). Does NOT fire on
21
+ * healthy → blocked or blocked → healthy — `credits-watch.ts` already
22
+ * covers those via the fatal-billing path. Steady-state throttling
23
+ * never re-notifies.
24
+ *
25
+ * Scope: per-account across the whole pool, not just the active one.
26
+ * The user's natural recovery action is switching to a healthy account,
27
+ * so they need visibility into non-active accounts too.
28
+ *
29
+ * Source data: broker `listState` + `probeQuota`. `listState` is a local
30
+ * IPC call (cheap). `probeQuota` is only called on state-change (when
31
+ * we're going to send a message anyway) to get fresh numbers for the
32
+ * notification body. On no-change polls, only `listState` is called.
33
+ */
34
+
35
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
36
+ import { join } from "path";
37
+ import type { AccountSnapshot } from "./auth-snapshot-format.js";
38
+ import {
39
+ classifyHealth,
40
+ type AccountHealth,
41
+ THROTTLING_THRESHOLD_PCT,
42
+ bindingWindow,
43
+ formatRelative,
44
+ fmtPct,
45
+ } from "./auth-snapshot-format.js";
46
+ import type { QuotaUtilization } from "./quota-check.js";
47
+
48
+ const STATE_FILE = "quota-watch.json";
49
+
50
+ // ─── State types ──────────────────────────────────────────────────────────────
51
+
52
+ /**
53
+ * Per-account last-notified health. We only care about the
54
+ * healthy ↔ throttling boundary — blocked is `credits-watch`'s domain.
55
+ * `null` means "never notified" (treat as healthy for transition logic).
56
+ */
57
+ export type QuotaWatchHealth = "healthy" | "throttling" | null;
58
+
59
+ export interface QuotaWatchAccountState {
60
+ /** Last health we sent a notification for. null = never notified. */
61
+ lastNotifiedHealth: QuotaWatchHealth;
62
+ /** Wall-clock ms of the last notification. */
63
+ lastNotifiedAt: number;
64
+ }
65
+
66
+ export type QuotaWatchState = Record<string, QuotaWatchAccountState>;
67
+
68
+ export function emptyQuotaWatchState(): QuotaWatchState {
69
+ return {};
70
+ }
71
+
72
+ export function emptyAccountState(): QuotaWatchAccountState {
73
+ return { lastNotifiedHealth: null, lastNotifiedAt: 0 };
74
+ }
75
+
76
+ // ─── Decision logic ───────────────────────────────────────────────────────────
77
+
78
+ export type QuotaWatchTransition =
79
+ | "entered-throttling"
80
+ | "recovered-to-healthy";
81
+
82
+ export type QuotaWatchDecision =
83
+ | {
84
+ kind: "notify";
85
+ accountLabel: string;
86
+ message: string;
87
+ newAccountState: QuotaWatchAccountState;
88
+ transition: QuotaWatchTransition;
89
+ }
90
+ | { kind: "skip"; accountLabel: string; reason: string };
91
+
92
+ /**
93
+ * Evaluate one account's quota state against its last-notified health.
94
+ *
95
+ * Transition table:
96
+ * healthy → healthy skip (steady-state)
97
+ * healthy → throttling notify (entered-throttling)
98
+ * healthy → blocked skip (credits-watch covers this)
99
+ * throttling → healthy notify (recovered-to-healthy)
100
+ * throttling → throttling skip (already notified)
101
+ * throttling → blocked skip (credits-watch covers blocked)
102
+ * blocked → * skip (credits-watch domain)
103
+ * unknown → * skip (no quota data — don't spam)
104
+ * * → unknown skip (probe failed — transient, don't alarm)
105
+ */
106
+ export function evaluateQuotaWatchAccount(args: {
107
+ agentName: string;
108
+ snap: AccountSnapshot;
109
+ prev: QuotaWatchAccountState;
110
+ now: number;
111
+ }): QuotaWatchDecision {
112
+ const { agentName, snap, prev, now } = args;
113
+ const label = snap.label;
114
+ const currentHealth = classifyHealth(snap);
115
+
116
+ // Unknown (probe failed) or blocked — skip entirely.
117
+ if (currentHealth === "unknown" || currentHealth === "blocked") {
118
+ return { kind: "skip", accountLabel: label, reason: `${currentHealth}-not-our-domain` };
119
+ }
120
+
121
+ // Normalise prev: null means healthy (never alerted = was healthy).
122
+ const prevHealth: "healthy" | "throttling" = prev.lastNotifiedHealth ?? "healthy";
123
+
124
+ // Steady-state — no change.
125
+ if (currentHealth === prevHealth) {
126
+ return { kind: "skip", accountLabel: label, reason: "steady-state" };
127
+ }
128
+
129
+ // healthy → throttling: proactive threshold push.
130
+ if (currentHealth === "throttling" && prevHealth === "healthy") {
131
+ const newState: QuotaWatchAccountState = {
132
+ lastNotifiedHealth: "throttling",
133
+ lastNotifiedAt: now,
134
+ };
135
+ return {
136
+ kind: "notify",
137
+ accountLabel: label,
138
+ message: buildThrottlingMessage(agentName, snap),
139
+ newAccountState: newState,
140
+ transition: "entered-throttling",
141
+ };
142
+ }
143
+
144
+ // throttling → healthy: recovery.
145
+ if (currentHealth === "healthy" && prevHealth === "throttling") {
146
+ const newState: QuotaWatchAccountState = {
147
+ lastNotifiedHealth: "healthy",
148
+ lastNotifiedAt: now,
149
+ };
150
+ return {
151
+ kind: "notify",
152
+ accountLabel: label,
153
+ message: buildRecoveryMessage(agentName, snap),
154
+ newAccountState: newState,
155
+ transition: "recovered-to-healthy",
156
+ };
157
+ }
158
+
159
+ // Any other combination (e.g. blocked → healthy, etc.) — skip.
160
+ return { kind: "skip", accountLabel: label, reason: "no-matching-transition" };
161
+ }
162
+
163
+ // ─── Message builders ─────────────────────────────────────────────────────────
164
+
165
+ function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): string {
166
+ const q = snap.quota!; // classifyHealth returned throttling, so quota is non-null
167
+ const fiveStr = fmtPct(q.fiveHourUtilizationPct);
168
+ const sevenStr = fmtPct(q.sevenDayUtilizationPct);
169
+ const max = Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
170
+ const win = max === q.fiveHourUtilizationPct ? "5h" : "7d";
171
+ const winLabel = win === "5h" ? "5-hour" : "7-day";
172
+ const resetAt = win === "5h" ? q.fiveHourResetAt : q.sevenDayResetAt;
173
+ const resetStr = resetAt
174
+ ? ` · refills in ${formatRelative(resetAt, new Date())}`
175
+ : "";
176
+
177
+ const activeNote = snap.isActive
178
+ ? ""
179
+ : `\nThis is a non-active account. Consider <code>/auth use ${escapeHtml(snap.label)}</code> to switch, or keep it as a fallback reserve.`;
180
+
181
+ const altNote = snap.isActive
182
+ ? `\nConsider <code>/auth use &lt;other-account&gt;</code> if you have a healthier account, or wait for the ${winLabel} window to refill${resetStr}.`
183
+ : "";
184
+
185
+ return [
186
+ `🟡 <b>Quota approaching limit</b> — <code>${escapeHtml(snap.label)}</code>`,
187
+ ``,
188
+ `${fiveStr} of 5h · ${sevenStr} of 7d`,
189
+ `Binding window: ${winLabel}${resetStr}`,
190
+ `${activeNote}${altNote}`,
191
+ ``,
192
+ `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Source: broker quota cache.</i>`,
193
+ `<i>Run /auth for full fleet status or /usage for the active account.</i>`,
194
+ ]
195
+ .join("\n")
196
+ .replace(/\n\n\n+/g, "\n\n")
197
+ .trim();
198
+ }
199
+
200
+ function buildRecoveryMessage(agentName: string, snap: AccountSnapshot): string {
201
+ const q = snap.quota;
202
+ const utilLine = q
203
+ ? `Current: ${fmtPct(q.fiveHourUtilizationPct)} of 5h · ${fmtPct(q.sevenDayUtilizationPct)} of 7d`
204
+ : "Current quota data unavailable.";
205
+
206
+ return [
207
+ `🟢 <b>Quota back in healthy range</b> — <code>${escapeHtml(snap.label)}</code>`,
208
+ ``,
209
+ utilLine,
210
+ ``,
211
+ `<i>Below ${THROTTLING_THRESHOLD_PCT}% on both windows.</i>`,
212
+ ].join("\n");
213
+ }
214
+
215
+ function escapeHtml(s: string): string {
216
+ return s
217
+ .replace(/&/g, "&amp;")
218
+ .replace(/</g, "&lt;")
219
+ .replace(/>/g, "&gt;")
220
+ .replace(/"/g, "&quot;")
221
+ .replace(/'/g, "&#39;");
222
+ }
223
+
224
+ // ─── State persistence ────────────────────────────────────────────────────────
225
+
226
+ export function loadQuotaWatchState(stateDir: string): QuotaWatchState {
227
+ const path = join(stateDir, STATE_FILE);
228
+ if (!existsSync(path)) return emptyQuotaWatchState();
229
+ try {
230
+ const raw = readFileSync(path, "utf-8");
231
+ const parsed = JSON.parse(raw);
232
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
233
+ return emptyQuotaWatchState();
234
+ }
235
+ // Validate each entry — drop malformed ones rather than failing the whole file.
236
+ const result: QuotaWatchState = {};
237
+ for (const [key, val] of Object.entries(parsed)) {
238
+ if (
239
+ val &&
240
+ typeof val === "object" &&
241
+ !Array.isArray(val) &&
242
+ (
243
+ (val as Record<string, unknown>).lastNotifiedHealth === null ||
244
+ (val as Record<string, unknown>).lastNotifiedHealth === "healthy" ||
245
+ (val as Record<string, unknown>).lastNotifiedHealth === "throttling"
246
+ ) &&
247
+ typeof (val as Record<string, unknown>).lastNotifiedAt === "number" &&
248
+ Number.isFinite((val as Record<string, unknown>).lastNotifiedAt as number)
249
+ ) {
250
+ result[key] = val as QuotaWatchAccountState;
251
+ }
252
+ }
253
+ return result;
254
+ } catch {
255
+ return emptyQuotaWatchState();
256
+ }
257
+ }
258
+
259
+ export function saveQuotaWatchState(stateDir: string, state: QuotaWatchState): void {
260
+ mkdirSync(stateDir, { recursive: true });
261
+ const path = join(stateDir, STATE_FILE);
262
+ writeFileSync(path, JSON.stringify(state, null, 2) + "\n", { mode: 0o600 });
263
+ }
264
+
265
+ /**
266
+ * Merge one account's updated state into a full `QuotaWatchState` map.
267
+ * Callers use this after each `evaluateQuotaWatchAccount` that returns
268
+ * `kind: "notify"` to produce the new map to persist.
269
+ */
270
+ export function patchQuotaWatchState(
271
+ current: QuotaWatchState,
272
+ accountLabel: string,
273
+ accountState: QuotaWatchAccountState,
274
+ ): QuotaWatchState {
275
+ return { ...current, [accountLabel]: accountState };
276
+ }