switchroom 0.14.0 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -58,6 +58,7 @@ import {
58
58
  makeEmptyActivityState,
59
59
  registerAndRender,
60
60
  describeToolUse,
61
+ appendActivityLine,
61
62
  type ActivityState,
62
63
  } from '../tool-activity-summary.js'
63
64
  import { toolLabel } from '../tool-labels.js'
@@ -373,6 +374,14 @@ import {
373
374
  loadCreditState,
374
375
  saveCreditState,
375
376
  } from '../credits-watch.js'
377
+ import {
378
+ evaluateQuotaWatchAccount,
379
+ loadQuotaWatchState,
380
+ saveQuotaWatchState,
381
+ patchQuotaWatchState,
382
+ emptyAccountState,
383
+ } from '../quota-watch.js'
384
+ import { buildSnapshotsFromState, buildSnapshotsFromCachedState } from '../auth-snapshot-format.js'
376
385
  import {
377
386
  writeTurnActiveMarker,
378
387
  touchTurnActiveMarker,
@@ -1330,6 +1339,11 @@ type CurrentTurn = {
1330
1339
  activityInFlight: Promise<void> | null
1331
1340
  activityPendingRender: string | null
1332
1341
  activityLastSentRender: string | null
1342
+ // Draft-mirror Phase 2: accumulating friendly-action feed for this turn
1343
+ // (DRAFT_MIRROR only). Each non-surface tool_use appends a line via
1344
+ // `appendActivityLine`; the feed renders as a capped chronological list
1345
+ // in the ephemeral draft and clears on reply. Reset per turn.
1346
+ mirrorLines: string[]
1333
1347
  // Issue #195 — answer-lane streaming. Lazily created on the first text
1334
1348
  // event of a turn (once enough text has accumulated, the stream itself
1335
1349
  // gates on minInitialChars). Materialized and cleared at turn_end.
@@ -3898,11 +3912,12 @@ const ipcServer: IpcServer = createIpcServer({
3898
3912
  const updateOutcomeLine = (() => {
3899
3913
  try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
3900
3914
  })()
3915
+ const resolvedAgentDirForCard = agentDir ?? (process.env.TELEGRAM_STATE_DIR ? require('path').dirname(process.env.TELEGRAM_STATE_DIR) : '/tmp')
3901
3916
  startBootCard(chatId, threadId, botApiForCard, {
3902
3917
  agentName: agentDisplayName,
3903
3918
  agentSlug,
3904
3919
  version: formatBootVersion(),
3905
- agentDir: agentDir ?? (process.env.TELEGRAM_STATE_DIR ? require('path').dirname(process.env.TELEGRAM_STATE_DIR) : '/tmp'),
3920
+ agentDir: resolvedAgentDirForCard,
3906
3921
  gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
3907
3922
  restartReason: reason,
3908
3923
  restartAgeMs: markerAgeMs,
@@ -3911,6 +3926,7 @@ const ipcServer: IpcServer = createIpcServer({
3911
3926
  probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
3912
3927
  tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
3913
3928
  dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
3929
+ configSnapshotPath: join(resolvedAgentDirForCard, '.config-snapshot.json'),
3914
3930
  ...(updateOutcomeLine ? { updateOutcomeLine } : {}),
3915
3931
  }, ackMsgId).then(handle => {
3916
3932
  activeBootCard = handle
@@ -6991,6 +7007,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6991
7007
  activityInFlight: null,
6992
7008
  activityPendingRender: null,
6993
7009
  activityLastSentRender: null,
7010
+ mirrorLines: [],
6994
7011
  answerStream: null,
6995
7012
  isDm: isDmChatId(ev.chatId),
6996
7013
  }
@@ -7136,11 +7153,12 @@ function handleSessionEvent(ev: SessionEvent): void {
7136
7153
  // exactly once at a time and re-running until pending matches
7137
7154
  // the last-sent. Captures `turn` so a late drain after turn-swap
7138
7155
  // can't corrupt the next turn's atom.
7139
- // DRAFT_MIRROR (RFC draft-mirror-preview): render each tool_use as a
7140
- // human-friendly line in the live preview, using the model-authored
7141
- // descriptive field (Bash.description, Read/Edit file basename,
7142
- // hindsight→"Searching memory", etc. — see describeToolUse). Latest
7143
- // action wins (the draft shows "doing X" live), clears on reply.
7156
+ // DRAFT_MIRROR (RFC draft-mirror-preview): accumulate each tool_use
7157
+ // into a human-friendly running feed in the live preview, using the
7158
+ // model-authored descriptive field (Bash.description, Read/Edit file
7159
+ // basename, hindsight→"Searching memory", etc. — see describeToolUse
7160
+ // / appendActivityLine). The draft shows the turn's actions as a
7161
+ // capped chronological list (Claude Code-style), clears on reply.
7144
7162
  // Never surfaces raw shell/query syntax — option A, uniform across
7145
7163
  // code + non-code agents.
7146
7164
  //
@@ -7149,7 +7167,7 @@ function handleSessionEvent(ev: SessionEvent): void {
7149
7167
  // pre-draft-mirror behavior.
7150
7168
  if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
7151
7169
  const rendered = DRAFT_MIRROR_ENABLED
7152
- ? describeToolUse(name, ev.input)
7170
+ ? appendActivityLine(turn.mirrorLines, name, ev.input)
7153
7171
  : registerAndRender(turn.toolActivity, name)
7154
7172
  if (rendered != null) {
7155
7173
  turn.activityPendingRender = rendered
@@ -11862,6 +11880,183 @@ async function runCreditWatch(): Promise<void> {
11862
11880
  }
11863
11881
  }
11864
11882
 
11883
+ /**
11884
+ * Quota threshold-tier push loop body (#E4). Reads the broker's in-memory
11885
+ * cached utilization (populated by previous probeQuota calls from /auth,
11886
+ * auto-fallback, and boot cards) via `listState.accounts[].last_quota`.
11887
+ * Classifies each account via classifyHealth, and fires one Telegram message
11888
+ * per healthy ↔ throttling transition (edge-triggered, not level-triggered).
11889
+ * Does NOT fire on healthy → blocked or blocked → healthy — credits-watch.ts
11890
+ * owns those.
11891
+ *
11892
+ * Probe discipline:
11893
+ * - Steady-state polls: ONE broker `listState` IPC call only (no network).
11894
+ * - Accounts with no cached snapshot (null last_quota): skipped silently
11895
+ * (classifyHealth returns 'unknown'). Cache populates from /auth, boot
11896
+ * card, and auto-fallback probes in normal use.
11897
+ * - State transition detected: ONE targeted `probeQuota` call for ONLY the
11898
+ * crossing account, immediately before sending the notification, to get
11899
+ * fresh numbers for the message body. All other steady-state accounts
11900
+ * are not probed.
11901
+ *
11902
+ * This replaces the previous implementation that called probeQuota for ALL
11903
+ * accounts unconditionally on every 15-minute poll (~768 live Anthropic
11904
+ * network calls/day for an 8-account fleet). The corrected version makes
11905
+ * 0 network calls on steady-state polls and at most 1 call per crossing
11906
+ * event (which is also when we need to notify the user anyway).
11907
+ *
11908
+ * State persists across restarts via `<stateDir>/quota-watch.json`.
11909
+ * Mirrors runCreditWatch's structure and notification routing.
11910
+ */
11911
+ async function runQuotaWatch(): Promise<void> {
11912
+ const agentName = getMyAgentName()
11913
+ const stateDir = STATE_DIR
11914
+
11915
+ // Read broker state. The listState response now includes last_quota
11916
+ // per account — the broker's in-memory cache from previous probeQuota
11917
+ // calls. This is a local IPC call: no network, no Anthropic contact.
11918
+ const brokerClient = await getAuthBrokerClient(agentName)
11919
+ if (!brokerClient) {
11920
+ process.stderr.write('telegram gateway: quota-watch: broker client unavailable — skipping\n')
11921
+ return
11922
+ }
11923
+
11924
+ let listStateData: Awaited<ReturnType<typeof brokerClient.listState>>
11925
+ try {
11926
+ listStateData = await brokerClient.listState()
11927
+ } catch (err) {
11928
+ process.stderr.write(`telegram gateway: quota-watch: listState failed: ${err}\n`)
11929
+ return
11930
+ }
11931
+
11932
+ if (!listStateData.accounts || listStateData.accounts.length === 0) {
11933
+ return // No accounts — nothing to watch.
11934
+ }
11935
+
11936
+ // Build AccountSnapshot[] from cached broker state only — no live probe.
11937
+ // Accounts with null last_quota produce quota=null snapshots; classifyHealth
11938
+ // returns 'unknown'; evaluateQuotaWatchAccount skips — no false alarms.
11939
+ const snapshots = buildSnapshotsFromCachedState(listStateData)
11940
+
11941
+ // Load persisted per-account state.
11942
+ let watchState = loadQuotaWatchState(stateDir)
11943
+ const now = Date.now()
11944
+ const access = loadAccess()
11945
+
11946
+ // First pass: evaluate all accounts against cached state. Collect
11947
+ // labels that need a live probe (i.e. accounts with a detected transition
11948
+ // that we're about to notify about). We probe those to get fresh
11949
+ // utilization numbers for the notification body — not for classification.
11950
+ const pendingTransitions: Array<{
11951
+ accountLabel: string
11952
+ snapIndex: number
11953
+ decision: ReturnType<typeof evaluateQuotaWatchAccount>
11954
+ }> = []
11955
+
11956
+ const labelToSnapIndex = new Map<string, number>(
11957
+ snapshots.map((s, i) => [s.label, i]),
11958
+ )
11959
+
11960
+ for (const snap of snapshots) {
11961
+ const prev = watchState[snap.label] ?? emptyAccountState()
11962
+ const decision = evaluateQuotaWatchAccount({ agentName, snap, prev, now })
11963
+ if (decision.kind !== 'skip') {
11964
+ pendingTransitions.push({
11965
+ accountLabel: snap.label,
11966
+ snapIndex: labelToSnapIndex.get(snap.label) ?? -1,
11967
+ decision,
11968
+ })
11969
+ }
11970
+ }
11971
+
11972
+ if (pendingTransitions.length === 0) {
11973
+ return // Steady-state: no notifications, no probes, no state write.
11974
+ }
11975
+
11976
+ // Transition detected: probe ONLY the crossing accounts to get fresh
11977
+ // numbers for the notification message bodies. One batched RPC for all
11978
+ // crossing accounts (typically 1, rarely 2+).
11979
+ const crossingLabels = pendingTransitions.map(t => t.accountLabel)
11980
+ let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]['result']>()
11981
+ try {
11982
+ const probeData = await brokerClient.probeQuota(crossingLabels, 8000)
11983
+ for (const entry of probeData.results) {
11984
+ freshProbeMap.set(entry.label, entry.result)
11985
+ }
11986
+ } catch (err) {
11987
+ // Probe failed — still send notifications using cached data.
11988
+ // Don't abort: the user should know about the threshold crossing
11989
+ // even if the message body shows slightly stale numbers.
11990
+ process.stderr.write(`telegram gateway: quota-watch: probe for crossing accounts failed: ${err}\n`)
11991
+ }
11992
+
11993
+ // Build final notifications, enriching the snapshot with fresh probe
11994
+ // data where available.
11995
+ let mutatedState = watchState
11996
+ const notifications: Array<{ message: string; accountLabel: string }> = []
11997
+
11998
+ for (const { accountLabel, snapIndex, decision } of pendingTransitions) {
11999
+ // Re-evaluate with fresh probe data to get an accurate message body.
12000
+ // If the fresh probe succeeded, replace the snap's quota with live data.
12001
+ const freshResult = freshProbeMap.get(accountLabel)
12002
+ let enrichedDecision = decision
12003
+ // pendingTransitions only ever holds notify decisions (pushed under
12004
+ // `decision.kind !== 'skip'`). Narrow explicitly so `decision.transition`
12005
+ // type-checks below; this continue never fires at runtime.
12006
+ if (decision.kind !== 'notify') continue
12007
+ if (freshResult && freshResult.ok && snapIndex >= 0) {
12008
+ const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshResult.data }
12009
+ const prev = watchState[accountLabel] ?? emptyAccountState()
12010
+ const re = evaluateQuotaWatchAccount({ agentName, snap: enrichedSnap, prev, now })
12011
+ // If the fresh probe still shows the same transition, use the
12012
+ // enriched message. If it no longer shows a transition (e.g. the
12013
+ // account recovered in the 100ms between listState and probe),
12014
+ // fall through to skip this notification.
12015
+ if (re.kind === 'notify' && re.transition === decision.transition) {
12016
+ enrichedDecision = re
12017
+ } else if (re.kind === 'skip') {
12018
+ // State normalised by the time of the probe — don't notify.
12019
+ continue
12020
+ }
12021
+ }
12022
+
12023
+ if (enrichedDecision.kind !== 'notify') continue
12024
+ notifications.push({ message: enrichedDecision.message, accountLabel })
12025
+ mutatedState = patchQuotaWatchState(mutatedState, accountLabel, enrichedDecision.newAccountState)
12026
+ }
12027
+
12028
+ if (notifications.length === 0) {
12029
+ return // All transitions resolved by the time of the live probe.
12030
+ }
12031
+
12032
+ // Send all notifications (one message per crossing account).
12033
+ for (const { message, accountLabel } of notifications) {
12034
+ for (const chat_id of access.allowFrom) {
12035
+ // Quota-watch notify — best-effort. Wrap via swallowingApiCall so
12036
+ // flood-wait / deleted-chat / not-found surface as a stderr log
12037
+ // rather than a thrown exception that aborts the loop and leaves
12038
+ // half the allowFrom chats unnotified. Matches the wrapping
12039
+ // contract enforced by scripts/check-bot-api-wrapping.sh (#1075).
12040
+ await swallowingApiCall(
12041
+ () =>
12042
+ bot.api.sendMessage(chat_id, message, {
12043
+ parse_mode: 'HTML',
12044
+ link_preview_options: { is_disabled: true },
12045
+ }),
12046
+ { chat_id, verb: 'quota-watch.notify' },
12047
+ )
12048
+ }
12049
+ process.stderr.write(`telegram gateway: quota-watch: notified transition for account=${accountLabel}\n`)
12050
+ }
12051
+
12052
+ // Persist updated state regardless of whether sends succeeded.
12053
+ try {
12054
+ saveQuotaWatchState(stateDir, mutatedState)
12055
+ } catch (err) {
12056
+ process.stderr.write(`telegram gateway: quota-watch state persist failed: ${err}\n`)
12057
+ }
12058
+ }
12059
+
11865
12060
  bot.command("auth", async ctx => {
11866
12061
  // sec WS7-F2b (#1394): `/auth` drives the auth-broker credential
11867
12062
  // lifecycle (`/auth add` mints/attaches an Anthropic account token,
@@ -12146,8 +12341,7 @@ function resolveAgentDirForName(agent: string): string | null {
12146
12341
  * restart — systemctl --user restart switchroom-<agent>
12147
12342
  * reauth — delegate to runSwitchroomAuthCommand (same flow as /auth reauth)
12148
12343
  * logs — post last 30 lines of journalctl for the agent
12149
- * swap-slot, add-slotPhase 4c will wire these; for now toast with the
12150
- * equivalent CLI command for the user to run manually.
12344
+ * slot management buttons removed (E5); use /auth use or /auth add instead.
12151
12345
  */
12152
12346
  /**
12153
12347
  * Issue #44: handle taps on the deferred-secret card's inline buttons.
@@ -13776,15 +13970,6 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
13776
13970
  }
13777
13971
  return
13778
13972
  }
13779
- case 'swap-slot':
13780
- case 'add-slot': {
13781
- await ctx.answerCallbackQuery({ text: 'Phase 4c will wire this' }).catch(() => {})
13782
- const cmd = action === 'swap-slot' ? `auth use ${agent} <slot-name>` : `auth add ${agent}`
13783
- await ctx.reply(`Phase 4c will wire ${action} buttons. Until then, run in terminal: <code>switchroom ${cmd}</code>`, {
13784
- parse_mode: 'HTML',
13785
- })
13786
- return
13787
- }
13788
13973
  default: {
13789
13974
  await ctx.answerCallbackQuery({ text: `Unknown action: ${action}` }).catch(() => {})
13790
13975
  return
@@ -14679,7 +14864,7 @@ bot.on('callback_query:data', async ctx => {
14679
14864
 
14680
14865
  // op:<action>:<encoded-agent> callbacks from operator-events.ts
14681
14866
  // renderOperatorEvent(). Agent name is URL-encoded at emit (issue #24).
14682
- // Actions: dismiss, restart, reauth, swap-slot, add-slot, logs.
14867
+ // Actions: dismiss, restart, reauth, logs.
14683
14868
  if (data.startsWith('op:')) {
14684
14869
  await handleOperatorEventCallback(ctx, data)
14685
14870
  return
@@ -16641,11 +16826,12 @@ void (async () => {
16641
16826
  const updateOutcomeLine = (() => {
16642
16827
  try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
16643
16828
  })()
16829
+ const resolvedAgentDirForBootCard = agentDir ?? join(homedir(), '.switchroom', 'agents', agentSlug)
16644
16830
  const handle = await startBootCard(chatId, threadId, botApiForCard, {
16645
16831
  agentName: agentDisplayName,
16646
16832
  agentSlug,
16647
16833
  version: formatBootVersion(),
16648
- agentDir: agentDir ?? join(homedir(), '.switchroom', 'agents', agentSlug),
16834
+ agentDir: resolvedAgentDirForBootCard,
16649
16835
  gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
16650
16836
  restartReason: reason,
16651
16837
  restartAgeMs: markerAgeMs,
@@ -16654,6 +16840,7 @@ void (async () => {
16654
16840
  probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
16655
16841
  tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
16656
16842
  dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
16843
+ configSnapshotPath: join(resolvedAgentDirForBootCard, '.config-snapshot.json'),
16657
16844
  ...(updateOutcomeLine ? { updateOutcomeLine } : {}),
16658
16845
  }, ackMsgId)
16659
16846
  activeBootCard = handle
@@ -16705,6 +16892,34 @@ void (async () => {
16705
16892
  }, CREDIT_WATCH_POLL_MS).unref()
16706
16893
  }
16707
16894
 
16895
+ // Proactive quota threshold-tier push (#E4). Reads broker cached
16896
+ // quota for all accounts in the pool, classifies health via
16897
+ // classifyHealth, and fires one Telegram message per
16898
+ // healthy ↔ throttling transition (edge-triggered). Does NOT
16899
+ // cover healthy → blocked or blocked → healthy — credits-watch
16900
+ // handles those fatal-billing transitions above.
16901
+ //
16902
+ // Cadence: 15 min by default (same as credit-watch). Each poll
16903
+ // calls broker listState (local IPC, cheap) + probeQuota only
16904
+ // when a state-change is detected (to get fresh numbers for
16905
+ // the notification body). SWITCHROOM_QUOTA_WATCH_POLL_MS=0 disables.
16906
+ const QUOTA_WATCH_POLL_MS = Number(process.env.SWITCHROOM_QUOTA_WATCH_POLL_MS ?? 15 * 60_000)
16907
+ if (QUOTA_WATCH_POLL_MS > 0) {
16908
+ // Delay the initial run by 30 s to let the broker connection
16909
+ // settle after boot (avoids a probe race with the boot-card
16910
+ // quota probe that fires in the first few seconds).
16911
+ setTimeout(() => {
16912
+ void runQuotaWatch().catch((err) => {
16913
+ process.stderr.write(`telegram gateway: quota-watch initial run failed: ${err}\n`)
16914
+ })
16915
+ }, 30_000)
16916
+ setInterval(() => {
16917
+ void runQuotaWatch().catch((err) => {
16918
+ process.stderr.write(`telegram gateway: quota-watch scheduled run failed: ${err}\n`)
16919
+ })
16920
+ }, QUOTA_WATCH_POLL_MS).unref()
16921
+ }
16922
+
16708
16923
  // Restart-watchdog: poll systemd's NRestarts for the agent unit.
16709
16924
  // When the count ticks up without a corresponding restart-pending
16710
16925
  // marker (= user-initiated /restart), emit an operator event.
@@ -257,16 +257,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
257
257
  text: [
258
258
  `💳 <b>Credit balance too low</b> for <b>${agent}</b>.`,
259
259
  detail ? `<i>${detail}</i>` : '',
260
- `Swap to another account slot or add a new one.`,
260
+ `Use <code>/auth use &lt;label&gt;</code> to switch account slot or <code>/auth add</code> to add one.`,
261
261
  ]
262
262
  .filter(Boolean)
263
263
  .join('\n'),
264
264
  keyboard: {
265
265
  inline_keyboard: [
266
- [
267
- { text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
268
- { text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
269
- ],
270
266
  [{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
271
267
  ],
272
268
  },
@@ -280,16 +276,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
280
276
  text: [
281
277
  `⚠️ <b>Quota exhausted</b> for <b>${agent}</b>.`,
282
278
  detail ? `<i>${detail}</i>` : '',
283
- `All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available.`,
279
+ `All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available. Use <code>/auth use &lt;label&gt;</code> to switch manually.`,
284
280
  ]
285
281
  .filter(Boolean)
286
282
  .join('\n'),
287
283
  keyboard: {
288
284
  inline_keyboard: [
289
- [
290
- { text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
291
- { text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
292
- ],
293
285
  [{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
294
286
  ],
295
287
  },
@@ -0,0 +1,276 @@
1
+ /**
2
+ * Proactive quota threshold-tier push (#E4).
3
+ *
4
+ * Background: JTBD `track-plan-quota-live` anti-pattern: "Quota visible
5
+ * only in a separate dashboard or a command. If the user has to go
6
+ * looking, they won't, and they'll hit the wall." The existing stack
7
+ * covers the wall (auto-fallback at 99.5%, credits-watch on fatal billing
8
+ * transitions) but fires zero proactive signal at 80% — the point where
9
+ * the user can still act by switching accounts. This module closes that gap.
10
+ *
11
+ * It is a pure decision layer. It reads the broker's cached quota state
12
+ * for all accounts, classifies health via the same `classifyHealth`
13
+ * three-state machine used by the /auth dashboard, compares against a
14
+ * persisted last-notified state, and tells the gateway whether to emit
15
+ * a Telegram message + what to say. The gateway wires the actual
16
+ * `bot.api.sendMessage` call (via `swallowingApiCall`) — same as
17
+ * `credits-watch.ts`.
18
+ *
19
+ * Edge-trigger discipline: only fires on health *transitions*
20
+ * (healthy → throttling and throttling → healthy). Does NOT fire on
21
+ * healthy → blocked or blocked → healthy — `credits-watch.ts` already
22
+ * covers those via the fatal-billing path. Steady-state throttling
23
+ * never re-notifies.
24
+ *
25
+ * Scope: per-account across the whole pool, not just the active one.
26
+ * The user's natural recovery action is switching to a healthy account,
27
+ * so they need visibility into non-active accounts too.
28
+ *
29
+ * Source data: broker `listState` + `probeQuota`. `listState` is a local
30
+ * IPC call (cheap). `probeQuota` is only called on state-change (when
31
+ * we're going to send a message anyway) to get fresh numbers for the
32
+ * notification body. On no-change polls, only `listState` is called.
33
+ */
34
+
35
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
36
+ import { join } from "path";
37
+ import type { AccountSnapshot } from "./auth-snapshot-format.js";
38
+ import {
39
+ classifyHealth,
40
+ type AccountHealth,
41
+ THROTTLING_THRESHOLD_PCT,
42
+ bindingWindow,
43
+ formatRelative,
44
+ fmtPct,
45
+ } from "./auth-snapshot-format.js";
46
+ import type { QuotaUtilization } from "./quota-check.js";
47
+
48
+ const STATE_FILE = "quota-watch.json";
49
+
50
+ // ─── State types ──────────────────────────────────────────────────────────────
51
+
52
+ /**
53
+ * Per-account last-notified health. We only care about the
54
+ * healthy ↔ throttling boundary — blocked is `credits-watch`'s domain.
55
+ * `null` means "never notified" (treat as healthy for transition logic).
56
+ */
57
+ export type QuotaWatchHealth = "healthy" | "throttling" | null;
58
+
59
+ export interface QuotaWatchAccountState {
60
+ /** Last health we sent a notification for. null = never notified. */
61
+ lastNotifiedHealth: QuotaWatchHealth;
62
+ /** Wall-clock ms of the last notification. */
63
+ lastNotifiedAt: number;
64
+ }
65
+
66
+ export type QuotaWatchState = Record<string, QuotaWatchAccountState>;
67
+
68
+ export function emptyQuotaWatchState(): QuotaWatchState {
69
+ return {};
70
+ }
71
+
72
+ export function emptyAccountState(): QuotaWatchAccountState {
73
+ return { lastNotifiedHealth: null, lastNotifiedAt: 0 };
74
+ }
75
+
76
+ // ─── Decision logic ───────────────────────────────────────────────────────────
77
+
78
+ export type QuotaWatchTransition =
79
+ | "entered-throttling"
80
+ | "recovered-to-healthy";
81
+
82
+ export type QuotaWatchDecision =
83
+ | {
84
+ kind: "notify";
85
+ accountLabel: string;
86
+ message: string;
87
+ newAccountState: QuotaWatchAccountState;
88
+ transition: QuotaWatchTransition;
89
+ }
90
+ | { kind: "skip"; accountLabel: string; reason: string };
91
+
92
+ /**
93
+ * Evaluate one account's quota state against its last-notified health.
94
+ *
95
+ * Transition table:
96
+ * healthy → healthy skip (steady-state)
97
+ * healthy → throttling notify (entered-throttling)
98
+ * healthy → blocked skip (credits-watch covers this)
99
+ * throttling → healthy notify (recovered-to-healthy)
100
+ * throttling → throttling skip (already notified)
101
+ * throttling → blocked skip (credits-watch covers blocked)
102
+ * blocked → * skip (credits-watch domain)
103
+ * unknown → * skip (no quota data — don't spam)
104
+ * * → unknown skip (probe failed — transient, don't alarm)
105
+ */
106
+ export function evaluateQuotaWatchAccount(args: {
107
+ agentName: string;
108
+ snap: AccountSnapshot;
109
+ prev: QuotaWatchAccountState;
110
+ now: number;
111
+ }): QuotaWatchDecision {
112
+ const { agentName, snap, prev, now } = args;
113
+ const label = snap.label;
114
+ const currentHealth = classifyHealth(snap);
115
+
116
+ // Unknown (probe failed) or blocked — skip entirely.
117
+ if (currentHealth === "unknown" || currentHealth === "blocked") {
118
+ return { kind: "skip", accountLabel: label, reason: `${currentHealth}-not-our-domain` };
119
+ }
120
+
121
+ // Normalise prev: null means healthy (never alerted = was healthy).
122
+ const prevHealth: "healthy" | "throttling" = prev.lastNotifiedHealth ?? "healthy";
123
+
124
+ // Steady-state — no change.
125
+ if (currentHealth === prevHealth) {
126
+ return { kind: "skip", accountLabel: label, reason: "steady-state" };
127
+ }
128
+
129
+ // healthy → throttling: proactive threshold push.
130
+ if (currentHealth === "throttling" && prevHealth === "healthy") {
131
+ const newState: QuotaWatchAccountState = {
132
+ lastNotifiedHealth: "throttling",
133
+ lastNotifiedAt: now,
134
+ };
135
+ return {
136
+ kind: "notify",
137
+ accountLabel: label,
138
+ message: buildThrottlingMessage(agentName, snap),
139
+ newAccountState: newState,
140
+ transition: "entered-throttling",
141
+ };
142
+ }
143
+
144
+ // throttling → healthy: recovery.
145
+ if (currentHealth === "healthy" && prevHealth === "throttling") {
146
+ const newState: QuotaWatchAccountState = {
147
+ lastNotifiedHealth: "healthy",
148
+ lastNotifiedAt: now,
149
+ };
150
+ return {
151
+ kind: "notify",
152
+ accountLabel: label,
153
+ message: buildRecoveryMessage(agentName, snap),
154
+ newAccountState: newState,
155
+ transition: "recovered-to-healthy",
156
+ };
157
+ }
158
+
159
+ // Any other combination (e.g. blocked → healthy, etc.) — skip.
160
+ return { kind: "skip", accountLabel: label, reason: "no-matching-transition" };
161
+ }
162
+
163
+ // ─── Message builders ─────────────────────────────────────────────────────────
164
+
165
+ function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): string {
166
+ const q = snap.quota!; // classifyHealth returned throttling, so quota is non-null
167
+ const fiveStr = fmtPct(q.fiveHourUtilizationPct);
168
+ const sevenStr = fmtPct(q.sevenDayUtilizationPct);
169
+ const max = Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
170
+ const win = max === q.fiveHourUtilizationPct ? "5h" : "7d";
171
+ const winLabel = win === "5h" ? "5-hour" : "7-day";
172
+ const resetAt = win === "5h" ? q.fiveHourResetAt : q.sevenDayResetAt;
173
+ const resetStr = resetAt
174
+ ? ` · refills in ${formatRelative(resetAt, new Date())}`
175
+ : "";
176
+
177
+ const activeNote = snap.isActive
178
+ ? ""
179
+ : `\nThis is a non-active account. Consider <code>/auth use ${escapeHtml(snap.label)}</code> to switch, or keep it as a fallback reserve.`;
180
+
181
+ const altNote = snap.isActive
182
+ ? `\nConsider <code>/auth use &lt;other-account&gt;</code> if you have a healthier account, or wait for the ${winLabel} window to refill${resetStr}.`
183
+ : "";
184
+
185
+ return [
186
+ `🟡 <b>Quota approaching limit</b> — <code>${escapeHtml(snap.label)}</code>`,
187
+ ``,
188
+ `${fiveStr} of 5h · ${sevenStr} of 7d`,
189
+ `Binding window: ${winLabel}${resetStr}`,
190
+ `${activeNote}${altNote}`,
191
+ ``,
192
+ `<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Source: broker quota cache.</i>`,
193
+ `<i>Run /auth for full fleet status or /usage for the active account.</i>`,
194
+ ]
195
+ .join("\n")
196
+ .replace(/\n\n\n+/g, "\n\n")
197
+ .trim();
198
+ }
199
+
200
+ function buildRecoveryMessage(agentName: string, snap: AccountSnapshot): string {
201
+ const q = snap.quota;
202
+ const utilLine = q
203
+ ? `Current: ${fmtPct(q.fiveHourUtilizationPct)} of 5h · ${fmtPct(q.sevenDayUtilizationPct)} of 7d`
204
+ : "Current quota data unavailable.";
205
+
206
+ return [
207
+ `🟢 <b>Quota back in healthy range</b> — <code>${escapeHtml(snap.label)}</code>`,
208
+ ``,
209
+ utilLine,
210
+ ``,
211
+ `<i>Below ${THROTTLING_THRESHOLD_PCT}% on both windows.</i>`,
212
+ ].join("\n");
213
+ }
214
+
215
+ function escapeHtml(s: string): string {
216
+ return s
217
+ .replace(/&/g, "&amp;")
218
+ .replace(/</g, "&lt;")
219
+ .replace(/>/g, "&gt;")
220
+ .replace(/"/g, "&quot;")
221
+ .replace(/'/g, "&#39;");
222
+ }
223
+
224
+ // ─── State persistence ────────────────────────────────────────────────────────
225
+
226
+ export function loadQuotaWatchState(stateDir: string): QuotaWatchState {
227
+ const path = join(stateDir, STATE_FILE);
228
+ if (!existsSync(path)) return emptyQuotaWatchState();
229
+ try {
230
+ const raw = readFileSync(path, "utf-8");
231
+ const parsed = JSON.parse(raw);
232
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
233
+ return emptyQuotaWatchState();
234
+ }
235
+ // Validate each entry — drop malformed ones rather than failing the whole file.
236
+ const result: QuotaWatchState = {};
237
+ for (const [key, val] of Object.entries(parsed)) {
238
+ if (
239
+ val &&
240
+ typeof val === "object" &&
241
+ !Array.isArray(val) &&
242
+ (
243
+ (val as Record<string, unknown>).lastNotifiedHealth === null ||
244
+ (val as Record<string, unknown>).lastNotifiedHealth === "healthy" ||
245
+ (val as Record<string, unknown>).lastNotifiedHealth === "throttling"
246
+ ) &&
247
+ typeof (val as Record<string, unknown>).lastNotifiedAt === "number" &&
248
+ Number.isFinite((val as Record<string, unknown>).lastNotifiedAt as number)
249
+ ) {
250
+ result[key] = val as QuotaWatchAccountState;
251
+ }
252
+ }
253
+ return result;
254
+ } catch {
255
+ return emptyQuotaWatchState();
256
+ }
257
+ }
258
+
259
+ export function saveQuotaWatchState(stateDir: string, state: QuotaWatchState): void {
260
+ mkdirSync(stateDir, { recursive: true });
261
+ const path = join(stateDir, STATE_FILE);
262
+ writeFileSync(path, JSON.stringify(state, null, 2) + "\n", { mode: 0o600 });
263
+ }
264
+
265
+ /**
266
+ * Merge one account's updated state into a full `QuotaWatchState` map.
267
+ * Callers use this after each `evaluateQuotaWatchAccount` that returns
268
+ * `kind: "notify"` to produce the new map to persist.
269
+ */
270
+ export function patchQuotaWatchState(
271
+ current: QuotaWatchState,
272
+ accountLabel: string,
273
+ accountState: QuotaWatchAccountState,
274
+ ): QuotaWatchState {
275
+ return { ...current, [accountLabel]: accountState };
276
+ }