switchroom 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-broker/index.js +16 -1
- package/dist/cli/switchroom.js +1082 -873
- package/dist/host-control/main.js +1 -1
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +1 -1
- package/telegram-plugin/auth-snapshot-format.ts +47 -1
- package/telegram-plugin/dist/gateway/gateway.js +967 -542
- package/telegram-plugin/gateway/boot-card.ts +100 -0
- package/telegram-plugin/gateway/config-snapshot.ts +274 -0
- package/telegram-plugin/gateway/gateway.ts +221 -14
- package/telegram-plugin/operator-events.ts +2 -10
- package/telegram-plugin/quota-watch.ts +276 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +133 -1
- package/telegram-plugin/tests/boot-card-render.test.ts +93 -0
- package/telegram-plugin/tests/config-snapshot.test.ts +409 -0
- package/telegram-plugin/tests/operator-events.test.ts +12 -6
- package/telegram-plugin/tests/quota-watch.test.ts +366 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +48 -0
- package/telegram-plugin/turn-flush-safety.ts +47 -0
- package/telegram-plugin/uat/assertions.ts +4 -4
|
@@ -373,6 +373,14 @@ import {
|
|
|
373
373
|
loadCreditState,
|
|
374
374
|
saveCreditState,
|
|
375
375
|
} from '../credits-watch.js'
|
|
376
|
+
import {
|
|
377
|
+
evaluateQuotaWatchAccount,
|
|
378
|
+
loadQuotaWatchState,
|
|
379
|
+
saveQuotaWatchState,
|
|
380
|
+
patchQuotaWatchState,
|
|
381
|
+
emptyAccountState,
|
|
382
|
+
} from '../quota-watch.js'
|
|
383
|
+
import { buildSnapshotsFromState, buildSnapshotsFromCachedState } from '../auth-snapshot-format.js'
|
|
376
384
|
import {
|
|
377
385
|
writeTurnActiveMarker,
|
|
378
386
|
touchTurnActiveMarker,
|
|
@@ -3898,11 +3906,12 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
3898
3906
|
const updateOutcomeLine = (() => {
|
|
3899
3907
|
try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
|
|
3900
3908
|
})()
|
|
3909
|
+
const resolvedAgentDirForCard = agentDir ?? (process.env.TELEGRAM_STATE_DIR ? require('path').dirname(process.env.TELEGRAM_STATE_DIR) : '/tmp')
|
|
3901
3910
|
startBootCard(chatId, threadId, botApiForCard, {
|
|
3902
3911
|
agentName: agentDisplayName,
|
|
3903
3912
|
agentSlug,
|
|
3904
3913
|
version: formatBootVersion(),
|
|
3905
|
-
agentDir:
|
|
3914
|
+
agentDir: resolvedAgentDirForCard,
|
|
3906
3915
|
gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
|
|
3907
3916
|
restartReason: reason,
|
|
3908
3917
|
restartAgeMs: markerAgeMs,
|
|
@@ -3911,6 +3920,7 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
3911
3920
|
probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
|
|
3912
3921
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
3913
3922
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
3923
|
+
configSnapshotPath: join(resolvedAgentDirForCard, '.config-snapshot.json'),
|
|
3914
3924
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
3915
3925
|
}, ackMsgId).then(handle => {
|
|
3916
3926
|
activeBootCard = handle
|
|
@@ -11862,6 +11872,183 @@ async function runCreditWatch(): Promise<void> {
|
|
|
11862
11872
|
}
|
|
11863
11873
|
}
|
|
11864
11874
|
|
|
11875
|
+
/**
|
|
11876
|
+
* Quota threshold-tier push loop body (#E4). Reads the broker's in-memory
|
|
11877
|
+
* cached utilization (populated by previous probeQuota calls from /auth,
|
|
11878
|
+
* auto-fallback, and boot cards) via `listState.accounts[].last_quota`.
|
|
11879
|
+
* Classifies each account via classifyHealth, and fires one Telegram message
|
|
11880
|
+
* per healthy ↔ throttling transition (edge-triggered, not level-triggered).
|
|
11881
|
+
* Does NOT fire on healthy → blocked or blocked → healthy — credits-watch.ts
|
|
11882
|
+
* owns those.
|
|
11883
|
+
*
|
|
11884
|
+
* Probe discipline:
|
|
11885
|
+
* - Steady-state polls: ONE broker `listState` IPC call only (no network).
|
|
11886
|
+
* - Accounts with no cached snapshot (null last_quota): skipped silently
|
|
11887
|
+
* (classifyHealth returns 'unknown'). Cache populates from /auth, boot
|
|
11888
|
+
* card, and auto-fallback probes in normal use.
|
|
11889
|
+
* - State transition detected: ONE targeted `probeQuota` call for ONLY the
|
|
11890
|
+
* crossing account, immediately before sending the notification, to get
|
|
11891
|
+
* fresh numbers for the message body. All other steady-state accounts
|
|
11892
|
+
* are not probed.
|
|
11893
|
+
*
|
|
11894
|
+
* This replaces the previous implementation that called probeQuota for ALL
|
|
11895
|
+
* accounts unconditionally on every 15-minute poll (~768 live Anthropic
|
|
11896
|
+
* network calls/day for an 8-account fleet). The corrected version makes
|
|
11897
|
+
* 0 network calls on steady-state polls and at most 1 call per crossing
|
|
11898
|
+
* event (which is also when we need to notify the user anyway).
|
|
11899
|
+
*
|
|
11900
|
+
* State persists across restarts via `<stateDir>/quota-watch.json`.
|
|
11901
|
+
* Mirrors runCreditWatch's structure and notification routing.
|
|
11902
|
+
*/
|
|
11903
|
+
async function runQuotaWatch(): Promise<void> {
|
|
11904
|
+
const agentName = getMyAgentName()
|
|
11905
|
+
const stateDir = STATE_DIR
|
|
11906
|
+
|
|
11907
|
+
// Read broker state. The listState response now includes last_quota
|
|
11908
|
+
// per account — the broker's in-memory cache from previous probeQuota
|
|
11909
|
+
// calls. This is a local IPC call: no network, no Anthropic contact.
|
|
11910
|
+
const brokerClient = await getAuthBrokerClient(agentName)
|
|
11911
|
+
if (!brokerClient) {
|
|
11912
|
+
process.stderr.write('telegram gateway: quota-watch: broker client unavailable — skipping\n')
|
|
11913
|
+
return
|
|
11914
|
+
}
|
|
11915
|
+
|
|
11916
|
+
let listStateData: Awaited<ReturnType<typeof brokerClient.listState>>
|
|
11917
|
+
try {
|
|
11918
|
+
listStateData = await brokerClient.listState()
|
|
11919
|
+
} catch (err) {
|
|
11920
|
+
process.stderr.write(`telegram gateway: quota-watch: listState failed: ${err}\n`)
|
|
11921
|
+
return
|
|
11922
|
+
}
|
|
11923
|
+
|
|
11924
|
+
if (!listStateData.accounts || listStateData.accounts.length === 0) {
|
|
11925
|
+
return // No accounts — nothing to watch.
|
|
11926
|
+
}
|
|
11927
|
+
|
|
11928
|
+
// Build AccountSnapshot[] from cached broker state only — no live probe.
|
|
11929
|
+
// Accounts with null last_quota produce quota=null snapshots; classifyHealth
|
|
11930
|
+
// returns 'unknown'; evaluateQuotaWatchAccount skips — no false alarms.
|
|
11931
|
+
const snapshots = buildSnapshotsFromCachedState(listStateData)
|
|
11932
|
+
|
|
11933
|
+
// Load persisted per-account state.
|
|
11934
|
+
let watchState = loadQuotaWatchState(stateDir)
|
|
11935
|
+
const now = Date.now()
|
|
11936
|
+
const access = loadAccess()
|
|
11937
|
+
|
|
11938
|
+
// First pass: evaluate all accounts against cached state. Collect
|
|
11939
|
+
// labels that need a live probe (i.e. accounts with a detected transition
|
|
11940
|
+
// that we're about to notify about). We probe those to get fresh
|
|
11941
|
+
// utilization numbers for the notification body — not for classification.
|
|
11942
|
+
const pendingTransitions: Array<{
|
|
11943
|
+
accountLabel: string
|
|
11944
|
+
snapIndex: number
|
|
11945
|
+
decision: ReturnType<typeof evaluateQuotaWatchAccount>
|
|
11946
|
+
}> = []
|
|
11947
|
+
|
|
11948
|
+
const labelToSnapIndex = new Map<string, number>(
|
|
11949
|
+
snapshots.map((s, i) => [s.label, i]),
|
|
11950
|
+
)
|
|
11951
|
+
|
|
11952
|
+
for (const snap of snapshots) {
|
|
11953
|
+
const prev = watchState[snap.label] ?? emptyAccountState()
|
|
11954
|
+
const decision = evaluateQuotaWatchAccount({ agentName, snap, prev, now })
|
|
11955
|
+
if (decision.kind !== 'skip') {
|
|
11956
|
+
pendingTransitions.push({
|
|
11957
|
+
accountLabel: snap.label,
|
|
11958
|
+
snapIndex: labelToSnapIndex.get(snap.label) ?? -1,
|
|
11959
|
+
decision,
|
|
11960
|
+
})
|
|
11961
|
+
}
|
|
11962
|
+
}
|
|
11963
|
+
|
|
11964
|
+
if (pendingTransitions.length === 0) {
|
|
11965
|
+
return // Steady-state: no notifications, no probes, no state write.
|
|
11966
|
+
}
|
|
11967
|
+
|
|
11968
|
+
// Transition detected: probe ONLY the crossing accounts to get fresh
|
|
11969
|
+
// numbers for the notification message bodies. One batched RPC for all
|
|
11970
|
+
// crossing accounts (typically 1, rarely 2+).
|
|
11971
|
+
const crossingLabels = pendingTransitions.map(t => t.accountLabel)
|
|
11972
|
+
let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]['result']>()
|
|
11973
|
+
try {
|
|
11974
|
+
const probeData = await brokerClient.probeQuota(crossingLabels, 8000)
|
|
11975
|
+
for (const entry of probeData.results) {
|
|
11976
|
+
freshProbeMap.set(entry.label, entry.result)
|
|
11977
|
+
}
|
|
11978
|
+
} catch (err) {
|
|
11979
|
+
// Probe failed — still send notifications using cached data.
|
|
11980
|
+
// Don't abort: the user should know about the threshold crossing
|
|
11981
|
+
// even if the message body shows slightly stale numbers.
|
|
11982
|
+
process.stderr.write(`telegram gateway: quota-watch: probe for crossing accounts failed: ${err}\n`)
|
|
11983
|
+
}
|
|
11984
|
+
|
|
11985
|
+
// Build final notifications, enriching the snapshot with fresh probe
|
|
11986
|
+
// data where available.
|
|
11987
|
+
let mutatedState = watchState
|
|
11988
|
+
const notifications: Array<{ message: string; accountLabel: string }> = []
|
|
11989
|
+
|
|
11990
|
+
for (const { accountLabel, snapIndex, decision } of pendingTransitions) {
|
|
11991
|
+
// Re-evaluate with fresh probe data to get an accurate message body.
|
|
11992
|
+
// If the fresh probe succeeded, replace the snap's quota with live data.
|
|
11993
|
+
const freshResult = freshProbeMap.get(accountLabel)
|
|
11994
|
+
let enrichedDecision = decision
|
|
11995
|
+
// pendingTransitions only ever holds notify decisions (pushed under
|
|
11996
|
+
// `decision.kind !== 'skip'`). Narrow explicitly so `decision.transition`
|
|
11997
|
+
// type-checks below; this continue never fires at runtime.
|
|
11998
|
+
if (decision.kind !== 'notify') continue
|
|
11999
|
+
if (freshResult && freshResult.ok && snapIndex >= 0) {
|
|
12000
|
+
const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshResult.data }
|
|
12001
|
+
const prev = watchState[accountLabel] ?? emptyAccountState()
|
|
12002
|
+
const re = evaluateQuotaWatchAccount({ agentName, snap: enrichedSnap, prev, now })
|
|
12003
|
+
// If the fresh probe still shows the same transition, use the
|
|
12004
|
+
// enriched message. If it no longer shows a transition (e.g. the
|
|
12005
|
+
// account recovered in the 100ms between listState and probe),
|
|
12006
|
+
// fall through to skip this notification.
|
|
12007
|
+
if (re.kind === 'notify' && re.transition === decision.transition) {
|
|
12008
|
+
enrichedDecision = re
|
|
12009
|
+
} else if (re.kind === 'skip') {
|
|
12010
|
+
// State normalised by the time of the probe — don't notify.
|
|
12011
|
+
continue
|
|
12012
|
+
}
|
|
12013
|
+
}
|
|
12014
|
+
|
|
12015
|
+
if (enrichedDecision.kind !== 'notify') continue
|
|
12016
|
+
notifications.push({ message: enrichedDecision.message, accountLabel })
|
|
12017
|
+
mutatedState = patchQuotaWatchState(mutatedState, accountLabel, enrichedDecision.newAccountState)
|
|
12018
|
+
}
|
|
12019
|
+
|
|
12020
|
+
if (notifications.length === 0) {
|
|
12021
|
+
return // All transitions resolved by the time of the live probe.
|
|
12022
|
+
}
|
|
12023
|
+
|
|
12024
|
+
// Send all notifications (one message per crossing account).
|
|
12025
|
+
for (const { message, accountLabel } of notifications) {
|
|
12026
|
+
for (const chat_id of access.allowFrom) {
|
|
12027
|
+
// Quota-watch notify — best-effort. Wrap via swallowingApiCall so
|
|
12028
|
+
// flood-wait / deleted-chat / not-found surface as a stderr log
|
|
12029
|
+
// rather than a thrown exception that aborts the loop and leaves
|
|
12030
|
+
// half the allowFrom chats unnotified. Matches the wrapping
|
|
12031
|
+
// contract enforced by scripts/check-bot-api-wrapping.sh (#1075).
|
|
12032
|
+
await swallowingApiCall(
|
|
12033
|
+
() =>
|
|
12034
|
+
bot.api.sendMessage(chat_id, message, {
|
|
12035
|
+
parse_mode: 'HTML',
|
|
12036
|
+
link_preview_options: { is_disabled: true },
|
|
12037
|
+
}),
|
|
12038
|
+
{ chat_id, verb: 'quota-watch.notify' },
|
|
12039
|
+
)
|
|
12040
|
+
}
|
|
12041
|
+
process.stderr.write(`telegram gateway: quota-watch: notified transition for account=${accountLabel}\n`)
|
|
12042
|
+
}
|
|
12043
|
+
|
|
12044
|
+
// Persist updated state regardless of whether sends succeeded.
|
|
12045
|
+
try {
|
|
12046
|
+
saveQuotaWatchState(stateDir, mutatedState)
|
|
12047
|
+
} catch (err) {
|
|
12048
|
+
process.stderr.write(`telegram gateway: quota-watch state persist failed: ${err}\n`)
|
|
12049
|
+
}
|
|
12050
|
+
}
|
|
12051
|
+
|
|
11865
12052
|
bot.command("auth", async ctx => {
|
|
11866
12053
|
// sec WS7-F2b (#1394): `/auth` drives the auth-broker credential
|
|
11867
12054
|
// lifecycle (`/auth add` mints/attaches an Anthropic account token,
|
|
@@ -12146,8 +12333,7 @@ function resolveAgentDirForName(agent: string): string | null {
|
|
|
12146
12333
|
* restart — systemctl --user restart switchroom-<agent>
|
|
12147
12334
|
* reauth — delegate to runSwitchroomAuthCommand (same flow as /auth reauth)
|
|
12148
12335
|
* logs — post last 30 lines of journalctl for the agent
|
|
12149
|
-
*
|
|
12150
|
-
* equivalent CLI command for the user to run manually.
|
|
12336
|
+
* slot management buttons — removed (E5); use /auth use or /auth add instead.
|
|
12151
12337
|
*/
|
|
12152
12338
|
/**
|
|
12153
12339
|
* Issue #44: handle taps on the deferred-secret card's inline buttons.
|
|
@@ -13776,15 +13962,6 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
|
|
|
13776
13962
|
}
|
|
13777
13963
|
return
|
|
13778
13964
|
}
|
|
13779
|
-
case 'swap-slot':
|
|
13780
|
-
case 'add-slot': {
|
|
13781
|
-
await ctx.answerCallbackQuery({ text: 'Phase 4c will wire this' }).catch(() => {})
|
|
13782
|
-
const cmd = action === 'swap-slot' ? `auth use ${agent} <slot-name>` : `auth add ${agent}`
|
|
13783
|
-
await ctx.reply(`Phase 4c will wire ${action} buttons. Until then, run in terminal: <code>switchroom ${cmd}</code>`, {
|
|
13784
|
-
parse_mode: 'HTML',
|
|
13785
|
-
})
|
|
13786
|
-
return
|
|
13787
|
-
}
|
|
13788
13965
|
default: {
|
|
13789
13966
|
await ctx.answerCallbackQuery({ text: `Unknown action: ${action}` }).catch(() => {})
|
|
13790
13967
|
return
|
|
@@ -14679,7 +14856,7 @@ bot.on('callback_query:data', async ctx => {
|
|
|
14679
14856
|
|
|
14680
14857
|
// op:<action>:<encoded-agent> callbacks from operator-events.ts
|
|
14681
14858
|
// renderOperatorEvent(). Agent name is URL-encoded at emit (issue #24).
|
|
14682
|
-
// Actions: dismiss, restart, reauth,
|
|
14859
|
+
// Actions: dismiss, restart, reauth, logs.
|
|
14683
14860
|
if (data.startsWith('op:')) {
|
|
14684
14861
|
await handleOperatorEventCallback(ctx, data)
|
|
14685
14862
|
return
|
|
@@ -16641,11 +16818,12 @@ void (async () => {
|
|
|
16641
16818
|
const updateOutcomeLine = (() => {
|
|
16642
16819
|
try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
|
|
16643
16820
|
})()
|
|
16821
|
+
const resolvedAgentDirForBootCard = agentDir ?? join(homedir(), '.switchroom', 'agents', agentSlug)
|
|
16644
16822
|
const handle = await startBootCard(chatId, threadId, botApiForCard, {
|
|
16645
16823
|
agentName: agentDisplayName,
|
|
16646
16824
|
agentSlug,
|
|
16647
16825
|
version: formatBootVersion(),
|
|
16648
|
-
agentDir:
|
|
16826
|
+
agentDir: resolvedAgentDirForBootCard,
|
|
16649
16827
|
gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
|
|
16650
16828
|
restartReason: reason,
|
|
16651
16829
|
restartAgeMs: markerAgeMs,
|
|
@@ -16654,6 +16832,7 @@ void (async () => {
|
|
|
16654
16832
|
probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
|
|
16655
16833
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
16656
16834
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
16835
|
+
configSnapshotPath: join(resolvedAgentDirForBootCard, '.config-snapshot.json'),
|
|
16657
16836
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
16658
16837
|
}, ackMsgId)
|
|
16659
16838
|
activeBootCard = handle
|
|
@@ -16705,6 +16884,34 @@ void (async () => {
|
|
|
16705
16884
|
}, CREDIT_WATCH_POLL_MS).unref()
|
|
16706
16885
|
}
|
|
16707
16886
|
|
|
16887
|
+
// Proactive quota threshold-tier push (#E4). Reads broker cached
|
|
16888
|
+
// quota for all accounts in the pool, classifies health via
|
|
16889
|
+
// classifyHealth, and fires one Telegram message per
|
|
16890
|
+
// healthy ↔ throttling transition (edge-triggered). Does NOT
|
|
16891
|
+
// cover healthy → blocked or blocked → healthy — credits-watch
|
|
16892
|
+
// handles those fatal-billing transitions above.
|
|
16893
|
+
//
|
|
16894
|
+
// Cadence: 15 min by default (same as credit-watch). Each poll
|
|
16895
|
+
// calls broker listState (local IPC, cheap) + probeQuota only
|
|
16896
|
+
// when a state-change is detected (to get fresh numbers for
|
|
16897
|
+
// the notification body). SWITCHROOM_QUOTA_WATCH_POLL_MS=0 disables.
|
|
16898
|
+
const QUOTA_WATCH_POLL_MS = Number(process.env.SWITCHROOM_QUOTA_WATCH_POLL_MS ?? 15 * 60_000)
|
|
16899
|
+
if (QUOTA_WATCH_POLL_MS > 0) {
|
|
16900
|
+
// Delay the initial run by 30 s to let the broker connection
|
|
16901
|
+
// settle after boot (avoids a probe race with the boot-card
|
|
16902
|
+
// quota probe that fires in the first few seconds).
|
|
16903
|
+
setTimeout(() => {
|
|
16904
|
+
void runQuotaWatch().catch((err) => {
|
|
16905
|
+
process.stderr.write(`telegram gateway: quota-watch initial run failed: ${err}\n`)
|
|
16906
|
+
})
|
|
16907
|
+
}, 30_000)
|
|
16908
|
+
setInterval(() => {
|
|
16909
|
+
void runQuotaWatch().catch((err) => {
|
|
16910
|
+
process.stderr.write(`telegram gateway: quota-watch scheduled run failed: ${err}\n`)
|
|
16911
|
+
})
|
|
16912
|
+
}, QUOTA_WATCH_POLL_MS).unref()
|
|
16913
|
+
}
|
|
16914
|
+
|
|
16708
16915
|
// Restart-watchdog: poll systemd's NRestarts for the agent unit.
|
|
16709
16916
|
// When the count ticks up without a corresponding restart-pending
|
|
16710
16917
|
// marker (= user-initiated /restart), emit an operator event.
|
|
@@ -257,16 +257,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
|
|
|
257
257
|
text: [
|
|
258
258
|
`💳 <b>Credit balance too low</b> for <b>${agent}</b>.`,
|
|
259
259
|
detail ? `<i>${detail}</i>` : '',
|
|
260
|
-
`
|
|
260
|
+
`Use <code>/auth use <label></code> to switch account slot or <code>/auth add</code> to add one.`,
|
|
261
261
|
]
|
|
262
262
|
.filter(Boolean)
|
|
263
263
|
.join('\n'),
|
|
264
264
|
keyboard: {
|
|
265
265
|
inline_keyboard: [
|
|
266
|
-
[
|
|
267
|
-
{ text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
|
|
268
|
-
{ text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
|
|
269
|
-
],
|
|
270
266
|
[{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
|
|
271
267
|
],
|
|
272
268
|
},
|
|
@@ -280,16 +276,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
|
|
|
280
276
|
text: [
|
|
281
277
|
`⚠️ <b>Quota exhausted</b> for <b>${agent}</b>.`,
|
|
282
278
|
detail ? `<i>${detail}</i>` : '',
|
|
283
|
-
`All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available.`,
|
|
279
|
+
`All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available. Use <code>/auth use <label></code> to switch manually.`,
|
|
284
280
|
]
|
|
285
281
|
.filter(Boolean)
|
|
286
282
|
.join('\n'),
|
|
287
283
|
keyboard: {
|
|
288
284
|
inline_keyboard: [
|
|
289
|
-
[
|
|
290
|
-
{ text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
|
|
291
|
-
{ text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
|
|
292
|
-
],
|
|
293
285
|
[{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
|
|
294
286
|
],
|
|
295
287
|
},
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proactive quota threshold-tier push (#E4).
|
|
3
|
+
*
|
|
4
|
+
* Background: JTBD `track-plan-quota-live` anti-pattern: "Quota visible
|
|
5
|
+
* only in a separate dashboard or a command. If the user has to go
|
|
6
|
+
* looking, they won't, and they'll hit the wall." The existing stack
|
|
7
|
+
* covers the wall (auto-fallback at 99.5%, credits-watch on fatal billing
|
|
8
|
+
* transitions) but fires zero proactive signal at 80% — the point where
|
|
9
|
+
* the user can still act by switching accounts. This module closes that gap.
|
|
10
|
+
*
|
|
11
|
+
* It is a pure decision layer. It reads the broker's cached quota state
|
|
12
|
+
* for all accounts, classifies health via the same `classifyHealth`
|
|
13
|
+
* three-state machine used by the /auth dashboard, compares against a
|
|
14
|
+
* persisted last-notified state, and tells the gateway whether to emit
|
|
15
|
+
* a Telegram message + what to say. The gateway wires the actual
|
|
16
|
+
* `bot.api.sendMessage` call (via `swallowingApiCall`) — same as
|
|
17
|
+
* `credits-watch.ts`.
|
|
18
|
+
*
|
|
19
|
+
* Edge-trigger discipline: only fires on health *transitions*
|
|
20
|
+
* (healthy → throttling and throttling → healthy). Does NOT fire on
|
|
21
|
+
* healthy → blocked or blocked → healthy — `credits-watch.ts` already
|
|
22
|
+
* covers those via the fatal-billing path. Steady-state throttling
|
|
23
|
+
* never re-notifies.
|
|
24
|
+
*
|
|
25
|
+
* Scope: per-account across the whole pool, not just the active one.
|
|
26
|
+
* The user's natural recovery action is switching to a healthy account,
|
|
27
|
+
* so they need visibility into non-active accounts too.
|
|
28
|
+
*
|
|
29
|
+
* Source data: broker `listState` + `probeQuota`. `listState` is a local
|
|
30
|
+
* IPC call (cheap). `probeQuota` is only called on state-change (when
|
|
31
|
+
* we're going to send a message anyway) to get fresh numbers for the
|
|
32
|
+
* notification body. On no-change polls, only `listState` is called.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
36
|
+
import { join } from "path";
|
|
37
|
+
import type { AccountSnapshot } from "./auth-snapshot-format.js";
|
|
38
|
+
import {
|
|
39
|
+
classifyHealth,
|
|
40
|
+
type AccountHealth,
|
|
41
|
+
THROTTLING_THRESHOLD_PCT,
|
|
42
|
+
bindingWindow,
|
|
43
|
+
formatRelative,
|
|
44
|
+
fmtPct,
|
|
45
|
+
} from "./auth-snapshot-format.js";
|
|
46
|
+
import type { QuotaUtilization } from "./quota-check.js";
|
|
47
|
+
|
|
48
|
+
const STATE_FILE = "quota-watch.json";
|
|
49
|
+
|
|
50
|
+
// ─── State types ──────────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Per-account last-notified health. We only care about the
|
|
54
|
+
* healthy ↔ throttling boundary — blocked is `credits-watch`'s domain.
|
|
55
|
+
* `null` means "never notified" (treat as healthy for transition logic).
|
|
56
|
+
*/
|
|
57
|
+
export type QuotaWatchHealth = "healthy" | "throttling" | null;
|
|
58
|
+
|
|
59
|
+
export interface QuotaWatchAccountState {
|
|
60
|
+
/** Last health we sent a notification for. null = never notified. */
|
|
61
|
+
lastNotifiedHealth: QuotaWatchHealth;
|
|
62
|
+
/** Wall-clock ms of the last notification. */
|
|
63
|
+
lastNotifiedAt: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export type QuotaWatchState = Record<string, QuotaWatchAccountState>;
|
|
67
|
+
|
|
68
|
+
export function emptyQuotaWatchState(): QuotaWatchState {
|
|
69
|
+
return {};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function emptyAccountState(): QuotaWatchAccountState {
|
|
73
|
+
return { lastNotifiedHealth: null, lastNotifiedAt: 0 };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ─── Decision logic ───────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
export type QuotaWatchTransition =
|
|
79
|
+
| "entered-throttling"
|
|
80
|
+
| "recovered-to-healthy";
|
|
81
|
+
|
|
82
|
+
export type QuotaWatchDecision =
|
|
83
|
+
| {
|
|
84
|
+
kind: "notify";
|
|
85
|
+
accountLabel: string;
|
|
86
|
+
message: string;
|
|
87
|
+
newAccountState: QuotaWatchAccountState;
|
|
88
|
+
transition: QuotaWatchTransition;
|
|
89
|
+
}
|
|
90
|
+
| { kind: "skip"; accountLabel: string; reason: string };
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Evaluate one account's quota state against its last-notified health.
|
|
94
|
+
*
|
|
95
|
+
* Transition table:
|
|
96
|
+
* healthy → healthy skip (steady-state)
|
|
97
|
+
* healthy → throttling notify (entered-throttling)
|
|
98
|
+
* healthy → blocked skip (credits-watch covers this)
|
|
99
|
+
* throttling → healthy notify (recovered-to-healthy)
|
|
100
|
+
* throttling → throttling skip (already notified)
|
|
101
|
+
* throttling → blocked skip (credits-watch covers blocked)
|
|
102
|
+
* blocked → * skip (credits-watch domain)
|
|
103
|
+
* unknown → * skip (no quota data — don't spam)
|
|
104
|
+
* * → unknown skip (probe failed — transient, don't alarm)
|
|
105
|
+
*/
|
|
106
|
+
export function evaluateQuotaWatchAccount(args: {
|
|
107
|
+
agentName: string;
|
|
108
|
+
snap: AccountSnapshot;
|
|
109
|
+
prev: QuotaWatchAccountState;
|
|
110
|
+
now: number;
|
|
111
|
+
}): QuotaWatchDecision {
|
|
112
|
+
const { agentName, snap, prev, now } = args;
|
|
113
|
+
const label = snap.label;
|
|
114
|
+
const currentHealth = classifyHealth(snap);
|
|
115
|
+
|
|
116
|
+
// Unknown (probe failed) or blocked — skip entirely.
|
|
117
|
+
if (currentHealth === "unknown" || currentHealth === "blocked") {
|
|
118
|
+
return { kind: "skip", accountLabel: label, reason: `${currentHealth}-not-our-domain` };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Normalise prev: null means healthy (never alerted = was healthy).
|
|
122
|
+
const prevHealth: "healthy" | "throttling" = prev.lastNotifiedHealth ?? "healthy";
|
|
123
|
+
|
|
124
|
+
// Steady-state — no change.
|
|
125
|
+
if (currentHealth === prevHealth) {
|
|
126
|
+
return { kind: "skip", accountLabel: label, reason: "steady-state" };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// healthy → throttling: proactive threshold push.
|
|
130
|
+
if (currentHealth === "throttling" && prevHealth === "healthy") {
|
|
131
|
+
const newState: QuotaWatchAccountState = {
|
|
132
|
+
lastNotifiedHealth: "throttling",
|
|
133
|
+
lastNotifiedAt: now,
|
|
134
|
+
};
|
|
135
|
+
return {
|
|
136
|
+
kind: "notify",
|
|
137
|
+
accountLabel: label,
|
|
138
|
+
message: buildThrottlingMessage(agentName, snap),
|
|
139
|
+
newAccountState: newState,
|
|
140
|
+
transition: "entered-throttling",
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// throttling → healthy: recovery.
|
|
145
|
+
if (currentHealth === "healthy" && prevHealth === "throttling") {
|
|
146
|
+
const newState: QuotaWatchAccountState = {
|
|
147
|
+
lastNotifiedHealth: "healthy",
|
|
148
|
+
lastNotifiedAt: now,
|
|
149
|
+
};
|
|
150
|
+
return {
|
|
151
|
+
kind: "notify",
|
|
152
|
+
accountLabel: label,
|
|
153
|
+
message: buildRecoveryMessage(agentName, snap),
|
|
154
|
+
newAccountState: newState,
|
|
155
|
+
transition: "recovered-to-healthy",
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Any other combination (e.g. blocked → healthy, etc.) — skip.
|
|
160
|
+
return { kind: "skip", accountLabel: label, reason: "no-matching-transition" };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ─── Message builders ─────────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): string {
|
|
166
|
+
const q = snap.quota!; // classifyHealth returned throttling, so quota is non-null
|
|
167
|
+
const fiveStr = fmtPct(q.fiveHourUtilizationPct);
|
|
168
|
+
const sevenStr = fmtPct(q.sevenDayUtilizationPct);
|
|
169
|
+
const max = Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
|
|
170
|
+
const win = max === q.fiveHourUtilizationPct ? "5h" : "7d";
|
|
171
|
+
const winLabel = win === "5h" ? "5-hour" : "7-day";
|
|
172
|
+
const resetAt = win === "5h" ? q.fiveHourResetAt : q.sevenDayResetAt;
|
|
173
|
+
const resetStr = resetAt
|
|
174
|
+
? ` · refills in ${formatRelative(resetAt, new Date())}`
|
|
175
|
+
: "";
|
|
176
|
+
|
|
177
|
+
const activeNote = snap.isActive
|
|
178
|
+
? ""
|
|
179
|
+
: `\nThis is a non-active account. Consider <code>/auth use ${escapeHtml(snap.label)}</code> to switch, or keep it as a fallback reserve.`;
|
|
180
|
+
|
|
181
|
+
const altNote = snap.isActive
|
|
182
|
+
? `\nConsider <code>/auth use <other-account></code> if you have a healthier account, or wait for the ${winLabel} window to refill${resetStr}.`
|
|
183
|
+
: "";
|
|
184
|
+
|
|
185
|
+
return [
|
|
186
|
+
`🟡 <b>Quota approaching limit</b> — <code>${escapeHtml(snap.label)}</code>`,
|
|
187
|
+
``,
|
|
188
|
+
`${fiveStr} of 5h · ${sevenStr} of 7d`,
|
|
189
|
+
`Binding window: ${winLabel}${resetStr}`,
|
|
190
|
+
`${activeNote}${altNote}`,
|
|
191
|
+
``,
|
|
192
|
+
`<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Source: broker quota cache.</i>`,
|
|
193
|
+
`<i>Run /auth for full fleet status or /usage for the active account.</i>`,
|
|
194
|
+
]
|
|
195
|
+
.join("\n")
|
|
196
|
+
.replace(/\n\n\n+/g, "\n\n")
|
|
197
|
+
.trim();
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function buildRecoveryMessage(agentName: string, snap: AccountSnapshot): string {
|
|
201
|
+
const q = snap.quota;
|
|
202
|
+
const utilLine = q
|
|
203
|
+
? `Current: ${fmtPct(q.fiveHourUtilizationPct)} of 5h · ${fmtPct(q.sevenDayUtilizationPct)} of 7d`
|
|
204
|
+
: "Current quota data unavailable.";
|
|
205
|
+
|
|
206
|
+
return [
|
|
207
|
+
`🟢 <b>Quota back in healthy range</b> — <code>${escapeHtml(snap.label)}</code>`,
|
|
208
|
+
``,
|
|
209
|
+
utilLine,
|
|
210
|
+
``,
|
|
211
|
+
`<i>Below ${THROTTLING_THRESHOLD_PCT}% on both windows.</i>`,
|
|
212
|
+
].join("\n");
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function escapeHtml(s: string): string {
|
|
216
|
+
return s
|
|
217
|
+
.replace(/&/g, "&")
|
|
218
|
+
.replace(/</g, "<")
|
|
219
|
+
.replace(/>/g, ">")
|
|
220
|
+
.replace(/"/g, """)
|
|
221
|
+
.replace(/'/g, "'");
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// ─── State persistence ────────────────────────────────────────────────────────
|
|
225
|
+
|
|
226
|
+
export function loadQuotaWatchState(stateDir: string): QuotaWatchState {
|
|
227
|
+
const path = join(stateDir, STATE_FILE);
|
|
228
|
+
if (!existsSync(path)) return emptyQuotaWatchState();
|
|
229
|
+
try {
|
|
230
|
+
const raw = readFileSync(path, "utf-8");
|
|
231
|
+
const parsed = JSON.parse(raw);
|
|
232
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
233
|
+
return emptyQuotaWatchState();
|
|
234
|
+
}
|
|
235
|
+
// Validate each entry — drop malformed ones rather than failing the whole file.
|
|
236
|
+
const result: QuotaWatchState = {};
|
|
237
|
+
for (const [key, val] of Object.entries(parsed)) {
|
|
238
|
+
if (
|
|
239
|
+
val &&
|
|
240
|
+
typeof val === "object" &&
|
|
241
|
+
!Array.isArray(val) &&
|
|
242
|
+
(
|
|
243
|
+
(val as Record<string, unknown>).lastNotifiedHealth === null ||
|
|
244
|
+
(val as Record<string, unknown>).lastNotifiedHealth === "healthy" ||
|
|
245
|
+
(val as Record<string, unknown>).lastNotifiedHealth === "throttling"
|
|
246
|
+
) &&
|
|
247
|
+
typeof (val as Record<string, unknown>).lastNotifiedAt === "number" &&
|
|
248
|
+
Number.isFinite((val as Record<string, unknown>).lastNotifiedAt as number)
|
|
249
|
+
) {
|
|
250
|
+
result[key] = val as QuotaWatchAccountState;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return result;
|
|
254
|
+
} catch {
|
|
255
|
+
return emptyQuotaWatchState();
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export function saveQuotaWatchState(stateDir: string, state: QuotaWatchState): void {
|
|
260
|
+
mkdirSync(stateDir, { recursive: true });
|
|
261
|
+
const path = join(stateDir, STATE_FILE);
|
|
262
|
+
writeFileSync(path, JSON.stringify(state, null, 2) + "\n", { mode: 0o600 });
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Merge one account's updated state into a full `QuotaWatchState` map.
|
|
267
|
+
* Callers use this after each `evaluateQuotaWatchAccount` that returns
|
|
268
|
+
* `kind: "notify"` to produce the new map to persist.
|
|
269
|
+
*/
|
|
270
|
+
export function patchQuotaWatchState(
|
|
271
|
+
current: QuotaWatchState,
|
|
272
|
+
accountLabel: string,
|
|
273
|
+
accountState: QuotaWatchAccountState,
|
|
274
|
+
): QuotaWatchState {
|
|
275
|
+
return { ...current, [accountLabel]: accountState };
|
|
276
|
+
}
|