switchroom 0.14.0 → 0.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-broker/index.js +16 -1
- package/dist/cli/switchroom.js +1082 -873
- package/dist/host-control/main.js +1 -1
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +1 -1
- package/telegram-plugin/auth-snapshot-format.ts +47 -1
- package/telegram-plugin/dist/gateway/gateway.js +983 -537
- package/telegram-plugin/gateway/boot-card.ts +100 -0
- package/telegram-plugin/gateway/config-snapshot.ts +274 -0
- package/telegram-plugin/gateway/gateway.ts +235 -20
- package/telegram-plugin/operator-events.ts +2 -10
- package/telegram-plugin/quota-watch.ts +276 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +133 -1
- package/telegram-plugin/tests/boot-card-render.test.ts +93 -0
- package/telegram-plugin/tests/config-snapshot.test.ts +409 -0
- package/telegram-plugin/tests/operator-events.test.ts +12 -6
- package/telegram-plugin/tests/quota-watch.test.ts +366 -0
- package/telegram-plugin/tests/tool-activity-summary.test.ts +45 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +48 -0
- package/telegram-plugin/tool-activity-summary.ts +47 -0
- package/telegram-plugin/turn-flush-safety.ts +47 -0
- package/telegram-plugin/uat/assertions.ts +4 -4
|
@@ -58,6 +58,7 @@ import {
|
|
|
58
58
|
makeEmptyActivityState,
|
|
59
59
|
registerAndRender,
|
|
60
60
|
describeToolUse,
|
|
61
|
+
appendActivityLine,
|
|
61
62
|
type ActivityState,
|
|
62
63
|
} from '../tool-activity-summary.js'
|
|
63
64
|
import { toolLabel } from '../tool-labels.js'
|
|
@@ -373,6 +374,14 @@ import {
|
|
|
373
374
|
loadCreditState,
|
|
374
375
|
saveCreditState,
|
|
375
376
|
} from '../credits-watch.js'
|
|
377
|
+
import {
|
|
378
|
+
evaluateQuotaWatchAccount,
|
|
379
|
+
loadQuotaWatchState,
|
|
380
|
+
saveQuotaWatchState,
|
|
381
|
+
patchQuotaWatchState,
|
|
382
|
+
emptyAccountState,
|
|
383
|
+
} from '../quota-watch.js'
|
|
384
|
+
import { buildSnapshotsFromState, buildSnapshotsFromCachedState } from '../auth-snapshot-format.js'
|
|
376
385
|
import {
|
|
377
386
|
writeTurnActiveMarker,
|
|
378
387
|
touchTurnActiveMarker,
|
|
@@ -1330,6 +1339,11 @@ type CurrentTurn = {
|
|
|
1330
1339
|
activityInFlight: Promise<void> | null
|
|
1331
1340
|
activityPendingRender: string | null
|
|
1332
1341
|
activityLastSentRender: string | null
|
|
1342
|
+
// Draft-mirror Phase 2: accumulating friendly-action feed for this turn
|
|
1343
|
+
// (DRAFT_MIRROR only). Each non-surface tool_use appends a line via
|
|
1344
|
+
// `appendActivityLine`; the feed renders as a capped chronological list
|
|
1345
|
+
// in the ephemeral draft and clears on reply. Reset per turn.
|
|
1346
|
+
mirrorLines: string[]
|
|
1333
1347
|
// Issue #195 — answer-lane streaming. Lazily created on the first text
|
|
1334
1348
|
// event of a turn (once enough text has accumulated, the stream itself
|
|
1335
1349
|
// gates on minInitialChars). Materialized and cleared at turn_end.
|
|
@@ -3898,11 +3912,12 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
3898
3912
|
const updateOutcomeLine = (() => {
|
|
3899
3913
|
try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
|
|
3900
3914
|
})()
|
|
3915
|
+
const resolvedAgentDirForCard = agentDir ?? (process.env.TELEGRAM_STATE_DIR ? require('path').dirname(process.env.TELEGRAM_STATE_DIR) : '/tmp')
|
|
3901
3916
|
startBootCard(chatId, threadId, botApiForCard, {
|
|
3902
3917
|
agentName: agentDisplayName,
|
|
3903
3918
|
agentSlug,
|
|
3904
3919
|
version: formatBootVersion(),
|
|
3905
|
-
agentDir:
|
|
3920
|
+
agentDir: resolvedAgentDirForCard,
|
|
3906
3921
|
gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
|
|
3907
3922
|
restartReason: reason,
|
|
3908
3923
|
restartAgeMs: markerAgeMs,
|
|
@@ -3911,6 +3926,7 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
3911
3926
|
probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
|
|
3912
3927
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
3913
3928
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
3929
|
+
configSnapshotPath: join(resolvedAgentDirForCard, '.config-snapshot.json'),
|
|
3914
3930
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
3915
3931
|
}, ackMsgId).then(handle => {
|
|
3916
3932
|
activeBootCard = handle
|
|
@@ -6991,6 +7007,7 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6991
7007
|
activityInFlight: null,
|
|
6992
7008
|
activityPendingRender: null,
|
|
6993
7009
|
activityLastSentRender: null,
|
|
7010
|
+
mirrorLines: [],
|
|
6994
7011
|
answerStream: null,
|
|
6995
7012
|
isDm: isDmChatId(ev.chatId),
|
|
6996
7013
|
}
|
|
@@ -7136,11 +7153,12 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
7136
7153
|
// exactly once at a time and re-running until pending matches
|
|
7137
7154
|
// the last-sent. Captures `turn` so a late drain after turn-swap
|
|
7138
7155
|
// can't corrupt the next turn's atom.
|
|
7139
|
-
// DRAFT_MIRROR (RFC draft-mirror-preview):
|
|
7140
|
-
// human-friendly
|
|
7141
|
-
// descriptive field (Bash.description, Read/Edit file
|
|
7142
|
-
// hindsight→"Searching memory", etc. — see describeToolUse
|
|
7143
|
-
//
|
|
7156
|
+
// DRAFT_MIRROR (RFC draft-mirror-preview): accumulate each tool_use
|
|
7157
|
+
// into a human-friendly running feed in the live preview, using the
|
|
7158
|
+
// model-authored descriptive field (Bash.description, Read/Edit file
|
|
7159
|
+
// basename, hindsight→"Searching memory", etc. — see describeToolUse
|
|
7160
|
+
// / appendActivityLine). The draft shows the turn's actions as a
|
|
7161
|
+
// capped chronological list (Claude Code-style), clears on reply.
|
|
7144
7162
|
// Never surfaces raw shell/query syntax — option A, uniform across
|
|
7145
7163
|
// code + non-code agents.
|
|
7146
7164
|
//
|
|
@@ -7149,7 +7167,7 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
7149
7167
|
// pre-draft-mirror behavior.
|
|
7150
7168
|
if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
|
|
7151
7169
|
const rendered = DRAFT_MIRROR_ENABLED
|
|
7152
|
-
?
|
|
7170
|
+
? appendActivityLine(turn.mirrorLines, name, ev.input)
|
|
7153
7171
|
: registerAndRender(turn.toolActivity, name)
|
|
7154
7172
|
if (rendered != null) {
|
|
7155
7173
|
turn.activityPendingRender = rendered
|
|
@@ -11862,6 +11880,183 @@ async function runCreditWatch(): Promise<void> {
|
|
|
11862
11880
|
}
|
|
11863
11881
|
}
|
|
11864
11882
|
|
|
11883
|
+
/**
|
|
11884
|
+
* Quota threshold-tier push loop body (#E4). Reads the broker's in-memory
|
|
11885
|
+
* cached utilization (populated by previous probeQuota calls from /auth,
|
|
11886
|
+
* auto-fallback, and boot cards) via `listState.accounts[].last_quota`.
|
|
11887
|
+
* Classifies each account via classifyHealth, and fires one Telegram message
|
|
11888
|
+
* per healthy ↔ throttling transition (edge-triggered, not level-triggered).
|
|
11889
|
+
* Does NOT fire on healthy → blocked or blocked → healthy — credits-watch.ts
|
|
11890
|
+
* owns those.
|
|
11891
|
+
*
|
|
11892
|
+
* Probe discipline:
|
|
11893
|
+
* - Steady-state polls: ONE broker `listState` IPC call only (no network).
|
|
11894
|
+
* - Accounts with no cached snapshot (null last_quota): skipped silently
|
|
11895
|
+
* (classifyHealth returns 'unknown'). Cache populates from /auth, boot
|
|
11896
|
+
* card, and auto-fallback probes in normal use.
|
|
11897
|
+
* - State transition detected: ONE targeted `probeQuota` call for ONLY the
|
|
11898
|
+
* crossing account, immediately before sending the notification, to get
|
|
11899
|
+
* fresh numbers for the message body. All other steady-state accounts
|
|
11900
|
+
* are not probed.
|
|
11901
|
+
*
|
|
11902
|
+
* This replaces the previous implementation that called probeQuota for ALL
|
|
11903
|
+
* accounts unconditionally on every 15-minute poll (~768 live Anthropic
|
|
11904
|
+
* network calls/day for an 8-account fleet). The corrected version makes
|
|
11905
|
+
* 0 network calls on steady-state polls and at most 1 call per crossing
|
|
11906
|
+
* event (which is also when we need to notify the user anyway).
|
|
11907
|
+
*
|
|
11908
|
+
* State persists across restarts via `<stateDir>/quota-watch.json`.
|
|
11909
|
+
* Mirrors runCreditWatch's structure and notification routing.
|
|
11910
|
+
*/
|
|
11911
|
+
async function runQuotaWatch(): Promise<void> {
|
|
11912
|
+
const agentName = getMyAgentName()
|
|
11913
|
+
const stateDir = STATE_DIR
|
|
11914
|
+
|
|
11915
|
+
// Read broker state. The listState response now includes last_quota
|
|
11916
|
+
// per account — the broker's in-memory cache from previous probeQuota
|
|
11917
|
+
// calls. This is a local IPC call: no network, no Anthropic contact.
|
|
11918
|
+
const brokerClient = await getAuthBrokerClient(agentName)
|
|
11919
|
+
if (!brokerClient) {
|
|
11920
|
+
process.stderr.write('telegram gateway: quota-watch: broker client unavailable — skipping\n')
|
|
11921
|
+
return
|
|
11922
|
+
}
|
|
11923
|
+
|
|
11924
|
+
let listStateData: Awaited<ReturnType<typeof brokerClient.listState>>
|
|
11925
|
+
try {
|
|
11926
|
+
listStateData = await brokerClient.listState()
|
|
11927
|
+
} catch (err) {
|
|
11928
|
+
process.stderr.write(`telegram gateway: quota-watch: listState failed: ${err}\n`)
|
|
11929
|
+
return
|
|
11930
|
+
}
|
|
11931
|
+
|
|
11932
|
+
if (!listStateData.accounts || listStateData.accounts.length === 0) {
|
|
11933
|
+
return // No accounts — nothing to watch.
|
|
11934
|
+
}
|
|
11935
|
+
|
|
11936
|
+
// Build AccountSnapshot[] from cached broker state only — no live probe.
|
|
11937
|
+
// Accounts with null last_quota produce quota=null snapshots; classifyHealth
|
|
11938
|
+
// returns 'unknown'; evaluateQuotaWatchAccount skips — no false alarms.
|
|
11939
|
+
const snapshots = buildSnapshotsFromCachedState(listStateData)
|
|
11940
|
+
|
|
11941
|
+
// Load persisted per-account state.
|
|
11942
|
+
let watchState = loadQuotaWatchState(stateDir)
|
|
11943
|
+
const now = Date.now()
|
|
11944
|
+
const access = loadAccess()
|
|
11945
|
+
|
|
11946
|
+
// First pass: evaluate all accounts against cached state. Collect
|
|
11947
|
+
// labels that need a live probe (i.e. accounts with a detected transition
|
|
11948
|
+
// that we're about to notify about). We probe those to get fresh
|
|
11949
|
+
// utilization numbers for the notification body — not for classification.
|
|
11950
|
+
const pendingTransitions: Array<{
|
|
11951
|
+
accountLabel: string
|
|
11952
|
+
snapIndex: number
|
|
11953
|
+
decision: ReturnType<typeof evaluateQuotaWatchAccount>
|
|
11954
|
+
}> = []
|
|
11955
|
+
|
|
11956
|
+
const labelToSnapIndex = new Map<string, number>(
|
|
11957
|
+
snapshots.map((s, i) => [s.label, i]),
|
|
11958
|
+
)
|
|
11959
|
+
|
|
11960
|
+
for (const snap of snapshots) {
|
|
11961
|
+
const prev = watchState[snap.label] ?? emptyAccountState()
|
|
11962
|
+
const decision = evaluateQuotaWatchAccount({ agentName, snap, prev, now })
|
|
11963
|
+
if (decision.kind !== 'skip') {
|
|
11964
|
+
pendingTransitions.push({
|
|
11965
|
+
accountLabel: snap.label,
|
|
11966
|
+
snapIndex: labelToSnapIndex.get(snap.label) ?? -1,
|
|
11967
|
+
decision,
|
|
11968
|
+
})
|
|
11969
|
+
}
|
|
11970
|
+
}
|
|
11971
|
+
|
|
11972
|
+
if (pendingTransitions.length === 0) {
|
|
11973
|
+
return // Steady-state: no notifications, no probes, no state write.
|
|
11974
|
+
}
|
|
11975
|
+
|
|
11976
|
+
// Transition detected: probe ONLY the crossing accounts to get fresh
|
|
11977
|
+
// numbers for the notification message bodies. One batched RPC for all
|
|
11978
|
+
// crossing accounts (typically 1, rarely 2+).
|
|
11979
|
+
const crossingLabels = pendingTransitions.map(t => t.accountLabel)
|
|
11980
|
+
let freshProbeMap = new Map<string, Awaited<ReturnType<typeof brokerClient.probeQuota>>['results'][number]['result']>()
|
|
11981
|
+
try {
|
|
11982
|
+
const probeData = await brokerClient.probeQuota(crossingLabels, 8000)
|
|
11983
|
+
for (const entry of probeData.results) {
|
|
11984
|
+
freshProbeMap.set(entry.label, entry.result)
|
|
11985
|
+
}
|
|
11986
|
+
} catch (err) {
|
|
11987
|
+
// Probe failed — still send notifications using cached data.
|
|
11988
|
+
// Don't abort: the user should know about the threshold crossing
|
|
11989
|
+
// even if the message body shows slightly stale numbers.
|
|
11990
|
+
process.stderr.write(`telegram gateway: quota-watch: probe for crossing accounts failed: ${err}\n`)
|
|
11991
|
+
}
|
|
11992
|
+
|
|
11993
|
+
// Build final notifications, enriching the snapshot with fresh probe
|
|
11994
|
+
// data where available.
|
|
11995
|
+
let mutatedState = watchState
|
|
11996
|
+
const notifications: Array<{ message: string; accountLabel: string }> = []
|
|
11997
|
+
|
|
11998
|
+
for (const { accountLabel, snapIndex, decision } of pendingTransitions) {
|
|
11999
|
+
// Re-evaluate with fresh probe data to get an accurate message body.
|
|
12000
|
+
// If the fresh probe succeeded, replace the snap's quota with live data.
|
|
12001
|
+
const freshResult = freshProbeMap.get(accountLabel)
|
|
12002
|
+
let enrichedDecision = decision
|
|
12003
|
+
// pendingTransitions only ever holds notify decisions (pushed under
|
|
12004
|
+
// `decision.kind !== 'skip'`). Narrow explicitly so `decision.transition`
|
|
12005
|
+
// type-checks below; this continue never fires at runtime.
|
|
12006
|
+
if (decision.kind !== 'notify') continue
|
|
12007
|
+
if (freshResult && freshResult.ok && snapIndex >= 0) {
|
|
12008
|
+
const enrichedSnap = { ...snapshots[snapIndex]!, quota: freshResult.data }
|
|
12009
|
+
const prev = watchState[accountLabel] ?? emptyAccountState()
|
|
12010
|
+
const re = evaluateQuotaWatchAccount({ agentName, snap: enrichedSnap, prev, now })
|
|
12011
|
+
// If the fresh probe still shows the same transition, use the
|
|
12012
|
+
// enriched message. If it no longer shows a transition (e.g. the
|
|
12013
|
+
// account recovered in the 100ms between listState and probe),
|
|
12014
|
+
// fall through to skip this notification.
|
|
12015
|
+
if (re.kind === 'notify' && re.transition === decision.transition) {
|
|
12016
|
+
enrichedDecision = re
|
|
12017
|
+
} else if (re.kind === 'skip') {
|
|
12018
|
+
// State normalised by the time of the probe — don't notify.
|
|
12019
|
+
continue
|
|
12020
|
+
}
|
|
12021
|
+
}
|
|
12022
|
+
|
|
12023
|
+
if (enrichedDecision.kind !== 'notify') continue
|
|
12024
|
+
notifications.push({ message: enrichedDecision.message, accountLabel })
|
|
12025
|
+
mutatedState = patchQuotaWatchState(mutatedState, accountLabel, enrichedDecision.newAccountState)
|
|
12026
|
+
}
|
|
12027
|
+
|
|
12028
|
+
if (notifications.length === 0) {
|
|
12029
|
+
return // All transitions resolved by the time of the live probe.
|
|
12030
|
+
}
|
|
12031
|
+
|
|
12032
|
+
// Send all notifications (one message per crossing account).
|
|
12033
|
+
for (const { message, accountLabel } of notifications) {
|
|
12034
|
+
for (const chat_id of access.allowFrom) {
|
|
12035
|
+
// Quota-watch notify — best-effort. Wrap via swallowingApiCall so
|
|
12036
|
+
// flood-wait / deleted-chat / not-found surface as a stderr log
|
|
12037
|
+
// rather than a thrown exception that aborts the loop and leaves
|
|
12038
|
+
// half the allowFrom chats unnotified. Matches the wrapping
|
|
12039
|
+
// contract enforced by scripts/check-bot-api-wrapping.sh (#1075).
|
|
12040
|
+
await swallowingApiCall(
|
|
12041
|
+
() =>
|
|
12042
|
+
bot.api.sendMessage(chat_id, message, {
|
|
12043
|
+
parse_mode: 'HTML',
|
|
12044
|
+
link_preview_options: { is_disabled: true },
|
|
12045
|
+
}),
|
|
12046
|
+
{ chat_id, verb: 'quota-watch.notify' },
|
|
12047
|
+
)
|
|
12048
|
+
}
|
|
12049
|
+
process.stderr.write(`telegram gateway: quota-watch: notified transition for account=${accountLabel}\n`)
|
|
12050
|
+
}
|
|
12051
|
+
|
|
12052
|
+
// Persist updated state regardless of whether sends succeeded.
|
|
12053
|
+
try {
|
|
12054
|
+
saveQuotaWatchState(stateDir, mutatedState)
|
|
12055
|
+
} catch (err) {
|
|
12056
|
+
process.stderr.write(`telegram gateway: quota-watch state persist failed: ${err}\n`)
|
|
12057
|
+
}
|
|
12058
|
+
}
|
|
12059
|
+
|
|
11865
12060
|
bot.command("auth", async ctx => {
|
|
11866
12061
|
// sec WS7-F2b (#1394): `/auth` drives the auth-broker credential
|
|
11867
12062
|
// lifecycle (`/auth add` mints/attaches an Anthropic account token,
|
|
@@ -12146,8 +12341,7 @@ function resolveAgentDirForName(agent: string): string | null {
|
|
|
12146
12341
|
* restart — systemctl --user restart switchroom-<agent>
|
|
12147
12342
|
* reauth — delegate to runSwitchroomAuthCommand (same flow as /auth reauth)
|
|
12148
12343
|
* logs — post last 30 lines of journalctl for the agent
|
|
12149
|
-
*
|
|
12150
|
-
* equivalent CLI command for the user to run manually.
|
|
12344
|
+
* slot management buttons — removed (E5); use /auth use or /auth add instead.
|
|
12151
12345
|
*/
|
|
12152
12346
|
/**
|
|
12153
12347
|
* Issue #44: handle taps on the deferred-secret card's inline buttons.
|
|
@@ -13776,15 +13970,6 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
|
|
|
13776
13970
|
}
|
|
13777
13971
|
return
|
|
13778
13972
|
}
|
|
13779
|
-
case 'swap-slot':
|
|
13780
|
-
case 'add-slot': {
|
|
13781
|
-
await ctx.answerCallbackQuery({ text: 'Phase 4c will wire this' }).catch(() => {})
|
|
13782
|
-
const cmd = action === 'swap-slot' ? `auth use ${agent} <slot-name>` : `auth add ${agent}`
|
|
13783
|
-
await ctx.reply(`Phase 4c will wire ${action} buttons. Until then, run in terminal: <code>switchroom ${cmd}</code>`, {
|
|
13784
|
-
parse_mode: 'HTML',
|
|
13785
|
-
})
|
|
13786
|
-
return
|
|
13787
|
-
}
|
|
13788
13973
|
default: {
|
|
13789
13974
|
await ctx.answerCallbackQuery({ text: `Unknown action: ${action}` }).catch(() => {})
|
|
13790
13975
|
return
|
|
@@ -14679,7 +14864,7 @@ bot.on('callback_query:data', async ctx => {
|
|
|
14679
14864
|
|
|
14680
14865
|
// op:<action>:<encoded-agent> callbacks from operator-events.ts
|
|
14681
14866
|
// renderOperatorEvent(). Agent name is URL-encoded at emit (issue #24).
|
|
14682
|
-
// Actions: dismiss, restart, reauth,
|
|
14867
|
+
// Actions: dismiss, restart, reauth, logs.
|
|
14683
14868
|
if (data.startsWith('op:')) {
|
|
14684
14869
|
await handleOperatorEventCallback(ctx, data)
|
|
14685
14870
|
return
|
|
@@ -16641,11 +16826,12 @@ void (async () => {
|
|
|
16641
16826
|
const updateOutcomeLine = (() => {
|
|
16642
16827
|
try { return maybeRenderUpdateAnnouncement() ?? undefined } catch { return undefined }
|
|
16643
16828
|
})()
|
|
16829
|
+
const resolvedAgentDirForBootCard = agentDir ?? join(homedir(), '.switchroom', 'agents', agentSlug)
|
|
16644
16830
|
const handle = await startBootCard(chatId, threadId, botApiForCard, {
|
|
16645
16831
|
agentName: agentDisplayName,
|
|
16646
16832
|
agentSlug,
|
|
16647
16833
|
version: formatBootVersion(),
|
|
16648
|
-
agentDir:
|
|
16834
|
+
agentDir: resolvedAgentDirForBootCard,
|
|
16649
16835
|
gatewayInfo: { pid: process.pid, startedAtMs: GATEWAY_STARTED_AT_MS },
|
|
16650
16836
|
restartReason: reason,
|
|
16651
16837
|
restartAgeMs: markerAgeMs,
|
|
@@ -16654,6 +16840,7 @@ void (async () => {
|
|
|
16654
16840
|
probeQuotaViaBroker: (t) => probeQuotaForBootCard(agentSlug, t),
|
|
16655
16841
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
16656
16842
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
16843
|
+
configSnapshotPath: join(resolvedAgentDirForBootCard, '.config-snapshot.json'),
|
|
16657
16844
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
16658
16845
|
}, ackMsgId)
|
|
16659
16846
|
activeBootCard = handle
|
|
@@ -16705,6 +16892,34 @@ void (async () => {
|
|
|
16705
16892
|
}, CREDIT_WATCH_POLL_MS).unref()
|
|
16706
16893
|
}
|
|
16707
16894
|
|
|
16895
|
+
// Proactive quota threshold-tier push (#E4). Reads broker cached
|
|
16896
|
+
// quota for all accounts in the pool, classifies health via
|
|
16897
|
+
// classifyHealth, and fires one Telegram message per
|
|
16898
|
+
// healthy ↔ throttling transition (edge-triggered). Does NOT
|
|
16899
|
+
// cover healthy → blocked or blocked → healthy — credits-watch
|
|
16900
|
+
// handles those fatal-billing transitions above.
|
|
16901
|
+
//
|
|
16902
|
+
// Cadence: 15 min by default (same as credit-watch). Each poll
|
|
16903
|
+
// calls broker listState (local IPC, cheap) + probeQuota only
|
|
16904
|
+
// when a state-change is detected (to get fresh numbers for
|
|
16905
|
+
// the notification body). SWITCHROOM_QUOTA_WATCH_POLL_MS=0 disables.
|
|
16906
|
+
const QUOTA_WATCH_POLL_MS = Number(process.env.SWITCHROOM_QUOTA_WATCH_POLL_MS ?? 15 * 60_000)
|
|
16907
|
+
if (QUOTA_WATCH_POLL_MS > 0) {
|
|
16908
|
+
// Delay the initial run by 30 s to let the broker connection
|
|
16909
|
+
// settle after boot (avoids a probe race with the boot-card
|
|
16910
|
+
// quota probe that fires in the first few seconds).
|
|
16911
|
+
setTimeout(() => {
|
|
16912
|
+
void runQuotaWatch().catch((err) => {
|
|
16913
|
+
process.stderr.write(`telegram gateway: quota-watch initial run failed: ${err}\n`)
|
|
16914
|
+
})
|
|
16915
|
+
}, 30_000)
|
|
16916
|
+
setInterval(() => {
|
|
16917
|
+
void runQuotaWatch().catch((err) => {
|
|
16918
|
+
process.stderr.write(`telegram gateway: quota-watch scheduled run failed: ${err}\n`)
|
|
16919
|
+
})
|
|
16920
|
+
}, QUOTA_WATCH_POLL_MS).unref()
|
|
16921
|
+
}
|
|
16922
|
+
|
|
16708
16923
|
// Restart-watchdog: poll systemd's NRestarts for the agent unit.
|
|
16709
16924
|
// When the count ticks up without a corresponding restart-pending
|
|
16710
16925
|
// marker (= user-initiated /restart), emit an operator event.
|
|
@@ -257,16 +257,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
|
|
|
257
257
|
text: [
|
|
258
258
|
`💳 <b>Credit balance too low</b> for <b>${agent}</b>.`,
|
|
259
259
|
detail ? `<i>${detail}</i>` : '',
|
|
260
|
-
`
|
|
260
|
+
`Use <code>/auth use <label></code> to switch account slot or <code>/auth add</code> to add one.`,
|
|
261
261
|
]
|
|
262
262
|
.filter(Boolean)
|
|
263
263
|
.join('\n'),
|
|
264
264
|
keyboard: {
|
|
265
265
|
inline_keyboard: [
|
|
266
|
-
[
|
|
267
|
-
{ text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
|
|
268
|
-
{ text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
|
|
269
|
-
],
|
|
270
266
|
[{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
|
|
271
267
|
],
|
|
272
268
|
},
|
|
@@ -280,16 +276,12 @@ export function renderOperatorEvent(ev: OperatorEvent): RenderResult {
|
|
|
280
276
|
text: [
|
|
281
277
|
`⚠️ <b>Quota exhausted</b> for <b>${agent}</b>.`,
|
|
282
278
|
detail ? `<i>${detail}</i>` : '',
|
|
283
|
-
`All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available.`,
|
|
279
|
+
`All account slots are at the usage limit. Switchroom will auto-fallback when another slot is available. Use <code>/auth use <label></code> to switch manually.`,
|
|
284
280
|
]
|
|
285
281
|
.filter(Boolean)
|
|
286
282
|
.join('\n'),
|
|
287
283
|
keyboard: {
|
|
288
284
|
inline_keyboard: [
|
|
289
|
-
[
|
|
290
|
-
{ text: '🔄 Swap slot', callback_data: `op:swap-slot:${encodeURIComponent(ev.agent)}` },
|
|
291
|
-
{ text: '➕ Add slot', callback_data: `op:add-slot:${encodeURIComponent(ev.agent)}` },
|
|
292
|
-
],
|
|
293
285
|
[{ text: '⏳ Wait', callback_data: `op:dismiss:${encodeURIComponent(ev.agent)}` }],
|
|
294
286
|
],
|
|
295
287
|
},
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proactive quota threshold-tier push (#E4).
|
|
3
|
+
*
|
|
4
|
+
* Background: JTBD `track-plan-quota-live` anti-pattern: "Quota visible
|
|
5
|
+
* only in a separate dashboard or a command. If the user has to go
|
|
6
|
+
* looking, they won't, and they'll hit the wall." The existing stack
|
|
7
|
+
* covers the wall (auto-fallback at 99.5%, credits-watch on fatal billing
|
|
8
|
+
* transitions) but fires zero proactive signal at 80% — the point where
|
|
9
|
+
* the user can still act by switching accounts. This module closes that gap.
|
|
10
|
+
*
|
|
11
|
+
* It is a pure decision layer. It reads the broker's cached quota state
|
|
12
|
+
* for all accounts, classifies health via the same `classifyHealth`
|
|
13
|
+
* three-state machine used by the /auth dashboard, compares against a
|
|
14
|
+
* persisted last-notified state, and tells the gateway whether to emit
|
|
15
|
+
* a Telegram message + what to say. The gateway wires the actual
|
|
16
|
+
* `bot.api.sendMessage` call (via `swallowingApiCall`) — same as
|
|
17
|
+
* `credits-watch.ts`.
|
|
18
|
+
*
|
|
19
|
+
* Edge-trigger discipline: only fires on health *transitions*
|
|
20
|
+
* (healthy → throttling and throttling → healthy). Does NOT fire on
|
|
21
|
+
* healthy → blocked or blocked → healthy — `credits-watch.ts` already
|
|
22
|
+
* covers those via the fatal-billing path. Steady-state throttling
|
|
23
|
+
* never re-notifies.
|
|
24
|
+
*
|
|
25
|
+
* Scope: per-account across the whole pool, not just the active one.
|
|
26
|
+
* The user's natural recovery action is switching to a healthy account,
|
|
27
|
+
* so they need visibility into non-active accounts too.
|
|
28
|
+
*
|
|
29
|
+
* Source data: broker `listState` + `probeQuota`. `listState` is a local
|
|
30
|
+
* IPC call (cheap). `probeQuota` is only called on state-change (when
|
|
31
|
+
* we're going to send a message anyway) to get fresh numbers for the
|
|
32
|
+
* notification body. On no-change polls, only `listState` is called.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
36
|
+
import { join } from "path";
|
|
37
|
+
import type { AccountSnapshot } from "./auth-snapshot-format.js";
|
|
38
|
+
import {
|
|
39
|
+
classifyHealth,
|
|
40
|
+
type AccountHealth,
|
|
41
|
+
THROTTLING_THRESHOLD_PCT,
|
|
42
|
+
bindingWindow,
|
|
43
|
+
formatRelative,
|
|
44
|
+
fmtPct,
|
|
45
|
+
} from "./auth-snapshot-format.js";
|
|
46
|
+
import type { QuotaUtilization } from "./quota-check.js";
|
|
47
|
+
|
|
48
|
+
const STATE_FILE = "quota-watch.json";
|
|
49
|
+
|
|
50
|
+
// ─── State types ──────────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Per-account last-notified health. We only care about the
|
|
54
|
+
* healthy ↔ throttling boundary — blocked is `credits-watch`'s domain.
|
|
55
|
+
* `null` means "never notified" (treat as healthy for transition logic).
|
|
56
|
+
*/
|
|
57
|
+
export type QuotaWatchHealth = "healthy" | "throttling" | null;
|
|
58
|
+
|
|
59
|
+
export interface QuotaWatchAccountState {
|
|
60
|
+
/** Last health we sent a notification for. null = never notified. */
|
|
61
|
+
lastNotifiedHealth: QuotaWatchHealth;
|
|
62
|
+
/** Wall-clock ms of the last notification. */
|
|
63
|
+
lastNotifiedAt: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export type QuotaWatchState = Record<string, QuotaWatchAccountState>;
|
|
67
|
+
|
|
68
|
+
export function emptyQuotaWatchState(): QuotaWatchState {
|
|
69
|
+
return {};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function emptyAccountState(): QuotaWatchAccountState {
|
|
73
|
+
return { lastNotifiedHealth: null, lastNotifiedAt: 0 };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ─── Decision logic ───────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
export type QuotaWatchTransition =
|
|
79
|
+
| "entered-throttling"
|
|
80
|
+
| "recovered-to-healthy";
|
|
81
|
+
|
|
82
|
+
export type QuotaWatchDecision =
|
|
83
|
+
| {
|
|
84
|
+
kind: "notify";
|
|
85
|
+
accountLabel: string;
|
|
86
|
+
message: string;
|
|
87
|
+
newAccountState: QuotaWatchAccountState;
|
|
88
|
+
transition: QuotaWatchTransition;
|
|
89
|
+
}
|
|
90
|
+
| { kind: "skip"; accountLabel: string; reason: string };
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Evaluate one account's quota state against its last-notified health.
|
|
94
|
+
*
|
|
95
|
+
* Transition table:
|
|
96
|
+
* healthy → healthy skip (steady-state)
|
|
97
|
+
* healthy → throttling notify (entered-throttling)
|
|
98
|
+
* healthy → blocked skip (credits-watch covers this)
|
|
99
|
+
* throttling → healthy notify (recovered-to-healthy)
|
|
100
|
+
* throttling → throttling skip (already notified)
|
|
101
|
+
* throttling → blocked skip (credits-watch covers blocked)
|
|
102
|
+
* blocked → * skip (credits-watch domain)
|
|
103
|
+
* unknown → * skip (no quota data — don't spam)
|
|
104
|
+
* * → unknown skip (probe failed — transient, don't alarm)
|
|
105
|
+
*/
|
|
106
|
+
export function evaluateQuotaWatchAccount(args: {
|
|
107
|
+
agentName: string;
|
|
108
|
+
snap: AccountSnapshot;
|
|
109
|
+
prev: QuotaWatchAccountState;
|
|
110
|
+
now: number;
|
|
111
|
+
}): QuotaWatchDecision {
|
|
112
|
+
const { agentName, snap, prev, now } = args;
|
|
113
|
+
const label = snap.label;
|
|
114
|
+
const currentHealth = classifyHealth(snap);
|
|
115
|
+
|
|
116
|
+
// Unknown (probe failed) or blocked — skip entirely.
|
|
117
|
+
if (currentHealth === "unknown" || currentHealth === "blocked") {
|
|
118
|
+
return { kind: "skip", accountLabel: label, reason: `${currentHealth}-not-our-domain` };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Normalise prev: null means healthy (never alerted = was healthy).
|
|
122
|
+
const prevHealth: "healthy" | "throttling" = prev.lastNotifiedHealth ?? "healthy";
|
|
123
|
+
|
|
124
|
+
// Steady-state — no change.
|
|
125
|
+
if (currentHealth === prevHealth) {
|
|
126
|
+
return { kind: "skip", accountLabel: label, reason: "steady-state" };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// healthy → throttling: proactive threshold push.
|
|
130
|
+
if (currentHealth === "throttling" && prevHealth === "healthy") {
|
|
131
|
+
const newState: QuotaWatchAccountState = {
|
|
132
|
+
lastNotifiedHealth: "throttling",
|
|
133
|
+
lastNotifiedAt: now,
|
|
134
|
+
};
|
|
135
|
+
return {
|
|
136
|
+
kind: "notify",
|
|
137
|
+
accountLabel: label,
|
|
138
|
+
message: buildThrottlingMessage(agentName, snap),
|
|
139
|
+
newAccountState: newState,
|
|
140
|
+
transition: "entered-throttling",
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// throttling → healthy: recovery.
|
|
145
|
+
if (currentHealth === "healthy" && prevHealth === "throttling") {
|
|
146
|
+
const newState: QuotaWatchAccountState = {
|
|
147
|
+
lastNotifiedHealth: "healthy",
|
|
148
|
+
lastNotifiedAt: now,
|
|
149
|
+
};
|
|
150
|
+
return {
|
|
151
|
+
kind: "notify",
|
|
152
|
+
accountLabel: label,
|
|
153
|
+
message: buildRecoveryMessage(agentName, snap),
|
|
154
|
+
newAccountState: newState,
|
|
155
|
+
transition: "recovered-to-healthy",
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Any other combination (e.g. blocked → healthy, etc.) — skip.
|
|
160
|
+
return { kind: "skip", accountLabel: label, reason: "no-matching-transition" };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ─── Message builders ─────────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
function buildThrottlingMessage(agentName: string, snap: AccountSnapshot): string {
|
|
166
|
+
const q = snap.quota!; // classifyHealth returned throttling, so quota is non-null
|
|
167
|
+
const fiveStr = fmtPct(q.fiveHourUtilizationPct);
|
|
168
|
+
const sevenStr = fmtPct(q.sevenDayUtilizationPct);
|
|
169
|
+
const max = Math.max(q.fiveHourUtilizationPct, q.sevenDayUtilizationPct);
|
|
170
|
+
const win = max === q.fiveHourUtilizationPct ? "5h" : "7d";
|
|
171
|
+
const winLabel = win === "5h" ? "5-hour" : "7-day";
|
|
172
|
+
const resetAt = win === "5h" ? q.fiveHourResetAt : q.sevenDayResetAt;
|
|
173
|
+
const resetStr = resetAt
|
|
174
|
+
? ` · refills in ${formatRelative(resetAt, new Date())}`
|
|
175
|
+
: "";
|
|
176
|
+
|
|
177
|
+
const activeNote = snap.isActive
|
|
178
|
+
? ""
|
|
179
|
+
: `\nThis is a non-active account. Consider <code>/auth use ${escapeHtml(snap.label)}</code> to switch, or keep it as a fallback reserve.`;
|
|
180
|
+
|
|
181
|
+
const altNote = snap.isActive
|
|
182
|
+
? `\nConsider <code>/auth use <other-account></code> if you have a healthier account, or wait for the ${winLabel} window to refill${resetStr}.`
|
|
183
|
+
: "";
|
|
184
|
+
|
|
185
|
+
return [
|
|
186
|
+
`🟡 <b>Quota approaching limit</b> — <code>${escapeHtml(snap.label)}</code>`,
|
|
187
|
+
``,
|
|
188
|
+
`${fiveStr} of 5h · ${sevenStr} of 7d`,
|
|
189
|
+
`Binding window: ${winLabel}${resetStr}`,
|
|
190
|
+
`${activeNote}${altNote}`,
|
|
191
|
+
``,
|
|
192
|
+
`<i>Threshold: ${THROTTLING_THRESHOLD_PCT}% on either window. Source: broker quota cache.</i>`,
|
|
193
|
+
`<i>Run /auth for full fleet status or /usage for the active account.</i>`,
|
|
194
|
+
]
|
|
195
|
+
.join("\n")
|
|
196
|
+
.replace(/\n\n\n+/g, "\n\n")
|
|
197
|
+
.trim();
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function buildRecoveryMessage(agentName: string, snap: AccountSnapshot): string {
|
|
201
|
+
const q = snap.quota;
|
|
202
|
+
const utilLine = q
|
|
203
|
+
? `Current: ${fmtPct(q.fiveHourUtilizationPct)} of 5h · ${fmtPct(q.sevenDayUtilizationPct)} of 7d`
|
|
204
|
+
: "Current quota data unavailable.";
|
|
205
|
+
|
|
206
|
+
return [
|
|
207
|
+
`🟢 <b>Quota back in healthy range</b> — <code>${escapeHtml(snap.label)}</code>`,
|
|
208
|
+
``,
|
|
209
|
+
utilLine,
|
|
210
|
+
``,
|
|
211
|
+
`<i>Below ${THROTTLING_THRESHOLD_PCT}% on both windows.</i>`,
|
|
212
|
+
].join("\n");
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function escapeHtml(s: string): string {
|
|
216
|
+
return s
|
|
217
|
+
.replace(/&/g, "&")
|
|
218
|
+
.replace(/</g, "<")
|
|
219
|
+
.replace(/>/g, ">")
|
|
220
|
+
.replace(/"/g, """)
|
|
221
|
+
.replace(/'/g, "'");
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// ─── State persistence ────────────────────────────────────────────────────────
|
|
225
|
+
|
|
226
|
+
export function loadQuotaWatchState(stateDir: string): QuotaWatchState {
|
|
227
|
+
const path = join(stateDir, STATE_FILE);
|
|
228
|
+
if (!existsSync(path)) return emptyQuotaWatchState();
|
|
229
|
+
try {
|
|
230
|
+
const raw = readFileSync(path, "utf-8");
|
|
231
|
+
const parsed = JSON.parse(raw);
|
|
232
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
233
|
+
return emptyQuotaWatchState();
|
|
234
|
+
}
|
|
235
|
+
// Validate each entry — drop malformed ones rather than failing the whole file.
|
|
236
|
+
const result: QuotaWatchState = {};
|
|
237
|
+
for (const [key, val] of Object.entries(parsed)) {
|
|
238
|
+
if (
|
|
239
|
+
val &&
|
|
240
|
+
typeof val === "object" &&
|
|
241
|
+
!Array.isArray(val) &&
|
|
242
|
+
(
|
|
243
|
+
(val as Record<string, unknown>).lastNotifiedHealth === null ||
|
|
244
|
+
(val as Record<string, unknown>).lastNotifiedHealth === "healthy" ||
|
|
245
|
+
(val as Record<string, unknown>).lastNotifiedHealth === "throttling"
|
|
246
|
+
) &&
|
|
247
|
+
typeof (val as Record<string, unknown>).lastNotifiedAt === "number" &&
|
|
248
|
+
Number.isFinite((val as Record<string, unknown>).lastNotifiedAt as number)
|
|
249
|
+
) {
|
|
250
|
+
result[key] = val as QuotaWatchAccountState;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return result;
|
|
254
|
+
} catch {
|
|
255
|
+
return emptyQuotaWatchState();
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export function saveQuotaWatchState(stateDir: string, state: QuotaWatchState): void {
|
|
260
|
+
mkdirSync(stateDir, { recursive: true });
|
|
261
|
+
const path = join(stateDir, STATE_FILE);
|
|
262
|
+
writeFileSync(path, JSON.stringify(state, null, 2) + "\n", { mode: 0o600 });
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Merge one account's updated state into a full `QuotaWatchState` map.
|
|
267
|
+
* Callers use this after each `evaluateQuotaWatchAccount` that returns
|
|
268
|
+
* `kind: "notify"` to produce the new map to persist.
|
|
269
|
+
*/
|
|
270
|
+
export function patchQuotaWatchState(
|
|
271
|
+
current: QuotaWatchState,
|
|
272
|
+
accountLabel: string,
|
|
273
|
+
accountState: QuotaWatchAccountState,
|
|
274
|
+
): QuotaWatchState {
|
|
275
|
+
return { ...current, [accountLabel]: accountState };
|
|
276
|
+
}
|