switchroom 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/cli/drive-write-pretool.mjs +5418 -0
- package/dist/cli/switchroom.js +201 -24
- package/package.json +1 -1
- package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
- package/telegram-plugin/admin-commands/index.ts +2 -0
- package/telegram-plugin/auth-snapshot-format.ts +612 -0
- package/telegram-plugin/auto-fallback-fleet.ts +215 -0
- package/telegram-plugin/auto-fallback.ts +28 -301
- package/telegram-plugin/dist/gateway/gateway.js +4407 -2252
- package/telegram-plugin/fleet-fallback-gate.ts +105 -0
- package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
- package/telegram-plugin/gateway/approval-callback.ts +31 -3
- package/telegram-plugin/gateway/auth-command.ts +121 -10
- package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
- package/telegram-plugin/gateway/boot-card.ts +1 -1
- package/telegram-plugin/gateway/boot-probes.ts +6 -9
- package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
- package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
- package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
- package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
- package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
- package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
- package/telegram-plugin/gateway/gateway.ts +876 -173
- package/telegram-plugin/gateway/hostd-dispatch.ts +127 -0
- package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
- package/telegram-plugin/gateway/ipc-server.ts +69 -0
- package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
- package/telegram-plugin/model-unavailable.ts +28 -12
- package/telegram-plugin/silence-poke.ts +153 -1
- package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
- package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
- package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
- package/telegram-plugin/tests/boot-probes.test.ts +16 -18
- package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
- package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
- package/telegram-plugin/tests/silence-poke.test.ts +237 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
- package/telegram-plugin/turn-flush-safety.ts +55 -1
- package/telegram-plugin/uat/SETUP.md +16 -12
- package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
- package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
- package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129
|
@@ -52,6 +52,7 @@ import { OutboundDedupCache } from '../recent-outbound-dedup.js'
|
|
|
52
52
|
import { createInboundCoalescer, inboundCoalesceKey } from './inbound-coalesce.js'
|
|
53
53
|
import { StatusReactionController } from '../status-reactions.js'
|
|
54
54
|
import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
|
|
55
|
+
import { toolLabel } from '../tool-labels.js'
|
|
55
56
|
import { createTypingWrapper } from '../typing-wrap.js'
|
|
56
57
|
import { type DraftStreamHandle } from '../draft-stream.js'
|
|
57
58
|
import { handlePtyPartialPure, type PtyHandlerState } from '../pty-partial-handler.js'
|
|
@@ -94,6 +95,8 @@ import {
|
|
|
94
95
|
import type { AuthBrokerClient } from './auth-command.js'
|
|
95
96
|
import type { ListStateData } from './auth-line.js'
|
|
96
97
|
import { getAuthBrokerClient, addAccountViaBroker } from './auth-broker-client.js'
|
|
98
|
+
import { resolveAuthBrokerSocketPath } from '../../src/auth/broker/client.js'
|
|
99
|
+
import { createFleetFallbackGate } from '../fleet-fallback-gate.js'
|
|
97
100
|
import {
|
|
98
101
|
pendingAuthAddFlows,
|
|
99
102
|
startAccountAuthSession,
|
|
@@ -124,6 +127,8 @@ import {
|
|
|
124
127
|
formatModelUnavailableCard,
|
|
125
128
|
resolveModelUnavailableFromOperatorEvent,
|
|
126
129
|
} from '../model-unavailable.js'
|
|
130
|
+
import { runFleetAutoFallback } from '../auto-fallback-fleet.js'
|
|
131
|
+
import { fetchAccountQuota } from '../quota-check.js'
|
|
127
132
|
import { startRestartWatchdog } from './restart-watchdog.js'
|
|
128
133
|
import { validateStringArray } from './access-validator.js'
|
|
129
134
|
|
|
@@ -160,6 +165,11 @@ import {
|
|
|
160
165
|
TELEGRAM_SWITCHROOM_COMMANDS,
|
|
161
166
|
type AgentMetadata, type AuthSummary, type StatusProbeRow,
|
|
162
167
|
} from '../welcome-text.js'
|
|
168
|
+
import {
|
|
169
|
+
type BrokerStateView,
|
|
170
|
+
type ClaudeJsonView,
|
|
171
|
+
buildAuthSummaryFromBroker,
|
|
172
|
+
} from './auth-status-adapter.js'
|
|
163
173
|
import {
|
|
164
174
|
isContextExhaustionText,
|
|
165
175
|
shouldArmOrphanedReplyTimeout,
|
|
@@ -186,39 +196,52 @@ import {
|
|
|
186
196
|
import { sweepActiveReactions } from '../active-reactions-sweep.js'
|
|
187
197
|
import { flushOnAgentDisconnect } from './disconnect-flush.js'
|
|
188
198
|
import { PreambleSuppressor } from './preamble-suppressor.js'
|
|
199
|
+
import {
|
|
200
|
+
fetchFolderPage,
|
|
201
|
+
FolderListCache,
|
|
202
|
+
} from '../../src/drive/folder-list.js'
|
|
203
|
+
import { loadFromAuthBroker } from '../../src/drive/wrapper-broker.js'
|
|
204
|
+
import {
|
|
205
|
+
handleFoldersCommand,
|
|
206
|
+
handleFolderPickerCallback,
|
|
207
|
+
type FolderPickerHandlerDeps,
|
|
208
|
+
} from './folder-picker-handler.js'
|
|
209
|
+
import {
|
|
210
|
+
approvalConsume as kernelApprovalConsume,
|
|
211
|
+
approvalRecord as kernelApprovalRecord,
|
|
212
|
+
approvalRequest as kernelApprovalRequest,
|
|
213
|
+
} from '../../src/vault/approvals/client.js'
|
|
189
214
|
import {
|
|
190
215
|
fetchQuota,
|
|
191
216
|
formatQuotaBlock,
|
|
192
217
|
} from '../quota-check.js'
|
|
193
218
|
import {
|
|
194
|
-
evaluateFallbackTrigger,
|
|
195
|
-
performAutoFallback,
|
|
196
|
-
emptyLockout,
|
|
197
219
|
loadLockout,
|
|
198
|
-
nextLockout,
|
|
199
|
-
saveLockout,
|
|
200
220
|
DEFAULT_FALLBACK_COOLDOWN_MS,
|
|
201
|
-
type LockoutRecord,
|
|
202
221
|
type LockoutPersistOps,
|
|
203
222
|
} from '../auto-fallback.js'
|
|
204
|
-
import {
|
|
205
|
-
import {
|
|
223
|
+
import { DEFAULT_SLOT } from '../../src/auth/accounts.js'
|
|
224
|
+
import { currentActiveSlot, type AuthCodeOutcome } from '../../src/auth/manager.js'
|
|
206
225
|
import { injectSlashCommand as injectSlashCommandImpl } from '../../src/agents/inject.js'
|
|
207
226
|
import { handleInjectCommand } from './inject-handler.js'
|
|
208
227
|
import { type BannerState } from '../slot-banner.js'
|
|
209
228
|
import { refreshBanner } from '../slot-banner-driver.js'
|
|
210
|
-
import { dispatchFallbackNotification } from '../auto-fallback-dispatcher.js'
|
|
211
229
|
import { loadConfig as loadSwitchroomConfig } from '../../src/config/loader.js'; import { resolveAgentConfig } from '../../src/config/merge.js'
|
|
212
230
|
import {
|
|
213
231
|
tryHostdDispatch,
|
|
214
232
|
hostdRequestId,
|
|
215
233
|
hostdWillBeUsed,
|
|
234
|
+
pollHostdStatus,
|
|
235
|
+
warnLegacySpawnIfHostdDisabled,
|
|
216
236
|
_resetHostdEnabledCache,
|
|
217
237
|
} from './hostd-dispatch.js'
|
|
238
|
+
import type { HostdRequest } from '../../src/host-control/protocol.js'
|
|
218
239
|
import type { AgentAudit } from '../welcome-text.js'
|
|
219
240
|
import { shouldSweepChatAtBoot } from './boot-sweep-filter.js'
|
|
220
241
|
|
|
221
242
|
import { createIpcServer, type IpcClient, type IpcServer } from './ipc-server.js'
|
|
243
|
+
import { handleRequestDriveApproval } from './drive-write-approval.js'
|
|
244
|
+
import { buildDiffPreviewCard } from './diff-preview-card.js'
|
|
222
245
|
import { createPendingInboundBuffer } from './pending-inbound-buffer.js'
|
|
223
246
|
import {
|
|
224
247
|
buildVaultGrantApprovedInbound,
|
|
@@ -1086,6 +1109,14 @@ type CurrentTurn = {
|
|
|
1086
1109
|
gatewayReceiveAt: number
|
|
1087
1110
|
replyCalled: boolean
|
|
1088
1111
|
capturedText: string[]
|
|
1112
|
+
// #1291: snapshot of capturedText.length at the moment of the most
|
|
1113
|
+
// recent reply / stream_reply tool call. Used by decideTurnFlush to
|
|
1114
|
+
// isolate the post-reply tail (e.g. a soft-commit reply followed by
|
|
1115
|
+
// the real substantive answer in terminal text only) and flush it as
|
|
1116
|
+
// a follow-up message. Pre-#1291 the existence of ANY reply call
|
|
1117
|
+
// suppressed flush entirely — that lost long terminal-only answers
|
|
1118
|
+
// after a "let me check" interim reply.
|
|
1119
|
+
capturedTextLenAtLastReply: number
|
|
1089
1120
|
orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
|
|
1090
1121
|
registryKey: string | null
|
|
1091
1122
|
// Last assistant outbound message id for the current turn — populated
|
|
@@ -1974,6 +2005,13 @@ const awaitingAuthCodeAt = new Map<string, number>()
|
|
|
1974
2005
|
const AUTH_CODE_CONTEXT_TTL_MS = 5 * 60_000 // 5 min — OAuth code lifetime
|
|
1975
2006
|
const DEFERRED_SECRET_TTL_MS = 24 * 60 * 60_000 // 24 h — ignored one-tap cards
|
|
1976
2007
|
|
|
2008
|
+
// Freshness throttle for `auth:refresh` taps. Keyed by `<chat_id>:<message_id>`
|
|
2009
|
+
// so two different snapshot messages throttle independently. Each refresh
|
|
2010
|
+
// fan-fires N live api.anthropic.com probes (one per account), so we cap
|
|
2011
|
+
// rapid re-taps to one per AUTH_REFRESH_THROTTLE_MS.
|
|
2012
|
+
const lastAuthRefreshAtMs = new Map<string, number>()
|
|
2013
|
+
const AUTH_REFRESH_THROTTLE_MS = 5_000
|
|
2014
|
+
|
|
1977
2015
|
// ─── TTL reaper ───────────────────────────────────────────────────────────
|
|
1978
2016
|
// Pending state maps above all grow whenever a flow starts and only shrink
|
|
1979
2017
|
// when the flow completes. Users abandoning a flow (closing Telegram, losing
|
|
@@ -2037,6 +2075,12 @@ const pendingStateReaper = setInterval(() => {
|
|
|
2037
2075
|
for (const [k, v] of awaitingAuthCodeAt) {
|
|
2038
2076
|
if (now - v > AUTH_CODE_CONTEXT_TTL_MS) awaitingAuthCodeAt.delete(k)
|
|
2039
2077
|
}
|
|
2078
|
+
// Auth-refresh throttle entries decay quickly (5s window); sweep
|
|
2079
|
+
// anything older than 60s so abandoned snapshot messages don't pin
|
|
2080
|
+
// their key forever.
|
|
2081
|
+
for (const [k, v] of lastAuthRefreshAtMs) {
|
|
2082
|
+
if (now - v > 60_000) lastAuthRefreshAtMs.delete(k)
|
|
2083
|
+
}
|
|
2040
2084
|
// /auth rm two-step confirm window — self-expires at `expiresAt`.
|
|
2041
2085
|
for (const [k, v] of pendingAuthRmFlows) {
|
|
2042
2086
|
if (now >= v.expiresAt) pendingAuthRmFlows.delete(k)
|
|
@@ -2241,11 +2285,33 @@ function emitGatewayOperatorEvent(event: OperatorEvent): void {
|
|
|
2241
2285
|
let renderedText: string
|
|
2242
2286
|
let renderedKeyboard: ReturnType<typeof renderOperatorEvent>['keyboard'] | undefined
|
|
2243
2287
|
if (modelUnavailable) {
|
|
2288
|
+
// Two questions, asked synchronously to avoid the "card promises
|
|
2289
|
+
// an announcement that never arrives" trap:
|
|
2290
|
+
// 1. Is this a kind that AUTO-fallback can address?
|
|
2291
|
+
// 2. Will the dispatcher actually fire (vs. dedup-drop)?
|
|
2292
|
+
// Card text branches on the AND. wouldFireFleetAutoFallback is a
|
|
2293
|
+
// pure read of the dedup state; calling fireFleetAutoFallback only
|
|
2294
|
+
// when both are true keeps the card honest.
|
|
2295
|
+
const isAutoKind =
|
|
2296
|
+
modelUnavailable.kind === 'quota_exhausted' || modelUnavailable.kind === 'overload'
|
|
2297
|
+
const willActuallyFire = isAutoKind && wouldFireFleetAutoFallback()
|
|
2244
2298
|
process.stderr.write(
|
|
2245
|
-
`telegram gateway: operator-event suppressing-raw-stderr-for-model-unavailable agent=${agent} kind=${kind} detected=${modelUnavailable.kind}\n`,
|
|
2299
|
+
`telegram gateway: operator-event suppressing-raw-stderr-for-model-unavailable agent=${agent} kind=${kind} detected=${modelUnavailable.kind} autoKind=${isAutoKind} willFire=${willActuallyFire}\n`,
|
|
2246
2300
|
)
|
|
2247
|
-
renderedText = formatModelUnavailableCard(modelUnavailable, agent
|
|
2301
|
+
renderedText = formatModelUnavailableCard(modelUnavailable, agent, {
|
|
2302
|
+
autoFallbackInFlight: willActuallyFire,
|
|
2303
|
+
})
|
|
2248
2304
|
renderedKeyboard = undefined
|
|
2305
|
+
// Trigger fleet-wide auto-fallback. Pre-fix this branch only
|
|
2306
|
+
// rendered the card; the fallback machinery was unreachable from
|
|
2307
|
+
// here. We fire-and-forget so card delivery is never blocked on
|
|
2308
|
+
// broker / API latency. The fallback's own announcement is sent
|
|
2309
|
+
// separately with the causal-shape headline ("5-hour limit on
|
|
2310
|
+
// ken" instead of generic "quota exhausted") — see
|
|
2311
|
+
// auth-snapshot-format.ts → renderFallbackAnnouncement.
|
|
2312
|
+
if (willActuallyFire) {
|
|
2313
|
+
void fireFleetAutoFallback(agent)
|
|
2314
|
+
}
|
|
2249
2315
|
} else {
|
|
2250
2316
|
try {
|
|
2251
2317
|
const r = renderOperatorEvent(event)
|
|
@@ -2513,6 +2579,7 @@ silencePoke.startTimer({
|
|
|
2513
2579
|
const text = silencePoke.formatFrameworkFallbackText(
|
|
2514
2580
|
ctx.fallbackKind,
|
|
2515
2581
|
ctx.silenceMs,
|
|
2582
|
+
ctx.inFlightTools,
|
|
2516
2583
|
)
|
|
2517
2584
|
try {
|
|
2518
2585
|
await robustApiCall(
|
|
@@ -2820,9 +2887,46 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
2820
2887
|
const key = statusKey(currentTurn.sessionChatId, currentTurn.sessionThreadId)
|
|
2821
2888
|
if (ev.kind === 'thinking') {
|
|
2822
2889
|
silencePoke.noteThinking(key, Date.now())
|
|
2823
|
-
} else if (ev.kind === 'tool_use'
|
|
2824
|
-
|
|
2825
|
-
|
|
2890
|
+
} else if (ev.kind === 'tool_use') {
|
|
2891
|
+
if (ev.toolName === 'Task' || ev.toolName === 'Agent') {
|
|
2892
|
+
// Built-in claude sub-agent dispatch — extends soft threshold to 5min.
|
|
2893
|
+
silencePoke.noteSubagentDispatch(key)
|
|
2894
|
+
}
|
|
2895
|
+
// #1292: track in-flight tool calls so the 300s framework
|
|
2896
|
+
// fallback message can name the actual observable (e.g.
|
|
2897
|
+
// "running Grep \"foo\" for 4m") instead of the dishonest
|
|
2898
|
+
// generic "still working… no update in 5 min" when the agent
|
|
2899
|
+
// is clearly busy on tool calls. Telegram-surface tools are
|
|
2900
|
+
// excluded — their job IS the outbound message, the silence
|
|
2901
|
+
// clock resets via noteOutbound when they fire. Sub-agent
|
|
2902
|
+
// tool_use events (kind='sub_agent_tool_use') intentionally
|
|
2903
|
+
// NOT tracked: the parent's Task tool_use is already on the
|
|
2904
|
+
// map and represents the user-observable wait.
|
|
2905
|
+
if (
|
|
2906
|
+
ev.toolUseId != null
|
|
2907
|
+
&& ev.toolUseId.length > 0
|
|
2908
|
+
&& !isTelegramSurfaceTool(ev.toolName)
|
|
2909
|
+
) {
|
|
2910
|
+
const label = toolLabel(
|
|
2911
|
+
ev.toolName,
|
|
2912
|
+
ev.input,
|
|
2913
|
+
/*preamble*/ undefined,
|
|
2914
|
+
ev.precomputedLabel,
|
|
2915
|
+
)
|
|
2916
|
+
silencePoke.noteToolStart(
|
|
2917
|
+
key,
|
|
2918
|
+
ev.toolUseId,
|
|
2919
|
+
ev.toolName,
|
|
2920
|
+
label.length > 0 ? label : null,
|
|
2921
|
+
Date.now(),
|
|
2922
|
+
)
|
|
2923
|
+
}
|
|
2924
|
+
} else if (ev.kind === 'tool_result') {
|
|
2925
|
+
// #1292: drain the in-flight entry. Idempotent on unknown ids
|
|
2926
|
+
// (covers Telegram-surface tools we skipped at start time).
|
|
2927
|
+
if (ev.toolUseId != null && ev.toolUseId.length > 0) {
|
|
2928
|
+
silencePoke.noteToolEnd(key, ev.toolUseId, Date.now())
|
|
2929
|
+
}
|
|
2826
2930
|
}
|
|
2827
2931
|
}
|
|
2828
2932
|
},
|
|
@@ -2959,6 +3063,69 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
2959
3063
|
* Logs every fire so an operator can correlate the agent's
|
|
2960
3064
|
* transcript turn against the scheduler's audit row by `prompt_key`.
|
|
2961
3065
|
*/
|
|
3066
|
+
async onRequestDriveApproval(client: IpcClient, msg) {
|
|
3067
|
+
// RFC E §4.2 Cut 2 — Drive-write PreToolUse hook is asking the
|
|
3068
|
+
// gateway to post a diff-preview card so the user can decide.
|
|
3069
|
+
await handleRequestDriveApproval(client, msg, {
|
|
3070
|
+
agentName: getMyAgentName(),
|
|
3071
|
+
loadAllowFrom: () => loadAccess().allowFrom,
|
|
3072
|
+
loadTargetChat: () => {
|
|
3073
|
+
const access = loadAccess()
|
|
3074
|
+
const operator = access.allowFrom[0]
|
|
3075
|
+
if (operator === undefined) return null
|
|
3076
|
+
// For DM-paired setups the target chat IS the operator's
|
|
3077
|
+
// user id. For group setups the gateway already has a topic
|
|
3078
|
+
// routing surface (see how /folders posts) — this picks the
|
|
3079
|
+
// DM path which is the common case; group-routing follow-up
|
|
3080
|
+
// can extend this.
|
|
3081
|
+
return { chatId: operator }
|
|
3082
|
+
},
|
|
3083
|
+
registerApproval: async (args) => {
|
|
3084
|
+
const r = await kernelApprovalRequest({
|
|
3085
|
+
agent_unit: args.agent_unit,
|
|
3086
|
+
scope: args.scope,
|
|
3087
|
+
action: args.action,
|
|
3088
|
+
approver_set: args.approver_set,
|
|
3089
|
+
why: args.why,
|
|
3090
|
+
ttl_ms: args.ttl_ms,
|
|
3091
|
+
})
|
|
3092
|
+
if (r === null || r.state === 'rate_limited') return null
|
|
3093
|
+
return {
|
|
3094
|
+
request_id: r.request_id,
|
|
3095
|
+
expires_at_ms: r.expires_at,
|
|
3096
|
+
}
|
|
3097
|
+
},
|
|
3098
|
+
postCard: async (args) => {
|
|
3099
|
+
try {
|
|
3100
|
+
const sent = await robustApiCall(
|
|
3101
|
+
() =>
|
|
3102
|
+
bot.api.sendMessage(args.chatId, args.text, {
|
|
3103
|
+
parse_mode: 'HTML',
|
|
3104
|
+
...(args.threadId !== undefined
|
|
3105
|
+
? { message_thread_id: args.threadId }
|
|
3106
|
+
: {}),
|
|
3107
|
+
reply_markup: args.replyMarkup as never,
|
|
3108
|
+
}),
|
|
3109
|
+
{
|
|
3110
|
+
chat_id: String(args.chatId),
|
|
3111
|
+
verb: 'drive-approval-card',
|
|
3112
|
+
...(args.threadId !== undefined ? { threadId: args.threadId } : {}),
|
|
3113
|
+
},
|
|
3114
|
+
)
|
|
3115
|
+
return { messageId: (sent as { message_id: number }).message_id }
|
|
3116
|
+
} catch (err) {
|
|
3117
|
+
process.stderr.write(
|
|
3118
|
+
`telegram gateway: drive-approval postCard failed: ${(err as Error).message}\n`,
|
|
3119
|
+
)
|
|
3120
|
+
return null
|
|
3121
|
+
}
|
|
3122
|
+
},
|
|
3123
|
+
buildCard: ({ preview, suggestRequestId }) =>
|
|
3124
|
+
buildDiffPreviewCard({ preview, suggestRequestId }),
|
|
3125
|
+
log: (m) => process.stderr.write(`telegram gateway: drive-approval — ${m}\n`),
|
|
3126
|
+
})
|
|
3127
|
+
},
|
|
3128
|
+
|
|
2962
3129
|
onInjectInbound(_client: IpcClient, msg: InjectInboundMessage) {
|
|
2963
3130
|
const promptKey = typeof msg.inbound.meta?.prompt_key === 'string'
|
|
2964
3131
|
? msg.inbound.meta.prompt_key
|
|
@@ -4638,6 +4805,7 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
4638
4805
|
gatewayReceiveAt: startedAt,
|
|
4639
4806
|
replyCalled: false,
|
|
4640
4807
|
capturedText: [],
|
|
4808
|
+
capturedTextLenAtLastReply: 0,
|
|
4641
4809
|
orphanedReplyTimeoutId: null,
|
|
4642
4810
|
registryKey: null,
|
|
4643
4811
|
lastAssistantMsgId: null,
|
|
@@ -4734,6 +4902,12 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
4734
4902
|
// placeholder-heartbeat label, which has been retired.
|
|
4735
4903
|
if (isTelegramReplyTool(name)) {
|
|
4736
4904
|
turn.replyCalled = true
|
|
4905
|
+
// #1291: pin the captured-text index at the moment of this reply
|
|
4906
|
+
// tool call. Anything pushed into capturedText after this point
|
|
4907
|
+
// is the post-reply tail (e.g. the substantive answer composed
|
|
4908
|
+
// in terminal text after a soft-commit "on it, back in a few").
|
|
4909
|
+
// decideTurnFlush slices from this index to flush the tail.
|
|
4910
|
+
turn.capturedTextLenAtLastReply = turn.capturedText.length
|
|
4737
4911
|
if (turn.orphanedReplyTimeoutId != null) {
|
|
4738
4912
|
clearTimeout(turn.orphanedReplyTimeoutId)
|
|
4739
4913
|
turn.orphanedReplyTimeoutId = null
|
|
@@ -4993,8 +5167,20 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
4993
5167
|
chatId: turn.sessionChatId,
|
|
4994
5168
|
replyCalled: turn.replyCalled,
|
|
4995
5169
|
capturedText: turn.capturedText,
|
|
5170
|
+
capturedTextLenAtLastReply: turn.capturedTextLenAtLastReply,
|
|
4996
5171
|
flushEnabled: TURN_FLUSH_SAFETY_ENABLED,
|
|
4997
5172
|
})
|
|
5173
|
+
// #1291: when the model emitted a soft-commit reply followed by a
|
|
5174
|
+
// substantive terminal-only answer, decideTurnFlush returns
|
|
5175
|
+
// kind:'flush' with the post-reply tail. Log WARN so this case is
|
|
5176
|
+
// auditable — the model SHOULD have called reply for the tail, but
|
|
5177
|
+
// didn't, and the framework is covering for it.
|
|
5178
|
+
if (flushDecision.kind === 'flush' && turn.replyCalled) {
|
|
5179
|
+
process.stderr.write(
|
|
5180
|
+
`telegram gateway: WARN post-reply-tail flush (#1291) — model emitted ${flushDecision.text.length} chars after a prior reply call without a follow-up reply tool` +
|
|
5181
|
+
` chat=${chatId} turnStartedAt=${turn.startedAt}\n`,
|
|
5182
|
+
)
|
|
5183
|
+
}
|
|
4998
5184
|
if (flushDecision.kind === 'skip' && flushDecision.reason !== 'reply-called') {
|
|
4999
5185
|
process.stderr.write(
|
|
5000
5186
|
`telegram gateway: turn-flush skipped — reason=${flushDecision.reason}\n`,
|
|
@@ -5144,6 +5330,21 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5144
5330
|
// backup; reset the preamble buffer (its content is already in
|
|
5145
5331
|
// the captured `capturedText`, which turn-flush is about to send).
|
|
5146
5332
|
preambleSuppressor.dropNow()
|
|
5333
|
+
// #1289 fix — drain silence-poke + signal-tracker state for this
|
|
5334
|
+
// turn. The three sibling turn_end exit branches (context-exhaust
|
|
5335
|
+
// at ~5098, silent-marker at ~5097-5098, default reply-called tail
|
|
5336
|
+
// at ~5348-5349) all call signalTracker.clear + silencePoke.endTurn.
|
|
5337
|
+
// The flush-backstop branch was retrofitted in #1067 to null
|
|
5338
|
+
// currentTurn early but never had this cleanup added — leaving the
|
|
5339
|
+
// silence-poke state in the Map, so 300s after the original turn
|
|
5340
|
+
// start the framework fallback fires and the user sees
|
|
5341
|
+
// "still working… (no update from agent in 5 min)" on a turn the
|
|
5342
|
+
// gateway already considers over.
|
|
5343
|
+
{
|
|
5344
|
+
const tKey = statusKey(chatId, threadId)
|
|
5345
|
+
signalTracker.clear(tKey)
|
|
5346
|
+
silencePoke.endTurn(tKey)
|
|
5347
|
+
}
|
|
5147
5348
|
|
|
5148
5349
|
void (async () => {
|
|
5149
5350
|
await new Promise<void>(resolve => setTimeout(resolve, 500))
|
|
@@ -7388,6 +7589,75 @@ async function executeVaultOp(ctx: Context, chatId: string, op: 'list' | 'get' |
|
|
|
7388
7589
|
}
|
|
7389
7590
|
}
|
|
7390
7591
|
|
|
7592
|
+
/**
|
|
7593
|
+
* Dispatch a short-running verb (agent_start, agent_stop, cross-agent
|
|
7594
|
+
* agent_restart) through hostd when available, else fall back to the
|
|
7595
|
+
* legacy in-container CLI shell-out.
|
|
7596
|
+
*
|
|
7597
|
+
* Why: on docker-mode hosts the agent container has no docker binary,
|
|
7598
|
+
* so the legacy `runSwitchroomCommand` path silently exits 127 for any
|
|
7599
|
+
* verb that touches compose (RFC C §1, #926). Hostd runs on the host
|
|
7600
|
+
* with the docker socket mounted, so the verb actually works.
|
|
7601
|
+
*
|
|
7602
|
+
* Result handling:
|
|
7603
|
+
* - `not-configured` → fall back to {@link runSwitchroomCommand}.
|
|
7604
|
+
* (Operator opted out; let the legacy path's existing error
|
|
7605
|
+
* surfacing handle the exit-127 case.)
|
|
7606
|
+
* - `completed` → reply with the stdout tail (mirrors the legacy
|
|
7607
|
+
* path's formatted-output reply).
|
|
7608
|
+
* - `started` → reply with a brief "🔄 dispatched" ack. Verbs that
|
|
7609
|
+
* return `started` (agent_restart) finish asynchronously on the
|
|
7610
|
+
* daemon; the audit log is the canonical record.
|
|
7611
|
+
* - `error` / `denied` → reply with the error tail inline. No
|
|
7612
|
+
* fallback (RFC §7 hard-fail contract — operator opted in).
|
|
7613
|
+
*/
|
|
7614
|
+
async function dispatchShortVerbViaHostd(
|
|
7615
|
+
ctx: Context,
|
|
7616
|
+
req: HostdRequest,
|
|
7617
|
+
label: string,
|
|
7618
|
+
legacyArgs: string[],
|
|
7619
|
+
): Promise<void> {
|
|
7620
|
+
const hostdResp = await tryHostdDispatch(getMyAgentName(), req)
|
|
7621
|
+
if (hostdResp === 'not-configured') {
|
|
7622
|
+
warnLegacySpawnIfHostdDisabled(req.op)
|
|
7623
|
+
await runSwitchroomCommand(ctx, legacyArgs, label)
|
|
7624
|
+
return
|
|
7625
|
+
}
|
|
7626
|
+
if (hostdResp.result === 'completed') {
|
|
7627
|
+
const body = hostdResp.stdout_tail?.trim() || `${label}: done (exit ${hostdResp.exit_code})`
|
|
7628
|
+
const formatted = formatSwitchroomOutput(stripAnsi(body))
|
|
7629
|
+
if (formatted) {
|
|
7630
|
+
await switchroomReply(ctx, preBlock(formatted), { html: true })
|
|
7631
|
+
} else {
|
|
7632
|
+
await switchroomReply(ctx, `${label}: done (no output)`)
|
|
7633
|
+
}
|
|
7634
|
+
return
|
|
7635
|
+
}
|
|
7636
|
+
if (hostdResp.result === 'started') {
|
|
7637
|
+
await switchroomReply(
|
|
7638
|
+
ctx,
|
|
7639
|
+
`🔄 <b>${escapeHtmlForTg(label)}</b> dispatched via hostd ` +
|
|
7640
|
+
`(request_id=<code>${escapeHtmlForTg(hostdResp.request_id)}</code>). ` +
|
|
7641
|
+
`Check audit log for completion.`,
|
|
7642
|
+
{ html: true },
|
|
7643
|
+
)
|
|
7644
|
+
return
|
|
7645
|
+
}
|
|
7646
|
+
// error / denied — surface inline. RFC §7 hard-fail: no spawn fallback.
|
|
7647
|
+
const errBody =
|
|
7648
|
+
hostdResp.error ??
|
|
7649
|
+
hostdResp.stderr_tail ??
|
|
7650
|
+
hostdResp.stdout_tail ??
|
|
7651
|
+
'(no error tail returned)'
|
|
7652
|
+
await switchroomReply(
|
|
7653
|
+
ctx,
|
|
7654
|
+
`❌ <b>${escapeHtmlForTg(label)} failed via hostd</b> ` +
|
|
7655
|
+
`(result=${escapeHtmlForTg(hostdResp.result)}):\n` +
|
|
7656
|
+
preBlock(stripAnsi(errBody)),
|
|
7657
|
+
{ html: true },
|
|
7658
|
+
)
|
|
7659
|
+
}
|
|
7660
|
+
|
|
7391
7661
|
async function runSwitchroomCommand(ctx: Context, args: string[], label: string): Promise<void> {
|
|
7392
7662
|
try {
|
|
7393
7663
|
const output = stripAnsi(switchroomExec(args))
|
|
@@ -7620,13 +7890,13 @@ function buildAgentAudit(agentName: string): AgentAudit | undefined {
|
|
|
7620
7890
|
}
|
|
7621
7891
|
|
|
7622
7892
|
// Build an AgentMetadata snapshot for the current agent by shelling out
|
|
7623
|
-
// to `switchroom agent list --json` and `switchroom auth
|
|
7624
|
-
// TODO(rfc-h): the `auth status` verb was retired by RFC H. The shell
|
|
7625
|
-
// fails silently and `authSummary` lands as null — /status renders
|
|
7626
|
-
// without auth detail. Replace with an `auth show --json` adapter that
|
|
7627
|
-
// maps the new fleet-broker shape to the per-agent AuthSummary fields.
|
|
7893
|
+
// to `switchroom agent list --json` and `switchroom auth show --json`.
|
|
7628
7894
|
// Best-effort — any missing piece renders as a placeholder in the text
|
|
7629
|
-
// templates rather than blocking the reply.
|
|
7895
|
+
// templates rather than blocking the reply. RFC H retired the per-agent
|
|
7896
|
+
// `auth status --json` shape; auth state is now derived from the
|
|
7897
|
+
// broker's fleet-wide `ListStateData` payload via
|
|
7898
|
+
// `buildAuthSummaryFromBroker`, with billingType pulled from the
|
|
7899
|
+
// agent's `.claude.json` (the broker doesn't track plan tier).
|
|
7630
7900
|
async function buildAgentMetadata(agentName: string): Promise<AgentMetadata> {
|
|
7631
7901
|
type AgentListResp = {
|
|
7632
7902
|
agents: Array<{
|
|
@@ -7636,24 +7906,22 @@ async function buildAgentMetadata(agentName: string): Promise<AgentMetadata> {
|
|
|
7636
7906
|
model?: string | null;
|
|
7637
7907
|
}>
|
|
7638
7908
|
}
|
|
7639
|
-
type AuthStatusResp = {
|
|
7640
|
-
agents: Array<{
|
|
7641
|
-
name: string; authenticated: boolean; auth_source: string | null;
|
|
7642
|
-
subscription_type: string | null; expires_in: string | null;
|
|
7643
|
-
}>
|
|
7644
|
-
}
|
|
7645
7909
|
const list = switchroomExecJson<AgentListResp>(['agent', 'list'])
|
|
7646
|
-
const
|
|
7910
|
+
const brokerState = switchroomExecJson<BrokerStateView>(['auth', 'show'])
|
|
7647
7911
|
const a = list?.agents?.find(x => x.name === agentName) ?? null
|
|
7648
|
-
|
|
7649
|
-
|
|
7650
|
-
|
|
7651
|
-
|
|
7652
|
-
|
|
7653
|
-
|
|
7654
|
-
|
|
7655
|
-
|
|
7656
|
-
|
|
7912
|
+
let claudeJson: ClaudeJsonView | null = null
|
|
7913
|
+
try {
|
|
7914
|
+
const agentDir = resolveAgentDirFromEnv()
|
|
7915
|
+
if (agentDir) {
|
|
7916
|
+
const raw = readFileSync(join(agentDir, '.claude', '.claude.json'), 'utf8')
|
|
7917
|
+
claudeJson = JSON.parse(raw) as ClaudeJsonView
|
|
7918
|
+
}
|
|
7919
|
+
} catch { /* leave null — billingType becomes null in the summary */ }
|
|
7920
|
+
const authSummary: AuthSummary | null = buildAuthSummaryFromBroker(
|
|
7921
|
+
brokerState,
|
|
7922
|
+
agentName,
|
|
7923
|
+
claudeJson,
|
|
7924
|
+
)
|
|
7657
7925
|
return {
|
|
7658
7926
|
agentName,
|
|
7659
7927
|
model: a?.model ?? null,
|
|
@@ -7798,14 +8066,24 @@ bot.command('agentstart', async ctx => {
|
|
|
7798
8066
|
if (!isAuthorizedSender(ctx)) return
|
|
7799
8067
|
const name = ctx.match?.trim() || getMyAgentName()
|
|
7800
8068
|
try { assertSafeAgentName(name) } catch { await switchroomReply(ctx, 'Invalid agent name.'); return }
|
|
7801
|
-
await
|
|
8069
|
+
await dispatchShortVerbViaHostd(
|
|
8070
|
+
ctx,
|
|
8071
|
+
{ v: 1, op: 'agent_start', request_id: hostdRequestId('gw-start'), args: { name } },
|
|
8072
|
+
`start ${name}`,
|
|
8073
|
+
['agent', 'start', name],
|
|
8074
|
+
)
|
|
7802
8075
|
})
|
|
7803
8076
|
|
|
7804
8077
|
bot.command('stop', async ctx => {
|
|
7805
8078
|
if (!isAuthorizedSender(ctx)) return
|
|
7806
8079
|
const name = ctx.match?.trim() || getMyAgentName()
|
|
7807
8080
|
try { assertSafeAgentName(name) } catch { await switchroomReply(ctx, 'Invalid agent name.'); return }
|
|
7808
|
-
await
|
|
8081
|
+
await dispatchShortVerbViaHostd(
|
|
8082
|
+
ctx,
|
|
8083
|
+
{ v: 1, op: 'agent_stop', request_id: hostdRequestId('gw-stop'), args: { name } },
|
|
8084
|
+
`stop ${name}`,
|
|
8085
|
+
['agent', 'stop', name],
|
|
8086
|
+
)
|
|
7809
8087
|
})
|
|
7810
8088
|
|
|
7811
8089
|
bot.command('restart', async ctx => {
|
|
@@ -7852,6 +8130,7 @@ bot.command('restart', async ctx => {
|
|
|
7852
8130
|
args: { name, force: true, reason: 'user: /restart from chat' },
|
|
7853
8131
|
})
|
|
7854
8132
|
if (hostdResp === 'not-configured') {
|
|
8133
|
+
warnLegacySpawnIfHostdDisabled('agent_restart')
|
|
7855
8134
|
spawnSwitchroomDetached(
|
|
7856
8135
|
['agent', 'restart', name, '--force'],
|
|
7857
8136
|
notifyDetachedFailure(chatId, threadId ?? null, `restart ${name}`),
|
|
@@ -7874,7 +8153,22 @@ bot.command('restart', async ctx => {
|
|
|
7874
8153
|
)
|
|
7875
8154
|
return
|
|
7876
8155
|
}
|
|
7877
|
-
|
|
8156
|
+
// Cross-agent /restart <other>. Same hostd-first shape as self-target,
|
|
8157
|
+
// but no restart marker / no self-kill: another agent's container is
|
|
8158
|
+
// about to bounce, not ours. The daemon spawns the work and returns
|
|
8159
|
+
// "started" (per handleAgentRestart at server.ts:466), so the user
|
|
8160
|
+
// sees a brief dispatch ack and the audit log carries the outcome.
|
|
8161
|
+
await dispatchShortVerbViaHostd(
|
|
8162
|
+
ctx,
|
|
8163
|
+
{
|
|
8164
|
+
v: 1,
|
|
8165
|
+
op: 'agent_restart',
|
|
8166
|
+
request_id: hostdRequestId('gw-restart-cross'),
|
|
8167
|
+
args: { name, force: true, reason: `user: /restart ${name} from chat` },
|
|
8168
|
+
},
|
|
8169
|
+
`restart ${name}`,
|
|
8170
|
+
['agent', 'restart', name],
|
|
8171
|
+
)
|
|
7878
8172
|
})
|
|
7879
8173
|
|
|
7880
8174
|
// ─── /new and /reset ──────────────────────────────────────────────────────
|
|
@@ -7993,6 +8287,7 @@ async function handleNewOrResetCommand(ctx: Context, kind: 'new' | 'reset'): Pro
|
|
|
7993
8287
|
args: { name, force: true, reason: `user: /${kind} from chat` },
|
|
7994
8288
|
})
|
|
7995
8289
|
if (hostdResp === 'not-configured') {
|
|
8290
|
+
warnLegacySpawnIfHostdDisabled('agent_restart')
|
|
7996
8291
|
spawnSwitchroomDetached(
|
|
7997
8292
|
['agent', 'restart', name, '--force'],
|
|
7998
8293
|
notifyDetachedFailure(chatId, threadId ?? null, `${kind} ${name}`),
|
|
@@ -8156,23 +8451,83 @@ bot.command('update', async ctx => {
|
|
|
8156
8451
|
await sweepBeforeSelfRestart()
|
|
8157
8452
|
const skipImages = passthrough.includes('--skip-images')
|
|
8158
8453
|
const rebuild = passthrough.includes('--rebuild')
|
|
8454
|
+
const updateRequestId = hostdRequestId('gw-update')
|
|
8159
8455
|
const hostdResp = await tryHostdDispatch(getMyAgentName(), {
|
|
8160
8456
|
v: 1,
|
|
8161
8457
|
op: 'update_apply',
|
|
8162
|
-
request_id:
|
|
8458
|
+
request_id: updateRequestId,
|
|
8163
8459
|
args: {
|
|
8164
8460
|
...(skipImages ? { skip_images: true } : {}),
|
|
8165
8461
|
...(rebuild ? { rebuild: true } : {}),
|
|
8166
8462
|
},
|
|
8167
8463
|
})
|
|
8168
8464
|
if (hostdResp === 'not-configured') {
|
|
8465
|
+
warnLegacySpawnIfHostdDisabled('update_apply')
|
|
8169
8466
|
spawnSwitchroomDetached(
|
|
8170
8467
|
['update', ...passthrough],
|
|
8171
8468
|
notifyDetachedFailure(chatId, threadId ?? null, 'update'),
|
|
8172
8469
|
)
|
|
8173
8470
|
return
|
|
8174
8471
|
}
|
|
8175
|
-
if (hostdResp.result === '
|
|
8472
|
+
if (hostdResp.result === 'completed') {
|
|
8473
|
+
return
|
|
8474
|
+
}
|
|
8475
|
+
if (hostdResp.result === 'started') {
|
|
8476
|
+
// RFC C §5.3: long-running mutation. Poll get_status until terminal
|
|
8477
|
+
// or until the recreate kills this gateway (whichever happens first).
|
|
8478
|
+
// The success signal is the post-restart greeting card edited into
|
|
8479
|
+
// ackId via the restart marker. The poll is here so that
|
|
8480
|
+
// *fail-before-recreate* (image pull error, scaffold regen crash)
|
|
8481
|
+
// doesn't leave the operator staring at the orphan "🚀 update started"
|
|
8482
|
+
// ack indefinitely. Live repro: PR #1305.
|
|
8483
|
+
void (async () => {
|
|
8484
|
+
// 60s budget: RFC C §5.3 specs `apply` at 30s and `update_apply`
|
|
8485
|
+
// at 60s. Image pulls + scaffold regeneration dominate the wall
|
|
8486
|
+
// clock for update_apply, hence the larger budget. The poll
|
|
8487
|
+
// resolves earlier on any terminal state from the daemon.
|
|
8488
|
+
const terminal = await pollHostdStatus(getMyAgentName(), updateRequestId, {
|
|
8489
|
+
timeoutMs: 60_000,
|
|
8490
|
+
})
|
|
8491
|
+
if (terminal === 'not-configured') return
|
|
8492
|
+
// completed → recreate is about to run / has run; let the post-
|
|
8493
|
+
// restart greeting card handle the success message.
|
|
8494
|
+
if (terminal.result === 'completed') return
|
|
8495
|
+
// Anything else means the daemon's mutation failed before it could
|
|
8496
|
+
// kill us. Edit the ack to surface the tail and clear the marker
|
|
8497
|
+
// so the next gateway boot doesn't render a false success card.
|
|
8498
|
+
clearRestartMarker()
|
|
8499
|
+
const errBody =
|
|
8500
|
+
terminal.error ??
|
|
8501
|
+
terminal.stderr_tail ??
|
|
8502
|
+
terminal.stdout_tail ??
|
|
8503
|
+
'(no error tail returned)'
|
|
8504
|
+
const editedText =
|
|
8505
|
+
`🚀 <b>update started</b> — <b>FAILED</b> via hostd ` +
|
|
8506
|
+
`(result=${escapeHtmlForTg(terminal.result)}):\n` +
|
|
8507
|
+
preBlock(errBody)
|
|
8508
|
+
if (ackId != null) {
|
|
8509
|
+
try {
|
|
8510
|
+
await robustApiCall(
|
|
8511
|
+
() =>
|
|
8512
|
+
lockedBot.api.editMessageText(chatId, ackId!, editedText, {
|
|
8513
|
+
parse_mode: 'HTML',
|
|
8514
|
+
link_preview_options: { is_disabled: true },
|
|
8515
|
+
}),
|
|
8516
|
+
{ verb: 'update.poll.editAck' },
|
|
8517
|
+
)
|
|
8518
|
+
} catch {
|
|
8519
|
+
// edit-failed (message deleted, parse error) — fall back to
|
|
8520
|
+
// a fresh reply so the failure isn't silent.
|
|
8521
|
+
try {
|
|
8522
|
+
await switchroomReply(ctx, editedText, { html: true })
|
|
8523
|
+
} catch {}
|
|
8524
|
+
}
|
|
8525
|
+
} else {
|
|
8526
|
+
try {
|
|
8527
|
+
await switchroomReply(ctx, editedText, { html: true })
|
|
8528
|
+
} catch {}
|
|
8529
|
+
}
|
|
8530
|
+
})()
|
|
8176
8531
|
return
|
|
8177
8532
|
}
|
|
8178
8533
|
clearRestartMarker()
|
|
@@ -8209,6 +8564,81 @@ bot.command('upgrade', async ctx => {
|
|
|
8209
8564
|
)
|
|
8210
8565
|
})
|
|
8211
8566
|
|
|
8567
|
+
// /audit hostd — tail/filter the hostd audit log. Mirrors `/vault audit`
|
|
8568
|
+
// in spirit (operator observability over a privileged subsystem from any
|
|
8569
|
+
// admin DM). Admin-gated via ADMIN_COMMAND_NAMES. Reads the audit JSONL
|
|
8570
|
+
// at ~/.switchroom/host-control-audit.log directly — no hostd RPC needed
|
|
8571
|
+
// because the file is shared via the host bind mount on docker installs.
|
|
8572
|
+
bot.command('audit', async ctx => {
|
|
8573
|
+
if (!isAuthorizedSender(ctx)) return
|
|
8574
|
+
const arg = (ctx.match ?? '').trim()
|
|
8575
|
+
if (arg === '' || arg === 'help' || arg === '--help') {
|
|
8576
|
+
await switchroomReply(
|
|
8577
|
+
ctx,
|
|
8578
|
+
'Usage: <code>/audit hostd [--tail N] [--agent <name>] [--op <verb>] [--error]</code>',
|
|
8579
|
+
{ html: true },
|
|
8580
|
+
)
|
|
8581
|
+
return
|
|
8582
|
+
}
|
|
8583
|
+
const tokens = arg.split(/\s+/)
|
|
8584
|
+
const sub = tokens[0]
|
|
8585
|
+
if (sub !== 'hostd') {
|
|
8586
|
+
await switchroomReply(
|
|
8587
|
+
ctx,
|
|
8588
|
+
`Unknown audit target <code>${escapeHtmlForTg(sub ?? '')}</code>. ` +
|
|
8589
|
+
`Supported: <code>hostd</code>.`,
|
|
8590
|
+
{ html: true },
|
|
8591
|
+
)
|
|
8592
|
+
return
|
|
8593
|
+
}
|
|
8594
|
+
// Build the CLI argv for switchroom hostd audit. Validate each
|
|
8595
|
+
// operator-supplied value to keep argv injection out of the picture.
|
|
8596
|
+
const ALLOWED_OPS = new Set([
|
|
8597
|
+
'agent_start', 'agent_stop', 'agent_restart', 'apply',
|
|
8598
|
+
'update_check', 'update_apply', 'update_status', 'upgrade_status',
|
|
8599
|
+
'get_status', 'doctor', 'fleet_state',
|
|
8600
|
+
])
|
|
8601
|
+
const argv: string[] = ['hostd', 'audit']
|
|
8602
|
+
for (let i = 1; i < tokens.length; i++) {
|
|
8603
|
+
const t = tokens[i]!
|
|
8604
|
+
if (t === '--error') { argv.push('--error'); continue }
|
|
8605
|
+
if (t === '--tail' || t === '--agent' || t === '--op') {
|
|
8606
|
+
const v = tokens[++i]
|
|
8607
|
+
if (v == null) {
|
|
8608
|
+
await switchroomReply(ctx, `Flag <code>${t}</code> requires a value.`, { html: true })
|
|
8609
|
+
return
|
|
8610
|
+
}
|
|
8611
|
+
if (t === '--tail' && !/^[0-9]{1,4}$/.test(v)) {
|
|
8612
|
+
await switchroomReply(ctx, `<code>--tail</code> must be an integer (1-9999).`, { html: true })
|
|
8613
|
+
return
|
|
8614
|
+
}
|
|
8615
|
+
if (t === '--agent' && !/^[a-z][a-z0-9-]{0,62}$/i.test(v)) {
|
|
8616
|
+
await switchroomReply(ctx, `<code>--agent</code> name has an invalid shape.`, { html: true })
|
|
8617
|
+
return
|
|
8618
|
+
}
|
|
8619
|
+
if (t === '--op' && !ALLOWED_OPS.has(v)) {
|
|
8620
|
+
await switchroomReply(
|
|
8621
|
+
ctx,
|
|
8622
|
+
`Unknown hostd verb <code>${escapeHtmlForTg(v)}</code>. ` +
|
|
8623
|
+
`Known: ${[...ALLOWED_OPS].sort().map(o => `<code>${o}</code>`).join(', ')}.`,
|
|
8624
|
+
{ html: true },
|
|
8625
|
+
)
|
|
8626
|
+
return
|
|
8627
|
+
}
|
|
8628
|
+
argv.push(t, v)
|
|
8629
|
+
continue
|
|
8630
|
+
}
|
|
8631
|
+
await switchroomReply(
|
|
8632
|
+
ctx,
|
|
8633
|
+
`Unknown flag <code>${escapeHtmlForTg(t)}</code>. ` +
|
|
8634
|
+
`Allowed: <code>--tail</code>, <code>--agent</code>, <code>--op</code>, <code>--error</code>.`,
|
|
8635
|
+
{ html: true },
|
|
8636
|
+
)
|
|
8637
|
+
return
|
|
8638
|
+
}
|
|
8639
|
+
await runSwitchroomCommand(ctx, argv, `hostd audit${argv.length > 2 ? ' …' : ''}`)
|
|
8640
|
+
})
|
|
8641
|
+
|
|
8212
8642
|
// ─── /approve, /deny, /pending ────────────────────────────────────────────
|
|
8213
8643
|
// Slash-command alternatives to the inline-button approval flow (useful for
|
|
8214
8644
|
// desktop-only sessions and power-users). Share pendingPermissions state
|
|
@@ -8272,6 +8702,59 @@ async function handlePermissionSlash(ctx: Context, behavior: 'allow' | 'deny'):
|
|
|
8272
8702
|
bot.command('approve', async ctx => handlePermissionSlash(ctx, 'allow'))
|
|
8273
8703
|
bot.command('deny', async ctx => handlePermissionSlash(ctx, 'deny'))
|
|
8274
8704
|
|
|
8705
|
+
// ─── Drive folder picker (RFC E §4.1) ───────────────────────────────────
|
|
8706
|
+
// /folders — post a Telegram picker card listing this agent's top-level
|
|
8707
|
+
// Drive folders. Tap [Allow] on a folder to grant the agent
|
|
8708
|
+
// allow_always at doc:gdrive:folder/<id>/**; tap [Browse] to drill in.
|
|
8709
|
+
//
|
|
8710
|
+
// Authorisation: same dmCommandGate as the other operator slash
|
|
8711
|
+
// commands — only allowFrom users can post-trigger.
|
|
8712
|
+
|
|
8713
|
+
const folderPickerCache = new FolderListCache()
|
|
8714
|
+
|
|
8715
|
+
function buildFolderPickerDeps(): FolderPickerHandlerDeps {
|
|
8716
|
+
const agentName = getMyAgentName()
|
|
8717
|
+
return {
|
|
8718
|
+
agentName,
|
|
8719
|
+
cache: folderPickerCache,
|
|
8720
|
+
fetchPage: async ({ parent_id, page_token }) => {
|
|
8721
|
+
const handle = await loadFromAuthBroker()
|
|
8722
|
+
if (handle === null) {
|
|
8723
|
+
throw new Error(
|
|
8724
|
+
`auth-broker unreachable for agent ${agentName} — is the broker container running?`,
|
|
8725
|
+
)
|
|
8726
|
+
}
|
|
8727
|
+
return fetchFolderPage({
|
|
8728
|
+
access_token: handle.access_token,
|
|
8729
|
+
...(parent_id !== undefined ? { parent_id } : {}),
|
|
8730
|
+
...(page_token !== undefined ? { page_token } : {}),
|
|
8731
|
+
})
|
|
8732
|
+
},
|
|
8733
|
+
approvalRequest: async (args) => {
|
|
8734
|
+
const r = await kernelApprovalRequest({
|
|
8735
|
+
agent_unit: args.agent_unit,
|
|
8736
|
+
scope: args.scope,
|
|
8737
|
+
action: args.action,
|
|
8738
|
+
approver_set: args.approver_set,
|
|
8739
|
+
...(args.why !== null && args.why !== undefined ? { why: args.why } : {}),
|
|
8740
|
+
...(args.ttl_ms !== null && args.ttl_ms !== undefined ? { ttl_ms: args.ttl_ms } : {}),
|
|
8741
|
+
})
|
|
8742
|
+
if (r === null || r.state === 'rate_limited') return null
|
|
8743
|
+
return { request_id: r.request_id }
|
|
8744
|
+
},
|
|
8745
|
+
approvalConsume: async (id) => {
|
|
8746
|
+
const r = await kernelApprovalConsume(id)
|
|
8747
|
+
return r !== null && r.consumed
|
|
8748
|
+
},
|
|
8749
|
+
approvalRecord: async (args) => kernelApprovalRecord(args),
|
|
8750
|
+
}
|
|
8751
|
+
}
|
|
8752
|
+
|
|
8753
|
+
bot.command('folders', async ctx => {
|
|
8754
|
+
if (!isAuthorizedSender(ctx)) return
|
|
8755
|
+
await handleFoldersCommand(ctx, buildFolderPickerDeps())
|
|
8756
|
+
})
|
|
8757
|
+
|
|
8275
8758
|
// /pending — list current pending permission prompts with their ids, so the
|
|
8276
8759
|
// user can target a specific one via /approve <id> or /deny <id>.
|
|
8277
8760
|
// Restricted to access.allowFrom DMs to match /approve and /deny — it
|
|
@@ -8303,16 +8786,12 @@ bot.command('interrupt', async ctx => {
|
|
|
8303
8786
|
await runSwitchroomCommand(ctx, ['agent', 'interrupt', name], `interrupt ${name}`)
|
|
8304
8787
|
})
|
|
8305
8788
|
|
|
8306
|
-
//
|
|
8307
|
-
//
|
|
8308
|
-
//
|
|
8309
|
-
//
|
|
8310
|
-
//
|
|
8311
|
-
//
|
|
8312
|
-
// quota-flap on the recovering slot could re-trigger fallback the
|
|
8313
|
-
// moment the gateway came back. We now seed from disk on first use
|
|
8314
|
-
// and persist on every transition. Errors are swallowed: losing the
|
|
8315
|
-
// lockout file just degrades to in-memory-only behaviour.
|
|
8789
|
+
// Persist-ops bundle for the legacy auto-fallback lockout file. The
|
|
8790
|
+
// only remaining reader is `isAutoFallbackCooldownActive` (line ~2030)
|
|
8791
|
+
// — used by the pending-restart drain cap to defer a forced restart
|
|
8792
|
+
// stacking on top of an in-flight slot rotation. The legacy poller
|
|
8793
|
+
// that USED to write this file was retired alongside this refactor;
|
|
8794
|
+
// existing on-disk lockouts age out via DEFAULT_FALLBACK_COOLDOWN_MS.
|
|
8316
8795
|
const lockoutOps: LockoutPersistOps = {
|
|
8317
8796
|
readFileSync: (p, enc) => readFileSync(p, enc),
|
|
8318
8797
|
writeFileSync: (p, data, opts) => writeFileSync(p, data, opts),
|
|
@@ -8320,24 +8799,6 @@ const lockoutOps: LockoutPersistOps = {
|
|
|
8320
8799
|
mkdirSync: (p, opts) => mkdirSync(p, opts),
|
|
8321
8800
|
joinPath: (...parts) => join(...parts),
|
|
8322
8801
|
}
|
|
8323
|
-
let autoFallbackLockout: LockoutRecord = emptyLockout()
|
|
8324
|
-
let autoFallbackLockoutSeeded = false
|
|
8325
|
-
function seedAutoFallbackLockoutIfNeeded(agentDir: string): void {
|
|
8326
|
-
if (autoFallbackLockoutSeeded) return
|
|
8327
|
-
autoFallbackLockoutSeeded = true
|
|
8328
|
-
try {
|
|
8329
|
-
autoFallbackLockout = loadLockout(agentDir, lockoutOps)
|
|
8330
|
-
} catch (err) {
|
|
8331
|
-
process.stderr.write(`telegram gateway: auto-fallback lockout seed failed (using empty): ${(err as Error).message}\n`)
|
|
8332
|
-
}
|
|
8333
|
-
}
|
|
8334
|
-
function persistLockout(agentDir: string): void {
|
|
8335
|
-
try {
|
|
8336
|
-
saveLockout(agentDir, autoFallbackLockout, lockoutOps)
|
|
8337
|
-
} catch (err) {
|
|
8338
|
-
process.stderr.write(`telegram gateway: auto-fallback lockout persist failed: ${(err as Error).message}\n`)
|
|
8339
|
-
}
|
|
8340
|
-
}
|
|
8341
8802
|
|
|
8342
8803
|
// Pinned slot-banner state (#421). One banner per gateway process,
|
|
8343
8804
|
// in the owner chat (access.allowFrom[0]). Per-topic forum support
|
|
@@ -8368,91 +8829,123 @@ async function refreshPinnedBanner(reason: string): Promise<void> {
|
|
|
8368
8829
|
}
|
|
8369
8830
|
}
|
|
8370
8831
|
|
|
8371
|
-
|
|
8372
|
-
|
|
8373
|
-
|
|
8374
|
-
|
|
8375
|
-
|
|
8832
|
+
/**
|
|
8833
|
+
* Re-entry guard + dedup window for `fireFleetAutoFallback`. The state
|
|
8834
|
+
* was lifted into `fleet-fallback-gate.ts` so it can be tested in
|
|
8835
|
+
* isolation (gateway.ts module state was unreachable from vitest). The
|
|
8836
|
+
* gate ALSO enforces the broker-reachability honesty contract: when the
|
|
8837
|
+
* broker is down, `wouldFire()` returns false so the model-unavailable
|
|
8838
|
+
* card stays honest instead of advertising a swap that would bail with
|
|
8839
|
+
* `reason=no-broker-client`.
|
|
8840
|
+
*/
|
|
8841
|
+
const FLEET_FALLBACK_DEDUP_MS = 30_000
|
|
8842
|
+
|
|
8843
|
+
/** Synchronous reachability check for the auth-broker UDS. Used by the
|
|
8844
|
+
* fleet-fallback gate to keep the model-unavailable card honest: if the
|
|
8845
|
+
* broker socket isn't bound, the dispatcher would bail with
|
|
8846
|
+
* `reason=no-broker-client`, so `wouldFire()` should return false and
|
|
8847
|
+
* the card should fall back to the manual `/auth use <label>` hint. */
|
|
8848
|
+
function isAuthBrokerSocketReachable(): boolean {
|
|
8849
|
+
try {
|
|
8850
|
+
return existsSync(resolveAuthBrokerSocketPath())
|
|
8851
|
+
} catch {
|
|
8852
|
+
return false
|
|
8853
|
+
}
|
|
8854
|
+
}
|
|
8855
|
+
|
|
8856
|
+
const fleetFallbackGate = createFleetFallbackGate({
|
|
8857
|
+
dedupMs: FLEET_FALLBACK_DEDUP_MS,
|
|
8858
|
+
brokerReachable: isAuthBrokerSocketReachable,
|
|
8859
|
+
})
|
|
8860
|
+
|
|
8861
|
+
function wouldFireFleetAutoFallback(): boolean {
|
|
8862
|
+
return fleetFallbackGate.wouldFire()
|
|
8863
|
+
}
|
|
8864
|
+
|
|
8865
|
+
/**
|
|
8866
|
+
* Fleet-wide auto-fallback dispatcher (RFC H follow-up).
|
|
8867
|
+
*
|
|
8868
|
+
* Wired from the model-unavailable card render path so a quota-out
|
|
8869
|
+
* event on ANY agent immediately triggers a fleet-wide swap (via
|
|
8870
|
+
* broker.setActive — same path /auth use takes), not the per-agent
|
|
8871
|
+
* legacy `runAutoFallbackCheck`. Pre-fix, the card path never called
|
|
8872
|
+
* any fallback machinery; the scheduled poller (60-min interval, only
|
|
8873
|
+
* fires on utilization headers) was the only trigger and missed
|
|
8874
|
+
* hard-rejection events.
|
|
8875
|
+
*
|
|
8876
|
+
* Concurrency: collapses concurrent triggers via the in-flight
|
|
8877
|
+
* promise above. Subsequent calls within `FLEET_FALLBACK_DEDUP_MS` of
|
|
8878
|
+
* a recent fire are dropped silently — the broadcast announcement is
|
|
8879
|
+
* the user-visible signal that the swap happened, no need to repeat.
|
|
8880
|
+
*
|
|
8881
|
+
* Fire-and-forget: never throws into the caller's flow. Posts the
|
|
8882
|
+
* causal-shape announcement to every chat in `loadAccess().allowFrom`
|
|
8883
|
+
* so the user sees the outcome inline with the original "Model
|
|
8884
|
+
* unavailable" card.
|
|
8885
|
+
*/
|
|
8886
|
+
async function fireFleetAutoFallback(triggerAgent: string): Promise<void> {
|
|
8887
|
+
return fleetFallbackGate.fire(
|
|
8888
|
+
() => doFireFleetAutoFallback(triggerAgent),
|
|
8889
|
+
(err) => {
|
|
8890
|
+
process.stderr.write(
|
|
8891
|
+
`telegram gateway: [fleet-fallback] error agent=${triggerAgent}: ${(err as Error)?.message ?? err}\n`,
|
|
8892
|
+
)
|
|
8893
|
+
},
|
|
8894
|
+
)
|
|
8895
|
+
}
|
|
8376
8896
|
|
|
8377
|
-
|
|
8378
|
-
|
|
8379
|
-
|
|
8380
|
-
|
|
8897
|
+
/** Returns true iff the dispatcher actually performed a swap (and the
|
|
8898
|
+
* user-visible announcement was broadcast). False on no-op /
|
|
8899
|
+
* error / idempotent-skip — caller uses this to decide whether to
|
|
8900
|
+
* arm the post-fire suppression window. */
|
|
8901
|
+
async function doFireFleetAutoFallback(triggerAgent: string): Promise<boolean> {
|
|
8381
8902
|
try {
|
|
8382
|
-
const
|
|
8383
|
-
if (!
|
|
8384
|
-
return { kind: 'no-action', reason: 'no agent dir', decision: 'noop' }
|
|
8385
|
-
}
|
|
8386
|
-
const agentName = getMyAgentName()
|
|
8387
|
-
seedAutoFallbackLockoutIfNeeded(agentDir)
|
|
8388
|
-
const active = currentActiveSlot(agentDir)
|
|
8389
|
-
const quota = await fetchQuota({ claudeConfigDir: join(agentDir, '.claude') })
|
|
8390
|
-
const decision = evaluateFallbackTrigger({
|
|
8391
|
-
quota,
|
|
8392
|
-
activeSlot: active,
|
|
8393
|
-
now: Date.now(),
|
|
8394
|
-
lockout: autoFallbackLockout,
|
|
8395
|
-
})
|
|
8396
|
-
if (decision.action !== 'fallback') {
|
|
8903
|
+
const client = await getAuthBrokerClient(triggerAgent)
|
|
8904
|
+
if (!client) {
|
|
8397
8905
|
process.stderr.write(
|
|
8398
|
-
`telegram gateway: [
|
|
8906
|
+
`telegram gateway: [fleet-fallback] skipped agent=${triggerAgent} reason=no-broker-client\n`,
|
|
8399
8907
|
)
|
|
8400
|
-
return
|
|
8908
|
+
return false
|
|
8401
8909
|
}
|
|
8402
|
-
|
|
8403
|
-
|
|
8910
|
+
const state = await client.listState()
|
|
8911
|
+
// Probe live quota for every account in parallel. force:true
|
|
8912
|
+
// bypasses the 5-min in-process cache — we want the freshest data
|
|
8913
|
+
// for the swap decision, not a cached stale read.
|
|
8914
|
+
const quotas = await Promise.all(
|
|
8915
|
+
state.accounts.map((a) => fetchAccountQuota(a.label, { force: true })),
|
|
8404
8916
|
)
|
|
8405
|
-
const
|
|
8406
|
-
|
|
8407
|
-
|
|
8408
|
-
|
|
8409
|
-
|
|
8917
|
+
const tz = process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ ?? 'UTC'
|
|
8918
|
+
const outcome = await runFleetAutoFallback({
|
|
8919
|
+
state,
|
|
8920
|
+
quotas,
|
|
8921
|
+
setActive: (label) => client.setActive(label),
|
|
8922
|
+
triggerAgent,
|
|
8923
|
+
tz,
|
|
8410
8924
|
})
|
|
8411
|
-
const ownerChatId = loadAccess().allowFrom[0]
|
|
8412
|
-
await dispatchFallbackNotification({
|
|
8413
|
-
bot,
|
|
8414
|
-
ownerChatId,
|
|
8415
|
-
plan,
|
|
8416
|
-
onError: (err) => {
|
|
8417
|
-
process.stderr.write(`telegram gateway: [autofallback] notify failed trigger=${opts.trigger} agent=${agentName}: ${err}\n`)
|
|
8418
|
-
},
|
|
8419
|
-
})
|
|
8420
|
-
if (plan.kind === 'executed') {
|
|
8421
|
-
try { assertSafeAgentName(plan.agentName) }
|
|
8422
|
-
catch {
|
|
8423
|
-
process.stderr.write(`telegram gateway: [autofallback] invalid-agent-name agent=${plan.agentName}\n`)
|
|
8424
|
-
return { kind: 'error', message: `invalid agent name: ${plan.agentName}` }
|
|
8425
|
-
}
|
|
8426
|
-
try {
|
|
8427
|
-
// Preemptive failover (utilization-over-threshold / explicit) waits
|
|
8428
|
-
// for the active turn to drain. Reactive failover (429-response)
|
|
8429
|
-
// hard-restarts because the request that triggered it has already
|
|
8430
|
-
// failed — there's no in-flight turn worth preserving. See #420.
|
|
8431
|
-
const restartArgs = ['agent', 'restart', plan.agentName]
|
|
8432
|
-
if (plan.triggerReason !== '429-response') {
|
|
8433
|
-
restartArgs.push('--graceful-restart')
|
|
8434
|
-
}
|
|
8435
|
-
process.stderr.write(
|
|
8436
|
-
`telegram gateway: [autofallback] executed agent=${plan.agentName} prev=${plan.previousSlot} next=${plan.newSlot} restart=${plan.triggerReason === '429-response' ? 'hard' : 'graceful'}\n`,
|
|
8437
|
-
)
|
|
8438
|
-
switchroomExec(restartArgs)
|
|
8439
|
-
} catch (err) {
|
|
8440
|
-
process.stderr.write(`telegram gateway: [autofallback] restart failed agent=${plan.agentName}: ${err}\n`)
|
|
8441
|
-
}
|
|
8442
|
-
autoFallbackLockout = nextLockout(plan.previousSlot, Date.now())
|
|
8443
|
-
persistLockout(agentDir)
|
|
8444
|
-
void refreshPinnedBanner('auto-fallback')
|
|
8445
|
-
return { kind: 'executed', previousSlot: plan.previousSlot, newSlot: plan.newSlot }
|
|
8446
|
-
}
|
|
8447
8925
|
process.stderr.write(
|
|
8448
|
-
`telegram gateway: [
|
|
8926
|
+
`telegram gateway: [fleet-fallback] outcome=${outcome.kind} agent=${triggerAgent}` +
|
|
8927
|
+
(outcome.kind === 'switched' ? ` old=${outcome.oldLabel} new=${outcome.newLabel}` : '') +
|
|
8928
|
+
'\n',
|
|
8449
8929
|
)
|
|
8450
|
-
|
|
8451
|
-
|
|
8452
|
-
|
|
8930
|
+
// Post the announcement to every authorized chat. Mirrors the
|
|
8931
|
+
// operator-event broadcast pattern (line ~2290) — DM-only opts
|
|
8932
|
+
// (no message_thread_id) so THREAD_NOT_FOUND can't fire here;
|
|
8933
|
+
// wrap in swallowingApiCall anyway per the codebase rule.
|
|
8934
|
+
const access = loadAccess()
|
|
8935
|
+
if (access.allowFrom.length === 0) return outcome.kind === 'switched'
|
|
8936
|
+
const opts = { parse_mode: 'HTML' as const }
|
|
8937
|
+
for (const chat_id of access.allowFrom) {
|
|
8938
|
+
void swallowingApiCall(
|
|
8939
|
+
() => bot.api.sendMessage(chat_id, outcome.announcement, opts),
|
|
8940
|
+
{ chat_id, verb: 'fleet-fallback:notify' },
|
|
8941
|
+
)
|
|
8942
|
+
}
|
|
8943
|
+
return outcome.kind === 'switched'
|
|
8453
8944
|
} catch (err) {
|
|
8454
|
-
process.stderr.write(
|
|
8455
|
-
|
|
8945
|
+
process.stderr.write(
|
|
8946
|
+
`telegram gateway: [fleet-fallback] error agent=${triggerAgent}: ${(err as Error)?.message ?? err}\n`,
|
|
8947
|
+
)
|
|
8948
|
+
return false
|
|
8456
8949
|
}
|
|
8457
8950
|
}
|
|
8458
8951
|
|
|
@@ -8512,15 +9005,6 @@ async function runCreditWatch(): Promise<void> {
|
|
|
8512
9005
|
}
|
|
8513
9006
|
}
|
|
8514
9007
|
|
|
8515
|
-
// /authfallback was removed in v0.6.12 — it duplicated the work of
|
|
8516
|
-
// the dashboard's Switch primary picker (operator-facing surface) and
|
|
8517
|
-
// the auto-fallback poller (transparent on-quota-wall case).
|
|
8518
|
-
// Operators who want to manually shuffle the active credential now
|
|
8519
|
-
// use the picker. The `runAutoFallbackCheck` function and the
|
|
8520
|
-
// `case 'fallback':` callback dispatch stay in the codebase: any
|
|
8521
|
-
// pinned messages from earlier versions still work, and the
|
|
8522
|
-
// auto-fallback poller still calls runAutoFallbackCheck directly.
|
|
8523
|
-
|
|
8524
9008
|
bot.command("auth", async ctx => {
|
|
8525
9009
|
if (!isAuthorizedSender(ctx)) return
|
|
8526
9010
|
const text = ctx.message?.text ?? ""
|
|
@@ -8614,8 +9098,46 @@ bot.command("auth", async ctx => {
|
|
|
8614
9098
|
isAdmin,
|
|
8615
9099
|
client,
|
|
8616
9100
|
chatId,
|
|
9101
|
+
// Format 2 enricher — probe live quota for every account in
|
|
9102
|
+
// parallel so the snapshot reflects current Anthropic-side
|
|
9103
|
+
// utilization, not the broker's potentially-days-stale
|
|
9104
|
+
// disk-cached `quota.json`. force:true bypasses the 5-min
|
|
9105
|
+
// in-process cache for this call. ~500-800ms per account
|
|
9106
|
+
// serial; in parallel ~800ms total for typical 3-account
|
|
9107
|
+
// fleets — acceptable for an interactive command.
|
|
9108
|
+
liveQuotas: async (accounts) =>
|
|
9109
|
+
Promise.all(
|
|
9110
|
+
accounts.map((a) => fetchAccountQuota(a.label, { force: true })),
|
|
9111
|
+
),
|
|
9112
|
+
tz: process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ,
|
|
8617
9113
|
})
|
|
8618
|
-
|
|
9114
|
+
// Translate the handler's optional keyboard shape into grammy's
|
|
9115
|
+
// `reply_markup`. Buttons with `callbackData` become callback_data;
|
|
9116
|
+
// buttons with `insertText` become switch_inline_query_current_chat
|
|
9117
|
+
// (taps paste the slash-command into the user's input). Keep a
|
|
9118
|
+
// safe default for buttons missing both (shouldn't happen).
|
|
9119
|
+
if (reply.keyboard && reply.keyboard.length > 0) {
|
|
9120
|
+
// Build via grammy's InlineKeyboard so the type is correct
|
|
9121
|
+
// for switchroomReply's reply_markup field — no `as never`
|
|
9122
|
+
// cast needed.
|
|
9123
|
+
const kb = new InlineKeyboard()
|
|
9124
|
+
for (let i = 0; i < reply.keyboard.length; i++) {
|
|
9125
|
+
const row = reply.keyboard[i]!
|
|
9126
|
+
for (const b of row) {
|
|
9127
|
+
if (b.callbackData) kb.text(b.text, b.callbackData)
|
|
9128
|
+
else if (b.insertText) kb.switchInlineCurrent(b.text, b.insertText)
|
|
9129
|
+
else kb.text(b.text, 'auth:noop')
|
|
9130
|
+
}
|
|
9131
|
+
// grammy's row terminator — except after the last row.
|
|
9132
|
+
if (i < reply.keyboard.length - 1) kb.row()
|
|
9133
|
+
}
|
|
9134
|
+
await switchroomReply(ctx, reply.text, {
|
|
9135
|
+
html: reply.html,
|
|
9136
|
+
reply_markup: kb,
|
|
9137
|
+
})
|
|
9138
|
+
} else {
|
|
9139
|
+
await switchroomReply(ctx, reply.text, { html: reply.html })
|
|
9140
|
+
}
|
|
8619
9141
|
})
|
|
8620
9142
|
|
|
8621
9143
|
// Boot-card auth-row loader (issue #708, RFC H rewire). Queries the
|
|
@@ -10243,12 +10765,149 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
|
|
|
10243
10765
|
// stub so any stale pinned message that fires an `auth:*` tap is
|
|
10244
10766
|
// silently dismissed instead of crashing the gateway.
|
|
10245
10767
|
async function handleAuthDashboardCallback(ctx: Context): Promise<void> {
|
|
10768
|
+
const data = ctx.callbackQuery?.data ?? ''
|
|
10769
|
+
const currentAgent = getMyAgentName()
|
|
10770
|
+
|
|
10771
|
+
// auth:use:<label> — fleet-wide swap via broker.setActive (same path
|
|
10772
|
+
// /auth use takes from chat). Admin-gated via the broker's own
|
|
10773
|
+
// per-agent admin flag.
|
|
10774
|
+
if (data.startsWith('auth:use:')) {
|
|
10775
|
+
const label = data.slice('auth:use:'.length)
|
|
10776
|
+
if (!label) {
|
|
10777
|
+
try { await ctx.answerCallbackQuery({ text: 'Missing account label.', show_alert: false }) } catch { /* */ }
|
|
10778
|
+
return
|
|
10779
|
+
}
|
|
10780
|
+
try {
|
|
10781
|
+
const client = await getAuthBrokerClient(currentAgent)
|
|
10782
|
+
if (!client) {
|
|
10783
|
+
try { await ctx.answerCallbackQuery({ text: 'Broker unreachable.', show_alert: true }) } catch { /* */ }
|
|
10784
|
+
return
|
|
10785
|
+
}
|
|
10786
|
+
const result = await client.setActive(label)
|
|
10787
|
+
try {
|
|
10788
|
+
await ctx.answerCallbackQuery({
|
|
10789
|
+
text: `Switched fleet → ${result.active} (${result.fanned.length} agents)`,
|
|
10790
|
+
show_alert: false,
|
|
10791
|
+
})
|
|
10792
|
+
} catch { /* toast may fail on stale tap */ }
|
|
10793
|
+
// Edit the source message to reflect the new active. Leaving
|
|
10794
|
+
// the old keyboard intact would tempt a double-tap; we replace
|
|
10795
|
+
// the text + drop the keyboard so the user has to /auth again
|
|
10796
|
+
// to see fresh state.
|
|
10797
|
+
const msg = ctx.callbackQuery?.message
|
|
10798
|
+
if (msg) {
|
|
10799
|
+
// Wrap in swallowingApiCall per #1075 — stale callback-source
|
|
10800
|
+
// messages (deleted topic, expired) shouldn't crash the swap.
|
|
10801
|
+
await swallowingApiCall(
|
|
10802
|
+
() =>
|
|
10803
|
+
bot.api.editMessageText(
|
|
10804
|
+
msg.chat.id,
|
|
10805
|
+
msg.message_id,
|
|
10806
|
+
`<b>Active account →</b> <code>${escapeHtmlForTg(result.active)}</code>\n` +
|
|
10807
|
+
`<i>Re-mirrored credentials for ${result.fanned.length} agent${result.fanned.length === 1 ? '' : 's'}.</i>\n\n` +
|
|
10808
|
+
`<i>Tap /auth to see updated quota for the new active account.</i>`,
|
|
10809
|
+
{ parse_mode: 'HTML' },
|
|
10810
|
+
),
|
|
10811
|
+
{ chat_id: String(msg.chat.id), verb: 'auth:use:edit' },
|
|
10812
|
+
)
|
|
10813
|
+
}
|
|
10814
|
+
} catch (err) {
|
|
10815
|
+
const msg = (err as Error)?.message ?? String(err)
|
|
10816
|
+
try {
|
|
10817
|
+
await ctx.answerCallbackQuery({
|
|
10818
|
+
text: `Switch failed: ${msg.slice(0, 180)}`,
|
|
10819
|
+
show_alert: true,
|
|
10820
|
+
})
|
|
10821
|
+
} catch { /* */ }
|
|
10822
|
+
}
|
|
10823
|
+
return
|
|
10824
|
+
}
|
|
10825
|
+
|
|
10826
|
+
// auth:refresh — re-render the /auth snapshot in-place with a fresh
|
|
10827
|
+
// live probe. Replaces the message body; keyboard stays.
|
|
10828
|
+
if (data === 'auth:refresh') {
|
|
10829
|
+
// Freshness throttle: each refresh fan-fires N live api.anthropic.com
|
|
10830
|
+
// probes (one per account, force=true bypasses the 5-min cache).
|
|
10831
|
+
// Without this, a user double-tapping the ↻ button burns through
|
|
10832
|
+
// their account's RPM budget on duplicate work. Cap at one per
|
|
10833
|
+
// AUTH_REFRESH_THROTTLE_MS per (chat, message) pair.
|
|
10834
|
+
const refreshMsg = ctx.callbackQuery?.message
|
|
10835
|
+
if (refreshMsg) {
|
|
10836
|
+
const key = `${refreshMsg.chat.id}:${refreshMsg.message_id}`
|
|
10837
|
+
const lastAtMs = lastAuthRefreshAtMs.get(key) ?? 0
|
|
10838
|
+
const sinceLastMs = Date.now() - lastAtMs
|
|
10839
|
+
if (sinceLastMs < AUTH_REFRESH_THROTTLE_MS) {
|
|
10840
|
+
const waitS = Math.ceil((AUTH_REFRESH_THROTTLE_MS - sinceLastMs) / 1000)
|
|
10841
|
+
try {
|
|
10842
|
+
await ctx.answerCallbackQuery({
|
|
10843
|
+
text: `Just refreshed — try again in ${waitS}s`,
|
|
10844
|
+
show_alert: false,
|
|
10845
|
+
})
|
|
10846
|
+
} catch { /* */ }
|
|
10847
|
+
return
|
|
10848
|
+
}
|
|
10849
|
+
lastAuthRefreshAtMs.set(key, Date.now())
|
|
10850
|
+
}
|
|
10851
|
+
try {
|
|
10852
|
+
const client = await getAuthBrokerClient(currentAgent)
|
|
10853
|
+
if (!client) {
|
|
10854
|
+
try { await ctx.answerCallbackQuery({ text: 'Broker unreachable.', show_alert: true }) } catch { /* */ }
|
|
10855
|
+
return
|
|
10856
|
+
}
|
|
10857
|
+
const state = await client.listState()
|
|
10858
|
+
const quotas = await Promise.all(
|
|
10859
|
+
state.accounts.map((a) => fetchAccountQuota(a.label, { force: true })),
|
|
10860
|
+
)
|
|
10861
|
+
const tz = process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ ?? 'UTC'
|
|
10862
|
+
const { renderAuthSnapshotFormat2, buildSnapshotsFromState, buildSnapshotKeyboard } = await import(
|
|
10863
|
+
'../auth-snapshot-format.js'
|
|
10864
|
+
)
|
|
10865
|
+
const snapshots = buildSnapshotsFromState(state, quotas)
|
|
10866
|
+
const text = renderAuthSnapshotFormat2(snapshots, {
|
|
10867
|
+
tz,
|
|
10868
|
+
now: new Date(),
|
|
10869
|
+
liveProbedAtMs: Date.now(),
|
|
10870
|
+
})
|
|
10871
|
+
const kbRows = buildSnapshotKeyboard(snapshots)
|
|
10872
|
+
const inline_keyboard = kbRows.map((row) =>
|
|
10873
|
+
row.map((b) => {
|
|
10874
|
+
if (b.callbackData) return { text: b.text, callback_data: b.callbackData }
|
|
10875
|
+
if (b.insertText) return { text: b.text, switch_inline_query_current_chat: b.insertText }
|
|
10876
|
+
return { text: b.text, callback_data: 'auth:noop' }
|
|
10877
|
+
}),
|
|
10878
|
+
)
|
|
10879
|
+
const msg = ctx.callbackQuery?.message
|
|
10880
|
+
if (msg) {
|
|
10881
|
+
await swallowingApiCall(
|
|
10882
|
+
() =>
|
|
10883
|
+
bot.api.editMessageText(msg.chat.id, msg.message_id, text, {
|
|
10884
|
+
parse_mode: 'HTML',
|
|
10885
|
+
reply_markup: { inline_keyboard },
|
|
10886
|
+
}),
|
|
10887
|
+
{ chat_id: String(msg.chat.id), verb: 'auth:refresh:edit' },
|
|
10888
|
+
)
|
|
10889
|
+
}
|
|
10890
|
+
try { await ctx.answerCallbackQuery({ text: 'Refreshed.', show_alert: false }) } catch { /* */ }
|
|
10891
|
+
} catch (err) {
|
|
10892
|
+
const msg = (err as Error)?.message ?? String(err)
|
|
10893
|
+
try {
|
|
10894
|
+
await ctx.answerCallbackQuery({
|
|
10895
|
+
text: `Refresh failed: ${msg.slice(0, 180)}`,
|
|
10896
|
+
show_alert: true,
|
|
10897
|
+
})
|
|
10898
|
+
} catch { /* */ }
|
|
10899
|
+
}
|
|
10900
|
+
return
|
|
10901
|
+
}
|
|
10902
|
+
|
|
10903
|
+
// Unknown auth:* — likely from a too-old message. Dismiss with a
|
|
10904
|
+
// hint pointing at the canonical re-render verb.
|
|
10246
10905
|
try {
|
|
10247
10906
|
await ctx.answerCallbackQuery({
|
|
10248
|
-
text:
|
|
10907
|
+
text: 'Unknown auth button. Send /auth for current state.',
|
|
10249
10908
|
show_alert: false,
|
|
10250
10909
|
})
|
|
10251
|
-
} catch { /*
|
|
10910
|
+
} catch { /* */ }
|
|
10252
10911
|
}
|
|
10253
10912
|
|
|
10254
10913
|
// /reauth was removed in v0.6.13 — the `/auth` dashboard's
|
|
@@ -10659,6 +11318,44 @@ bot.command('issues', async ctx => {
|
|
|
10659
11318
|
|
|
10660
11319
|
bot.command('usage', async ctx => {
|
|
10661
11320
|
if (!isAuthorizedSender(ctx)) return
|
|
11321
|
+
// Format 2 path: enumerate every account in the broker's known set,
|
|
11322
|
+
// probe live quota in parallel, render the health-grouped snapshot.
|
|
11323
|
+
// Falls back to the legacy single-agent shape when the broker is
|
|
11324
|
+
// unreachable, since /usage was historically callable against any
|
|
11325
|
+
// agent regardless of fleet state.
|
|
11326
|
+
const currentAgent = getMyAgentName()
|
|
11327
|
+
try {
|
|
11328
|
+
const client = await getAuthBrokerClient(currentAgent)
|
|
11329
|
+
if (client) {
|
|
11330
|
+
const state = await client.listState()
|
|
11331
|
+
if (state.accounts.length > 0) {
|
|
11332
|
+
const quotas = await Promise.all(
|
|
11333
|
+
state.accounts.map((a) => fetchAccountQuota(a.label, { force: true })),
|
|
11334
|
+
)
|
|
11335
|
+
const { renderAuthSnapshotFormat2, buildSnapshotsFromState } = await import(
|
|
11336
|
+
'../auth-snapshot-format.js'
|
|
11337
|
+
)
|
|
11338
|
+
const tz = process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ ?? 'UTC'
|
|
11339
|
+
const snapshots = buildSnapshotsFromState(state, quotas)
|
|
11340
|
+
const text = renderAuthSnapshotFormat2(snapshots, {
|
|
11341
|
+
tz,
|
|
11342
|
+
now: new Date(),
|
|
11343
|
+
liveProbedAtMs: Date.now(),
|
|
11344
|
+
})
|
|
11345
|
+
await switchroomReply(ctx, text, { html: true })
|
|
11346
|
+
return
|
|
11347
|
+
}
|
|
11348
|
+
}
|
|
11349
|
+
} catch (err) {
|
|
11350
|
+
process.stderr.write(
|
|
11351
|
+
`telegram gateway: /usage Format 2 path failed agent=${currentAgent}: ${(err as Error)?.message ?? err}\n`,
|
|
11352
|
+
)
|
|
11353
|
+
// fall through to legacy single-agent path
|
|
11354
|
+
}
|
|
11355
|
+
|
|
11356
|
+
// Legacy single-agent path — kept as a graceful fallback when the
|
|
11357
|
+
// broker is unreachable (post-RFC-H rewire boot timing, broken
|
|
11358
|
+
// socket bind, etc.). Same shape /usage shipped with originally.
|
|
10662
11359
|
const agentDir = resolveAgentDirFromEnv()
|
|
10663
11360
|
if (!agentDir) {
|
|
10664
11361
|
await switchroomReply(ctx, '<b>/usage:</b> cannot resolve agent dir.', { html: true })
|
|
@@ -10783,6 +11480,29 @@ bot.on('callback_query:data', async ctx => {
|
|
|
10783
11480
|
return
|
|
10784
11481
|
}
|
|
10785
11482
|
|
|
11483
|
+
// RFC E §4.1: drvpick:<verb>:<agent>[:<...>] — folder-picker card taps.
|
|
11484
|
+
// open / enter / back / refresh re-render the card in place;
|
|
11485
|
+
// grant writes an allow_always kernel decision at
|
|
11486
|
+
// doc:gdrive:folder/<id>/** and edits the card to a confirmation.
|
|
11487
|
+
//
|
|
11488
|
+
// Auth gate: the picker grant is an OPERATOR action (mirrors the
|
|
11489
|
+
// `op:`/`vd:`/`vg:` family, not the `apv:` agent-approval shape).
|
|
11490
|
+
// Mirror those patterns — refuse callbacks from anyone outside
|
|
11491
|
+
// `access.allowFrom`. Without this, a group member who isn't in
|
|
11492
|
+
// the operator allowlist could still tap [✅ Allow "<folder>"] on
|
|
11493
|
+
// a card that landed in the group and write an `allow_always`
|
|
11494
|
+
// decision attributed to themselves.
|
|
11495
|
+
if (data.startsWith('drvpick:')) {
|
|
11496
|
+
const access = loadAccess()
|
|
11497
|
+
const senderId = String(ctx.from?.id ?? '')
|
|
11498
|
+
if (!access.allowFrom.includes(senderId)) {
|
|
11499
|
+
await ctx.answerCallbackQuery({ text: 'Not authorized.' })
|
|
11500
|
+
return
|
|
11501
|
+
}
|
|
11502
|
+
await handleFolderPickerCallback(ctx, data, buildFolderPickerDeps())
|
|
11503
|
+
return
|
|
11504
|
+
}
|
|
11505
|
+
|
|
10786
11506
|
// op:<action>:<encoded-agent> callbacks from operator-events.ts
|
|
10787
11507
|
// renderOperatorEvent(). Agent name is URL-encoded at emit (issue #24).
|
|
10788
11508
|
// Actions: dismiss, restart, reauth, swap-slot, add-slot, logs.
|
|
@@ -12723,23 +13443,6 @@ void (async () => {
|
|
|
12723
13443
|
}
|
|
12724
13444
|
} catch {}
|
|
12725
13445
|
|
|
12726
|
-
// Auto-fallback on quota exhaustion. Periodically polls
|
|
12727
|
-
// the active slot's rate-limit headers; when utilization >= 99.5%
|
|
12728
|
-
// or a 429 is observed, marks the slot exhausted, swaps to the
|
|
12729
|
-
// next healthy slot via src/auth, restarts the agent, and posts
|
|
12730
|
-
// a notification to the owner chat. See telegram-plugin/auto-fallback.ts
|
|
12731
|
-
// for the pure decision logic + notification builder.
|
|
12732
|
-
//
|
|
12733
|
-
// Default poll cadence: every 60 minutes. Set
|
|
12734
|
-
// SWITCHROOM_AUTO_FALLBACK_POLL_MS=0 to disable the background
|
|
12735
|
-
// poller. Pre-v0.6.12 a manual `/authfallback` typed command
|
|
12736
|
-
// also ran the same check; that command was removed in favour
|
|
12737
|
-
// of the `/auth` dashboard's Switch primary picker.
|
|
12738
|
-
const AUTO_FALLBACK_POLL_MS = Number(process.env.SWITCHROOM_AUTO_FALLBACK_POLL_MS ?? 60 * 60_000)
|
|
12739
|
-
if (AUTO_FALLBACK_POLL_MS > 0) {
|
|
12740
|
-
setInterval(() => { void runAutoFallbackCheck({ trigger: 'scheduled' }) }, AUTO_FALLBACK_POLL_MS).unref()
|
|
12741
|
-
}
|
|
12742
|
-
|
|
12743
13446
|
// Credit-exhaustion watcher (#348). Reads `<agentDir>/.claude/.claude.json`
|
|
12744
13447
|
// for `cachedExtraUsageDisabledReason`. Fires a Telegram notification
|
|
12745
13448
|
// on transition into / out of fatal billing states (out_of_credits,
|