switchroom 0.10.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/agent-scheduler/index.js +2 -2
- package/dist/auth-broker/index.js +125 -3
- package/dist/cli/drive-write-pretool.mjs +5436 -0
- package/dist/cli/switchroom.js +231 -29
- package/dist/host-control/main.js +2 -2
- package/dist/vault/approvals/kernel-server.js +2 -2
- package/dist/vault/broker/server.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
- package/telegram-plugin/admin-commands/index.ts +2 -0
- package/telegram-plugin/auth-snapshot-format.ts +612 -0
- package/telegram-plugin/auto-fallback-fleet.ts +215 -0
- package/telegram-plugin/auto-fallback.ts +28 -301
- package/telegram-plugin/dist/gateway/gateway.js +4314 -2143
- package/telegram-plugin/fleet-fallback-gate.ts +105 -0
- package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
- package/telegram-plugin/gateway/approval-callback.ts +31 -3
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -0
- package/telegram-plugin/gateway/auth-command.ts +131 -10
- package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
- package/telegram-plugin/gateway/boot-card.ts +1 -1
- package/telegram-plugin/gateway/boot-probes.ts +6 -9
- package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
- package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
- package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
- package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
- package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
- package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
- package/telegram-plugin/gateway/gateway.ts +903 -173
- package/telegram-plugin/gateway/hostd-dispatch.ts +137 -2
- package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
- package/telegram-plugin/gateway/ipc-server.ts +69 -0
- package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
- package/telegram-plugin/model-unavailable.ts +28 -12
- package/telegram-plugin/silence-poke.ts +153 -1
- package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
- package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
- package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
- package/telegram-plugin/tests/boot-probes.test.ts +16 -18
- package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
- package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
- package/telegram-plugin/tests/silence-poke.test.ts +237 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
- package/telegram-plugin/turn-flush-safety.ts +55 -1
- package/telegram-plugin/uat/SETUP.md +16 -12
- package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
- package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
- package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129
|
@@ -52,6 +52,7 @@ import { OutboundDedupCache } from '../recent-outbound-dedup.js'
|
|
|
52
52
|
import { createInboundCoalescer, inboundCoalesceKey } from './inbound-coalesce.js'
|
|
53
53
|
import { StatusReactionController } from '../status-reactions.js'
|
|
54
54
|
import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
|
|
55
|
+
import { toolLabel } from '../tool-labels.js'
|
|
55
56
|
import { createTypingWrapper } from '../typing-wrap.js'
|
|
56
57
|
import { type DraftStreamHandle } from '../draft-stream.js'
|
|
57
58
|
import { handlePtyPartialPure, type PtyHandlerState } from '../pty-partial-handler.js'
|
|
@@ -94,6 +95,8 @@ import {
|
|
|
94
95
|
import type { AuthBrokerClient } from './auth-command.js'
|
|
95
96
|
import type { ListStateData } from './auth-line.js'
|
|
96
97
|
import { getAuthBrokerClient, addAccountViaBroker } from './auth-broker-client.js'
|
|
98
|
+
import { resolveAuthBrokerSocketPath } from '../../src/auth/broker/client.js'
|
|
99
|
+
import { createFleetFallbackGate } from '../fleet-fallback-gate.js'
|
|
97
100
|
import {
|
|
98
101
|
pendingAuthAddFlows,
|
|
99
102
|
startAccountAuthSession,
|
|
@@ -124,6 +127,7 @@ import {
|
|
|
124
127
|
formatModelUnavailableCard,
|
|
125
128
|
resolveModelUnavailableFromOperatorEvent,
|
|
126
129
|
} from '../model-unavailable.js'
|
|
130
|
+
import { runFleetAutoFallback } from '../auto-fallback-fleet.js'
|
|
127
131
|
import { startRestartWatchdog } from './restart-watchdog.js'
|
|
128
132
|
import { validateStringArray } from './access-validator.js'
|
|
129
133
|
|
|
@@ -160,6 +164,11 @@ import {
|
|
|
160
164
|
TELEGRAM_SWITCHROOM_COMMANDS,
|
|
161
165
|
type AgentMetadata, type AuthSummary, type StatusProbeRow,
|
|
162
166
|
} from '../welcome-text.js'
|
|
167
|
+
import {
|
|
168
|
+
type BrokerStateView,
|
|
169
|
+
type ClaudeJsonView,
|
|
170
|
+
buildAuthSummaryFromBroker,
|
|
171
|
+
} from './auth-status-adapter.js'
|
|
163
172
|
import {
|
|
164
173
|
isContextExhaustionText,
|
|
165
174
|
shouldArmOrphanedReplyTimeout,
|
|
@@ -186,39 +195,52 @@ import {
|
|
|
186
195
|
import { sweepActiveReactions } from '../active-reactions-sweep.js'
|
|
187
196
|
import { flushOnAgentDisconnect } from './disconnect-flush.js'
|
|
188
197
|
import { PreambleSuppressor } from './preamble-suppressor.js'
|
|
198
|
+
import {
|
|
199
|
+
fetchFolderPage,
|
|
200
|
+
FolderListCache,
|
|
201
|
+
} from '../../src/drive/folder-list.js'
|
|
202
|
+
import { loadFromAuthBroker } from '../../src/drive/wrapper-broker.js'
|
|
203
|
+
import {
|
|
204
|
+
handleFoldersCommand,
|
|
205
|
+
handleFolderPickerCallback,
|
|
206
|
+
type FolderPickerHandlerDeps,
|
|
207
|
+
} from './folder-picker-handler.js'
|
|
208
|
+
import {
|
|
209
|
+
approvalConsume as kernelApprovalConsume,
|
|
210
|
+
approvalRecord as kernelApprovalRecord,
|
|
211
|
+
approvalRequest as kernelApprovalRequest,
|
|
212
|
+
} from '../../src/vault/approvals/client.js'
|
|
189
213
|
import {
|
|
190
214
|
fetchQuota,
|
|
191
215
|
formatQuotaBlock,
|
|
192
216
|
} from '../quota-check.js'
|
|
193
217
|
import {
|
|
194
|
-
evaluateFallbackTrigger,
|
|
195
|
-
performAutoFallback,
|
|
196
|
-
emptyLockout,
|
|
197
218
|
loadLockout,
|
|
198
|
-
nextLockout,
|
|
199
|
-
saveLockout,
|
|
200
219
|
DEFAULT_FALLBACK_COOLDOWN_MS,
|
|
201
|
-
type LockoutRecord,
|
|
202
220
|
type LockoutPersistOps,
|
|
203
221
|
} from '../auto-fallback.js'
|
|
204
|
-
import {
|
|
205
|
-
import {
|
|
222
|
+
import { DEFAULT_SLOT } from '../../src/auth/accounts.js'
|
|
223
|
+
import { currentActiveSlot, type AuthCodeOutcome } from '../../src/auth/manager.js'
|
|
206
224
|
import { injectSlashCommand as injectSlashCommandImpl } from '../../src/agents/inject.js'
|
|
207
225
|
import { handleInjectCommand } from './inject-handler.js'
|
|
208
226
|
import { type BannerState } from '../slot-banner.js'
|
|
209
227
|
import { refreshBanner } from '../slot-banner-driver.js'
|
|
210
|
-
import { dispatchFallbackNotification } from '../auto-fallback-dispatcher.js'
|
|
211
228
|
import { loadConfig as loadSwitchroomConfig } from '../../src/config/loader.js'; import { resolveAgentConfig } from '../../src/config/merge.js'
|
|
212
229
|
import {
|
|
213
230
|
tryHostdDispatch,
|
|
214
231
|
hostdRequestId,
|
|
215
232
|
hostdWillBeUsed,
|
|
233
|
+
pollHostdStatus,
|
|
234
|
+
warnLegacySpawnIfHostdDisabled,
|
|
216
235
|
_resetHostdEnabledCache,
|
|
217
236
|
} from './hostd-dispatch.js'
|
|
237
|
+
import type { HostdRequest } from '../../src/host-control/protocol.js'
|
|
218
238
|
import type { AgentAudit } from '../welcome-text.js'
|
|
219
239
|
import { shouldSweepChatAtBoot } from './boot-sweep-filter.js'
|
|
220
240
|
|
|
221
241
|
import { createIpcServer, type IpcClient, type IpcServer } from './ipc-server.js'
|
|
242
|
+
import { handleRequestDriveApproval } from './drive-write-approval.js'
|
|
243
|
+
import { buildDiffPreviewCard } from './diff-preview-card.js'
|
|
222
244
|
import { createPendingInboundBuffer } from './pending-inbound-buffer.js'
|
|
223
245
|
import {
|
|
224
246
|
buildVaultGrantApprovedInbound,
|
|
@@ -1086,6 +1108,14 @@ type CurrentTurn = {
|
|
|
1086
1108
|
gatewayReceiveAt: number
|
|
1087
1109
|
replyCalled: boolean
|
|
1088
1110
|
capturedText: string[]
|
|
1111
|
+
// #1291: snapshot of capturedText.length at the moment of the most
|
|
1112
|
+
// recent reply / stream_reply tool call. Used by decideTurnFlush to
|
|
1113
|
+
// isolate the post-reply tail (e.g. a soft-commit reply followed by
|
|
1114
|
+
// the real substantive answer in terminal text only) and flush it as
|
|
1115
|
+
// a follow-up message. Pre-#1291 the existence of ANY reply call
|
|
1116
|
+
// suppressed flush entirely — that lost long terminal-only answers
|
|
1117
|
+
// after a "let me check" interim reply.
|
|
1118
|
+
capturedTextLenAtLastReply: number
|
|
1089
1119
|
orphanedReplyTimeoutId: ReturnType<typeof setTimeout> | null
|
|
1090
1120
|
registryKey: string | null
|
|
1091
1121
|
// Last assistant outbound message id for the current turn — populated
|
|
@@ -1974,6 +2004,13 @@ const awaitingAuthCodeAt = new Map<string, number>()
|
|
|
1974
2004
|
const AUTH_CODE_CONTEXT_TTL_MS = 5 * 60_000 // 5 min — OAuth code lifetime
|
|
1975
2005
|
const DEFERRED_SECRET_TTL_MS = 24 * 60 * 60_000 // 24 h — ignored one-tap cards
|
|
1976
2006
|
|
|
2007
|
+
// Freshness throttle for `auth:refresh` taps. Keyed by `<chat_id>:<message_id>`
|
|
2008
|
+
// so two different snapshot messages throttle independently. Each refresh
|
|
2009
|
+
// fan-fires N live api.anthropic.com probes (one per account), so we cap
|
|
2010
|
+
// rapid re-taps to one per AUTH_REFRESH_THROTTLE_MS.
|
|
2011
|
+
const lastAuthRefreshAtMs = new Map<string, number>()
|
|
2012
|
+
const AUTH_REFRESH_THROTTLE_MS = 5_000
|
|
2013
|
+
|
|
1977
2014
|
// ─── TTL reaper ───────────────────────────────────────────────────────────
|
|
1978
2015
|
// Pending state maps above all grow whenever a flow starts and only shrink
|
|
1979
2016
|
// when the flow completes. Users abandoning a flow (closing Telegram, losing
|
|
@@ -2037,6 +2074,12 @@ const pendingStateReaper = setInterval(() => {
|
|
|
2037
2074
|
for (const [k, v] of awaitingAuthCodeAt) {
|
|
2038
2075
|
if (now - v > AUTH_CODE_CONTEXT_TTL_MS) awaitingAuthCodeAt.delete(k)
|
|
2039
2076
|
}
|
|
2077
|
+
// Auth-refresh throttle entries decay quickly (5s window); sweep
|
|
2078
|
+
// anything older than 60s so abandoned snapshot messages don't pin
|
|
2079
|
+
// their key forever.
|
|
2080
|
+
for (const [k, v] of lastAuthRefreshAtMs) {
|
|
2081
|
+
if (now - v > 60_000) lastAuthRefreshAtMs.delete(k)
|
|
2082
|
+
}
|
|
2040
2083
|
// /auth rm two-step confirm window — self-expires at `expiresAt`.
|
|
2041
2084
|
for (const [k, v] of pendingAuthRmFlows) {
|
|
2042
2085
|
if (now >= v.expiresAt) pendingAuthRmFlows.delete(k)
|
|
@@ -2241,11 +2284,33 @@ function emitGatewayOperatorEvent(event: OperatorEvent): void {
|
|
|
2241
2284
|
let renderedText: string
|
|
2242
2285
|
let renderedKeyboard: ReturnType<typeof renderOperatorEvent>['keyboard'] | undefined
|
|
2243
2286
|
if (modelUnavailable) {
|
|
2287
|
+
// Two questions, asked synchronously to avoid the "card promises
|
|
2288
|
+
// an announcement that never arrives" trap:
|
|
2289
|
+
// 1. Is this a kind that AUTO-fallback can address?
|
|
2290
|
+
// 2. Will the dispatcher actually fire (vs. dedup-drop)?
|
|
2291
|
+
// Card text branches on the AND. wouldFireFleetAutoFallback is a
|
|
2292
|
+
// pure read of the dedup state; calling fireFleetAutoFallback only
|
|
2293
|
+
// when both are true keeps the card honest.
|
|
2294
|
+
const isAutoKind =
|
|
2295
|
+
modelUnavailable.kind === 'quota_exhausted' || modelUnavailable.kind === 'overload'
|
|
2296
|
+
const willActuallyFire = isAutoKind && wouldFireFleetAutoFallback()
|
|
2244
2297
|
process.stderr.write(
|
|
2245
|
-
`telegram gateway: operator-event suppressing-raw-stderr-for-model-unavailable agent=${agent} kind=${kind} detected=${modelUnavailable.kind}\n`,
|
|
2298
|
+
`telegram gateway: operator-event suppressing-raw-stderr-for-model-unavailable agent=${agent} kind=${kind} detected=${modelUnavailable.kind} autoKind=${isAutoKind} willFire=${willActuallyFire}\n`,
|
|
2246
2299
|
)
|
|
2247
|
-
renderedText = formatModelUnavailableCard(modelUnavailable, agent
|
|
2300
|
+
renderedText = formatModelUnavailableCard(modelUnavailable, agent, {
|
|
2301
|
+
autoFallbackInFlight: willActuallyFire,
|
|
2302
|
+
})
|
|
2248
2303
|
renderedKeyboard = undefined
|
|
2304
|
+
// Trigger fleet-wide auto-fallback. Pre-fix this branch only
|
|
2305
|
+
// rendered the card; the fallback machinery was unreachable from
|
|
2306
|
+
// here. We fire-and-forget so card delivery is never blocked on
|
|
2307
|
+
// broker / API latency. The fallback's own announcement is sent
|
|
2308
|
+
// separately with the causal-shape headline ("5-hour limit on
|
|
2309
|
+
// ken" instead of generic "quota exhausted") — see
|
|
2310
|
+
// auth-snapshot-format.ts → renderFallbackAnnouncement.
|
|
2311
|
+
if (willActuallyFire) {
|
|
2312
|
+
void fireFleetAutoFallback(agent)
|
|
2313
|
+
}
|
|
2249
2314
|
} else {
|
|
2250
2315
|
try {
|
|
2251
2316
|
const r = renderOperatorEvent(event)
|
|
@@ -2513,6 +2578,7 @@ silencePoke.startTimer({
|
|
|
2513
2578
|
const text = silencePoke.formatFrameworkFallbackText(
|
|
2514
2579
|
ctx.fallbackKind,
|
|
2515
2580
|
ctx.silenceMs,
|
|
2581
|
+
ctx.inFlightTools,
|
|
2516
2582
|
)
|
|
2517
2583
|
try {
|
|
2518
2584
|
await robustApiCall(
|
|
@@ -2820,9 +2886,46 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
2820
2886
|
const key = statusKey(currentTurn.sessionChatId, currentTurn.sessionThreadId)
|
|
2821
2887
|
if (ev.kind === 'thinking') {
|
|
2822
2888
|
silencePoke.noteThinking(key, Date.now())
|
|
2823
|
-
} else if (ev.kind === 'tool_use'
|
|
2824
|
-
|
|
2825
|
-
|
|
2889
|
+
} else if (ev.kind === 'tool_use') {
|
|
2890
|
+
if (ev.toolName === 'Task' || ev.toolName === 'Agent') {
|
|
2891
|
+
// Built-in claude sub-agent dispatch — extends soft threshold to 5min.
|
|
2892
|
+
silencePoke.noteSubagentDispatch(key)
|
|
2893
|
+
}
|
|
2894
|
+
// #1292: track in-flight tool calls so the 300s framework
|
|
2895
|
+
// fallback message can name the actual observable (e.g.
|
|
2896
|
+
// "running Grep \"foo\" for 4m") instead of the dishonest
|
|
2897
|
+
// generic "still working… no update in 5 min" when the agent
|
|
2898
|
+
// is clearly busy on tool calls. Telegram-surface tools are
|
|
2899
|
+
// excluded — their job IS the outbound message, the silence
|
|
2900
|
+
// clock resets via noteOutbound when they fire. Sub-agent
|
|
2901
|
+
// tool_use events (kind='sub_agent_tool_use') intentionally
|
|
2902
|
+
// NOT tracked: the parent's Task tool_use is already on the
|
|
2903
|
+
// map and represents the user-observable wait.
|
|
2904
|
+
if (
|
|
2905
|
+
ev.toolUseId != null
|
|
2906
|
+
&& ev.toolUseId.length > 0
|
|
2907
|
+
&& !isTelegramSurfaceTool(ev.toolName)
|
|
2908
|
+
) {
|
|
2909
|
+
const label = toolLabel(
|
|
2910
|
+
ev.toolName,
|
|
2911
|
+
ev.input,
|
|
2912
|
+
/*preamble*/ undefined,
|
|
2913
|
+
ev.precomputedLabel,
|
|
2914
|
+
)
|
|
2915
|
+
silencePoke.noteToolStart(
|
|
2916
|
+
key,
|
|
2917
|
+
ev.toolUseId,
|
|
2918
|
+
ev.toolName,
|
|
2919
|
+
label.length > 0 ? label : null,
|
|
2920
|
+
Date.now(),
|
|
2921
|
+
)
|
|
2922
|
+
}
|
|
2923
|
+
} else if (ev.kind === 'tool_result') {
|
|
2924
|
+
// #1292: drain the in-flight entry. Idempotent on unknown ids
|
|
2925
|
+
// (covers Telegram-surface tools we skipped at start time).
|
|
2926
|
+
if (ev.toolUseId != null && ev.toolUseId.length > 0) {
|
|
2927
|
+
silencePoke.noteToolEnd(key, ev.toolUseId, Date.now())
|
|
2928
|
+
}
|
|
2826
2929
|
}
|
|
2827
2930
|
}
|
|
2828
2931
|
},
|
|
@@ -2959,6 +3062,69 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
2959
3062
|
* Logs every fire so an operator can correlate the agent's
|
|
2960
3063
|
* transcript turn against the scheduler's audit row by `prompt_key`.
|
|
2961
3064
|
*/
|
|
3065
|
+
async onRequestDriveApproval(client: IpcClient, msg) {
|
|
3066
|
+
// RFC E §4.2 Cut 2 — Drive-write PreToolUse hook is asking the
|
|
3067
|
+
// gateway to post a diff-preview card so the user can decide.
|
|
3068
|
+
await handleRequestDriveApproval(client, msg, {
|
|
3069
|
+
agentName: getMyAgentName(),
|
|
3070
|
+
loadAllowFrom: () => loadAccess().allowFrom,
|
|
3071
|
+
loadTargetChat: () => {
|
|
3072
|
+
const access = loadAccess()
|
|
3073
|
+
const operator = access.allowFrom[0]
|
|
3074
|
+
if (operator === undefined) return null
|
|
3075
|
+
// For DM-paired setups the target chat IS the operator's
|
|
3076
|
+
// user id. For group setups the gateway already has a topic
|
|
3077
|
+
// routing surface (see how /folders posts) — this picks the
|
|
3078
|
+
// DM path which is the common case; group-routing follow-up
|
|
3079
|
+
// can extend this.
|
|
3080
|
+
return { chatId: operator }
|
|
3081
|
+
},
|
|
3082
|
+
registerApproval: async (args) => {
|
|
3083
|
+
const r = await kernelApprovalRequest({
|
|
3084
|
+
agent_unit: args.agent_unit,
|
|
3085
|
+
scope: args.scope,
|
|
3086
|
+
action: args.action,
|
|
3087
|
+
approver_set: args.approver_set,
|
|
3088
|
+
why: args.why,
|
|
3089
|
+
ttl_ms: args.ttl_ms,
|
|
3090
|
+
})
|
|
3091
|
+
if (r === null || r.state === 'rate_limited') return null
|
|
3092
|
+
return {
|
|
3093
|
+
request_id: r.request_id,
|
|
3094
|
+
expires_at_ms: r.expires_at,
|
|
3095
|
+
}
|
|
3096
|
+
},
|
|
3097
|
+
postCard: async (args) => {
|
|
3098
|
+
try {
|
|
3099
|
+
const sent = await robustApiCall(
|
|
3100
|
+
() =>
|
|
3101
|
+
bot.api.sendMessage(args.chatId, args.text, {
|
|
3102
|
+
parse_mode: 'HTML',
|
|
3103
|
+
...(args.threadId !== undefined
|
|
3104
|
+
? { message_thread_id: args.threadId }
|
|
3105
|
+
: {}),
|
|
3106
|
+
reply_markup: args.replyMarkup as never,
|
|
3107
|
+
}),
|
|
3108
|
+
{
|
|
3109
|
+
chat_id: String(args.chatId),
|
|
3110
|
+
verb: 'drive-approval-card',
|
|
3111
|
+
...(args.threadId !== undefined ? { threadId: args.threadId } : {}),
|
|
3112
|
+
},
|
|
3113
|
+
)
|
|
3114
|
+
return { messageId: (sent as { message_id: number }).message_id }
|
|
3115
|
+
} catch (err) {
|
|
3116
|
+
process.stderr.write(
|
|
3117
|
+
`telegram gateway: drive-approval postCard failed: ${(err as Error).message}\n`,
|
|
3118
|
+
)
|
|
3119
|
+
return null
|
|
3120
|
+
}
|
|
3121
|
+
},
|
|
3122
|
+
buildCard: ({ preview, suggestRequestId }) =>
|
|
3123
|
+
buildDiffPreviewCard({ preview, suggestRequestId }),
|
|
3124
|
+
log: (m) => process.stderr.write(`telegram gateway: drive-approval — ${m}\n`),
|
|
3125
|
+
})
|
|
3126
|
+
},
|
|
3127
|
+
|
|
2962
3128
|
onInjectInbound(_client: IpcClient, msg: InjectInboundMessage) {
|
|
2963
3129
|
const promptKey = typeof msg.inbound.meta?.prompt_key === 'string'
|
|
2964
3130
|
? msg.inbound.meta.prompt_key
|
|
@@ -4638,6 +4804,7 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
4638
4804
|
gatewayReceiveAt: startedAt,
|
|
4639
4805
|
replyCalled: false,
|
|
4640
4806
|
capturedText: [],
|
|
4807
|
+
capturedTextLenAtLastReply: 0,
|
|
4641
4808
|
orphanedReplyTimeoutId: null,
|
|
4642
4809
|
registryKey: null,
|
|
4643
4810
|
lastAssistantMsgId: null,
|
|
@@ -4734,6 +4901,12 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
4734
4901
|
// placeholder-heartbeat label, which has been retired.
|
|
4735
4902
|
if (isTelegramReplyTool(name)) {
|
|
4736
4903
|
turn.replyCalled = true
|
|
4904
|
+
// #1291: pin the captured-text index at the moment of this reply
|
|
4905
|
+
// tool call. Anything pushed into capturedText after this point
|
|
4906
|
+
// is the post-reply tail (e.g. the substantive answer composed
|
|
4907
|
+
// in terminal text after a soft-commit "on it, back in a few").
|
|
4908
|
+
// decideTurnFlush slices from this index to flush the tail.
|
|
4909
|
+
turn.capturedTextLenAtLastReply = turn.capturedText.length
|
|
4737
4910
|
if (turn.orphanedReplyTimeoutId != null) {
|
|
4738
4911
|
clearTimeout(turn.orphanedReplyTimeoutId)
|
|
4739
4912
|
turn.orphanedReplyTimeoutId = null
|
|
@@ -4993,8 +5166,20 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
4993
5166
|
chatId: turn.sessionChatId,
|
|
4994
5167
|
replyCalled: turn.replyCalled,
|
|
4995
5168
|
capturedText: turn.capturedText,
|
|
5169
|
+
capturedTextLenAtLastReply: turn.capturedTextLenAtLastReply,
|
|
4996
5170
|
flushEnabled: TURN_FLUSH_SAFETY_ENABLED,
|
|
4997
5171
|
})
|
|
5172
|
+
// #1291: when the model emitted a soft-commit reply followed by a
|
|
5173
|
+
// substantive terminal-only answer, decideTurnFlush returns
|
|
5174
|
+
// kind:'flush' with the post-reply tail. Log WARN so this case is
|
|
5175
|
+
// auditable — the model SHOULD have called reply for the tail, but
|
|
5176
|
+
// didn't, and the framework is covering for it.
|
|
5177
|
+
if (flushDecision.kind === 'flush' && turn.replyCalled) {
|
|
5178
|
+
process.stderr.write(
|
|
5179
|
+
`telegram gateway: WARN post-reply-tail flush (#1291) — model emitted ${flushDecision.text.length} chars after a prior reply call without a follow-up reply tool` +
|
|
5180
|
+
` chat=${chatId} turnStartedAt=${turn.startedAt}\n`,
|
|
5181
|
+
)
|
|
5182
|
+
}
|
|
4998
5183
|
if (flushDecision.kind === 'skip' && flushDecision.reason !== 'reply-called') {
|
|
4999
5184
|
process.stderr.write(
|
|
5000
5185
|
`telegram gateway: turn-flush skipped — reason=${flushDecision.reason}\n`,
|
|
@@ -5144,6 +5329,21 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
5144
5329
|
// backup; reset the preamble buffer (its content is already in
|
|
5145
5330
|
// the captured `capturedText`, which turn-flush is about to send).
|
|
5146
5331
|
preambleSuppressor.dropNow()
|
|
5332
|
+
// #1289 fix — drain silence-poke + signal-tracker state for this
|
|
5333
|
+
// turn. The three sibling turn_end exit branches (context-exhaust
|
|
5334
|
+
// at ~5098, silent-marker at ~5097-5098, default reply-called tail
|
|
5335
|
+
// at ~5348-5349) all call signalTracker.clear + silencePoke.endTurn.
|
|
5336
|
+
// The flush-backstop branch was retrofitted in #1067 to null
|
|
5337
|
+
// currentTurn early but never had this cleanup added — leaving the
|
|
5338
|
+
// silence-poke state in the Map, so 300s after the original turn
|
|
5339
|
+
// start the framework fallback fires and the user sees
|
|
5340
|
+
// "still working… (no update from agent in 5 min)" on a turn the
|
|
5341
|
+
// gateway already considers over.
|
|
5342
|
+
{
|
|
5343
|
+
const tKey = statusKey(chatId, threadId)
|
|
5344
|
+
signalTracker.clear(tKey)
|
|
5345
|
+
silencePoke.endTurn(tKey)
|
|
5346
|
+
}
|
|
5147
5347
|
|
|
5148
5348
|
void (async () => {
|
|
5149
5349
|
await new Promise<void>(resolve => setTimeout(resolve, 500))
|
|
@@ -7388,6 +7588,75 @@ async function executeVaultOp(ctx: Context, chatId: string, op: 'list' | 'get' |
|
|
|
7388
7588
|
}
|
|
7389
7589
|
}
|
|
7390
7590
|
|
|
7591
|
+
/**
|
|
7592
|
+
* Dispatch a short-running verb (agent_start, agent_stop, cross-agent
|
|
7593
|
+
* agent_restart) through hostd when available, else fall back to the
|
|
7594
|
+
* legacy in-container CLI shell-out.
|
|
7595
|
+
*
|
|
7596
|
+
* Why: on docker-mode hosts the agent container has no docker binary,
|
|
7597
|
+
* so the legacy `runSwitchroomCommand` path silently exits 127 for any
|
|
7598
|
+
* verb that touches compose (RFC C §1, #926). Hostd runs on the host
|
|
7599
|
+
* with the docker socket mounted, so the verb actually works.
|
|
7600
|
+
*
|
|
7601
|
+
* Result handling:
|
|
7602
|
+
* - `not-configured` → fall back to {@link runSwitchroomCommand}.
|
|
7603
|
+
* (Operator opted out; let the legacy path's existing error
|
|
7604
|
+
* surfacing handle the exit-127 case.)
|
|
7605
|
+
* - `completed` → reply with the stdout tail (mirrors the legacy
|
|
7606
|
+
* path's formatted-output reply).
|
|
7607
|
+
* - `started` → reply with a brief "🔄 dispatched" ack. Verbs that
|
|
7608
|
+
* return `started` (agent_restart) finish asynchronously on the
|
|
7609
|
+
* daemon; the audit log is the canonical record.
|
|
7610
|
+
* - `error` / `denied` → reply with the error tail inline. No
|
|
7611
|
+
* fallback (RFC §7 hard-fail contract — operator opted in).
|
|
7612
|
+
*/
|
|
7613
|
+
async function dispatchShortVerbViaHostd(
|
|
7614
|
+
ctx: Context,
|
|
7615
|
+
req: HostdRequest,
|
|
7616
|
+
label: string,
|
|
7617
|
+
legacyArgs: string[],
|
|
7618
|
+
): Promise<void> {
|
|
7619
|
+
const hostdResp = await tryHostdDispatch(getMyAgentName(), req)
|
|
7620
|
+
if (hostdResp === 'not-configured') {
|
|
7621
|
+
warnLegacySpawnIfHostdDisabled(req.op)
|
|
7622
|
+
await runSwitchroomCommand(ctx, legacyArgs, label)
|
|
7623
|
+
return
|
|
7624
|
+
}
|
|
7625
|
+
if (hostdResp.result === 'completed') {
|
|
7626
|
+
const body = hostdResp.stdout_tail?.trim() || `${label}: done (exit ${hostdResp.exit_code})`
|
|
7627
|
+
const formatted = formatSwitchroomOutput(stripAnsi(body))
|
|
7628
|
+
if (formatted) {
|
|
7629
|
+
await switchroomReply(ctx, preBlock(formatted), { html: true })
|
|
7630
|
+
} else {
|
|
7631
|
+
await switchroomReply(ctx, `${label}: done (no output)`)
|
|
7632
|
+
}
|
|
7633
|
+
return
|
|
7634
|
+
}
|
|
7635
|
+
if (hostdResp.result === 'started') {
|
|
7636
|
+
await switchroomReply(
|
|
7637
|
+
ctx,
|
|
7638
|
+
`🔄 <b>${escapeHtmlForTg(label)}</b> dispatched via hostd ` +
|
|
7639
|
+
`(request_id=<code>${escapeHtmlForTg(hostdResp.request_id)}</code>). ` +
|
|
7640
|
+
`Check audit log for completion.`,
|
|
7641
|
+
{ html: true },
|
|
7642
|
+
)
|
|
7643
|
+
return
|
|
7644
|
+
}
|
|
7645
|
+
// error / denied — surface inline. RFC §7 hard-fail: no spawn fallback.
|
|
7646
|
+
const errBody =
|
|
7647
|
+
hostdResp.error ??
|
|
7648
|
+
hostdResp.stderr_tail ??
|
|
7649
|
+
hostdResp.stdout_tail ??
|
|
7650
|
+
'(no error tail returned)'
|
|
7651
|
+
await switchroomReply(
|
|
7652
|
+
ctx,
|
|
7653
|
+
`❌ <b>${escapeHtmlForTg(label)} failed via hostd</b> ` +
|
|
7654
|
+
`(result=${escapeHtmlForTg(hostdResp.result)}):\n` +
|
|
7655
|
+
preBlock(stripAnsi(errBody)),
|
|
7656
|
+
{ html: true },
|
|
7657
|
+
)
|
|
7658
|
+
}
|
|
7659
|
+
|
|
7391
7660
|
async function runSwitchroomCommand(ctx: Context, args: string[], label: string): Promise<void> {
|
|
7392
7661
|
try {
|
|
7393
7662
|
const output = stripAnsi(switchroomExec(args))
|
|
@@ -7620,13 +7889,13 @@ function buildAgentAudit(agentName: string): AgentAudit | undefined {
|
|
|
7620
7889
|
}
|
|
7621
7890
|
|
|
7622
7891
|
// Build an AgentMetadata snapshot for the current agent by shelling out
|
|
7623
|
-
// to `switchroom agent list --json` and `switchroom auth
|
|
7624
|
-
// TODO(rfc-h): the `auth status` verb was retired by RFC H. The shell
|
|
7625
|
-
// fails silently and `authSummary` lands as null — /status renders
|
|
7626
|
-
// without auth detail. Replace with an `auth show --json` adapter that
|
|
7627
|
-
// maps the new fleet-broker shape to the per-agent AuthSummary fields.
|
|
7892
|
+
// to `switchroom agent list --json` and `switchroom auth show --json`.
|
|
7628
7893
|
// Best-effort — any missing piece renders as a placeholder in the text
|
|
7629
|
-
// templates rather than blocking the reply.
|
|
7894
|
+
// templates rather than blocking the reply. RFC H retired the per-agent
|
|
7895
|
+
// `auth status --json` shape; auth state is now derived from the
|
|
7896
|
+
// broker's fleet-wide `ListStateData` payload via
|
|
7897
|
+
// `buildAuthSummaryFromBroker`, with billingType pulled from the
|
|
7898
|
+
// agent's `.claude.json` (the broker doesn't track plan tier).
|
|
7630
7899
|
async function buildAgentMetadata(agentName: string): Promise<AgentMetadata> {
|
|
7631
7900
|
type AgentListResp = {
|
|
7632
7901
|
agents: Array<{
|
|
@@ -7636,24 +7905,22 @@ async function buildAgentMetadata(agentName: string): Promise<AgentMetadata> {
|
|
|
7636
7905
|
model?: string | null;
|
|
7637
7906
|
}>
|
|
7638
7907
|
}
|
|
7639
|
-
type AuthStatusResp = {
|
|
7640
|
-
agents: Array<{
|
|
7641
|
-
name: string; authenticated: boolean; auth_source: string | null;
|
|
7642
|
-
subscription_type: string | null; expires_in: string | null;
|
|
7643
|
-
}>
|
|
7644
|
-
}
|
|
7645
7908
|
const list = switchroomExecJson<AgentListResp>(['agent', 'list'])
|
|
7646
|
-
const
|
|
7909
|
+
const brokerState = switchroomExecJson<BrokerStateView>(['auth', 'show'])
|
|
7647
7910
|
const a = list?.agents?.find(x => x.name === agentName) ?? null
|
|
7648
|
-
|
|
7649
|
-
|
|
7650
|
-
|
|
7651
|
-
|
|
7652
|
-
|
|
7653
|
-
|
|
7654
|
-
|
|
7655
|
-
|
|
7656
|
-
|
|
7911
|
+
let claudeJson: ClaudeJsonView | null = null
|
|
7912
|
+
try {
|
|
7913
|
+
const agentDir = resolveAgentDirFromEnv()
|
|
7914
|
+
if (agentDir) {
|
|
7915
|
+
const raw = readFileSync(join(agentDir, '.claude', '.claude.json'), 'utf8')
|
|
7916
|
+
claudeJson = JSON.parse(raw) as ClaudeJsonView
|
|
7917
|
+
}
|
|
7918
|
+
} catch { /* leave null — billingType becomes null in the summary */ }
|
|
7919
|
+
const authSummary: AuthSummary | null = buildAuthSummaryFromBroker(
|
|
7920
|
+
brokerState,
|
|
7921
|
+
agentName,
|
|
7922
|
+
claudeJson,
|
|
7923
|
+
)
|
|
7657
7924
|
return {
|
|
7658
7925
|
agentName,
|
|
7659
7926
|
model: a?.model ?? null,
|
|
@@ -7798,14 +8065,24 @@ bot.command('agentstart', async ctx => {
|
|
|
7798
8065
|
if (!isAuthorizedSender(ctx)) return
|
|
7799
8066
|
const name = ctx.match?.trim() || getMyAgentName()
|
|
7800
8067
|
try { assertSafeAgentName(name) } catch { await switchroomReply(ctx, 'Invalid agent name.'); return }
|
|
7801
|
-
await
|
|
8068
|
+
await dispatchShortVerbViaHostd(
|
|
8069
|
+
ctx,
|
|
8070
|
+
{ v: 1, op: 'agent_start', request_id: hostdRequestId('gw-start'), args: { name } },
|
|
8071
|
+
`start ${name}`,
|
|
8072
|
+
['agent', 'start', name],
|
|
8073
|
+
)
|
|
7802
8074
|
})
|
|
7803
8075
|
|
|
7804
8076
|
bot.command('stop', async ctx => {
|
|
7805
8077
|
if (!isAuthorizedSender(ctx)) return
|
|
7806
8078
|
const name = ctx.match?.trim() || getMyAgentName()
|
|
7807
8079
|
try { assertSafeAgentName(name) } catch { await switchroomReply(ctx, 'Invalid agent name.'); return }
|
|
7808
|
-
await
|
|
8080
|
+
await dispatchShortVerbViaHostd(
|
|
8081
|
+
ctx,
|
|
8082
|
+
{ v: 1, op: 'agent_stop', request_id: hostdRequestId('gw-stop'), args: { name } },
|
|
8083
|
+
`stop ${name}`,
|
|
8084
|
+
['agent', 'stop', name],
|
|
8085
|
+
)
|
|
7809
8086
|
})
|
|
7810
8087
|
|
|
7811
8088
|
bot.command('restart', async ctx => {
|
|
@@ -7852,6 +8129,7 @@ bot.command('restart', async ctx => {
|
|
|
7852
8129
|
args: { name, force: true, reason: 'user: /restart from chat' },
|
|
7853
8130
|
})
|
|
7854
8131
|
if (hostdResp === 'not-configured') {
|
|
8132
|
+
warnLegacySpawnIfHostdDisabled('agent_restart')
|
|
7855
8133
|
spawnSwitchroomDetached(
|
|
7856
8134
|
['agent', 'restart', name, '--force'],
|
|
7857
8135
|
notifyDetachedFailure(chatId, threadId ?? null, `restart ${name}`),
|
|
@@ -7874,7 +8152,22 @@ bot.command('restart', async ctx => {
|
|
|
7874
8152
|
)
|
|
7875
8153
|
return
|
|
7876
8154
|
}
|
|
7877
|
-
|
|
8155
|
+
// Cross-agent /restart <other>. Same hostd-first shape as self-target,
|
|
8156
|
+
// but no restart marker / no self-kill: another agent's container is
|
|
8157
|
+
// about to bounce, not ours. The daemon spawns the work and returns
|
|
8158
|
+
// "started" (per handleAgentRestart at server.ts:466), so the user
|
|
8159
|
+
// sees a brief dispatch ack and the audit log carries the outcome.
|
|
8160
|
+
await dispatchShortVerbViaHostd(
|
|
8161
|
+
ctx,
|
|
8162
|
+
{
|
|
8163
|
+
v: 1,
|
|
8164
|
+
op: 'agent_restart',
|
|
8165
|
+
request_id: hostdRequestId('gw-restart-cross'),
|
|
8166
|
+
args: { name, force: true, reason: `user: /restart ${name} from chat` },
|
|
8167
|
+
},
|
|
8168
|
+
`restart ${name}`,
|
|
8169
|
+
['agent', 'restart', name],
|
|
8170
|
+
)
|
|
7878
8171
|
})
|
|
7879
8172
|
|
|
7880
8173
|
// ─── /new and /reset ──────────────────────────────────────────────────────
|
|
@@ -7993,6 +8286,7 @@ async function handleNewOrResetCommand(ctx: Context, kind: 'new' | 'reset'): Pro
|
|
|
7993
8286
|
args: { name, force: true, reason: `user: /${kind} from chat` },
|
|
7994
8287
|
})
|
|
7995
8288
|
if (hostdResp === 'not-configured') {
|
|
8289
|
+
warnLegacySpawnIfHostdDisabled('agent_restart')
|
|
7996
8290
|
spawnSwitchroomDetached(
|
|
7997
8291
|
['agent', 'restart', name, '--force'],
|
|
7998
8292
|
notifyDetachedFailure(chatId, threadId ?? null, `${kind} ${name}`),
|
|
@@ -8156,23 +8450,83 @@ bot.command('update', async ctx => {
|
|
|
8156
8450
|
await sweepBeforeSelfRestart()
|
|
8157
8451
|
const skipImages = passthrough.includes('--skip-images')
|
|
8158
8452
|
const rebuild = passthrough.includes('--rebuild')
|
|
8453
|
+
const updateRequestId = hostdRequestId('gw-update')
|
|
8159
8454
|
const hostdResp = await tryHostdDispatch(getMyAgentName(), {
|
|
8160
8455
|
v: 1,
|
|
8161
8456
|
op: 'update_apply',
|
|
8162
|
-
request_id:
|
|
8457
|
+
request_id: updateRequestId,
|
|
8163
8458
|
args: {
|
|
8164
8459
|
...(skipImages ? { skip_images: true } : {}),
|
|
8165
8460
|
...(rebuild ? { rebuild: true } : {}),
|
|
8166
8461
|
},
|
|
8167
8462
|
})
|
|
8168
8463
|
if (hostdResp === 'not-configured') {
|
|
8464
|
+
warnLegacySpawnIfHostdDisabled('update_apply')
|
|
8169
8465
|
spawnSwitchroomDetached(
|
|
8170
8466
|
['update', ...passthrough],
|
|
8171
8467
|
notifyDetachedFailure(chatId, threadId ?? null, 'update'),
|
|
8172
8468
|
)
|
|
8173
8469
|
return
|
|
8174
8470
|
}
|
|
8175
|
-
if (hostdResp.result === '
|
|
8471
|
+
if (hostdResp.result === 'completed') {
|
|
8472
|
+
return
|
|
8473
|
+
}
|
|
8474
|
+
if (hostdResp.result === 'started') {
|
|
8475
|
+
// RFC C §5.3: long-running mutation. Poll get_status until terminal
|
|
8476
|
+
// or until the recreate kills this gateway (whichever happens first).
|
|
8477
|
+
// The success signal is the post-restart greeting card edited into
|
|
8478
|
+
// ackId via the restart marker. The poll is here so that
|
|
8479
|
+
// *fail-before-recreate* (image pull error, scaffold regen crash)
|
|
8480
|
+
// doesn't leave the operator staring at the orphan "🚀 update started"
|
|
8481
|
+
// ack indefinitely. Live repro: PR #1305.
|
|
8482
|
+
void (async () => {
|
|
8483
|
+
// 60s budget: RFC C §5.3 specs `apply` at 30s and `update_apply`
|
|
8484
|
+
// at 60s. Image pulls + scaffold regeneration dominate the wall
|
|
8485
|
+
// clock for update_apply, hence the larger budget. The poll
|
|
8486
|
+
// resolves earlier on any terminal state from the daemon.
|
|
8487
|
+
const terminal = await pollHostdStatus(getMyAgentName(), updateRequestId, {
|
|
8488
|
+
timeoutMs: 60_000,
|
|
8489
|
+
})
|
|
8490
|
+
if (terminal === 'not-configured') return
|
|
8491
|
+
// completed → recreate is about to run / has run; let the post-
|
|
8492
|
+
// restart greeting card handle the success message.
|
|
8493
|
+
if (terminal.result === 'completed') return
|
|
8494
|
+
// Anything else means the daemon's mutation failed before it could
|
|
8495
|
+
// kill us. Edit the ack to surface the tail and clear the marker
|
|
8496
|
+
// so the next gateway boot doesn't render a false success card.
|
|
8497
|
+
clearRestartMarker()
|
|
8498
|
+
const errBody =
|
|
8499
|
+
terminal.error ??
|
|
8500
|
+
terminal.stderr_tail ??
|
|
8501
|
+
terminal.stdout_tail ??
|
|
8502
|
+
'(no error tail returned)'
|
|
8503
|
+
const editedText =
|
|
8504
|
+
`🚀 <b>update started</b> — <b>FAILED</b> via hostd ` +
|
|
8505
|
+
`(result=${escapeHtmlForTg(terminal.result)}):\n` +
|
|
8506
|
+
preBlock(errBody)
|
|
8507
|
+
if (ackId != null) {
|
|
8508
|
+
try {
|
|
8509
|
+
await robustApiCall(
|
|
8510
|
+
() =>
|
|
8511
|
+
lockedBot.api.editMessageText(chatId, ackId!, editedText, {
|
|
8512
|
+
parse_mode: 'HTML',
|
|
8513
|
+
link_preview_options: { is_disabled: true },
|
|
8514
|
+
}),
|
|
8515
|
+
{ verb: 'update.poll.editAck' },
|
|
8516
|
+
)
|
|
8517
|
+
} catch {
|
|
8518
|
+
// edit-failed (message deleted, parse error) — fall back to
|
|
8519
|
+
// a fresh reply so the failure isn't silent.
|
|
8520
|
+
try {
|
|
8521
|
+
await switchroomReply(ctx, editedText, { html: true })
|
|
8522
|
+
} catch {}
|
|
8523
|
+
}
|
|
8524
|
+
} else {
|
|
8525
|
+
try {
|
|
8526
|
+
await switchroomReply(ctx, editedText, { html: true })
|
|
8527
|
+
} catch {}
|
|
8528
|
+
}
|
|
8529
|
+
})()
|
|
8176
8530
|
return
|
|
8177
8531
|
}
|
|
8178
8532
|
clearRestartMarker()
|
|
@@ -8209,6 +8563,81 @@ bot.command('upgrade', async ctx => {
|
|
|
8209
8563
|
)
|
|
8210
8564
|
})
|
|
8211
8565
|
|
|
8566
|
+
// /audit hostd — tail/filter the hostd audit log. Mirrors `/vault audit`
|
|
8567
|
+
// in spirit (operator observability over a privileged subsystem from any
|
|
8568
|
+
// admin DM). Admin-gated via ADMIN_COMMAND_NAMES. Reads the audit JSONL
|
|
8569
|
+
// at ~/.switchroom/host-control-audit.log directly — no hostd RPC needed
|
|
8570
|
+
// because the file is shared via the host bind mount on docker installs.
|
|
8571
|
+
bot.command('audit', async ctx => {
|
|
8572
|
+
if (!isAuthorizedSender(ctx)) return
|
|
8573
|
+
const arg = (ctx.match ?? '').trim()
|
|
8574
|
+
if (arg === '' || arg === 'help' || arg === '--help') {
|
|
8575
|
+
await switchroomReply(
|
|
8576
|
+
ctx,
|
|
8577
|
+
'Usage: <code>/audit hostd [--tail N] [--agent <name>] [--op <verb>] [--error]</code>',
|
|
8578
|
+
{ html: true },
|
|
8579
|
+
)
|
|
8580
|
+
return
|
|
8581
|
+
}
|
|
8582
|
+
const tokens = arg.split(/\s+/)
|
|
8583
|
+
const sub = tokens[0]
|
|
8584
|
+
if (sub !== 'hostd') {
|
|
8585
|
+
await switchroomReply(
|
|
8586
|
+
ctx,
|
|
8587
|
+
`Unknown audit target <code>${escapeHtmlForTg(sub ?? '')}</code>. ` +
|
|
8588
|
+
`Supported: <code>hostd</code>.`,
|
|
8589
|
+
{ html: true },
|
|
8590
|
+
)
|
|
8591
|
+
return
|
|
8592
|
+
}
|
|
8593
|
+
// Build the CLI argv for switchroom hostd audit. Validate each
|
|
8594
|
+
// operator-supplied value to keep argv injection out of the picture.
|
|
8595
|
+
const ALLOWED_OPS = new Set([
|
|
8596
|
+
'agent_start', 'agent_stop', 'agent_restart', 'apply',
|
|
8597
|
+
'update_check', 'update_apply', 'update_status', 'upgrade_status',
|
|
8598
|
+
'get_status', 'doctor', 'fleet_state',
|
|
8599
|
+
])
|
|
8600
|
+
const argv: string[] = ['hostd', 'audit']
|
|
8601
|
+
for (let i = 1; i < tokens.length; i++) {
|
|
8602
|
+
const t = tokens[i]!
|
|
8603
|
+
if (t === '--error') { argv.push('--error'); continue }
|
|
8604
|
+
if (t === '--tail' || t === '--agent' || t === '--op') {
|
|
8605
|
+
const v = tokens[++i]
|
|
8606
|
+
if (v == null) {
|
|
8607
|
+
await switchroomReply(ctx, `Flag <code>${t}</code> requires a value.`, { html: true })
|
|
8608
|
+
return
|
|
8609
|
+
}
|
|
8610
|
+
if (t === '--tail' && !/^[0-9]{1,4}$/.test(v)) {
|
|
8611
|
+
await switchroomReply(ctx, `<code>--tail</code> must be an integer (1-9999).`, { html: true })
|
|
8612
|
+
return
|
|
8613
|
+
}
|
|
8614
|
+
if (t === '--agent' && !/^[a-z][a-z0-9-]{0,62}$/i.test(v)) {
|
|
8615
|
+
await switchroomReply(ctx, `<code>--agent</code> name has an invalid shape.`, { html: true })
|
|
8616
|
+
return
|
|
8617
|
+
}
|
|
8618
|
+
if (t === '--op' && !ALLOWED_OPS.has(v)) {
|
|
8619
|
+
await switchroomReply(
|
|
8620
|
+
ctx,
|
|
8621
|
+
`Unknown hostd verb <code>${escapeHtmlForTg(v)}</code>. ` +
|
|
8622
|
+
`Known: ${[...ALLOWED_OPS].sort().map(o => `<code>${o}</code>`).join(', ')}.`,
|
|
8623
|
+
{ html: true },
|
|
8624
|
+
)
|
|
8625
|
+
return
|
|
8626
|
+
}
|
|
8627
|
+
argv.push(t, v)
|
|
8628
|
+
continue
|
|
8629
|
+
}
|
|
8630
|
+
await switchroomReply(
|
|
8631
|
+
ctx,
|
|
8632
|
+
`Unknown flag <code>${escapeHtmlForTg(t)}</code>. ` +
|
|
8633
|
+
`Allowed: <code>--tail</code>, <code>--agent</code>, <code>--op</code>, <code>--error</code>.`,
|
|
8634
|
+
{ html: true },
|
|
8635
|
+
)
|
|
8636
|
+
return
|
|
8637
|
+
}
|
|
8638
|
+
await runSwitchroomCommand(ctx, argv, `hostd audit${argv.length > 2 ? ' …' : ''}`)
|
|
8639
|
+
})
|
|
8640
|
+
|
|
8212
8641
|
// ─── /approve, /deny, /pending ────────────────────────────────────────────
|
|
8213
8642
|
// Slash-command alternatives to the inline-button approval flow (useful for
|
|
8214
8643
|
// desktop-only sessions and power-users). Share pendingPermissions state
|
|
@@ -8272,6 +8701,59 @@ async function handlePermissionSlash(ctx: Context, behavior: 'allow' | 'deny'):
|
|
|
8272
8701
|
bot.command('approve', async ctx => handlePermissionSlash(ctx, 'allow'))
|
|
8273
8702
|
bot.command('deny', async ctx => handlePermissionSlash(ctx, 'deny'))
|
|
8274
8703
|
|
|
8704
|
+
// ─── Drive folder picker (RFC E §4.1) ───────────────────────────────────
|
|
8705
|
+
// /folders — post a Telegram picker card listing this agent's top-level
|
|
8706
|
+
// Drive folders. Tap [Allow] on a folder to grant the agent
|
|
8707
|
+
// allow_always at doc:gdrive:folder/<id>/**; tap [Browse] to drill in.
|
|
8708
|
+
//
|
|
8709
|
+
// Authorisation: same dmCommandGate as the other operator slash
|
|
8710
|
+
// commands — only allowFrom users can post-trigger.
|
|
8711
|
+
|
|
8712
|
+
const folderPickerCache = new FolderListCache()
|
|
8713
|
+
|
|
8714
|
+
function buildFolderPickerDeps(): FolderPickerHandlerDeps {
|
|
8715
|
+
const agentName = getMyAgentName()
|
|
8716
|
+
return {
|
|
8717
|
+
agentName,
|
|
8718
|
+
cache: folderPickerCache,
|
|
8719
|
+
fetchPage: async ({ parent_id, page_token }) => {
|
|
8720
|
+
const handle = await loadFromAuthBroker()
|
|
8721
|
+
if (handle === null) {
|
|
8722
|
+
throw new Error(
|
|
8723
|
+
`auth-broker unreachable for agent ${agentName} — is the broker container running?`,
|
|
8724
|
+
)
|
|
8725
|
+
}
|
|
8726
|
+
return fetchFolderPage({
|
|
8727
|
+
access_token: handle.access_token,
|
|
8728
|
+
...(parent_id !== undefined ? { parent_id } : {}),
|
|
8729
|
+
...(page_token !== undefined ? { page_token } : {}),
|
|
8730
|
+
})
|
|
8731
|
+
},
|
|
8732
|
+
approvalRequest: async (args) => {
|
|
8733
|
+
const r = await kernelApprovalRequest({
|
|
8734
|
+
agent_unit: args.agent_unit,
|
|
8735
|
+
scope: args.scope,
|
|
8736
|
+
action: args.action,
|
|
8737
|
+
approver_set: args.approver_set,
|
|
8738
|
+
...(args.why !== null && args.why !== undefined ? { why: args.why } : {}),
|
|
8739
|
+
...(args.ttl_ms !== null && args.ttl_ms !== undefined ? { ttl_ms: args.ttl_ms } : {}),
|
|
8740
|
+
})
|
|
8741
|
+
if (r === null || r.state === 'rate_limited') return null
|
|
8742
|
+
return { request_id: r.request_id }
|
|
8743
|
+
},
|
|
8744
|
+
approvalConsume: async (id) => {
|
|
8745
|
+
const r = await kernelApprovalConsume(id)
|
|
8746
|
+
return r !== null && r.consumed
|
|
8747
|
+
},
|
|
8748
|
+
approvalRecord: async (args) => kernelApprovalRecord(args),
|
|
8749
|
+
}
|
|
8750
|
+
}
|
|
8751
|
+
|
|
8752
|
+
bot.command('folders', async ctx => {
|
|
8753
|
+
if (!isAuthorizedSender(ctx)) return
|
|
8754
|
+
await handleFoldersCommand(ctx, buildFolderPickerDeps())
|
|
8755
|
+
})
|
|
8756
|
+
|
|
8275
8757
|
// /pending — list current pending permission prompts with their ids, so the
|
|
8276
8758
|
// user can target a specific one via /approve <id> or /deny <id>.
|
|
8277
8759
|
// Restricted to access.allowFrom DMs to match /approve and /deny — it
|
|
@@ -8303,16 +8785,12 @@ bot.command('interrupt', async ctx => {
|
|
|
8303
8785
|
await runSwitchroomCommand(ctx, ['agent', 'interrupt', name], `interrupt ${name}`)
|
|
8304
8786
|
})
|
|
8305
8787
|
|
|
8306
|
-
//
|
|
8307
|
-
//
|
|
8308
|
-
//
|
|
8309
|
-
//
|
|
8310
|
-
//
|
|
8311
|
-
//
|
|
8312
|
-
// quota-flap on the recovering slot could re-trigger fallback the
|
|
8313
|
-
// moment the gateway came back. We now seed from disk on first use
|
|
8314
|
-
// and persist on every transition. Errors are swallowed: losing the
|
|
8315
|
-
// lockout file just degrades to in-memory-only behaviour.
|
|
8788
|
+
// Persist-ops bundle for the legacy auto-fallback lockout file. The
|
|
8789
|
+
// only remaining reader is `isAutoFallbackCooldownActive` (line ~2030)
|
|
8790
|
+
// — used by the pending-restart drain cap to defer a forced restart
|
|
8791
|
+
// stacking on top of an in-flight slot rotation. The legacy poller
|
|
8792
|
+
// that USED to write this file was retired alongside this refactor;
|
|
8793
|
+
// existing on-disk lockouts age out via DEFAULT_FALLBACK_COOLDOWN_MS.
|
|
8316
8794
|
const lockoutOps: LockoutPersistOps = {
|
|
8317
8795
|
readFileSync: (p, enc) => readFileSync(p, enc),
|
|
8318
8796
|
writeFileSync: (p, data, opts) => writeFileSync(p, data, opts),
|
|
@@ -8320,24 +8798,6 @@ const lockoutOps: LockoutPersistOps = {
|
|
|
8320
8798
|
mkdirSync: (p, opts) => mkdirSync(p, opts),
|
|
8321
8799
|
joinPath: (...parts) => join(...parts),
|
|
8322
8800
|
}
|
|
8323
|
-
let autoFallbackLockout: LockoutRecord = emptyLockout()
|
|
8324
|
-
let autoFallbackLockoutSeeded = false
|
|
8325
|
-
function seedAutoFallbackLockoutIfNeeded(agentDir: string): void {
|
|
8326
|
-
if (autoFallbackLockoutSeeded) return
|
|
8327
|
-
autoFallbackLockoutSeeded = true
|
|
8328
|
-
try {
|
|
8329
|
-
autoFallbackLockout = loadLockout(agentDir, lockoutOps)
|
|
8330
|
-
} catch (err) {
|
|
8331
|
-
process.stderr.write(`telegram gateway: auto-fallback lockout seed failed (using empty): ${(err as Error).message}\n`)
|
|
8332
|
-
}
|
|
8333
|
-
}
|
|
8334
|
-
function persistLockout(agentDir: string): void {
|
|
8335
|
-
try {
|
|
8336
|
-
saveLockout(agentDir, autoFallbackLockout, lockoutOps)
|
|
8337
|
-
} catch (err) {
|
|
8338
|
-
process.stderr.write(`telegram gateway: auto-fallback lockout persist failed: ${(err as Error).message}\n`)
|
|
8339
|
-
}
|
|
8340
|
-
}
|
|
8341
8801
|
|
|
8342
8802
|
// Pinned slot-banner state (#421). One banner per gateway process,
|
|
8343
8803
|
// in the owner chat (access.allowFrom[0]). Per-topic forum support
|
|
@@ -8368,91 +8828,129 @@ async function refreshPinnedBanner(reason: string): Promise<void> {
|
|
|
8368
8828
|
}
|
|
8369
8829
|
}
|
|
8370
8830
|
|
|
8371
|
-
|
|
8372
|
-
|
|
8373
|
-
|
|
8374
|
-
|
|
8375
|
-
|
|
8831
|
+
/**
|
|
8832
|
+
* Re-entry guard + dedup window for `fireFleetAutoFallback`. The state
|
|
8833
|
+
* was lifted into `fleet-fallback-gate.ts` so it can be tested in
|
|
8834
|
+
* isolation (gateway.ts module state was unreachable from vitest). The
|
|
8835
|
+
* gate ALSO enforces the broker-reachability honesty contract: when the
|
|
8836
|
+
* broker is down, `wouldFire()` returns false so the model-unavailable
|
|
8837
|
+
* card stays honest instead of advertising a swap that would bail with
|
|
8838
|
+
* `reason=no-broker-client`.
|
|
8839
|
+
*/
|
|
8840
|
+
const FLEET_FALLBACK_DEDUP_MS = 30_000
|
|
8841
|
+
|
|
8842
|
+
/** Synchronous reachability check for the auth-broker UDS. Used by the
|
|
8843
|
+
* fleet-fallback gate to keep the model-unavailable card honest: if the
|
|
8844
|
+
* broker socket isn't bound, the dispatcher would bail with
|
|
8845
|
+
* `reason=no-broker-client`, so `wouldFire()` should return false and
|
|
8846
|
+
* the card should fall back to the manual `/auth use <label>` hint. */
|
|
8847
|
+
function isAuthBrokerSocketReachable(): boolean {
|
|
8848
|
+
try {
|
|
8849
|
+
return existsSync(resolveAuthBrokerSocketPath())
|
|
8850
|
+
} catch {
|
|
8851
|
+
return false
|
|
8852
|
+
}
|
|
8853
|
+
}
|
|
8854
|
+
|
|
8855
|
+
const fleetFallbackGate = createFleetFallbackGate({
|
|
8856
|
+
dedupMs: FLEET_FALLBACK_DEDUP_MS,
|
|
8857
|
+
brokerReachable: isAuthBrokerSocketReachable,
|
|
8858
|
+
})
|
|
8859
|
+
|
|
8860
|
+
function wouldFireFleetAutoFallback(): boolean {
|
|
8861
|
+
return fleetFallbackGate.wouldFire()
|
|
8862
|
+
}
|
|
8376
8863
|
|
|
8377
|
-
|
|
8378
|
-
|
|
8379
|
-
|
|
8380
|
-
|
|
8864
|
+
/**
|
|
8865
|
+
* Fleet-wide auto-fallback dispatcher (RFC H follow-up).
|
|
8866
|
+
*
|
|
8867
|
+
* Wired from the model-unavailable card render path so a quota-out
|
|
8868
|
+
* event on ANY agent immediately triggers a fleet-wide swap (via
|
|
8869
|
+
* broker.setActive — same path /auth use takes), not the per-agent
|
|
8870
|
+
* legacy `runAutoFallbackCheck`. Pre-fix, the card path never called
|
|
8871
|
+
* any fallback machinery; the scheduled poller (60-min interval, only
|
|
8872
|
+
* fires on utilization headers) was the only trigger and missed
|
|
8873
|
+
* hard-rejection events.
|
|
8874
|
+
*
|
|
8875
|
+
* Concurrency: collapses concurrent triggers via the in-flight
|
|
8876
|
+
* promise above. Subsequent calls within `FLEET_FALLBACK_DEDUP_MS` of
|
|
8877
|
+
* a recent fire are dropped silently — the broadcast announcement is
|
|
8878
|
+
* the user-visible signal that the swap happened, no need to repeat.
|
|
8879
|
+
*
|
|
8880
|
+
* Fire-and-forget: never throws into the caller's flow. Posts the
|
|
8881
|
+
* causal-shape announcement to every chat in `loadAccess().allowFrom`
|
|
8882
|
+
* so the user sees the outcome inline with the original "Model
|
|
8883
|
+
* unavailable" card.
|
|
8884
|
+
*/
|
|
8885
|
+
async function fireFleetAutoFallback(triggerAgent: string): Promise<void> {
|
|
8886
|
+
return fleetFallbackGate.fire(
|
|
8887
|
+
() => doFireFleetAutoFallback(triggerAgent),
|
|
8888
|
+
(err) => {
|
|
8889
|
+
process.stderr.write(
|
|
8890
|
+
`telegram gateway: [fleet-fallback] error agent=${triggerAgent}: ${(err as Error)?.message ?? err}\n`,
|
|
8891
|
+
)
|
|
8892
|
+
},
|
|
8893
|
+
)
|
|
8894
|
+
}
|
|
8895
|
+
|
|
8896
|
+
/** Returns true iff the dispatcher actually performed a swap (and the
|
|
8897
|
+
* user-visible announcement was broadcast). False on no-op /
|
|
8898
|
+
* error / idempotent-skip — caller uses this to decide whether to
|
|
8899
|
+
* arm the post-fire suppression window. */
|
|
8900
|
+
async function doFireFleetAutoFallback(triggerAgent: string): Promise<boolean> {
|
|
8381
8901
|
try {
|
|
8382
|
-
const
|
|
8383
|
-
if (!
|
|
8384
|
-
return { kind: 'no-action', reason: 'no agent dir', decision: 'noop' }
|
|
8385
|
-
}
|
|
8386
|
-
const agentName = getMyAgentName()
|
|
8387
|
-
seedAutoFallbackLockoutIfNeeded(agentDir)
|
|
8388
|
-
const active = currentActiveSlot(agentDir)
|
|
8389
|
-
const quota = await fetchQuota({ claudeConfigDir: join(agentDir, '.claude') })
|
|
8390
|
-
const decision = evaluateFallbackTrigger({
|
|
8391
|
-
quota,
|
|
8392
|
-
activeSlot: active,
|
|
8393
|
-
now: Date.now(),
|
|
8394
|
-
lockout: autoFallbackLockout,
|
|
8395
|
-
})
|
|
8396
|
-
if (decision.action !== 'fallback') {
|
|
8902
|
+
const client = await getAuthBrokerClient(triggerAgent)
|
|
8903
|
+
if (!client) {
|
|
8397
8904
|
process.stderr.write(
|
|
8398
|
-
`telegram gateway: [
|
|
8905
|
+
`telegram gateway: [fleet-fallback] skipped agent=${triggerAgent} reason=no-broker-client\n`,
|
|
8399
8906
|
)
|
|
8400
|
-
return
|
|
8907
|
+
return false
|
|
8401
8908
|
}
|
|
8402
|
-
|
|
8403
|
-
|
|
8404
|
-
|
|
8405
|
-
|
|
8406
|
-
|
|
8407
|
-
|
|
8408
|
-
|
|
8409
|
-
|
|
8909
|
+
const state = await client.listState()
|
|
8910
|
+
// Probe live quota via the broker (#1336). Pre-fix this read
|
|
8911
|
+
// credentials.json off the agent HOME, which is never populated
|
|
8912
|
+
// post-RFC-H — every account looked "no credentials" and the
|
|
8913
|
+
// fallback logic rolled blindly. Broker-routed probes use the
|
|
8914
|
+
// canonical stored tokens.
|
|
8915
|
+
const probeResp = state.accounts.length > 0
|
|
8916
|
+
? await client.probeQuota(state.accounts.map((a) => a.label)).catch(() => ({ results: [] }))
|
|
8917
|
+
: { results: [] }
|
|
8918
|
+
const quotas = state.accounts.map((a) => {
|
|
8919
|
+
const hit = probeResp.results.find((r) => r.label === a.label)
|
|
8920
|
+
return hit?.result ?? { ok: false as const, reason: 'broker returned no result for account' }
|
|
8410
8921
|
})
|
|
8411
|
-
const
|
|
8412
|
-
await
|
|
8413
|
-
|
|
8414
|
-
|
|
8415
|
-
|
|
8416
|
-
|
|
8417
|
-
|
|
8418
|
-
},
|
|
8922
|
+
const tz = process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ ?? 'UTC'
|
|
8923
|
+
const outcome = await runFleetAutoFallback({
|
|
8924
|
+
state,
|
|
8925
|
+
quotas,
|
|
8926
|
+
setActive: (label) => client.setActive(label),
|
|
8927
|
+
triggerAgent,
|
|
8928
|
+
tz,
|
|
8419
8929
|
})
|
|
8420
|
-
if (plan.kind === 'executed') {
|
|
8421
|
-
try { assertSafeAgentName(plan.agentName) }
|
|
8422
|
-
catch {
|
|
8423
|
-
process.stderr.write(`telegram gateway: [autofallback] invalid-agent-name agent=${plan.agentName}\n`)
|
|
8424
|
-
return { kind: 'error', message: `invalid agent name: ${plan.agentName}` }
|
|
8425
|
-
}
|
|
8426
|
-
try {
|
|
8427
|
-
// Preemptive failover (utilization-over-threshold / explicit) waits
|
|
8428
|
-
// for the active turn to drain. Reactive failover (429-response)
|
|
8429
|
-
// hard-restarts because the request that triggered it has already
|
|
8430
|
-
// failed — there's no in-flight turn worth preserving. See #420.
|
|
8431
|
-
const restartArgs = ['agent', 'restart', plan.agentName]
|
|
8432
|
-
if (plan.triggerReason !== '429-response') {
|
|
8433
|
-
restartArgs.push('--graceful-restart')
|
|
8434
|
-
}
|
|
8435
|
-
process.stderr.write(
|
|
8436
|
-
`telegram gateway: [autofallback] executed agent=${plan.agentName} prev=${plan.previousSlot} next=${plan.newSlot} restart=${plan.triggerReason === '429-response' ? 'hard' : 'graceful'}\n`,
|
|
8437
|
-
)
|
|
8438
|
-
switchroomExec(restartArgs)
|
|
8439
|
-
} catch (err) {
|
|
8440
|
-
process.stderr.write(`telegram gateway: [autofallback] restart failed agent=${plan.agentName}: ${err}\n`)
|
|
8441
|
-
}
|
|
8442
|
-
autoFallbackLockout = nextLockout(plan.previousSlot, Date.now())
|
|
8443
|
-
persistLockout(agentDir)
|
|
8444
|
-
void refreshPinnedBanner('auto-fallback')
|
|
8445
|
-
return { kind: 'executed', previousSlot: plan.previousSlot, newSlot: plan.newSlot }
|
|
8446
|
-
}
|
|
8447
8930
|
process.stderr.write(
|
|
8448
|
-
`telegram gateway: [
|
|
8931
|
+
`telegram gateway: [fleet-fallback] outcome=${outcome.kind} agent=${triggerAgent}` +
|
|
8932
|
+
(outcome.kind === 'switched' ? ` old=${outcome.oldLabel} new=${outcome.newLabel}` : '') +
|
|
8933
|
+
'\n',
|
|
8449
8934
|
)
|
|
8450
|
-
|
|
8451
|
-
|
|
8452
|
-
|
|
8935
|
+
// Post the announcement to every authorized chat. Mirrors the
|
|
8936
|
+
// operator-event broadcast pattern (line ~2290) — DM-only opts
|
|
8937
|
+
// (no message_thread_id) so THREAD_NOT_FOUND can't fire here;
|
|
8938
|
+
// wrap in swallowingApiCall anyway per the codebase rule.
|
|
8939
|
+
const access = loadAccess()
|
|
8940
|
+
if (access.allowFrom.length === 0) return outcome.kind === 'switched'
|
|
8941
|
+
const opts = { parse_mode: 'HTML' as const }
|
|
8942
|
+
for (const chat_id of access.allowFrom) {
|
|
8943
|
+
void swallowingApiCall(
|
|
8944
|
+
() => bot.api.sendMessage(chat_id, outcome.announcement, opts),
|
|
8945
|
+
{ chat_id, verb: 'fleet-fallback:notify' },
|
|
8946
|
+
)
|
|
8947
|
+
}
|
|
8948
|
+
return outcome.kind === 'switched'
|
|
8453
8949
|
} catch (err) {
|
|
8454
|
-
process.stderr.write(
|
|
8455
|
-
|
|
8950
|
+
process.stderr.write(
|
|
8951
|
+
`telegram gateway: [fleet-fallback] error agent=${triggerAgent}: ${(err as Error)?.message ?? err}\n`,
|
|
8952
|
+
)
|
|
8953
|
+
return false
|
|
8456
8954
|
}
|
|
8457
8955
|
}
|
|
8458
8956
|
|
|
@@ -8512,15 +9010,6 @@ async function runCreditWatch(): Promise<void> {
|
|
|
8512
9010
|
}
|
|
8513
9011
|
}
|
|
8514
9012
|
|
|
8515
|
-
// /authfallback was removed in v0.6.12 — it duplicated the work of
|
|
8516
|
-
// the dashboard's Switch primary picker (operator-facing surface) and
|
|
8517
|
-
// the auto-fallback poller (transparent on-quota-wall case).
|
|
8518
|
-
// Operators who want to manually shuffle the active credential now
|
|
8519
|
-
// use the picker. The `runAutoFallbackCheck` function and the
|
|
8520
|
-
// `case 'fallback':` callback dispatch stay in the codebase: any
|
|
8521
|
-
// pinned messages from earlier versions still work, and the
|
|
8522
|
-
// auto-fallback poller still calls runAutoFallbackCheck directly.
|
|
8523
|
-
|
|
8524
9013
|
bot.command("auth", async ctx => {
|
|
8525
9014
|
if (!isAuthorizedSender(ctx)) return
|
|
8526
9015
|
const text = ctx.message?.text ?? ""
|
|
@@ -8614,8 +9103,60 @@ bot.command("auth", async ctx => {
|
|
|
8614
9103
|
isAdmin,
|
|
8615
9104
|
client,
|
|
8616
9105
|
chatId,
|
|
9106
|
+
// Format 2 enricher — live quota probe via the broker (#1336).
|
|
9107
|
+
// Pre-broker this read `~/.switchroom/accounts/<label>/credentials.json`
|
|
9108
|
+
// off the agent's HOME, which post-RFC-H is never populated (broker
|
|
9109
|
+
// writes only the per-agent .claude/.credentials.json mirror) — so
|
|
9110
|
+
// every account showed "no credentials.json or accessToken" in
|
|
9111
|
+
// /auth show. The broker is the source of truth for tokens and now
|
|
9112
|
+
// does the Anthropic probe server-side via `probe-quota`. Tokens
|
|
9113
|
+
// never leave the broker container.
|
|
9114
|
+
liveQuotas: async (accounts) => {
|
|
9115
|
+
try {
|
|
9116
|
+
const { results } = await client.probeQuota(accounts.map((a) => a.label))
|
|
9117
|
+
// Preserve input order (broker also preserves it, but be defensive).
|
|
9118
|
+
return accounts.map((a) => {
|
|
9119
|
+
const hit = results.find((r) => r.label === a.label)
|
|
9120
|
+
if (!hit) return { ok: false as const, reason: "broker returned no result for account" }
|
|
9121
|
+
return hit.result
|
|
9122
|
+
})
|
|
9123
|
+
} catch (err) {
|
|
9124
|
+
// Surface a uniform per-account failure so the dashboard renders
|
|
9125
|
+
// gracefully (label badge stays UNKNOWN) instead of falling back
|
|
9126
|
+
// to the legacy table.
|
|
9127
|
+
const reason = `broker probe-quota failed: ${(err as Error)?.message ?? String(err)}`
|
|
9128
|
+
return accounts.map(() => ({ ok: false as const, reason }))
|
|
9129
|
+
}
|
|
9130
|
+
},
|
|
9131
|
+
tz: process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ,
|
|
8617
9132
|
})
|
|
8618
|
-
|
|
9133
|
+
// Translate the handler's optional keyboard shape into grammy's
|
|
9134
|
+
// `reply_markup`. Buttons with `callbackData` become callback_data;
|
|
9135
|
+
// buttons with `insertText` become switch_inline_query_current_chat
|
|
9136
|
+
// (taps paste the slash-command into the user's input). Keep a
|
|
9137
|
+
// safe default for buttons missing both (shouldn't happen).
|
|
9138
|
+
if (reply.keyboard && reply.keyboard.length > 0) {
|
|
9139
|
+
// Build via grammy's InlineKeyboard so the type is correct
|
|
9140
|
+
// for switchroomReply's reply_markup field — no `as never`
|
|
9141
|
+
// cast needed.
|
|
9142
|
+
const kb = new InlineKeyboard()
|
|
9143
|
+
for (let i = 0; i < reply.keyboard.length; i++) {
|
|
9144
|
+
const row = reply.keyboard[i]!
|
|
9145
|
+
for (const b of row) {
|
|
9146
|
+
if (b.callbackData) kb.text(b.text, b.callbackData)
|
|
9147
|
+
else if (b.insertText) kb.switchInlineCurrent(b.text, b.insertText)
|
|
9148
|
+
else kb.text(b.text, 'auth:noop')
|
|
9149
|
+
}
|
|
9150
|
+
// grammy's row terminator — except after the last row.
|
|
9151
|
+
if (i < reply.keyboard.length - 1) kb.row()
|
|
9152
|
+
}
|
|
9153
|
+
await switchroomReply(ctx, reply.text, {
|
|
9154
|
+
html: reply.html,
|
|
9155
|
+
reply_markup: kb,
|
|
9156
|
+
})
|
|
9157
|
+
} else {
|
|
9158
|
+
await switchroomReply(ctx, reply.text, { html: reply.html })
|
|
9159
|
+
}
|
|
8619
9160
|
})
|
|
8620
9161
|
|
|
8621
9162
|
// Boot-card auth-row loader (issue #708, RFC H rewire). Queries the
|
|
@@ -10243,12 +10784,154 @@ async function handleOperatorEventCallback(ctx: Context, data: string): Promise<
|
|
|
10243
10784
|
// stub so any stale pinned message that fires an `auth:*` tap is
|
|
10244
10785
|
// silently dismissed instead of crashing the gateway.
|
|
10245
10786
|
async function handleAuthDashboardCallback(ctx: Context): Promise<void> {
|
|
10787
|
+
const data = ctx.callbackQuery?.data ?? ''
|
|
10788
|
+
const currentAgent = getMyAgentName()
|
|
10789
|
+
|
|
10790
|
+
// auth:use:<label> — fleet-wide swap via broker.setActive (same path
|
|
10791
|
+
// /auth use takes from chat). Admin-gated via the broker's own
|
|
10792
|
+
// per-agent admin flag.
|
|
10793
|
+
if (data.startsWith('auth:use:')) {
|
|
10794
|
+
const label = data.slice('auth:use:'.length)
|
|
10795
|
+
if (!label) {
|
|
10796
|
+
try { await ctx.answerCallbackQuery({ text: 'Missing account label.', show_alert: false }) } catch { /* */ }
|
|
10797
|
+
return
|
|
10798
|
+
}
|
|
10799
|
+
try {
|
|
10800
|
+
const client = await getAuthBrokerClient(currentAgent)
|
|
10801
|
+
if (!client) {
|
|
10802
|
+
try { await ctx.answerCallbackQuery({ text: 'Broker unreachable.', show_alert: true }) } catch { /* */ }
|
|
10803
|
+
return
|
|
10804
|
+
}
|
|
10805
|
+
const result = await client.setActive(label)
|
|
10806
|
+
try {
|
|
10807
|
+
await ctx.answerCallbackQuery({
|
|
10808
|
+
text: `Switched fleet → ${result.active} (${result.fanned.length} agents)`,
|
|
10809
|
+
show_alert: false,
|
|
10810
|
+
})
|
|
10811
|
+
} catch { /* toast may fail on stale tap */ }
|
|
10812
|
+
// Edit the source message to reflect the new active. Leaving
|
|
10813
|
+
// the old keyboard intact would tempt a double-tap; we replace
|
|
10814
|
+
// the text + drop the keyboard so the user has to /auth again
|
|
10815
|
+
// to see fresh state.
|
|
10816
|
+
const msg = ctx.callbackQuery?.message
|
|
10817
|
+
if (msg) {
|
|
10818
|
+
// Wrap in swallowingApiCall per #1075 — stale callback-source
|
|
10819
|
+
// messages (deleted topic, expired) shouldn't crash the swap.
|
|
10820
|
+
await swallowingApiCall(
|
|
10821
|
+
() =>
|
|
10822
|
+
bot.api.editMessageText(
|
|
10823
|
+
msg.chat.id,
|
|
10824
|
+
msg.message_id,
|
|
10825
|
+
`<b>Active account →</b> <code>${escapeHtmlForTg(result.active)}</code>\n` +
|
|
10826
|
+
`<i>Re-mirrored credentials for ${result.fanned.length} agent${result.fanned.length === 1 ? '' : 's'}.</i>\n\n` +
|
|
10827
|
+
`<i>Tap /auth to see updated quota for the new active account.</i>`,
|
|
10828
|
+
{ parse_mode: 'HTML' },
|
|
10829
|
+
),
|
|
10830
|
+
{ chat_id: String(msg.chat.id), verb: 'auth:use:edit' },
|
|
10831
|
+
)
|
|
10832
|
+
}
|
|
10833
|
+
} catch (err) {
|
|
10834
|
+
const msg = (err as Error)?.message ?? String(err)
|
|
10835
|
+
try {
|
|
10836
|
+
await ctx.answerCallbackQuery({
|
|
10837
|
+
text: `Switch failed: ${msg.slice(0, 180)}`,
|
|
10838
|
+
show_alert: true,
|
|
10839
|
+
})
|
|
10840
|
+
} catch { /* */ }
|
|
10841
|
+
}
|
|
10842
|
+
return
|
|
10843
|
+
}
|
|
10844
|
+
|
|
10845
|
+
// auth:refresh — re-render the /auth snapshot in-place with a fresh
|
|
10846
|
+
// live probe. Replaces the message body; keyboard stays.
|
|
10847
|
+
if (data === 'auth:refresh') {
|
|
10848
|
+
// Freshness throttle: each refresh fan-fires N live api.anthropic.com
|
|
10849
|
+
// probes (one per account, force=true bypasses the 5-min cache).
|
|
10850
|
+
// Without this, a user double-tapping the ↻ button burns through
|
|
10851
|
+
// their account's RPM budget on duplicate work. Cap at one per
|
|
10852
|
+
// AUTH_REFRESH_THROTTLE_MS per (chat, message) pair.
|
|
10853
|
+
const refreshMsg = ctx.callbackQuery?.message
|
|
10854
|
+
if (refreshMsg) {
|
|
10855
|
+
const key = `${refreshMsg.chat.id}:${refreshMsg.message_id}`
|
|
10856
|
+
const lastAtMs = lastAuthRefreshAtMs.get(key) ?? 0
|
|
10857
|
+
const sinceLastMs = Date.now() - lastAtMs
|
|
10858
|
+
if (sinceLastMs < AUTH_REFRESH_THROTTLE_MS) {
|
|
10859
|
+
const waitS = Math.ceil((AUTH_REFRESH_THROTTLE_MS - sinceLastMs) / 1000)
|
|
10860
|
+
try {
|
|
10861
|
+
await ctx.answerCallbackQuery({
|
|
10862
|
+
text: `Just refreshed — try again in ${waitS}s`,
|
|
10863
|
+
show_alert: false,
|
|
10864
|
+
})
|
|
10865
|
+
} catch { /* */ }
|
|
10866
|
+
return
|
|
10867
|
+
}
|
|
10868
|
+
lastAuthRefreshAtMs.set(key, Date.now())
|
|
10869
|
+
}
|
|
10870
|
+
try {
|
|
10871
|
+
const client = await getAuthBrokerClient(currentAgent)
|
|
10872
|
+
if (!client) {
|
|
10873
|
+
try { await ctx.answerCallbackQuery({ text: 'Broker unreachable.', show_alert: true }) } catch { /* */ }
|
|
10874
|
+
return
|
|
10875
|
+
}
|
|
10876
|
+
const state = await client.listState()
|
|
10877
|
+
// Broker-routed probe (#1336) — see gateway.ts:8910 for diagnosis.
|
|
10878
|
+
const probeResp = state.accounts.length > 0
|
|
10879
|
+
? await client.probeQuota(state.accounts.map((a) => a.label)).catch(() => ({ results: [] }))
|
|
10880
|
+
: { results: [] }
|
|
10881
|
+
const quotas = state.accounts.map((a) => {
|
|
10882
|
+
const hit = probeResp.results.find((r) => r.label === a.label)
|
|
10883
|
+
return hit?.result ?? { ok: false as const, reason: 'broker returned no result for account' }
|
|
10884
|
+
})
|
|
10885
|
+
const tz = process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ ?? 'UTC'
|
|
10886
|
+
const { renderAuthSnapshotFormat2, buildSnapshotsFromState, buildSnapshotKeyboard } = await import(
|
|
10887
|
+
'../auth-snapshot-format.js'
|
|
10888
|
+
)
|
|
10889
|
+
const snapshots = buildSnapshotsFromState(state, quotas)
|
|
10890
|
+
const text = renderAuthSnapshotFormat2(snapshots, {
|
|
10891
|
+
tz,
|
|
10892
|
+
now: new Date(),
|
|
10893
|
+
liveProbedAtMs: Date.now(),
|
|
10894
|
+
})
|
|
10895
|
+
const kbRows = buildSnapshotKeyboard(snapshots)
|
|
10896
|
+
const inline_keyboard = kbRows.map((row) =>
|
|
10897
|
+
row.map((b) => {
|
|
10898
|
+
if (b.callbackData) return { text: b.text, callback_data: b.callbackData }
|
|
10899
|
+
if (b.insertText) return { text: b.text, switch_inline_query_current_chat: b.insertText }
|
|
10900
|
+
return { text: b.text, callback_data: 'auth:noop' }
|
|
10901
|
+
}),
|
|
10902
|
+
)
|
|
10903
|
+
const msg = ctx.callbackQuery?.message
|
|
10904
|
+
if (msg) {
|
|
10905
|
+
await swallowingApiCall(
|
|
10906
|
+
() =>
|
|
10907
|
+
bot.api.editMessageText(msg.chat.id, msg.message_id, text, {
|
|
10908
|
+
parse_mode: 'HTML',
|
|
10909
|
+
reply_markup: { inline_keyboard },
|
|
10910
|
+
}),
|
|
10911
|
+
{ chat_id: String(msg.chat.id), verb: 'auth:refresh:edit' },
|
|
10912
|
+
)
|
|
10913
|
+
}
|
|
10914
|
+
try { await ctx.answerCallbackQuery({ text: 'Refreshed.', show_alert: false }) } catch { /* */ }
|
|
10915
|
+
} catch (err) {
|
|
10916
|
+
const msg = (err as Error)?.message ?? String(err)
|
|
10917
|
+
try {
|
|
10918
|
+
await ctx.answerCallbackQuery({
|
|
10919
|
+
text: `Refresh failed: ${msg.slice(0, 180)}`,
|
|
10920
|
+
show_alert: true,
|
|
10921
|
+
})
|
|
10922
|
+
} catch { /* */ }
|
|
10923
|
+
}
|
|
10924
|
+
return
|
|
10925
|
+
}
|
|
10926
|
+
|
|
10927
|
+
// Unknown auth:* — likely from a too-old message. Dismiss with a
|
|
10928
|
+
// hint pointing at the canonical re-render verb.
|
|
10246
10929
|
try {
|
|
10247
10930
|
await ctx.answerCallbackQuery({
|
|
10248
|
-
text:
|
|
10931
|
+
text: 'Unknown auth button. Send /auth for current state.',
|
|
10249
10932
|
show_alert: false,
|
|
10250
10933
|
})
|
|
10251
|
-
} catch { /*
|
|
10934
|
+
} catch { /* */ }
|
|
10252
10935
|
}
|
|
10253
10936
|
|
|
10254
10937
|
// /reauth was removed in v0.6.13 — the `/auth` dashboard's
|
|
@@ -10659,6 +11342,47 @@ bot.command('issues', async ctx => {
|
|
|
10659
11342
|
|
|
10660
11343
|
bot.command('usage', async ctx => {
|
|
10661
11344
|
if (!isAuthorizedSender(ctx)) return
|
|
11345
|
+
// Format 2 path: enumerate every account in the broker's known set,
|
|
11346
|
+
// probe live quota in parallel, render the health-grouped snapshot.
|
|
11347
|
+
// Falls back to the legacy single-agent shape when the broker is
|
|
11348
|
+
// unreachable, since /usage was historically callable against any
|
|
11349
|
+
// agent regardless of fleet state.
|
|
11350
|
+
const currentAgent = getMyAgentName()
|
|
11351
|
+
try {
|
|
11352
|
+
const client = await getAuthBrokerClient(currentAgent)
|
|
11353
|
+
if (client) {
|
|
11354
|
+
const state = await client.listState()
|
|
11355
|
+
if (state.accounts.length > 0) {
|
|
11356
|
+
// Broker-routed probe (#1336) — see gateway.ts:8910 for diagnosis.
|
|
11357
|
+
const probeResp = await client.probeQuota(state.accounts.map((a) => a.label)).catch(() => ({ results: [] }))
|
|
11358
|
+
const quotas = state.accounts.map((a) => {
|
|
11359
|
+
const hit = probeResp.results.find((r) => r.label === a.label)
|
|
11360
|
+
return hit?.result ?? { ok: false as const, reason: 'broker returned no result for account' }
|
|
11361
|
+
})
|
|
11362
|
+
const { renderAuthSnapshotFormat2, buildSnapshotsFromState } = await import(
|
|
11363
|
+
'../auth-snapshot-format.js'
|
|
11364
|
+
)
|
|
11365
|
+
const tz = process.env.SWITCHROOM_TIMEZONE ?? process.env.TZ ?? 'UTC'
|
|
11366
|
+
const snapshots = buildSnapshotsFromState(state, quotas)
|
|
11367
|
+
const text = renderAuthSnapshotFormat2(snapshots, {
|
|
11368
|
+
tz,
|
|
11369
|
+
now: new Date(),
|
|
11370
|
+
liveProbedAtMs: Date.now(),
|
|
11371
|
+
})
|
|
11372
|
+
await switchroomReply(ctx, text, { html: true })
|
|
11373
|
+
return
|
|
11374
|
+
}
|
|
11375
|
+
}
|
|
11376
|
+
} catch (err) {
|
|
11377
|
+
process.stderr.write(
|
|
11378
|
+
`telegram gateway: /usage Format 2 path failed agent=${currentAgent}: ${(err as Error)?.message ?? err}\n`,
|
|
11379
|
+
)
|
|
11380
|
+
// fall through to legacy single-agent path
|
|
11381
|
+
}
|
|
11382
|
+
|
|
11383
|
+
// Legacy single-agent path — kept as a graceful fallback when the
|
|
11384
|
+
// broker is unreachable (post-RFC-H rewire boot timing, broken
|
|
11385
|
+
// socket bind, etc.). Same shape /usage shipped with originally.
|
|
10662
11386
|
const agentDir = resolveAgentDirFromEnv()
|
|
10663
11387
|
if (!agentDir) {
|
|
10664
11388
|
await switchroomReply(ctx, '<b>/usage:</b> cannot resolve agent dir.', { html: true })
|
|
@@ -10783,6 +11507,29 @@ bot.on('callback_query:data', async ctx => {
|
|
|
10783
11507
|
return
|
|
10784
11508
|
}
|
|
10785
11509
|
|
|
11510
|
+
// RFC E §4.1: drvpick:<verb>:<agent>[:<...>] — folder-picker card taps.
|
|
11511
|
+
// open / enter / back / refresh re-render the card in place;
|
|
11512
|
+
// grant writes an allow_always kernel decision at
|
|
11513
|
+
// doc:gdrive:folder/<id>/** and edits the card to a confirmation.
|
|
11514
|
+
//
|
|
11515
|
+
// Auth gate: the picker grant is an OPERATOR action (mirrors the
|
|
11516
|
+
// `op:`/`vd:`/`vg:` family, not the `apv:` agent-approval shape).
|
|
11517
|
+
// Mirror those patterns — refuse callbacks from anyone outside
|
|
11518
|
+
// `access.allowFrom`. Without this, a group member who isn't in
|
|
11519
|
+
// the operator allowlist could still tap [✅ Allow "<folder>"] on
|
|
11520
|
+
// a card that landed in the group and write an `allow_always`
|
|
11521
|
+
// decision attributed to themselves.
|
|
11522
|
+
if (data.startsWith('drvpick:')) {
|
|
11523
|
+
const access = loadAccess()
|
|
11524
|
+
const senderId = String(ctx.from?.id ?? '')
|
|
11525
|
+
if (!access.allowFrom.includes(senderId)) {
|
|
11526
|
+
await ctx.answerCallbackQuery({ text: 'Not authorized.' })
|
|
11527
|
+
return
|
|
11528
|
+
}
|
|
11529
|
+
await handleFolderPickerCallback(ctx, data, buildFolderPickerDeps())
|
|
11530
|
+
return
|
|
11531
|
+
}
|
|
11532
|
+
|
|
10786
11533
|
// op:<action>:<encoded-agent> callbacks from operator-events.ts
|
|
10787
11534
|
// renderOperatorEvent(). Agent name is URL-encoded at emit (issue #24).
|
|
10788
11535
|
// Actions: dismiss, restart, reauth, swap-slot, add-slot, logs.
|
|
@@ -12723,23 +13470,6 @@ void (async () => {
|
|
|
12723
13470
|
}
|
|
12724
13471
|
} catch {}
|
|
12725
13472
|
|
|
12726
|
-
// Auto-fallback on quota exhaustion. Periodically polls
|
|
12727
|
-
// the active slot's rate-limit headers; when utilization >= 99.5%
|
|
12728
|
-
// or a 429 is observed, marks the slot exhausted, swaps to the
|
|
12729
|
-
// next healthy slot via src/auth, restarts the agent, and posts
|
|
12730
|
-
// a notification to the owner chat. See telegram-plugin/auto-fallback.ts
|
|
12731
|
-
// for the pure decision logic + notification builder.
|
|
12732
|
-
//
|
|
12733
|
-
// Default poll cadence: every 60 minutes. Set
|
|
12734
|
-
// SWITCHROOM_AUTO_FALLBACK_POLL_MS=0 to disable the background
|
|
12735
|
-
// poller. Pre-v0.6.12 a manual `/authfallback` typed command
|
|
12736
|
-
// also ran the same check; that command was removed in favour
|
|
12737
|
-
// of the `/auth` dashboard's Switch primary picker.
|
|
12738
|
-
const AUTO_FALLBACK_POLL_MS = Number(process.env.SWITCHROOM_AUTO_FALLBACK_POLL_MS ?? 60 * 60_000)
|
|
12739
|
-
if (AUTO_FALLBACK_POLL_MS > 0) {
|
|
12740
|
-
setInterval(() => { void runAutoFallbackCheck({ trigger: 'scheduled' }) }, AUTO_FALLBACK_POLL_MS).unref()
|
|
12741
|
-
}
|
|
12742
|
-
|
|
12743
13473
|
// Credit-exhaustion watcher (#348). Reads `<agentDir>/.claude/.claude.json`
|
|
12744
13474
|
// for `cachedExtraUsageDisabledReason`. Fires a Telegram notification
|
|
12745
13475
|
// on transition into / out of fatal billing states (out_of_credits,
|