switchroom 0.14.47 → 0.14.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +190 -90
- package/telegram-plugin/gateway/boot-card-msgid.ts +70 -0
- package/telegram-plugin/gateway/boot-card.ts +81 -14
- package/telegram-plugin/gateway/gateway.ts +88 -2
- package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts +12 -0
- package/telegram-plugin/gateway/pending-inbound-buffer.ts +17 -1
- package/telegram-plugin/gateway/resume-inbound-builder.ts +20 -4
- package/telegram-plugin/tests/boot-card-edit-in-place.test.ts +139 -0
- package/telegram-plugin/tests/boot-card-msgid.test.ts +88 -0
- package/telegram-plugin/tests/pending-inbound-buffer.test.ts +27 -0
- package/telegram-plugin/tests/resume-inbound-builder.test.ts +19 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-reboot persistence for the boot card's Telegram message id.
|
|
3
|
+
*
|
|
4
|
+
* Why: a freshly *sent* Telegram message always bumps the chat's unread
|
|
5
|
+
* badge — `disable_notification: true` removes the sound/banner but not the
|
|
6
|
+
* badge (there is no Bot API flag for that). To make routine reboots produce
|
|
7
|
+
* ZERO notification (operator request, 2026-06-03), the gateway reuses the
|
|
8
|
+
* PRIOR boot card's message and EDITS it in place instead of sending a new
|
|
9
|
+
* one — and edits never touch the badge.
|
|
10
|
+
*
|
|
11
|
+
* That requires remembering the last boot card's `message_id` across gateway
|
|
12
|
+
* restarts, keyed by the chat (+ forum topic) it lives in. This module is the
|
|
13
|
+
* tiny JSON store for that, mirroring `config-snapshot.ts` /
|
|
14
|
+
* `boot-issue-cache.ts`: one file under the agent's (bind-mounted, reboot-
|
|
15
|
+
* surviving) state dir, read once on boot, written once after the id is
|
|
16
|
+
* established. All failures are non-fatal — a missing/corrupt file just means
|
|
17
|
+
* "no prior card", so the boot path falls back to a fresh (silent) send.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { readFileSync, writeFileSync } from 'node:fs'
|
|
21
|
+
|
|
22
|
+
/** Stable key for a boot-card target: chat id + optional forum topic. A DM
|
|
23
|
+
* agent always boots to the same `<chatId>:` key; a supergroup agent keys by
|
|
24
|
+
* `<chatId>:<threadId>` so a topic change starts a fresh card. */
|
|
25
|
+
export function bootCardChatKey(chatId: string, threadId: number | undefined): string {
|
|
26
|
+
return `${chatId}:${threadId ?? ''}`
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
type Store = Record<string, number>
|
|
30
|
+
|
|
31
|
+
function readStore(path: string): Store {
|
|
32
|
+
try {
|
|
33
|
+
const parsed = JSON.parse(readFileSync(path, 'utf8')) as unknown
|
|
34
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
35
|
+
return parsed as Store
|
|
36
|
+
}
|
|
37
|
+
} catch {
|
|
38
|
+
/* missing / corrupt → treat as empty */
|
|
39
|
+
}
|
|
40
|
+
return {}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** The persisted message id for this chat+topic, or null when there's no
|
|
44
|
+
* prior boot card to reuse (first boot, corrupt file, different chat). */
|
|
45
|
+
export function loadBootCardMsgId(
|
|
46
|
+
path: string,
|
|
47
|
+
chatKey: string,
|
|
48
|
+
): number | null {
|
|
49
|
+
const id = readStore(path)[chatKey]
|
|
50
|
+
return typeof id === 'number' && Number.isFinite(id) && id > 0 ? id : null
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Record the current boot card's message id for this chat+topic. Merges into
|
|
54
|
+
* the existing store (other chats' ids survive). Non-fatal on write failure —
|
|
55
|
+
* the worst case is the next reboot sends a fresh card (one badge). */
|
|
56
|
+
export function saveBootCardMsgId(
|
|
57
|
+
path: string,
|
|
58
|
+
chatKey: string,
|
|
59
|
+
messageId: number,
|
|
60
|
+
): void {
|
|
61
|
+
if (!(Number.isFinite(messageId) && messageId > 0)) return
|
|
62
|
+
try {
|
|
63
|
+
const store = readStore(path)
|
|
64
|
+
if (store[chatKey] === messageId) return // idempotent — no rewrite
|
|
65
|
+
store[chatKey] = messageId
|
|
66
|
+
writeFileSync(path, JSON.stringify(store), 'utf8')
|
|
67
|
+
} catch {
|
|
68
|
+
/* non-fatal */
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -67,6 +67,7 @@ import {
|
|
|
67
67
|
type ConfigDiff,
|
|
68
68
|
} from './config-snapshot.js'
|
|
69
69
|
import { join } from 'path'
|
|
70
|
+
import { bootCardChatKey, loadBootCardMsgId, saveBootCardMsgId } from './boot-card-msgid.js'
|
|
70
71
|
import { loadConfig as _loadSwitchroomConfig } from '../../src/config/loader.js'
|
|
71
72
|
import { resolveAgentConfig as _resolveAgentConfig } from '../../src/config/merge.js'
|
|
72
73
|
|
|
@@ -134,6 +135,21 @@ export interface BotApiForBootCard {
|
|
|
134
135
|
text: string,
|
|
135
136
|
opts?: Record<string, unknown>,
|
|
136
137
|
): Promise<unknown>
|
|
138
|
+
/**
|
|
139
|
+
* Like `editMessageText`, but reports whether the target message still
|
|
140
|
+
* exists rather than swallowing a "message to edit not found" the way the
|
|
141
|
+
* shared retry policy does (retry-api-call.ts) — the boot path needs to
|
|
142
|
+
* know so it can fall back to a fresh send when the prior card was deleted.
|
|
143
|
+
* `'edited'` = the edit landed (or content was identical → message exists);
|
|
144
|
+
* `'gone'` = the message is missing (or any other error → send fresh).
|
|
145
|
+
* Optional so existing callers/tests without it fall back to always-send.
|
|
146
|
+
*/
|
|
147
|
+
editMessageTextStrict?(
|
|
148
|
+
chatId: string,
|
|
149
|
+
messageId: number,
|
|
150
|
+
text: string,
|
|
151
|
+
opts?: Record<string, unknown>,
|
|
152
|
+
): Promise<'edited' | 'gone'>
|
|
137
153
|
}
|
|
138
154
|
|
|
139
155
|
export interface BootCardHandle {
|
|
@@ -568,6 +584,17 @@ export interface RunProbesOpts {
|
|
|
568
584
|
* resolve the default memory collection label.
|
|
569
585
|
*/
|
|
570
586
|
configSnapshotPath?: string
|
|
587
|
+
/**
|
|
588
|
+
* Cross-reboot store for the boot card's Telegram message id (JSON,
|
|
589
|
+
* typically `<agentDir>/.boot-card-msgid.json`). When set AND the bot
|
|
590
|
+
* supports `editMessageTextStrict`, a routine reboot (no `ackMessageId`)
|
|
591
|
+
* EDITS the prior boot card in place instead of sending a new one — edits
|
|
592
|
+
* never bump the unread badge, so reboots produce zero notification
|
|
593
|
+
* (operator request 2026-06-03). Falls back to a fresh silent send when
|
|
594
|
+
* there's no prior card or it was deleted. Omit to keep the always-send
|
|
595
|
+
* behaviour.
|
|
596
|
+
*/
|
|
597
|
+
bootCardStatePath?: string
|
|
571
598
|
}
|
|
572
599
|
|
|
573
600
|
/** Run all six probes concurrently with their own per-probe timeouts.
|
|
@@ -641,20 +668,60 @@ export async function startBootCard(
|
|
|
641
668
|
// the chat is where you look, and nothing here warrants a push.
|
|
642
669
|
const silentBootCard = true
|
|
643
670
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
671
|
+
// Edit-in-place to produce ZERO notification (operator request 2026-06-03).
|
|
672
|
+
// A sent message always bumps the unread badge — `disable_notification`
|
|
673
|
+
// only kills the sound/banner. So for a ROUTINE reboot (no `ackMessageId`:
|
|
674
|
+
// operator update / cli rollout / crash / fresh) we EDIT the prior boot
|
|
675
|
+
// card in place — edits never touch the badge — instead of sending a new
|
|
676
|
+
// one. We only do this when the bot can tell us the prior message still
|
|
677
|
+
// exists (`editMessageTextStrict`); if it's gone, or this is a
|
|
678
|
+
// Telegram-initiated `/restart` (ackMessageId set — the operator asked and
|
|
679
|
+
// is watching, and the card should reply to their command), we fall back to
|
|
680
|
+
// a fresh silent send.
|
|
681
|
+
const chatKey = bootCardChatKey(chatId, threadId)
|
|
682
|
+
const reuseId =
|
|
683
|
+
ackMessageId == null && opts.bootCardStatePath != null && bot.editMessageTextStrict != null
|
|
684
|
+
? loadBootCardMsgId(opts.bootCardStatePath, chatKey)
|
|
685
|
+
: null
|
|
686
|
+
|
|
687
|
+
let messageId = -1
|
|
688
|
+
if (reuseId != null && bot.editMessageTextStrict != null) {
|
|
689
|
+
try {
|
|
690
|
+
const outcome = await bot.editMessageTextStrict(chatId, reuseId, ackText, {
|
|
691
|
+
parse_mode: 'HTML',
|
|
692
|
+
link_preview_options: { is_disabled: true },
|
|
693
|
+
...(threadId != null ? { message_thread_id: threadId } : {}),
|
|
694
|
+
})
|
|
695
|
+
if (outcome === 'edited') {
|
|
696
|
+
messageId = reuseId
|
|
697
|
+
logger(`telegram gateway: boot-card: reused msgId=${messageId} chatId=${chatId} reason=${opts.restartReason ?? '-'} reason_detail=${opts.restartReasonDetail ?? '-'} edit_in_place=true notify=none\n`)
|
|
698
|
+
}
|
|
699
|
+
} catch (err: unknown) {
|
|
700
|
+
logger(`telegram gateway: boot-card: edit-in-place probe failed (${(err as Error)?.message ?? String(err)}) — sending fresh\n`)
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
if (messageId < 0) {
|
|
705
|
+
try {
|
|
706
|
+
const sent = await bot.sendMessage(chatId, ackText, {
|
|
707
|
+
parse_mode: 'HTML',
|
|
708
|
+
link_preview_options: { is_disabled: true },
|
|
709
|
+
...(threadId != null ? { message_thread_id: threadId } : {}),
|
|
710
|
+
...(ackMessageId != null ? { reply_parameters: { message_id: ackMessageId } } : {}),
|
|
711
|
+
...(silentBootCard ? { disable_notification: true } : {}),
|
|
712
|
+
})
|
|
713
|
+
messageId = sent.message_id
|
|
714
|
+
logger(`telegram gateway: boot-card: posted msgId=${messageId} chatId=${chatId} reason=${opts.restartReason ?? '-'} reason_detail=${opts.restartReasonDetail ?? '-'} silent=${silentBootCard}\n`)
|
|
715
|
+
} catch (err: unknown) {
|
|
716
|
+
logger(`telegram gateway: boot-card: failed to post ack: ${(err as Error)?.message ?? String(err)}\n`)
|
|
717
|
+
return { messageId: -1, complete: () => {} }
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// Remember this card's id so the NEXT reboot can edit it in place (no
|
|
722
|
+
// notification). Idempotent on reuse; non-fatal on write failure.
|
|
723
|
+
if (opts.bootCardStatePath != null && messageId > 0) {
|
|
724
|
+
saveBootCardMsgId(opts.bootCardStatePath, chatKey, messageId)
|
|
658
725
|
}
|
|
659
726
|
|
|
660
727
|
// Determine the live window for agent-service status updates. Callers
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* is connected, inbound LLM messages get a "⏳ Agent is restarting…" reply.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import { Bot, GrammyError, InlineKeyboard, InputFile, type Context } from 'grammy'
|
|
12
|
+
import { Bot, GrammyError, InlineKeyboard, InputFile, type Context, type Api } from 'grammy'
|
|
13
13
|
import { run, type RunnerHandle } from '@grammyjs/runner'
|
|
14
14
|
import type { ReactionTypeEmoji } from 'grammy/types'
|
|
15
15
|
import { randomBytes } from 'crypto'
|
|
@@ -1064,7 +1064,19 @@ try {
|
|
|
1064
1064
|
const pending = findLatestTurnIfInterrupted(turnsDb)
|
|
1065
1065
|
const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
|
|
1066
1066
|
if (pending != null && selfAgent) {
|
|
1067
|
-
|
|
1067
|
+
// 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
|
|
1068
|
+
// interrupted work older than RESUME_MAX_AGE_MS — selectResumeBuilder
|
|
1069
|
+
// downgrades a stale 'resume' to the passive 'report' so the user is told
|
|
1070
|
+
// ("I was working on X ~Nh ago") but nothing replays unprompted. Env
|
|
1071
|
+
// override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
|
|
1072
|
+
const RESUME_MAX_AGE_MS = (() => {
|
|
1073
|
+
const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
|
|
1074
|
+
return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
|
|
1075
|
+
})()
|
|
1076
|
+
const kind = selectResumeBuilder(pending.ended_via, {
|
|
1077
|
+
ageMs: Math.max(0, Date.now() - pending.started_at),
|
|
1078
|
+
maxAgeMs: RESUME_MAX_AGE_MS,
|
|
1079
|
+
})
|
|
1068
1080
|
if (kind === 'resume') {
|
|
1069
1081
|
bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
|
|
1070
1082
|
} else if (kind === 'report') {
|
|
@@ -1801,6 +1813,7 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
|
|
|
1801
1813
|
return d
|
|
1802
1814
|
},
|
|
1803
1815
|
inboundSpool,
|
|
1816
|
+
trackRedeliveredInbound,
|
|
1804
1817
|
)
|
|
1805
1818
|
if (fr.redelivered > 0) {
|
|
1806
1819
|
process.stderr.write(
|
|
@@ -1896,6 +1909,7 @@ function releaseTurnBufferGate(key: string): void {
|
|
|
1896
1909
|
return d
|
|
1897
1910
|
},
|
|
1898
1911
|
inboundSpool,
|
|
1912
|
+
trackRedeliveredInbound,
|
|
1899
1913
|
)
|
|
1900
1914
|
if (fr.redelivered > 0) {
|
|
1901
1915
|
process.stderr.write(
|
|
@@ -2589,6 +2603,26 @@ function wrapBootCardApi(
|
|
|
2589
2603
|
),
|
|
2590
2604
|
opts(cid),
|
|
2591
2605
|
) as Promise<unknown>,
|
|
2606
|
+
// Strict edit for the boot-card edit-in-place probe: distinguishes
|
|
2607
|
+
// "message gone" (→ 'gone', caller sends fresh) from a landed/identical
|
|
2608
|
+
// edit (→ 'edited'). robustApiCall SWALLOWS "message to edit not found"
|
|
2609
|
+
// to undefined (retry-api-call.ts), so this can't go through it — a
|
|
2610
|
+
// deliberate single-attempt raw edit that classifies the error itself.
|
|
2611
|
+
editMessageTextStrict: async (cid, mid, text, editOpts) => {
|
|
2612
|
+
type EditOpts = Parameters<Api['editMessageText']>[3]
|
|
2613
|
+
try {
|
|
2614
|
+
// allow-raw-bot-api: boot-card edit-in-place probe — must detect a deleted target, which the shared retry policy swallows.
|
|
2615
|
+
await lockedBot.api.editMessageText(cid, mid, text, editOpts as EditOpts)
|
|
2616
|
+
return 'edited'
|
|
2617
|
+
} catch (err) {
|
|
2618
|
+
const desc =
|
|
2619
|
+
err instanceof GrammyError ? err.description : err instanceof Error ? err.message : String(err)
|
|
2620
|
+
// Content identical → message still exists; reuse it.
|
|
2621
|
+
if (typeof desc === 'string' && desc.toLowerCase().includes('not modified')) return 'edited'
|
|
2622
|
+
// Not found, or any other error → fall back to a fresh silent send.
|
|
2623
|
+
return 'gone'
|
|
2624
|
+
}
|
|
2625
|
+
},
|
|
2592
2626
|
}
|
|
2593
2627
|
}
|
|
2594
2628
|
|
|
@@ -4134,6 +4168,7 @@ silencePoke.startTimer({
|
|
|
4134
4168
|
return d
|
|
4135
4169
|
},
|
|
4136
4170
|
inboundSpool,
|
|
4171
|
+
trackRedeliveredInbound,
|
|
4137
4172
|
)
|
|
4138
4173
|
process.stderr.write(
|
|
4139
4174
|
`telegram gateway: silence-poke framework-fallback ended wedged turn ` +
|
|
@@ -4163,6 +4198,45 @@ const _deliveryMachineTick = setInterval(() => {
|
|
|
4163
4198
|
}, DELIVERY_MACHINE_TICK_MS)
|
|
4164
4199
|
_deliveryMachineTick.unref?.()
|
|
4165
4200
|
|
|
4201
|
+
// Enrol a buffer-redelivered inbound in the deliver-until-acked queue so the
|
|
4202
|
+
// existing sweep re-delivers it until claude's `enqueue` ack lands. Wired into
|
|
4203
|
+
// EVERY redelivery path (bridgeUp drain, silence-poke fallback, flap/reply-gate
|
|
4204
|
+
// flushes) — `send` returning true only means the bytes reached the bridge, NOT
|
|
4205
|
+
// that claude consumed them. Right after a restart (esp. a slow MCP boot) the
|
|
4206
|
+
// inject can hit a not-ready session and be silently dropped, and nothing
|
|
4207
|
+
// retried it: the clerk 2026-06-03 lost-message incident. Mirrors the
|
|
4208
|
+
// live-delivery tracking at the handleInbound site (chatKey + messageId), so
|
|
4209
|
+
// DMs and supergroup forum topics are handled identically. Only real user
|
|
4210
|
+
// inbounds are tracked — shouldTrackDelivery excludes steer/interrupt/
|
|
4211
|
+
// synthetic-source/empty, which never produce an `enqueue` and would otherwise
|
|
4212
|
+
// re-deliver forever.
|
|
4213
|
+
function trackRedeliveredInbound(merged: InboundMessage): void {
|
|
4214
|
+
if (!DELIVERY_CONFIRM_ENABLED) return
|
|
4215
|
+
if (
|
|
4216
|
+
!shouldTrackDelivery({
|
|
4217
|
+
isSteering: false,
|
|
4218
|
+
isInterrupt: false,
|
|
4219
|
+
// Synthetic inbounds (cron / vault / handback / resume) carry a source
|
|
4220
|
+
// and are NOT tracked here — they enqueue under their own semantics, and
|
|
4221
|
+
// (for the resume synthetics) tracking them safely first needs the
|
|
4222
|
+
// resume builder to emit meta.message_id so the deliver-until-acked ack
|
|
4223
|
+
// matches its enqueue. Tracked separately as a follow-up (see PR notes).
|
|
4224
|
+
hasSource: merged.meta?.source != null,
|
|
4225
|
+
effectiveText: merged.text,
|
|
4226
|
+
})
|
|
4227
|
+
) {
|
|
4228
|
+
return
|
|
4229
|
+
}
|
|
4230
|
+
const key = chatKey(merged.chatId, merged.threadId != null ? Number(merged.threadId) : null)
|
|
4231
|
+
trackDelivery(
|
|
4232
|
+
deliveryQueue,
|
|
4233
|
+
key,
|
|
4234
|
+
merged,
|
|
4235
|
+
Date.now(),
|
|
4236
|
+
merged.messageId != null ? String(merged.messageId) : null,
|
|
4237
|
+
)
|
|
4238
|
+
}
|
|
4239
|
+
|
|
4166
4240
|
// Re-deliver stranded inbounds until claude acks (the marko drop-wedge).
|
|
4167
4241
|
// Every few seconds, re-send any inbound that was handed to claude but never
|
|
4168
4242
|
// acked by an `enqueue` — it stranded unsubmitted in the composer. Re-clear
|
|
@@ -4400,6 +4474,11 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
4400
4474
|
inboundSpool: inboundSpool ?? null,
|
|
4401
4475
|
pendingPermissionBuffer,
|
|
4402
4476
|
client,
|
|
4477
|
+
// Enrol each drained user inbound in the deliver-until-acked queue
|
|
4478
|
+
// so the 5s sweep re-delivers until claude's `enqueue` ack lands —
|
|
4479
|
+
// a socket-write into a still-booting session is NOT consumption
|
|
4480
|
+
// (clerk lost-message incident, 2026-06-03).
|
|
4481
|
+
onUserInboundDelivered: trackRedeliveredInbound,
|
|
4403
4482
|
})
|
|
4404
4483
|
} else {
|
|
4405
4484
|
// Kill-switch fallback: imperative drain (parity with pre-cutover
|
|
@@ -4410,6 +4489,10 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
4410
4489
|
try {
|
|
4411
4490
|
client.send(msg)
|
|
4412
4491
|
inboundSpool?.ack(msg)
|
|
4492
|
+
// Same enrol as the cutover drain path: a socket-write success is
|
|
4493
|
+
// not proof claude consumed it — enrol so the sweep re-delivers
|
|
4494
|
+
// until `enqueue` (clerk lost-message incident, 2026-06-03).
|
|
4495
|
+
trackRedeliveredInbound(msg)
|
|
4413
4496
|
} catch (err) {
|
|
4414
4497
|
process.stderr.write(
|
|
4415
4498
|
`telegram gateway: pending-inbound drain failed agent=${client.agentName} ` +
|
|
@@ -4515,6 +4598,7 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
4515
4598
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
4516
4599
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
4517
4600
|
configSnapshotPath: join(resolvedAgentDirForCard, '.config-snapshot.json'),
|
|
4601
|
+
bootCardStatePath: join(resolvedAgentDirForCard, '.boot-card-msgid.json'),
|
|
4518
4602
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
4519
4603
|
}, ackMsgId).then(handle => {
|
|
4520
4604
|
activeBootCard = handle
|
|
@@ -5318,6 +5402,7 @@ if (!STATIC) {
|
|
|
5318
5402
|
return d
|
|
5319
5403
|
},
|
|
5320
5404
|
inboundSpool,
|
|
5405
|
+
trackRedeliveredInbound,
|
|
5321
5406
|
)
|
|
5322
5407
|
if (r != null && r.redelivered > 0) {
|
|
5323
5408
|
process.stderr.write(
|
|
@@ -18405,6 +18490,7 @@ void (async () => {
|
|
|
18405
18490
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
18406
18491
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
18407
18492
|
configSnapshotPath: join(resolvedAgentDirForBootCard, '.config-snapshot.json'),
|
|
18493
|
+
bootCardStatePath: join(resolvedAgentDirForBootCard, '.boot-card-msgid.json'),
|
|
18408
18494
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
18409
18495
|
}, ackMsgId)
|
|
18410
18496
|
activeBootCard = handle
|
|
@@ -45,6 +45,15 @@ export interface DispatchCtx {
|
|
|
45
45
|
readonly client?: IpcClient
|
|
46
46
|
/** Optional log sink — default stderr. Test hook. */
|
|
47
47
|
readonly log?: (line: string) => void
|
|
48
|
+
/**
|
|
49
|
+
* Optional: enrol a drained+redelivered inbound in the deliver-until-acked
|
|
50
|
+
* queue. The bridgeUp drain's socket-write "success" is NOT proof claude
|
|
51
|
+
* consumed the message — right after a restart (esp. with a slow MCP boot)
|
|
52
|
+
* the inject can hit a not-ready session and be dropped. Wiring this makes
|
|
53
|
+
* the existing 5s sweep re-deliver until claude's `enqueue` ack lands.
|
|
54
|
+
* (clerk lost-message incident, 2026-06-03.)
|
|
55
|
+
*/
|
|
56
|
+
readonly onUserInboundDelivered?: (merged: InboundMessage) => void
|
|
48
57
|
}
|
|
49
58
|
|
|
50
59
|
const enabled = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== '0'
|
|
@@ -103,6 +112,9 @@ function dispatchOne(effect: Effect, ctx: DispatchCtx): void {
|
|
|
103
112
|
ctx.selfAgent,
|
|
104
113
|
send,
|
|
105
114
|
ctx.inboundSpool ?? undefined,
|
|
115
|
+
ctx.onUserInboundDelivered
|
|
116
|
+
? (merged) => ctx.onUserInboundDelivered!(merged)
|
|
117
|
+
: undefined,
|
|
106
118
|
)
|
|
107
119
|
if (result.drained > 0) {
|
|
108
120
|
log(
|
|
@@ -87,6 +87,14 @@ export function redeliverBufferedInbound(
|
|
|
87
87
|
agent: string,
|
|
88
88
|
send: (msg: InboundMessage) => boolean,
|
|
89
89
|
spool?: InboundSpool,
|
|
90
|
+
// Called once per merged group on CONFIRMED delivery (after spool.ack).
|
|
91
|
+
// The caller uses it to enrol the redelivered inbound in the
|
|
92
|
+
// deliver-until-acked queue (`trackDelivery`) so it is re-sent until
|
|
93
|
+
// claude's `enqueue` ack lands — closing the restart boot-race where a
|
|
94
|
+
// socket-write "succeeds" into a not-ready session and the message is
|
|
95
|
+
// silently dropped (clerk 2026-06-03). `send` returning true only means
|
|
96
|
+
// the bytes reached the bridge, NOT that claude consumed them.
|
|
97
|
+
onDelivered?: (merged: InboundMessage, originals: InboundMessage[]) => void,
|
|
90
98
|
): { drained: number; redelivered: number; rebuffered: number } {
|
|
91
99
|
const pending = buffer.drain(agent)
|
|
92
100
|
let redelivered = 0
|
|
@@ -110,6 +118,10 @@ export function redeliverBufferedInbound(
|
|
|
110
118
|
// originals are, so we ack by original identity.
|
|
111
119
|
for (const o of originals) spool?.ack(o)
|
|
112
120
|
redelivered += originals.length
|
|
121
|
+
// Enrol in the deliver-until-acked queue (caller's hook). A bare
|
|
122
|
+
// socket-write success is NOT proof claude consumed it; the queue's
|
|
123
|
+
// sweep re-delivers until the `enqueue` ack lands.
|
|
124
|
+
onDelivered?.(merged, originals)
|
|
113
125
|
} else {
|
|
114
126
|
// Re-buffer the originals (not the merged synthetic) so the spool
|
|
115
127
|
// identity is preserved and the next drain re-merges them losslessly.
|
|
@@ -258,11 +270,15 @@ export function idleDrainTick(
|
|
|
258
270
|
isBridgeAlive: () => boolean,
|
|
259
271
|
send: (msg: InboundMessage) => boolean,
|
|
260
272
|
spool?: InboundSpool,
|
|
273
|
+
// Forwarded to redeliverBufferedInbound so the post-flap-settle drain also
|
|
274
|
+
// enrols redelivered inbounds in the deliver-until-acked queue (parity with
|
|
275
|
+
// the bridgeUp drain — clerk lost-message incident, 2026-06-03).
|
|
276
|
+
onDelivered?: (merged: InboundMessage, originals: InboundMessage[]) => void,
|
|
261
277
|
): { drained: number; redelivered: number; rebuffered: number } | null {
|
|
262
278
|
if (!agent) return null
|
|
263
279
|
if (buffer.depth(agent) === 0) return null
|
|
264
280
|
if (!isBridgeAlive()) return null
|
|
265
|
-
return redeliverBufferedInbound(buffer, agent, send, spool)
|
|
281
|
+
return redeliverBufferedInbound(buffer, agent, send, spool, onDelivered)
|
|
266
282
|
}
|
|
267
283
|
|
|
268
284
|
export function createPendingInboundBuffer(
|
|
@@ -172,9 +172,25 @@ export function buildResumeWatchdogReportInbound(
|
|
|
172
172
|
*/
|
|
173
173
|
export function selectResumeBuilder(
|
|
174
174
|
endedVia: TurnEndedVia | null,
|
|
175
|
+
// 3h staleness failsafe (operator spec, 2026-06-03): when the interrupted
|
|
176
|
+
// turn is older than `maxAgeMs`, an AUTO-resume is downgraded to the passive
|
|
177
|
+
// `report` — silently re-injecting hours-old work could act on long-stale
|
|
178
|
+
// context (a tax figure, a "send it" the user has moved on from). Pass both
|
|
179
|
+
// to enable; omit (default) keeps the legacy blanket-resume behaviour.
|
|
180
|
+
opts?: { ageMs?: number; maxAgeMs?: number },
|
|
175
181
|
): 'resume' | 'report' | null {
|
|
176
|
-
|
|
177
|
-
if (endedVia === '
|
|
178
|
-
if (endedVia
|
|
179
|
-
|
|
182
|
+
let kind: 'resume' | 'report' | null
|
|
183
|
+
if (endedVia === 'timeout') kind = 'report'
|
|
184
|
+
else if (endedVia === 'restart' || endedVia === 'sigterm' || endedVia === 'unknown') kind = 'resume'
|
|
185
|
+
else if (endedVia == null) kind = 'resume' // still-open at boot = killed mid-flight
|
|
186
|
+
else kind = null
|
|
187
|
+
if (
|
|
188
|
+
kind === 'resume' &&
|
|
189
|
+
opts?.ageMs != null &&
|
|
190
|
+
opts?.maxAgeMs != null &&
|
|
191
|
+
opts.ageMs > opts.maxAgeMs
|
|
192
|
+
) {
|
|
193
|
+
return 'report' // too old to safely auto-resume — passive notice only
|
|
194
|
+
}
|
|
195
|
+
return kind
|
|
180
196
|
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Edit-in-place boot card (zero-notification reboots, operator request
|
|
3
|
+
* 2026-06-03). A routine reboot must EDIT the prior boot card rather than
|
|
4
|
+
* send a new one — a sent message bumps the unread badge even with
|
|
5
|
+
* `disable_notification: true`; an edit never does.
|
|
6
|
+
*
|
|
7
|
+
* Pins the startBootCard contract:
|
|
8
|
+
* - first boot (no persisted id) → SEND + persist the id
|
|
9
|
+
* - next routine boot (persisted id exists) → EDIT in place, NO send
|
|
10
|
+
* - persisted id but message deleted ('gone') → fall back to SEND
|
|
11
|
+
* - Telegram-initiated /restart (ackMessageId set) → SEND fresh (replies to
|
|
12
|
+
* the operator's command; they asked and are watching)
|
|
13
|
+
* - no bootCardStatePath → always SEND (back-compat)
|
|
14
|
+
*
|
|
15
|
+
* State is an isolated mkdtemp file — NEVER ~/.switchroom (test discipline).
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
19
|
+
import { mkdtempSync, rmSync } from 'node:fs'
|
|
20
|
+
import { tmpdir } from 'node:os'
|
|
21
|
+
import { join } from 'node:path'
|
|
22
|
+
import { startBootCard } from '../gateway/boot-card.js'
|
|
23
|
+
import type { BotApiForBootCard } from '../gateway/boot-card.js'
|
|
24
|
+
|
|
25
|
+
let dir: string
|
|
26
|
+
let statePath: string
|
|
27
|
+
|
|
28
|
+
beforeEach(() => {
|
|
29
|
+
dir = mkdtempSync(join(tmpdir(), 'boot-card-eip-'))
|
|
30
|
+
statePath = join(dir, '.boot-card-msgid.json')
|
|
31
|
+
})
|
|
32
|
+
afterEach(() => {
|
|
33
|
+
rmSync(dir, { recursive: true, force: true })
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
/** Capturing bot with a configurable strict-edit outcome. */
|
|
37
|
+
function makeBot(strictOutcome: 'edited' | 'gone' | null) {
|
|
38
|
+
const sends: Array<{ chatId: string; opts: Record<string, unknown> }> = []
|
|
39
|
+
const strictEdits: Array<{ chatId: string; messageId: number }> = []
|
|
40
|
+
let nextId = 1000
|
|
41
|
+
const bot: BotApiForBootCard = {
|
|
42
|
+
sendMessage: async (chatId, _text, opts) => {
|
|
43
|
+
sends.push({ chatId, opts: opts ?? {} })
|
|
44
|
+
return { message_id: ++nextId }
|
|
45
|
+
},
|
|
46
|
+
editMessageText: async () => ({}),
|
|
47
|
+
...(strictOutcome != null
|
|
48
|
+
? {
|
|
49
|
+
editMessageTextStrict: async (chatId: string, messageId: number) => {
|
|
50
|
+
strictEdits.push({ chatId, messageId })
|
|
51
|
+
return strictOutcome
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
: {}),
|
|
55
|
+
}
|
|
56
|
+
return { bot, sends, strictEdits }
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function mkOpts(overrides: Record<string, unknown> = {}) {
|
|
60
|
+
return {
|
|
61
|
+
agentName: 'TestAgent',
|
|
62
|
+
agentSlug: 'test-agent',
|
|
63
|
+
version: 'v0.0.0-test',
|
|
64
|
+
agentDir: dir,
|
|
65
|
+
gatewayInfo: { pid: 1, startedAtMs: Date.now() },
|
|
66
|
+
restartReason: 'graceful' as const,
|
|
67
|
+
agentLiveWindowMs: 0, // disable the live loop — we assert the initial post/edit only
|
|
68
|
+
settleWindowMs: 1_000_000,
|
|
69
|
+
bootCardStatePath: statePath,
|
|
70
|
+
...overrides,
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
describe('boot card — edit-in-place (zero-notification reboots)', () => {
|
|
75
|
+
it('first boot with no persisted id → sends, and persists the id', async () => {
|
|
76
|
+
const { bot, sends, strictEdits } = makeBot('edited')
|
|
77
|
+
await startBootCard('chat1', undefined, bot, mkOpts())
|
|
78
|
+
expect(sends).toHaveLength(1) // first boot must send (one badge, ever)
|
|
79
|
+
expect(strictEdits).toHaveLength(0)
|
|
80
|
+
expect(sends[0]!.opts.disable_notification).toBe(true) // still silent
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('second routine boot → edits the prior card in place, NO new send', async () => {
|
|
84
|
+
// Boot 1 sends + persists.
|
|
85
|
+
const first = makeBot('edited')
|
|
86
|
+
await startBootCard('chat1', undefined, first.bot, mkOpts())
|
|
87
|
+
expect(first.sends).toHaveLength(1)
|
|
88
|
+
|
|
89
|
+
// Boot 2 (same state file) → reuse via strict edit, no send.
|
|
90
|
+
const second = makeBot('edited')
|
|
91
|
+
await startBootCard('chat1', undefined, second.bot, mkOpts())
|
|
92
|
+
expect(second.sends).toHaveLength(0) // ← zero notification: no new message
|
|
93
|
+
expect(second.strictEdits).toHaveLength(1)
|
|
94
|
+
// Boot 1's send returned message_id 1001 (nextId starts at 1000, ++ first);
|
|
95
|
+
// boot 2 must edit exactly that persisted id.
|
|
96
|
+
expect(second.strictEdits[0]!.messageId).toBe(1001)
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
it('persisted id but message was deleted (gone) → falls back to a fresh send', async () => {
|
|
100
|
+
const first = makeBot('edited')
|
|
101
|
+
await startBootCard('chat1', undefined, first.bot, mkOpts())
|
|
102
|
+
|
|
103
|
+
const second = makeBot('gone')
|
|
104
|
+
await startBootCard('chat1', undefined, second.bot, mkOpts())
|
|
105
|
+
expect(second.strictEdits).toHaveLength(1) // probed the old id
|
|
106
|
+
expect(second.sends).toHaveLength(1) // and fell back to a fresh send
|
|
107
|
+
expect(second.sends[0]!.opts.disable_notification).toBe(true)
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
it('Telegram-initiated /restart (ackMessageId set) → sends fresh, never edits', async () => {
|
|
111
|
+
// Seed a persisted id from a routine boot.
|
|
112
|
+
const seed = makeBot('edited')
|
|
113
|
+
await startBootCard('chat1', undefined, seed.bot, mkOpts())
|
|
114
|
+
|
|
115
|
+
// A /restart passes ackMessageId → must reply with a fresh card, not edit.
|
|
116
|
+
const restart = makeBot('edited')
|
|
117
|
+
await startBootCard('chat1', undefined, restart.bot, mkOpts(), 555)
|
|
118
|
+
expect(restart.strictEdits).toHaveLength(0) // no reuse on user-initiated restart
|
|
119
|
+
expect(restart.sends).toHaveLength(1)
|
|
120
|
+
expect(restart.sends[0]!.opts.reply_parameters).toEqual({ message_id: 555 })
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
it('no bootCardStatePath → always sends (back-compat, unchanged)', async () => {
|
|
124
|
+
const { bot, sends, strictEdits } = makeBot('edited')
|
|
125
|
+
await startBootCard('chat1', undefined, bot, mkOpts({ bootCardStatePath: undefined }))
|
|
126
|
+
expect(sends).toHaveLength(1)
|
|
127
|
+
expect(strictEdits).toHaveLength(0)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('bot without editMessageTextStrict → always sends (graceful degrade)', async () => {
|
|
131
|
+
const { bot, sends } = makeBot(null) // no strict method
|
|
132
|
+
await startBootCard('chat1', undefined, bot, mkOpts())
|
|
133
|
+
const second = makeBot(null)
|
|
134
|
+
await startBootCard('chat1', undefined, second.bot, mkOpts())
|
|
135
|
+
// Both boots send — no strict method means no in-place reuse path.
|
|
136
|
+
expect(sends).toHaveLength(1)
|
|
137
|
+
expect(second.sends).toHaveLength(1)
|
|
138
|
+
})
|
|
139
|
+
})
|