switchroom 0.14.48 → 0.14.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +154 -76
- package/telegram-plugin/gateway/boot-card-msgid.ts +70 -0
- package/telegram-plugin/gateway/boot-card.ts +81 -14
- package/telegram-plugin/gateway/gateway.ts +31 -6
- package/telegram-plugin/gateway/inbound-delivery-confirm.ts +26 -0
- package/telegram-plugin/gateway/resume-inbound-builder.ts +27 -2
- package/telegram-plugin/tests/boot-card-edit-in-place.test.ts +139 -0
- package/telegram-plugin/tests/boot-card-msgid.test.ts +88 -0
- package/telegram-plugin/tests/inbound-delivery-confirm.test.ts +61 -0
- package/telegram-plugin/tests/resume-inbound-builder.test.ts +33 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-reboot persistence for the boot card's Telegram message id.
|
|
3
|
+
*
|
|
4
|
+
* Why: a freshly *sent* Telegram message always bumps the chat's unread
|
|
5
|
+
* badge — `disable_notification: true` removes the sound/banner but not the
|
|
6
|
+
* badge (there is no Bot API flag for that). To make routine reboots produce
|
|
7
|
+
* ZERO notification (operator request, 2026-06-03), the gateway reuses the
|
|
8
|
+
* PRIOR boot card's message and EDITS it in place instead of sending a new
|
|
9
|
+
* one — and edits never touch the badge.
|
|
10
|
+
*
|
|
11
|
+
* That requires remembering the last boot card's `message_id` across gateway
|
|
12
|
+
* restarts, keyed by the chat (+ forum topic) it lives in. This module is the
|
|
13
|
+
* tiny JSON store for that, mirroring `config-snapshot.ts` /
|
|
14
|
+
* `boot-issue-cache.ts`: one file under the agent's (bind-mounted, reboot-
|
|
15
|
+
* surviving) state dir, read once on boot, written once after the id is
|
|
16
|
+
* established. All failures are non-fatal — a missing/corrupt file just means
|
|
17
|
+
* "no prior card", so the boot path falls back to a fresh (silent) send.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { readFileSync, writeFileSync } from 'node:fs'
|
|
21
|
+
|
|
22
|
+
/** Stable key for a boot-card target: chat id + optional forum topic. A DM
|
|
23
|
+
* agent always boots to the same `<chatId>:` key; a supergroup agent keys by
|
|
24
|
+
* `<chatId>:<threadId>` so a topic change starts a fresh card. */
|
|
25
|
+
export function bootCardChatKey(chatId: string, threadId: number | undefined): string {
|
|
26
|
+
return `${chatId}:${threadId ?? ''}`
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
type Store = Record<string, number>
|
|
30
|
+
|
|
31
|
+
function readStore(path: string): Store {
|
|
32
|
+
try {
|
|
33
|
+
const parsed = JSON.parse(readFileSync(path, 'utf8')) as unknown
|
|
34
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
35
|
+
return parsed as Store
|
|
36
|
+
}
|
|
37
|
+
} catch {
|
|
38
|
+
/* missing / corrupt → treat as empty */
|
|
39
|
+
}
|
|
40
|
+
return {}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** The persisted message id for this chat+topic, or null when there's no
|
|
44
|
+
* prior boot card to reuse (first boot, corrupt file, different chat). */
|
|
45
|
+
export function loadBootCardMsgId(
|
|
46
|
+
path: string,
|
|
47
|
+
chatKey: string,
|
|
48
|
+
): number | null {
|
|
49
|
+
const id = readStore(path)[chatKey]
|
|
50
|
+
return typeof id === 'number' && Number.isFinite(id) && id > 0 ? id : null
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Record the current boot card's message id for this chat+topic. Merges into
|
|
54
|
+
* the existing store (other chats' ids survive). Non-fatal on write failure —
|
|
55
|
+
* the worst case is the next reboot sends a fresh card (one badge). */
|
|
56
|
+
export function saveBootCardMsgId(
|
|
57
|
+
path: string,
|
|
58
|
+
chatKey: string,
|
|
59
|
+
messageId: number,
|
|
60
|
+
): void {
|
|
61
|
+
if (!(Number.isFinite(messageId) && messageId > 0)) return
|
|
62
|
+
try {
|
|
63
|
+
const store = readStore(path)
|
|
64
|
+
if (store[chatKey] === messageId) return // idempotent — no rewrite
|
|
65
|
+
store[chatKey] = messageId
|
|
66
|
+
writeFileSync(path, JSON.stringify(store), 'utf8')
|
|
67
|
+
} catch {
|
|
68
|
+
/* non-fatal */
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -67,6 +67,7 @@ import {
|
|
|
67
67
|
type ConfigDiff,
|
|
68
68
|
} from './config-snapshot.js'
|
|
69
69
|
import { join } from 'path'
|
|
70
|
+
import { bootCardChatKey, loadBootCardMsgId, saveBootCardMsgId } from './boot-card-msgid.js'
|
|
70
71
|
import { loadConfig as _loadSwitchroomConfig } from '../../src/config/loader.js'
|
|
71
72
|
import { resolveAgentConfig as _resolveAgentConfig } from '../../src/config/merge.js'
|
|
72
73
|
|
|
@@ -134,6 +135,21 @@ export interface BotApiForBootCard {
|
|
|
134
135
|
text: string,
|
|
135
136
|
opts?: Record<string, unknown>,
|
|
136
137
|
): Promise<unknown>
|
|
138
|
+
/**
|
|
139
|
+
* Like `editMessageText`, but reports whether the target message still
|
|
140
|
+
* exists rather than swallowing a "message to edit not found" the way the
|
|
141
|
+
* shared retry policy does (retry-api-call.ts) — the boot path needs to
|
|
142
|
+
* know so it can fall back to a fresh send when the prior card was deleted.
|
|
143
|
+
* `'edited'` = the edit landed (or content was identical → message exists);
|
|
144
|
+
* `'gone'` = the message is missing (or any other error → send fresh).
|
|
145
|
+
* Optional so existing callers/tests without it fall back to always-send.
|
|
146
|
+
*/
|
|
147
|
+
editMessageTextStrict?(
|
|
148
|
+
chatId: string,
|
|
149
|
+
messageId: number,
|
|
150
|
+
text: string,
|
|
151
|
+
opts?: Record<string, unknown>,
|
|
152
|
+
): Promise<'edited' | 'gone'>
|
|
137
153
|
}
|
|
138
154
|
|
|
139
155
|
export interface BootCardHandle {
|
|
@@ -568,6 +584,17 @@ export interface RunProbesOpts {
|
|
|
568
584
|
* resolve the default memory collection label.
|
|
569
585
|
*/
|
|
570
586
|
configSnapshotPath?: string
|
|
587
|
+
/**
|
|
588
|
+
* Cross-reboot store for the boot card's Telegram message id (JSON,
|
|
589
|
+
* typically `<agentDir>/.boot-card-msgid.json`). When set AND the bot
|
|
590
|
+
* supports `editMessageTextStrict`, a routine reboot (no `ackMessageId`)
|
|
591
|
+
* EDITS the prior boot card in place instead of sending a new one — edits
|
|
592
|
+
* never bump the unread badge, so reboots produce zero notification
|
|
593
|
+
* (operator request 2026-06-03). Falls back to a fresh silent send when
|
|
594
|
+
* there's no prior card or it was deleted. Omit to keep the always-send
|
|
595
|
+
* behaviour.
|
|
596
|
+
*/
|
|
597
|
+
bootCardStatePath?: string
|
|
571
598
|
}
|
|
572
599
|
|
|
573
600
|
/** Run all six probes concurrently with their own per-probe timeouts.
|
|
@@ -641,20 +668,60 @@ export async function startBootCard(
|
|
|
641
668
|
// the chat is where you look, and nothing here warrants a push.
|
|
642
669
|
const silentBootCard = true
|
|
643
670
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
671
|
+
// Edit-in-place to produce ZERO notification (operator request 2026-06-03).
|
|
672
|
+
// A sent message always bumps the unread badge — `disable_notification`
|
|
673
|
+
// only kills the sound/banner. So for a ROUTINE reboot (no `ackMessageId`:
|
|
674
|
+
// operator update / cli rollout / crash / fresh) we EDIT the prior boot
|
|
675
|
+
// card in place — edits never touch the badge — instead of sending a new
|
|
676
|
+
// one. We only do this when the bot can tell us the prior message still
|
|
677
|
+
// exists (`editMessageTextStrict`); if it's gone, or this is a
|
|
678
|
+
// Telegram-initiated `/restart` (ackMessageId set — the operator asked and
|
|
679
|
+
// is watching, and the card should reply to their command), we fall back to
|
|
680
|
+
// a fresh silent send.
|
|
681
|
+
const chatKey = bootCardChatKey(chatId, threadId)
|
|
682
|
+
const reuseId =
|
|
683
|
+
ackMessageId == null && opts.bootCardStatePath != null && bot.editMessageTextStrict != null
|
|
684
|
+
? loadBootCardMsgId(opts.bootCardStatePath, chatKey)
|
|
685
|
+
: null
|
|
686
|
+
|
|
687
|
+
let messageId = -1
|
|
688
|
+
if (reuseId != null && bot.editMessageTextStrict != null) {
|
|
689
|
+
try {
|
|
690
|
+
const outcome = await bot.editMessageTextStrict(chatId, reuseId, ackText, {
|
|
691
|
+
parse_mode: 'HTML',
|
|
692
|
+
link_preview_options: { is_disabled: true },
|
|
693
|
+
...(threadId != null ? { message_thread_id: threadId } : {}),
|
|
694
|
+
})
|
|
695
|
+
if (outcome === 'edited') {
|
|
696
|
+
messageId = reuseId
|
|
697
|
+
logger(`telegram gateway: boot-card: reused msgId=${messageId} chatId=${chatId} reason=${opts.restartReason ?? '-'} reason_detail=${opts.restartReasonDetail ?? '-'} edit_in_place=true notify=none\n`)
|
|
698
|
+
}
|
|
699
|
+
} catch (err: unknown) {
|
|
700
|
+
logger(`telegram gateway: boot-card: edit-in-place probe failed (${(err as Error)?.message ?? String(err)}) — sending fresh\n`)
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
if (messageId < 0) {
|
|
705
|
+
try {
|
|
706
|
+
const sent = await bot.sendMessage(chatId, ackText, {
|
|
707
|
+
parse_mode: 'HTML',
|
|
708
|
+
link_preview_options: { is_disabled: true },
|
|
709
|
+
...(threadId != null ? { message_thread_id: threadId } : {}),
|
|
710
|
+
...(ackMessageId != null ? { reply_parameters: { message_id: ackMessageId } } : {}),
|
|
711
|
+
...(silentBootCard ? { disable_notification: true } : {}),
|
|
712
|
+
})
|
|
713
|
+
messageId = sent.message_id
|
|
714
|
+
logger(`telegram gateway: boot-card: posted msgId=${messageId} chatId=${chatId} reason=${opts.restartReason ?? '-'} reason_detail=${opts.restartReasonDetail ?? '-'} silent=${silentBootCard}\n`)
|
|
715
|
+
} catch (err: unknown) {
|
|
716
|
+
logger(`telegram gateway: boot-card: failed to post ack: ${(err as Error)?.message ?? String(err)}\n`)
|
|
717
|
+
return { messageId: -1, complete: () => {} }
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// Remember this card's id so the NEXT reboot can edit it in place (no
|
|
722
|
+
// notification). Idempotent on reuse; non-fatal on write failure.
|
|
723
|
+
if (opts.bootCardStatePath != null && messageId > 0) {
|
|
724
|
+
saveBootCardMsgId(opts.bootCardStatePath, chatKey, messageId)
|
|
658
725
|
}
|
|
659
726
|
|
|
660
727
|
// Determine the live window for agent-service status updates. Callers
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* is connected, inbound LLM messages get a "⏳ Agent is restarting…" reply.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import { Bot, GrammyError, InlineKeyboard, InputFile, type Context } from 'grammy'
|
|
12
|
+
import { Bot, GrammyError, InlineKeyboard, InputFile, type Context, type Api } from 'grammy'
|
|
13
13
|
import { run, type RunnerHandle } from '@grammyjs/runner'
|
|
14
14
|
import type { ReactionTypeEmoji } from 'grammy/types'
|
|
15
15
|
import { randomBytes } from 'crypto'
|
|
@@ -287,6 +287,7 @@ import {
|
|
|
287
287
|
sweep as sweepDeliveryQueue,
|
|
288
288
|
forgetDelivery,
|
|
289
289
|
shouldTrackDelivery,
|
|
290
|
+
isTrackableResumeSynthetic,
|
|
290
291
|
type PendingDelivery,
|
|
291
292
|
} from './inbound-delivery-confirm.js'
|
|
292
293
|
import { createPendingPermissionBuffer } from './pending-permission-decisions.js'
|
|
@@ -2603,6 +2604,26 @@ function wrapBootCardApi(
|
|
|
2603
2604
|
),
|
|
2604
2605
|
opts(cid),
|
|
2605
2606
|
) as Promise<unknown>,
|
|
2607
|
+
// Strict edit for the boot-card edit-in-place probe: distinguishes
|
|
2608
|
+
// "message gone" (→ 'gone', caller sends fresh) from a landed/identical
|
|
2609
|
+
// edit (→ 'edited'). robustApiCall SWALLOWS "message to edit not found"
|
|
2610
|
+
// to undefined (retry-api-call.ts), so this can't go through it — a
|
|
2611
|
+
// deliberate single-attempt raw edit that classifies the error itself.
|
|
2612
|
+
editMessageTextStrict: async (cid, mid, text, editOpts) => {
|
|
2613
|
+
type EditOpts = Parameters<Api['editMessageText']>[3]
|
|
2614
|
+
try {
|
|
2615
|
+
// allow-raw-bot-api: boot-card edit-in-place probe — must detect a deleted target, which the shared retry policy swallows.
|
|
2616
|
+
await lockedBot.api.editMessageText(cid, mid, text, editOpts as EditOpts)
|
|
2617
|
+
return 'edited'
|
|
2618
|
+
} catch (err) {
|
|
2619
|
+
const desc =
|
|
2620
|
+
err instanceof GrammyError ? err.description : err instanceof Error ? err.message : String(err)
|
|
2621
|
+
// Content identical → message still exists; reuse it.
|
|
2622
|
+
if (typeof desc === 'string' && desc.toLowerCase().includes('not modified')) return 'edited'
|
|
2623
|
+
// Not found, or any other error → fall back to a fresh silent send.
|
|
2624
|
+
return 'gone'
|
|
2625
|
+
}
|
|
2626
|
+
},
|
|
2606
2627
|
}
|
|
2607
2628
|
}
|
|
2608
2629
|
|
|
@@ -4192,15 +4213,17 @@ _deliveryMachineTick.unref?.()
|
|
|
4192
4213
|
// re-deliver forever.
|
|
4193
4214
|
function trackRedeliveredInbound(merged: InboundMessage): void {
|
|
4194
4215
|
if (!DELIVERY_CONFIRM_ENABLED) return
|
|
4216
|
+
// The boot-resume synthetic ('resume_interrupted') is the ONE synthetic we DO
|
|
4217
|
+
// enrol: a restart can drop it into a not-ready session exactly like a user
|
|
4218
|
+
// inbound, leaving the interrupted work silently un-resumed. Safe iff it
|
|
4219
|
+
// carries the chat_id + message_id that make its enqueue ack-able — see
|
|
4220
|
+
// isTrackableResumeSynthetic. Every other synthetic stays excluded below.
|
|
4221
|
+
const isTrackableResume = isTrackableResumeSynthetic(merged.meta)
|
|
4195
4222
|
if (
|
|
4223
|
+
!isTrackableResume &&
|
|
4196
4224
|
!shouldTrackDelivery({
|
|
4197
4225
|
isSteering: false,
|
|
4198
4226
|
isInterrupt: false,
|
|
4199
|
-
// Synthetic inbounds (cron / vault / handback / resume) carry a source
|
|
4200
|
-
// and are NOT tracked here — they enqueue under their own semantics, and
|
|
4201
|
-
// (for the resume synthetics) tracking them safely first needs the
|
|
4202
|
-
// resume builder to emit meta.message_id so the deliver-until-acked ack
|
|
4203
|
-
// matches its enqueue. Tracked separately as a follow-up (see PR notes).
|
|
4204
4227
|
hasSource: merged.meta?.source != null,
|
|
4205
4228
|
effectiveText: merged.text,
|
|
4206
4229
|
})
|
|
@@ -4578,6 +4601,7 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
4578
4601
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
4579
4602
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
4580
4603
|
configSnapshotPath: join(resolvedAgentDirForCard, '.config-snapshot.json'),
|
|
4604
|
+
bootCardStatePath: join(resolvedAgentDirForCard, '.boot-card-msgid.json'),
|
|
4581
4605
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
4582
4606
|
}, ackMsgId).then(handle => {
|
|
4583
4607
|
activeBootCard = handle
|
|
@@ -18469,6 +18493,7 @@ void (async () => {
|
|
|
18469
18493
|
tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
|
|
18470
18494
|
dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
|
|
18471
18495
|
configSnapshotPath: join(resolvedAgentDirForBootCard, '.config-snapshot.json'),
|
|
18496
|
+
bootCardStatePath: join(resolvedAgentDirForBootCard, '.boot-card-msgid.json'),
|
|
18472
18497
|
...(updateOutcomeLine ? { updateOutcomeLine } : {}),
|
|
18473
18498
|
}, ackMsgId)
|
|
18474
18499
|
activeBootCard = handle
|
|
@@ -158,3 +158,29 @@ export function shouldTrackDelivery(input: {
|
|
|
158
158
|
if (input.effectiveText !== undefined && input.effectiveText.trim().length === 0) return false
|
|
159
159
|
return true
|
|
160
160
|
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* The ONE synthetic-source inbound that IS safe to enrol in the
|
|
164
|
+
* deliver-until-acked queue: the boot-resume synthetic
|
|
165
|
+
* (`meta.source === 'resume_interrupted'`). A restart can drop it into a
|
|
166
|
+
* not-ready (slow MCP boot) session exactly like a user inbound, leaving the
|
|
167
|
+
* interrupted work silently un-resumed — so it needs the same re-delivery
|
|
168
|
+
* backstop. It is safe to track ONLY when it carries BOTH:
|
|
169
|
+
* - `meta.chat_id` — so the gateway's enqueue handler (gated on `ev.chatId`)
|
|
170
|
+
* builds a currentTurn AND fires `ackDelivery` for it; and
|
|
171
|
+
* - `meta.message_id` — so the enqueue's id round-trips and `ackDelivery`
|
|
172
|
+
* matches THIS synthetic, stopping the sweep (no re-deliver-forever storm).
|
|
173
|
+
* Every other synthetic (cron / vault / handback / the watchdog `report`) omits
|
|
174
|
+
* `meta.message_id` and stays excluded by `shouldTrackDelivery`'s `hasSource`
|
|
175
|
+
* gate. We require message_id here (chat_id is implied by the ack mechanics);
|
|
176
|
+
* without a round-trippable id, tracking would storm.
|
|
177
|
+
*/
|
|
178
|
+
export function isTrackableResumeSynthetic(
|
|
179
|
+
meta: Record<string, string> | undefined,
|
|
180
|
+
): boolean {
|
|
181
|
+
return (
|
|
182
|
+
meta?.source === 'resume_interrupted' &&
|
|
183
|
+
meta.message_id != null &&
|
|
184
|
+
meta.message_id !== ''
|
|
185
|
+
)
|
|
186
|
+
}
|
|
@@ -78,14 +78,33 @@ function promptClause(turn: Turn): string {
|
|
|
78
78
|
export function buildResumeInterruptedInbound(ctx: ResumeInboundContext): InboundMessage {
|
|
79
79
|
const ts = ctx.nowMs ?? Date.now()
|
|
80
80
|
const elapsed = humanizeElapsed(ts - ctx.turn.started_at)
|
|
81
|
+
const threadId = threadIdNum(ctx.turn)
|
|
81
82
|
const meta: Record<string, string> = {
|
|
82
83
|
source: 'resume_interrupted',
|
|
84
|
+
// Carry the originating chat/topic as model-visible channel attributes
|
|
85
|
+
// (mirrors the real-inbound + subagent_handback shapes — see
|
|
86
|
+
// gateway.ts:10753 and subagent-handback-inbound-builder.ts:115-121).
|
|
87
|
+
// Without meta.chat_id the enqueue's channel XML has no chat_id, so the
|
|
88
|
+
// gateway's enqueue handler (gateway.ts `if (ev.chatId)`) never builds a
|
|
89
|
+
// currentTurn for the resume turn — meaning no progress card, no
|
|
90
|
+
// silence-poke protection, and the reply falling back to the agent's
|
|
91
|
+
// default chat instead of the topic the interrupted work lived in.
|
|
92
|
+
chat_id: ctx.turn.chat_id,
|
|
93
|
+
...(threadId != null ? { message_thread_id: String(threadId) } : {}),
|
|
94
|
+
// message_id rounds-trips the fabricated `ts` through the enqueue's
|
|
95
|
+
// channel XML so the deliver-until-acked queue can ack THIS synthetic by
|
|
96
|
+
// its own enqueue id (gateway trackRedeliveredInbound carve-in). It is
|
|
97
|
+
// never used as a Telegram reply_to: the model's reply tool quotes the
|
|
98
|
+
// real prior user message via getLatestInboundMessageId (role='user',
|
|
99
|
+
// synthetics aren't in history), and the activity feed anchors with
|
|
100
|
+
// allow_sending_without_reply. Required so tracking the resume can ack and
|
|
101
|
+
// never re-delivers forever.
|
|
102
|
+
message_id: String(ts),
|
|
83
103
|
resume_turn_key: ctx.turn.turn_key,
|
|
84
104
|
interrupted_via: ctx.turn.ended_via ?? 'restart',
|
|
85
105
|
started_at: String(ctx.turn.started_at),
|
|
86
106
|
}
|
|
87
107
|
if (ctx.turn.user_prompt_preview) meta.original_prompt = ctx.turn.user_prompt_preview
|
|
88
|
-
const threadId = threadIdNum(ctx.turn)
|
|
89
108
|
return {
|
|
90
109
|
type: 'inbound',
|
|
91
110
|
chatId: ctx.turn.chat_id,
|
|
@@ -127,8 +146,15 @@ export function buildResumeWatchdogReportInbound(
|
|
|
127
146
|
ctx.turn.tool_call_count != null && ctx.turn.tool_call_count > 0
|
|
128
147
|
? ` You'd run ${ctx.turn.tool_call_count} tool call${ctx.turn.tool_call_count === 1 ? '' : 's'} before it stalled.`
|
|
129
148
|
: ''
|
|
149
|
+
const threadId = threadIdNum(ctx.turn)
|
|
130
150
|
const meta: Record<string, string> = {
|
|
131
151
|
source: 'resume_watchdog_timeout',
|
|
152
|
+
// Origin chat/topic as channel attributes so the report turn gets a
|
|
153
|
+
// currentTurn (progress card + silence-poke) and the "your last turn was
|
|
154
|
+
// interrupted" notice lands in the topic the work lived in, not the
|
|
155
|
+
// agent's default chat. Same rationale as buildResumeInterruptedInbound.
|
|
156
|
+
chat_id: ctx.turn.chat_id,
|
|
157
|
+
...(threadId != null ? { message_thread_id: String(threadId) } : {}),
|
|
132
158
|
resume_turn_key: ctx.turn.turn_key,
|
|
133
159
|
interrupted_via: 'timeout',
|
|
134
160
|
idle_ms: String(ctx.idleMs),
|
|
@@ -136,7 +162,6 @@ export function buildResumeWatchdogReportInbound(
|
|
|
136
162
|
}
|
|
137
163
|
if (ctx.turn.tool_call_count != null) meta.tool_call_count = String(ctx.turn.tool_call_count)
|
|
138
164
|
if (ctx.turn.user_prompt_preview) meta.original_prompt = ctx.turn.user_prompt_preview
|
|
139
|
-
const threadId = threadIdNum(ctx.turn)
|
|
140
165
|
return {
|
|
141
166
|
type: 'inbound',
|
|
142
167
|
chatId: ctx.turn.chat_id,
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Edit-in-place boot card (zero-notification reboots, operator request
|
|
3
|
+
* 2026-06-03). A routine reboot must EDIT the prior boot card rather than
|
|
4
|
+
* send a new one — a sent message bumps the unread badge even with
|
|
5
|
+
* `disable_notification: true`; an edit never does.
|
|
6
|
+
*
|
|
7
|
+
* Pins the startBootCard contract:
|
|
8
|
+
* - first boot (no persisted id) → SEND + persist the id
|
|
9
|
+
* - next routine boot (persisted id exists) → EDIT in place, NO send
|
|
10
|
+
* - persisted id but message deleted ('gone') → fall back to SEND
|
|
11
|
+
* - Telegram-initiated /restart (ackMessageId set) → SEND fresh (replies to
|
|
12
|
+
* the operator's command; they asked and are watching)
|
|
13
|
+
* - no bootCardStatePath → always SEND (back-compat)
|
|
14
|
+
*
|
|
15
|
+
* State is an isolated mkdtemp file — NEVER ~/.switchroom (test discipline).
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
19
|
+
import { mkdtempSync, rmSync } from 'node:fs'
|
|
20
|
+
import { tmpdir } from 'node:os'
|
|
21
|
+
import { join } from 'node:path'
|
|
22
|
+
import { startBootCard } from '../gateway/boot-card.js'
|
|
23
|
+
import type { BotApiForBootCard } from '../gateway/boot-card.js'
|
|
24
|
+
|
|
25
|
+
let dir: string
|
|
26
|
+
let statePath: string
|
|
27
|
+
|
|
28
|
+
beforeEach(() => {
|
|
29
|
+
dir = mkdtempSync(join(tmpdir(), 'boot-card-eip-'))
|
|
30
|
+
statePath = join(dir, '.boot-card-msgid.json')
|
|
31
|
+
})
|
|
32
|
+
afterEach(() => {
|
|
33
|
+
rmSync(dir, { recursive: true, force: true })
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
/** Capturing bot with a configurable strict-edit outcome. */
|
|
37
|
+
function makeBot(strictOutcome: 'edited' | 'gone' | null) {
|
|
38
|
+
const sends: Array<{ chatId: string; opts: Record<string, unknown> }> = []
|
|
39
|
+
const strictEdits: Array<{ chatId: string; messageId: number }> = []
|
|
40
|
+
let nextId = 1000
|
|
41
|
+
const bot: BotApiForBootCard = {
|
|
42
|
+
sendMessage: async (chatId, _text, opts) => {
|
|
43
|
+
sends.push({ chatId, opts: opts ?? {} })
|
|
44
|
+
return { message_id: ++nextId }
|
|
45
|
+
},
|
|
46
|
+
editMessageText: async () => ({}),
|
|
47
|
+
...(strictOutcome != null
|
|
48
|
+
? {
|
|
49
|
+
editMessageTextStrict: async (chatId: string, messageId: number) => {
|
|
50
|
+
strictEdits.push({ chatId, messageId })
|
|
51
|
+
return strictOutcome
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
: {}),
|
|
55
|
+
}
|
|
56
|
+
return { bot, sends, strictEdits }
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function mkOpts(overrides: Record<string, unknown> = {}) {
|
|
60
|
+
return {
|
|
61
|
+
agentName: 'TestAgent',
|
|
62
|
+
agentSlug: 'test-agent',
|
|
63
|
+
version: 'v0.0.0-test',
|
|
64
|
+
agentDir: dir,
|
|
65
|
+
gatewayInfo: { pid: 1, startedAtMs: Date.now() },
|
|
66
|
+
restartReason: 'graceful' as const,
|
|
67
|
+
agentLiveWindowMs: 0, // disable the live loop — we assert the initial post/edit only
|
|
68
|
+
settleWindowMs: 1_000_000,
|
|
69
|
+
bootCardStatePath: statePath,
|
|
70
|
+
...overrides,
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
describe('boot card — edit-in-place (zero-notification reboots)', () => {
|
|
75
|
+
it('first boot with no persisted id → sends, and persists the id', async () => {
|
|
76
|
+
const { bot, sends, strictEdits } = makeBot('edited')
|
|
77
|
+
await startBootCard('chat1', undefined, bot, mkOpts())
|
|
78
|
+
expect(sends).toHaveLength(1) // first boot must send (one badge, ever)
|
|
79
|
+
expect(strictEdits).toHaveLength(0)
|
|
80
|
+
expect(sends[0]!.opts.disable_notification).toBe(true) // still silent
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('second routine boot → edits the prior card in place, NO new send', async () => {
|
|
84
|
+
// Boot 1 sends + persists.
|
|
85
|
+
const first = makeBot('edited')
|
|
86
|
+
await startBootCard('chat1', undefined, first.bot, mkOpts())
|
|
87
|
+
expect(first.sends).toHaveLength(1)
|
|
88
|
+
|
|
89
|
+
// Boot 2 (same state file) → reuse via strict edit, no send.
|
|
90
|
+
const second = makeBot('edited')
|
|
91
|
+
await startBootCard('chat1', undefined, second.bot, mkOpts())
|
|
92
|
+
expect(second.sends).toHaveLength(0) // ← zero notification: no new message
|
|
93
|
+
expect(second.strictEdits).toHaveLength(1)
|
|
94
|
+
// Boot 1's send returned message_id 1001 (nextId starts at 1000, ++ first);
|
|
95
|
+
// boot 2 must edit exactly that persisted id.
|
|
96
|
+
expect(second.strictEdits[0]!.messageId).toBe(1001)
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
it('persisted id but message was deleted (gone) → falls back to a fresh send', async () => {
|
|
100
|
+
const first = makeBot('edited')
|
|
101
|
+
await startBootCard('chat1', undefined, first.bot, mkOpts())
|
|
102
|
+
|
|
103
|
+
const second = makeBot('gone')
|
|
104
|
+
await startBootCard('chat1', undefined, second.bot, mkOpts())
|
|
105
|
+
expect(second.strictEdits).toHaveLength(1) // probed the old id
|
|
106
|
+
expect(second.sends).toHaveLength(1) // and fell back to a fresh send
|
|
107
|
+
expect(second.sends[0]!.opts.disable_notification).toBe(true)
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
it('Telegram-initiated /restart (ackMessageId set) → sends fresh, never edits', async () => {
|
|
111
|
+
// Seed a persisted id from a routine boot.
|
|
112
|
+
const seed = makeBot('edited')
|
|
113
|
+
await startBootCard('chat1', undefined, seed.bot, mkOpts())
|
|
114
|
+
|
|
115
|
+
// A /restart passes ackMessageId → must reply with a fresh card, not edit.
|
|
116
|
+
const restart = makeBot('edited')
|
|
117
|
+
await startBootCard('chat1', undefined, restart.bot, mkOpts(), 555)
|
|
118
|
+
expect(restart.strictEdits).toHaveLength(0) // no reuse on user-initiated restart
|
|
119
|
+
expect(restart.sends).toHaveLength(1)
|
|
120
|
+
expect(restart.sends[0]!.opts.reply_parameters).toEqual({ message_id: 555 })
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
it('no bootCardStatePath → always sends (back-compat, unchanged)', async () => {
|
|
124
|
+
const { bot, sends, strictEdits } = makeBot('edited')
|
|
125
|
+
await startBootCard('chat1', undefined, bot, mkOpts({ bootCardStatePath: undefined }))
|
|
126
|
+
expect(sends).toHaveLength(1)
|
|
127
|
+
expect(strictEdits).toHaveLength(0)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('bot without editMessageTextStrict → always sends (graceful degrade)', async () => {
|
|
131
|
+
const { bot, sends } = makeBot(null) // no strict method
|
|
132
|
+
await startBootCard('chat1', undefined, bot, mkOpts())
|
|
133
|
+
const second = makeBot(null)
|
|
134
|
+
await startBootCard('chat1', undefined, second.bot, mkOpts())
|
|
135
|
+
// Both boots send — no strict method means no in-place reuse path.
|
|
136
|
+
expect(sends).toHaveLength(1)
|
|
137
|
+
expect(second.sends).toHaveLength(1)
|
|
138
|
+
})
|
|
139
|
+
})
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the cross-reboot boot-card message-id store
|
|
3
|
+
* (gateway/boot-card-msgid.ts) — the persistence that lets a routine reboot
|
|
4
|
+
* EDIT the prior boot card in place (zero notification) instead of sending a
|
|
5
|
+
* new one.
|
|
6
|
+
*
|
|
7
|
+
* All I/O is to an isolated mkdtemp dir — NEVER ~/.switchroom (test discipline).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
11
|
+
import { mkdtempSync, rmSync, writeFileSync, existsSync } from 'node:fs'
|
|
12
|
+
import { tmpdir } from 'node:os'
|
|
13
|
+
import { join } from 'node:path'
|
|
14
|
+
import {
|
|
15
|
+
bootCardChatKey,
|
|
16
|
+
loadBootCardMsgId,
|
|
17
|
+
saveBootCardMsgId,
|
|
18
|
+
} from '../gateway/boot-card-msgid.js'
|
|
19
|
+
|
|
20
|
+
let dir: string
|
|
21
|
+
let path: string
|
|
22
|
+
|
|
23
|
+
beforeEach(() => {
|
|
24
|
+
dir = mkdtempSync(join(tmpdir(), 'boot-card-msgid-'))
|
|
25
|
+
path = join(dir, '.boot-card-msgid.json')
|
|
26
|
+
})
|
|
27
|
+
afterEach(() => {
|
|
28
|
+
rmSync(dir, { recursive: true, force: true })
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
describe('bootCardChatKey', () => {
|
|
32
|
+
it('keys a DM (no topic) distinctly from a supergroup topic', () => {
|
|
33
|
+
expect(bootCardChatKey('12345', undefined)).toBe('12345:')
|
|
34
|
+
expect(bootCardChatKey('-1001234567890', 4)).toBe('-1001234567890:4')
|
|
35
|
+
// Different topics in the same supergroup are distinct cards.
|
|
36
|
+
expect(bootCardChatKey('-100', 3)).not.toBe(bootCardChatKey('-100', 4))
|
|
37
|
+
})
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
describe('loadBootCardMsgId / saveBootCardMsgId', () => {
|
|
41
|
+
it('returns null when the file does not exist (first boot)', () => {
|
|
42
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBeNull()
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
it('round-trips a saved id', () => {
|
|
46
|
+
saveBootCardMsgId(path, 'dm:', 353)
|
|
47
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBe(353)
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
it('keeps ids for different chats independent', () => {
|
|
51
|
+
saveBootCardMsgId(path, 'dm:', 100)
|
|
52
|
+
saveBootCardMsgId(path, '-100:4', 200)
|
|
53
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBe(100)
|
|
54
|
+
expect(loadBootCardMsgId(path, '-100:4')).toBe(200)
|
|
55
|
+
// Updating one leaves the other intact.
|
|
56
|
+
saveBootCardMsgId(path, 'dm:', 101)
|
|
57
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBe(101)
|
|
58
|
+
expect(loadBootCardMsgId(path, '-100:4')).toBe(200)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('returns null for an unknown chat key', () => {
|
|
62
|
+
saveBootCardMsgId(path, 'dm:', 1)
|
|
63
|
+
expect(loadBootCardMsgId(path, 'other:')).toBeNull()
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
it('rejects non-positive / non-finite ids on save and load', () => {
|
|
67
|
+
saveBootCardMsgId(path, 'dm:', 0)
|
|
68
|
+
saveBootCardMsgId(path, 'dm:', -5)
|
|
69
|
+
saveBootCardMsgId(path, 'dm:', Number.NaN)
|
|
70
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBeNull()
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('treats a corrupt file as empty (falls back to fresh send)', () => {
|
|
74
|
+
writeFileSync(path, 'not json{', 'utf8')
|
|
75
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBeNull()
|
|
76
|
+
// And a subsequent save still works (overwrites the garbage).
|
|
77
|
+
saveBootCardMsgId(path, 'dm:', 7)
|
|
78
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBe(7)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('does not rewrite the file when the id is unchanged (idempotent)', () => {
|
|
82
|
+
saveBootCardMsgId(path, 'dm:', 42)
|
|
83
|
+
expect(existsSync(path)).toBe(true)
|
|
84
|
+
// Saving the same value again is a no-op; the value is still readable.
|
|
85
|
+
saveBootCardMsgId(path, 'dm:', 42)
|
|
86
|
+
expect(loadBootCardMsgId(path, 'dm:')).toBe(42)
|
|
87
|
+
})
|
|
88
|
+
})
|
|
@@ -4,6 +4,7 @@ import {
|
|
|
4
4
|
ackDelivery,
|
|
5
5
|
createDeliveryQueue,
|
|
6
6
|
forgetDelivery,
|
|
7
|
+
isTrackableResumeSynthetic,
|
|
7
8
|
shouldTrackDelivery,
|
|
8
9
|
sweep,
|
|
9
10
|
trackDelivery,
|
|
@@ -178,3 +179,63 @@ describe('shouldTrackDelivery — only fresh-turn messages are tracked', () => {
|
|
|
178
179
|
expect(shouldTrackDelivery({ isSteering: false, isInterrupt: false, effectiveText: 'draft the email' })).toBe(true)
|
|
179
180
|
})
|
|
180
181
|
})
|
|
182
|
+
|
|
183
|
+
// The boot-resume synthetic is the one synthetic ENROLLED for re-delivery (so a
|
|
184
|
+
// restart can't silently drop the "pick up your interrupted work" wake into a
|
|
185
|
+
// not-ready session). It is safe ONLY because it carries a round-trippable
|
|
186
|
+
// message_id that its own enqueue acks — without that it would re-deliver
|
|
187
|
+
// forever (the storm the hasSource exclusion otherwise prevents).
|
|
188
|
+
describe('isTrackableResumeSynthetic — resume carve-in to the deliver-until-acked queue', () => {
|
|
189
|
+
it('tracks a resume_interrupted synthetic that carries a message_id', () => {
|
|
190
|
+
expect(isTrackableResumeSynthetic({ source: 'resume_interrupted', message_id: '1700000000000' })).toBe(true)
|
|
191
|
+
})
|
|
192
|
+
it('does NOT track a resume_interrupted WITHOUT a message_id (would never ack → storm)', () => {
|
|
193
|
+
expect(isTrackableResumeSynthetic({ source: 'resume_interrupted' })).toBe(false)
|
|
194
|
+
expect(isTrackableResumeSynthetic({ source: 'resume_interrupted', message_id: '' })).toBe(false)
|
|
195
|
+
})
|
|
196
|
+
it('does NOT track the watchdog report (untracked by design) even though it carries chat_id', () => {
|
|
197
|
+
expect(isTrackableResumeSynthetic({ source: 'resume_watchdog_timeout', chat_id: '123' })).toBe(false)
|
|
198
|
+
})
|
|
199
|
+
it('does NOT track other synthetics (cron / vault / handback) even if they somehow had a message_id', () => {
|
|
200
|
+
expect(isTrackableResumeSynthetic({ source: 'cron', message_id: '1' })).toBe(false)
|
|
201
|
+
expect(isTrackableResumeSynthetic({ source: 'subagent_handback', message_id: '1' })).toBe(false)
|
|
202
|
+
expect(isTrackableResumeSynthetic({ source: 'vault_grant_approved', message_id: '1' })).toBe(false)
|
|
203
|
+
})
|
|
204
|
+
it('does NOT track a real (no source) inbound here — that path is shouldTrackDelivery', () => {
|
|
205
|
+
expect(isTrackableResumeSynthetic({ chat_id: '1', message_id: '2' })).toBe(false)
|
|
206
|
+
expect(isTrackableResumeSynthetic(undefined)).toBe(false)
|
|
207
|
+
})
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
// End-to-end queue contract for the resume synthetic: tracked by its fabricated
|
|
211
|
+
// id, acked by its OWN enqueue (which round-trips the same id), and — crucially
|
|
212
|
+
// — it can never re-deliver forever (the storm the original deferral feared).
|
|
213
|
+
describe('resume synthetic: tracked, acked by own enqueue, never storms', () => {
|
|
214
|
+
const RESUME_ID = '1700000000000' // String(ts), the fabricated message_id
|
|
215
|
+
it('acks when its own enqueue arrives with the matching id (no storm)', () => {
|
|
216
|
+
const q = fresh()
|
|
217
|
+
trackDelivery(q, 'chat:_', { text: 'resume…' }, 1_000, RESUME_ID)
|
|
218
|
+
// The resume turn starts → enqueue fires carrying the round-tripped id.
|
|
219
|
+
expect(ackDelivery(q, 'chat:_', RESUME_ID)).toBe(true)
|
|
220
|
+
expect(q.pending.size).toBe(0)
|
|
221
|
+
// Never re-delivered after the ack — bounded, not a forever loop.
|
|
222
|
+
expect(sweep(q, 1_000 + 999_999, TIMEOUT)).toHaveLength(0)
|
|
223
|
+
})
|
|
224
|
+
it('strands then re-delivers until acked (rescues a drop into a not-ready session)', () => {
|
|
225
|
+
const q = fresh()
|
|
226
|
+
trackDelivery(q, 'chat:_', { text: 'resume…' }, 1_000, RESUME_ID)
|
|
227
|
+
// Dropped into a not-ready session: no enqueue within the timeout → re-deliver.
|
|
228
|
+
const stranded = sweep(q, 1_000 + TIMEOUT + 1, TIMEOUT)
|
|
229
|
+
expect(stranded).toHaveLength(1)
|
|
230
|
+
expect(stranded[0]!.messageId).toBe(RESUME_ID)
|
|
231
|
+
// Once claude is ready, its enqueue acks it and the loop stops.
|
|
232
|
+
expect(ackDelivery(q, 'chat:_', RESUME_ID)).toBe(true)
|
|
233
|
+
expect(sweep(q, 1_000 + TIMEOUT * 10, TIMEOUT)).toHaveLength(0)
|
|
234
|
+
})
|
|
235
|
+
it('a DIFFERENT enqueue id (e.g. a racing real user msg) does NOT false-ack the resume', () => {
|
|
236
|
+
const q = fresh()
|
|
237
|
+
trackDelivery(q, 'chat:_', { text: 'resume…' }, 1_000, RESUME_ID)
|
|
238
|
+
expect(ackDelivery(q, 'chat:_', '999')).toBe(false) // not our id
|
|
239
|
+
expect(q.pending.size).toBe(1) // still pending, will re-deliver
|
|
240
|
+
})
|
|
241
|
+
})
|