switchroom 0.14.47 → 0.14.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Cross-reboot persistence for the boot card's Telegram message id.
3
+ *
4
+ * Why: a freshly *sent* Telegram message always bumps the chat's unread
5
+ * badge — `disable_notification: true` removes the sound/banner but not the
6
+ * badge (there is no Bot API flag for that). To make routine reboots produce
7
+ * ZERO notification (operator request, 2026-06-03), the gateway reuses the
8
+ * PRIOR boot card's message and EDITS it in place instead of sending a new
9
+ * one — and edits never touch the badge.
10
+ *
11
+ * That requires remembering the last boot card's `message_id` across gateway
12
+ * restarts, keyed by the chat (+ forum topic) it lives in. This module is the
13
+ * tiny JSON store for that, mirroring `config-snapshot.ts` /
14
+ * `boot-issue-cache.ts`: one file under the agent's (bind-mounted, reboot-
15
+ * surviving) state dir, read once on boot, written once after the id is
16
+ * established. All failures are non-fatal — a missing/corrupt file just means
17
+ * "no prior card", so the boot path falls back to a fresh (silent) send.
18
+ */
19
+
20
+ import { readFileSync, writeFileSync } from 'node:fs'
21
+
22
+ /** Stable key for a boot-card target: chat id + optional forum topic. A DM
23
+ * agent always boots to the same `<chatId>:` key; a supergroup agent keys by
24
+ * `<chatId>:<threadId>` so a topic change starts a fresh card. */
25
+ export function bootCardChatKey(chatId: string, threadId: number | undefined): string {
26
+ return `${chatId}:${threadId ?? ''}`
27
+ }
28
+
29
+ type Store = Record<string, number>
30
+
31
+ function readStore(path: string): Store {
32
+ try {
33
+ const parsed = JSON.parse(readFileSync(path, 'utf8')) as unknown
34
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
35
+ return parsed as Store
36
+ }
37
+ } catch {
38
+ /* missing / corrupt → treat as empty */
39
+ }
40
+ return {}
41
+ }
42
+
43
+ /** The persisted message id for this chat+topic, or null when there's no
44
+ * prior boot card to reuse (first boot, corrupt file, different chat). */
45
+ export function loadBootCardMsgId(
46
+ path: string,
47
+ chatKey: string,
48
+ ): number | null {
49
+ const id = readStore(path)[chatKey]
50
+ return typeof id === 'number' && Number.isFinite(id) && id > 0 ? id : null
51
+ }
52
+
53
+ /** Record the current boot card's message id for this chat+topic. Merges into
54
+ * the existing store (other chats' ids survive). Non-fatal on write failure —
55
+ * the worst case is the next reboot sends a fresh card (one badge). */
56
+ export function saveBootCardMsgId(
57
+ path: string,
58
+ chatKey: string,
59
+ messageId: number,
60
+ ): void {
61
+ if (!(Number.isFinite(messageId) && messageId > 0)) return
62
+ try {
63
+ const store = readStore(path)
64
+ if (store[chatKey] === messageId) return // idempotent — no rewrite
65
+ store[chatKey] = messageId
66
+ writeFileSync(path, JSON.stringify(store), 'utf8')
67
+ } catch {
68
+ /* non-fatal */
69
+ }
70
+ }
@@ -67,6 +67,7 @@ import {
67
67
  type ConfigDiff,
68
68
  } from './config-snapshot.js'
69
69
  import { join } from 'path'
70
+ import { bootCardChatKey, loadBootCardMsgId, saveBootCardMsgId } from './boot-card-msgid.js'
70
71
  import { loadConfig as _loadSwitchroomConfig } from '../../src/config/loader.js'
71
72
  import { resolveAgentConfig as _resolveAgentConfig } from '../../src/config/merge.js'
72
73
 
@@ -134,6 +135,21 @@ export interface BotApiForBootCard {
134
135
  text: string,
135
136
  opts?: Record<string, unknown>,
136
137
  ): Promise<unknown>
138
+ /**
139
+ * Like `editMessageText`, but reports whether the target message still
140
+ * exists rather than swallowing a "message to edit not found" the way the
141
+ * shared retry policy does (retry-api-call.ts) — the boot path needs to
142
+ * know so it can fall back to a fresh send when the prior card was deleted.
143
+ * `'edited'` = the edit landed (or content was identical → message exists);
144
+ * `'gone'` = the message is missing (or any other error → send fresh).
145
+ * Optional so existing callers/tests without it fall back to always-send.
146
+ */
147
+ editMessageTextStrict?(
148
+ chatId: string,
149
+ messageId: number,
150
+ text: string,
151
+ opts?: Record<string, unknown>,
152
+ ): Promise<'edited' | 'gone'>
137
153
  }
138
154
 
139
155
  export interface BootCardHandle {
@@ -568,6 +584,17 @@ export interface RunProbesOpts {
568
584
  * resolve the default memory collection label.
569
585
  */
570
586
  configSnapshotPath?: string
587
+ /**
588
+ * Cross-reboot store for the boot card's Telegram message id (JSON,
589
+ * typically `<agentDir>/.boot-card-msgid.json`). When set AND the bot
590
+ * supports `editMessageTextStrict`, a routine reboot (no `ackMessageId`)
591
+ * EDITS the prior boot card in place instead of sending a new one — edits
592
+ * never bump the unread badge, so reboots produce zero notification
593
+ * (operator request 2026-06-03). Falls back to a fresh silent send when
594
+ * there's no prior card or it was deleted. Omit to keep the always-send
595
+ * behaviour.
596
+ */
597
+ bootCardStatePath?: string
571
598
  }
572
599
 
573
600
  /** Run all six probes concurrently with their own per-probe timeouts.
@@ -641,20 +668,60 @@ export async function startBootCard(
641
668
  // the chat is where you look, and nothing here warrants a push.
642
669
  const silentBootCard = true
643
670
 
644
- let messageId: number
645
- try {
646
- const sent = await bot.sendMessage(chatId, ackText, {
647
- parse_mode: 'HTML',
648
- link_preview_options: { is_disabled: true },
649
- ...(threadId != null ? { message_thread_id: threadId } : {}),
650
- ...(ackMessageId != null ? { reply_parameters: { message_id: ackMessageId } } : {}),
651
- ...(silentBootCard ? { disable_notification: true } : {}),
652
- })
653
- messageId = sent.message_id
654
- logger(`telegram gateway: boot-card: posted msgId=${messageId} chatId=${chatId} reason=${opts.restartReason ?? '-'} reason_detail=${opts.restartReasonDetail ?? '-'} silent=${silentBootCard}\n`)
655
- } catch (err: unknown) {
656
- logger(`telegram gateway: boot-card: failed to post ack: ${(err as Error)?.message ?? String(err)}\n`)
657
- return { messageId: -1, complete: () => {} }
671
+ // Edit-in-place to produce ZERO notification (operator request 2026-06-03).
672
+ // A sent message always bumps the unread badge — `disable_notification`
673
+ // only kills the sound/banner. So for a ROUTINE reboot (no `ackMessageId`:
674
+ // operator update / cli rollout / crash / fresh) we EDIT the prior boot
675
+ // card in place — edits never touch the badge — instead of sending a new
676
+ // one. We only do this when the bot can tell us the prior message still
677
+ // exists (`editMessageTextStrict`); if it's gone, or this is a
678
+ // Telegram-initiated `/restart` (ackMessageId set the operator asked and
679
+ // is watching, and the card should reply to their command), we fall back to
680
+ // a fresh silent send.
681
+ const chatKey = bootCardChatKey(chatId, threadId)
682
+ const reuseId =
683
+ ackMessageId == null && opts.bootCardStatePath != null && bot.editMessageTextStrict != null
684
+ ? loadBootCardMsgId(opts.bootCardStatePath, chatKey)
685
+ : null
686
+
687
+ let messageId = -1
688
+ if (reuseId != null && bot.editMessageTextStrict != null) {
689
+ try {
690
+ const outcome = await bot.editMessageTextStrict(chatId, reuseId, ackText, {
691
+ parse_mode: 'HTML',
692
+ link_preview_options: { is_disabled: true },
693
+ ...(threadId != null ? { message_thread_id: threadId } : {}),
694
+ })
695
+ if (outcome === 'edited') {
696
+ messageId = reuseId
697
+ logger(`telegram gateway: boot-card: reused msgId=${messageId} chatId=${chatId} reason=${opts.restartReason ?? '-'} reason_detail=${opts.restartReasonDetail ?? '-'} edit_in_place=true notify=none\n`)
698
+ }
699
+ } catch (err: unknown) {
700
+ logger(`telegram gateway: boot-card: edit-in-place probe failed (${(err as Error)?.message ?? String(err)}) — sending fresh\n`)
701
+ }
702
+ }
703
+
704
+ if (messageId < 0) {
705
+ try {
706
+ const sent = await bot.sendMessage(chatId, ackText, {
707
+ parse_mode: 'HTML',
708
+ link_preview_options: { is_disabled: true },
709
+ ...(threadId != null ? { message_thread_id: threadId } : {}),
710
+ ...(ackMessageId != null ? { reply_parameters: { message_id: ackMessageId } } : {}),
711
+ ...(silentBootCard ? { disable_notification: true } : {}),
712
+ })
713
+ messageId = sent.message_id
714
+ logger(`telegram gateway: boot-card: posted msgId=${messageId} chatId=${chatId} reason=${opts.restartReason ?? '-'} reason_detail=${opts.restartReasonDetail ?? '-'} silent=${silentBootCard}\n`)
715
+ } catch (err: unknown) {
716
+ logger(`telegram gateway: boot-card: failed to post ack: ${(err as Error)?.message ?? String(err)}\n`)
717
+ return { messageId: -1, complete: () => {} }
718
+ }
719
+ }
720
+
721
+ // Remember this card's id so the NEXT reboot can edit it in place (no
722
+ // notification). Idempotent on reuse; non-fatal on write failure.
723
+ if (opts.bootCardStatePath != null && messageId > 0) {
724
+ saveBootCardMsgId(opts.bootCardStatePath, chatKey, messageId)
658
725
  }
659
726
 
660
727
  // Determine the live window for agent-service status updates. Callers
@@ -9,7 +9,7 @@
9
9
  * is connected, inbound LLM messages get a "⏳ Agent is restarting…" reply.
10
10
  */
11
11
 
12
- import { Bot, GrammyError, InlineKeyboard, InputFile, type Context } from 'grammy'
12
+ import { Bot, GrammyError, InlineKeyboard, InputFile, type Context, type Api } from 'grammy'
13
13
  import { run, type RunnerHandle } from '@grammyjs/runner'
14
14
  import type { ReactionTypeEmoji } from 'grammy/types'
15
15
  import { randomBytes } from 'crypto'
@@ -1064,7 +1064,19 @@ try {
1064
1064
  const pending = findLatestTurnIfInterrupted(turnsDb)
1065
1065
  const selfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
1066
1066
  if (pending != null && selfAgent) {
1067
- const kind = selectResumeBuilder(pending.ended_via)
1067
+ // 3h staleness failsafe (operator spec, 2026-06-03): never AUTO-resume
1068
+ // interrupted work older than RESUME_MAX_AGE_MS — selectResumeBuilder
1069
+ // downgrades a stale 'resume' to the passive 'report' so the user is told
1070
+ // ("I was working on X ~Nh ago") but nothing replays unprompted. Env
1071
+ // override SWITCHROOM_RESUME_MAX_AGE_MS (ms); set very high to disable.
1072
+ const RESUME_MAX_AGE_MS = (() => {
1073
+ const v = Number(process.env.SWITCHROOM_RESUME_MAX_AGE_MS)
1074
+ return Number.isFinite(v) && v > 0 ? v : 10_800_000 // 3h
1075
+ })()
1076
+ const kind = selectResumeBuilder(pending.ended_via, {
1077
+ ageMs: Math.max(0, Date.now() - pending.started_at),
1078
+ maxAgeMs: RESUME_MAX_AGE_MS,
1079
+ })
1068
1080
  if (kind === 'resume') {
1069
1081
  bootResumeInbound = { agent: selfAgent, msg: buildResumeInterruptedInbound({ turn: pending }) }
1070
1082
  } else if (kind === 'report') {
@@ -1801,6 +1813,7 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1801
1813
  return d
1802
1814
  },
1803
1815
  inboundSpool,
1816
+ trackRedeliveredInbound,
1804
1817
  )
1805
1818
  if (fr.redelivered > 0) {
1806
1819
  process.stderr.write(
@@ -1896,6 +1909,7 @@ function releaseTurnBufferGate(key: string): void {
1896
1909
  return d
1897
1910
  },
1898
1911
  inboundSpool,
1912
+ trackRedeliveredInbound,
1899
1913
  )
1900
1914
  if (fr.redelivered > 0) {
1901
1915
  process.stderr.write(
@@ -2589,6 +2603,26 @@ function wrapBootCardApi(
2589
2603
  ),
2590
2604
  opts(cid),
2591
2605
  ) as Promise<unknown>,
2606
+ // Strict edit for the boot-card edit-in-place probe: distinguishes
2607
+ // "message gone" (→ 'gone', caller sends fresh) from a landed/identical
2608
+ // edit (→ 'edited'). robustApiCall SWALLOWS "message to edit not found"
2609
+ // to undefined (retry-api-call.ts), so this can't go through it — a
2610
+ // deliberate single-attempt raw edit that classifies the error itself.
2611
+ editMessageTextStrict: async (cid, mid, text, editOpts) => {
2612
+ type EditOpts = Parameters<Api['editMessageText']>[3]
2613
+ try {
2614
+ // allow-raw-bot-api: boot-card edit-in-place probe — must detect a deleted target, which the shared retry policy swallows.
2615
+ await lockedBot.api.editMessageText(cid, mid, text, editOpts as EditOpts)
2616
+ return 'edited'
2617
+ } catch (err) {
2618
+ const desc =
2619
+ err instanceof GrammyError ? err.description : err instanceof Error ? err.message : String(err)
2620
+ // Content identical → message still exists; reuse it.
2621
+ if (typeof desc === 'string' && desc.toLowerCase().includes('not modified')) return 'edited'
2622
+ // Not found, or any other error → fall back to a fresh silent send.
2623
+ return 'gone'
2624
+ }
2625
+ },
2592
2626
  }
2593
2627
  }
2594
2628
 
@@ -4134,6 +4168,7 @@ silencePoke.startTimer({
4134
4168
  return d
4135
4169
  },
4136
4170
  inboundSpool,
4171
+ trackRedeliveredInbound,
4137
4172
  )
4138
4173
  process.stderr.write(
4139
4174
  `telegram gateway: silence-poke framework-fallback ended wedged turn ` +
@@ -4163,6 +4198,45 @@ const _deliveryMachineTick = setInterval(() => {
4163
4198
  }, DELIVERY_MACHINE_TICK_MS)
4164
4199
  _deliveryMachineTick.unref?.()
4165
4200
 
4201
+ // Enrol a buffer-redelivered inbound in the deliver-until-acked queue so the
4202
+ // existing sweep re-delivers it until claude's `enqueue` ack lands. Wired into
4203
+ // EVERY redelivery path (bridgeUp drain, silence-poke fallback, flap/reply-gate
4204
+ // flushes) — `send` returning true only means the bytes reached the bridge, NOT
4205
+ // that claude consumed them. Right after a restart (esp. a slow MCP boot) the
4206
+ // inject can hit a not-ready session and be silently dropped, and nothing
4207
+ // retried it: the clerk 2026-06-03 lost-message incident. Mirrors the
4208
+ // live-delivery tracking at the handleInbound site (chatKey + messageId), so
4209
+ // DMs and supergroup forum topics are handled identically. Only real user
4210
+ // inbounds are tracked — shouldTrackDelivery excludes steer/interrupt/
4211
+ // synthetic-source/empty, which never produce an `enqueue` and would otherwise
4212
+ // re-deliver forever.
4213
+ function trackRedeliveredInbound(merged: InboundMessage): void {
4214
+ if (!DELIVERY_CONFIRM_ENABLED) return
4215
+ if (
4216
+ !shouldTrackDelivery({
4217
+ isSteering: false,
4218
+ isInterrupt: false,
4219
+ // Synthetic inbounds (cron / vault / handback / resume) carry a source
4220
+ // and are NOT tracked here — they enqueue under their own semantics, and
4221
+ // (for the resume synthetics) tracking them safely first needs the
4222
+ // resume builder to emit meta.message_id so the deliver-until-acked ack
4223
+ // matches its enqueue. Tracked separately as a follow-up (see PR notes).
4224
+ hasSource: merged.meta?.source != null,
4225
+ effectiveText: merged.text,
4226
+ })
4227
+ ) {
4228
+ return
4229
+ }
4230
+ const key = chatKey(merged.chatId, merged.threadId != null ? Number(merged.threadId) : null)
4231
+ trackDelivery(
4232
+ deliveryQueue,
4233
+ key,
4234
+ merged,
4235
+ Date.now(),
4236
+ merged.messageId != null ? String(merged.messageId) : null,
4237
+ )
4238
+ }
4239
+
4166
4240
  // Re-deliver stranded inbounds until claude acks (the marko drop-wedge).
4167
4241
  // Every few seconds, re-send any inbound that was handed to claude but never
4168
4242
  // acked by an `enqueue` — it stranded unsubmitted in the composer. Re-clear
@@ -4400,6 +4474,11 @@ const ipcServer: IpcServer = createIpcServer({
4400
4474
  inboundSpool: inboundSpool ?? null,
4401
4475
  pendingPermissionBuffer,
4402
4476
  client,
4477
+ // Enrol each drained user inbound in the deliver-until-acked queue
4478
+ // so the 5s sweep re-delivers until claude's `enqueue` ack lands —
4479
+ // a socket-write into a still-booting session is NOT consumption
4480
+ // (clerk lost-message incident, 2026-06-03).
4481
+ onUserInboundDelivered: trackRedeliveredInbound,
4403
4482
  })
4404
4483
  } else {
4405
4484
  // Kill-switch fallback: imperative drain (parity with pre-cutover
@@ -4410,6 +4489,10 @@ const ipcServer: IpcServer = createIpcServer({
4410
4489
  try {
4411
4490
  client.send(msg)
4412
4491
  inboundSpool?.ack(msg)
4492
+ // Same enrol as the cutover drain path: a socket-write success is
4493
+ // not proof claude consumed it — enrol so the sweep re-delivers
4494
+ // until `enqueue` (clerk lost-message incident, 2026-06-03).
4495
+ trackRedeliveredInbound(msg)
4413
4496
  } catch (err) {
4414
4497
  process.stderr.write(
4415
4498
  `telegram gateway: pending-inbound drain failed agent=${client.agentName} ` +
@@ -4515,6 +4598,7 @@ const ipcServer: IpcServer = createIpcServer({
4515
4598
  tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
4516
4599
  dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
4517
4600
  configSnapshotPath: join(resolvedAgentDirForCard, '.config-snapshot.json'),
4601
+ bootCardStatePath: join(resolvedAgentDirForCard, '.boot-card-msgid.json'),
4518
4602
  ...(updateOutcomeLine ? { updateOutcomeLine } : {}),
4519
4603
  }, ackMsgId).then(handle => {
4520
4604
  activeBootCard = handle
@@ -5318,6 +5402,7 @@ if (!STATIC) {
5318
5402
  return d
5319
5403
  },
5320
5404
  inboundSpool,
5405
+ trackRedeliveredInbound,
5321
5406
  )
5322
5407
  if (r != null && r.redelivered > 0) {
5323
5408
  process.stderr.write(
@@ -18405,6 +18490,7 @@ void (async () => {
18405
18490
  tmuxSupervisor: process.env.SWITCHROOM_TMUX_SUPERVISOR === '1',
18406
18491
  dockerMode: process.env.SWITCHROOM_RUNTIME === 'docker',
18407
18492
  configSnapshotPath: join(resolvedAgentDirForBootCard, '.config-snapshot.json'),
18493
+ bootCardStatePath: join(resolvedAgentDirForBootCard, '.boot-card-msgid.json'),
18408
18494
  ...(updateOutcomeLine ? { updateOutcomeLine } : {}),
18409
18495
  }, ackMsgId)
18410
18496
  activeBootCard = handle
@@ -45,6 +45,15 @@ export interface DispatchCtx {
45
45
  readonly client?: IpcClient
46
46
  /** Optional log sink — default stderr. Test hook. */
47
47
  readonly log?: (line: string) => void
48
+ /**
49
+ * Optional: enrol a drained+redelivered inbound in the deliver-until-acked
50
+ * queue. The bridgeUp drain's socket-write "success" is NOT proof claude
51
+ * consumed the message — right after a restart (esp. with a slow MCP boot)
52
+ * the inject can hit a not-ready session and be dropped. Wiring this makes
53
+ * the existing 5s sweep re-deliver until claude's `enqueue` ack lands.
54
+ * (clerk lost-message incident, 2026-06-03.)
55
+ */
56
+ readonly onUserInboundDelivered?: (merged: InboundMessage) => void
48
57
  }
49
58
 
50
59
  const enabled = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== '0'
@@ -103,6 +112,9 @@ function dispatchOne(effect: Effect, ctx: DispatchCtx): void {
103
112
  ctx.selfAgent,
104
113
  send,
105
114
  ctx.inboundSpool ?? undefined,
115
+ ctx.onUserInboundDelivered
116
+ ? (merged) => ctx.onUserInboundDelivered!(merged)
117
+ : undefined,
106
118
  )
107
119
  if (result.drained > 0) {
108
120
  log(
@@ -87,6 +87,14 @@ export function redeliverBufferedInbound(
87
87
  agent: string,
88
88
  send: (msg: InboundMessage) => boolean,
89
89
  spool?: InboundSpool,
90
+ // Called once per merged group on CONFIRMED delivery (after spool.ack).
91
+ // The caller uses it to enrol the redelivered inbound in the
92
+ // deliver-until-acked queue (`trackDelivery`) so it is re-sent until
93
+ // claude's `enqueue` ack lands — closing the restart boot-race where a
94
+ // socket-write "succeeds" into a not-ready session and the message is
95
+ // silently dropped (clerk 2026-06-03). `send` returning true only means
96
+ // the bytes reached the bridge, NOT that claude consumed them.
97
+ onDelivered?: (merged: InboundMessage, originals: InboundMessage[]) => void,
90
98
  ): { drained: number; redelivered: number; rebuffered: number } {
91
99
  const pending = buffer.drain(agent)
92
100
  let redelivered = 0
@@ -110,6 +118,10 @@ export function redeliverBufferedInbound(
110
118
  // originals are, so we ack by original identity.
111
119
  for (const o of originals) spool?.ack(o)
112
120
  redelivered += originals.length
121
+ // Enrol in the deliver-until-acked queue (caller's hook). A bare
122
+ // socket-write success is NOT proof claude consumed it; the queue's
123
+ // sweep re-delivers until the `enqueue` ack lands.
124
+ onDelivered?.(merged, originals)
113
125
  } else {
114
126
  // Re-buffer the originals (not the merged synthetic) so the spool
115
127
  // identity is preserved and the next drain re-merges them losslessly.
@@ -258,11 +270,15 @@ export function idleDrainTick(
258
270
  isBridgeAlive: () => boolean,
259
271
  send: (msg: InboundMessage) => boolean,
260
272
  spool?: InboundSpool,
273
+ // Forwarded to redeliverBufferedInbound so the post-flap-settle drain also
274
+ // enrols redelivered inbounds in the deliver-until-acked queue (parity with
275
+ // the bridgeUp drain — clerk lost-message incident, 2026-06-03).
276
+ onDelivered?: (merged: InboundMessage, originals: InboundMessage[]) => void,
261
277
  ): { drained: number; redelivered: number; rebuffered: number } | null {
262
278
  if (!agent) return null
263
279
  if (buffer.depth(agent) === 0) return null
264
280
  if (!isBridgeAlive()) return null
265
- return redeliverBufferedInbound(buffer, agent, send, spool)
281
+ return redeliverBufferedInbound(buffer, agent, send, spool, onDelivered)
266
282
  }
267
283
 
268
284
  export function createPendingInboundBuffer(
@@ -172,9 +172,25 @@ export function buildResumeWatchdogReportInbound(
172
172
  */
173
173
  export function selectResumeBuilder(
174
174
  endedVia: TurnEndedVia | null,
175
+ // 3h staleness failsafe (operator spec, 2026-06-03): when the interrupted
176
+ // turn is older than `maxAgeMs`, an AUTO-resume is downgraded to the passive
177
+ // `report` — silently re-injecting hours-old work could act on long-stale
178
+ // context (a tax figure, a "send it" the user has moved on from). Pass both
179
+ // to enable; omit (default) keeps the legacy blanket-resume behaviour.
180
+ opts?: { ageMs?: number; maxAgeMs?: number },
175
181
  ): 'resume' | 'report' | null {
176
- if (endedVia === 'timeout') return 'report'
177
- if (endedVia === 'restart' || endedVia === 'sigterm' || endedVia === 'unknown') return 'resume'
178
- if (endedVia == null) return 'resume' // still-open at boot = killed mid-flight
179
- return null
182
+ let kind: 'resume' | 'report' | null
183
+ if (endedVia === 'timeout') kind = 'report'
184
+ else if (endedVia === 'restart' || endedVia === 'sigterm' || endedVia === 'unknown') kind = 'resume'
185
+ else if (endedVia == null) kind = 'resume' // still-open at boot = killed mid-flight
186
+ else kind = null
187
+ if (
188
+ kind === 'resume' &&
189
+ opts?.ageMs != null &&
190
+ opts?.maxAgeMs != null &&
191
+ opts.ageMs > opts.maxAgeMs
192
+ ) {
193
+ return 'report' // too old to safely auto-resume — passive notice only
194
+ }
195
+ return kind
180
196
  }
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Edit-in-place boot card (zero-notification reboots, operator request
3
+ * 2026-06-03). A routine reboot must EDIT the prior boot card rather than
4
+ * send a new one — a sent message bumps the unread badge even with
5
+ * `disable_notification: true`; an edit never does.
6
+ *
7
+ * Pins the startBootCard contract:
8
+ * - first boot (no persisted id) → SEND + persist the id
9
+ * - next routine boot (persisted id exists) → EDIT in place, NO send
10
+ * - persisted id but message deleted ('gone') → fall back to SEND
11
+ * - Telegram-initiated /restart (ackMessageId set) → SEND fresh (replies to
12
+ * the operator's command; they asked and are watching)
13
+ * - no bootCardStatePath → always SEND (back-compat)
14
+ *
15
+ * State is an isolated mkdtemp file — NEVER ~/.switchroom (test discipline).
16
+ */
17
+
18
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest'
19
+ import { mkdtempSync, rmSync } from 'node:fs'
20
+ import { tmpdir } from 'node:os'
21
+ import { join } from 'node:path'
22
+ import { startBootCard } from '../gateway/boot-card.js'
23
+ import type { BotApiForBootCard } from '../gateway/boot-card.js'
24
+
25
+ let dir: string
26
+ let statePath: string
27
+
28
+ beforeEach(() => {
29
+ dir = mkdtempSync(join(tmpdir(), 'boot-card-eip-'))
30
+ statePath = join(dir, '.boot-card-msgid.json')
31
+ })
32
+ afterEach(() => {
33
+ rmSync(dir, { recursive: true, force: true })
34
+ })
35
+
36
+ /** Capturing bot with a configurable strict-edit outcome. */
37
+ function makeBot(strictOutcome: 'edited' | 'gone' | null) {
38
+ const sends: Array<{ chatId: string; opts: Record<string, unknown> }> = []
39
+ const strictEdits: Array<{ chatId: string; messageId: number }> = []
40
+ let nextId = 1000
41
+ const bot: BotApiForBootCard = {
42
+ sendMessage: async (chatId, _text, opts) => {
43
+ sends.push({ chatId, opts: opts ?? {} })
44
+ return { message_id: ++nextId }
45
+ },
46
+ editMessageText: async () => ({}),
47
+ ...(strictOutcome != null
48
+ ? {
49
+ editMessageTextStrict: async (chatId: string, messageId: number) => {
50
+ strictEdits.push({ chatId, messageId })
51
+ return strictOutcome
52
+ },
53
+ }
54
+ : {}),
55
+ }
56
+ return { bot, sends, strictEdits }
57
+ }
58
+
59
+ function mkOpts(overrides: Record<string, unknown> = {}) {
60
+ return {
61
+ agentName: 'TestAgent',
62
+ agentSlug: 'test-agent',
63
+ version: 'v0.0.0-test',
64
+ agentDir: dir,
65
+ gatewayInfo: { pid: 1, startedAtMs: Date.now() },
66
+ restartReason: 'graceful' as const,
67
+ agentLiveWindowMs: 0, // disable the live loop — we assert the initial post/edit only
68
+ settleWindowMs: 1_000_000,
69
+ bootCardStatePath: statePath,
70
+ ...overrides,
71
+ }
72
+ }
73
+
74
+ describe('boot card — edit-in-place (zero-notification reboots)', () => {
75
+ it('first boot with no persisted id → sends, and persists the id', async () => {
76
+ const { bot, sends, strictEdits } = makeBot('edited')
77
+ await startBootCard('chat1', undefined, bot, mkOpts())
78
+ expect(sends).toHaveLength(1) // first boot must send (one badge, ever)
79
+ expect(strictEdits).toHaveLength(0)
80
+ expect(sends[0]!.opts.disable_notification).toBe(true) // still silent
81
+ })
82
+
83
+ it('second routine boot → edits the prior card in place, NO new send', async () => {
84
+ // Boot 1 sends + persists.
85
+ const first = makeBot('edited')
86
+ await startBootCard('chat1', undefined, first.bot, mkOpts())
87
+ expect(first.sends).toHaveLength(1)
88
+
89
+ // Boot 2 (same state file) → reuse via strict edit, no send.
90
+ const second = makeBot('edited')
91
+ await startBootCard('chat1', undefined, second.bot, mkOpts())
92
+ expect(second.sends).toHaveLength(0) // ← zero notification: no new message
93
+ expect(second.strictEdits).toHaveLength(1)
94
+ // Boot 1's send returned message_id 1001 (nextId starts at 1000, ++ first);
95
+ // boot 2 must edit exactly that persisted id.
96
+ expect(second.strictEdits[0]!.messageId).toBe(1001)
97
+ })
98
+
99
+ it('persisted id but message was deleted (gone) → falls back to a fresh send', async () => {
100
+ const first = makeBot('edited')
101
+ await startBootCard('chat1', undefined, first.bot, mkOpts())
102
+
103
+ const second = makeBot('gone')
104
+ await startBootCard('chat1', undefined, second.bot, mkOpts())
105
+ expect(second.strictEdits).toHaveLength(1) // probed the old id
106
+ expect(second.sends).toHaveLength(1) // and fell back to a fresh send
107
+ expect(second.sends[0]!.opts.disable_notification).toBe(true)
108
+ })
109
+
110
+ it('Telegram-initiated /restart (ackMessageId set) → sends fresh, never edits', async () => {
111
+ // Seed a persisted id from a routine boot.
112
+ const seed = makeBot('edited')
113
+ await startBootCard('chat1', undefined, seed.bot, mkOpts())
114
+
115
+ // A /restart passes ackMessageId → must reply with a fresh card, not edit.
116
+ const restart = makeBot('edited')
117
+ await startBootCard('chat1', undefined, restart.bot, mkOpts(), 555)
118
+ expect(restart.strictEdits).toHaveLength(0) // no reuse on user-initiated restart
119
+ expect(restart.sends).toHaveLength(1)
120
+ expect(restart.sends[0]!.opts.reply_parameters).toEqual({ message_id: 555 })
121
+ })
122
+
123
+ it('no bootCardStatePath → always sends (back-compat, unchanged)', async () => {
124
+ const { bot, sends, strictEdits } = makeBot('edited')
125
+ await startBootCard('chat1', undefined, bot, mkOpts({ bootCardStatePath: undefined }))
126
+ expect(sends).toHaveLength(1)
127
+ expect(strictEdits).toHaveLength(0)
128
+ })
129
+
130
+ it('bot without editMessageTextStrict → always sends (graceful degrade)', async () => {
131
+ const { bot, sends } = makeBot(null) // no strict method
132
+ await startBootCard('chat1', undefined, bot, mkOpts())
133
+ const second = makeBot(null)
134
+ await startBootCard('chat1', undefined, second.bot, mkOpts())
135
+ // Both boots send — no strict method means no in-place reuse path.
136
+ expect(sends).toHaveLength(1)
137
+ expect(second.sends).toHaveLength(1)
138
+ })
139
+ })