switchroom 0.15.45 → 0.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +56 -15
  2. package/dist/auth-broker/index.js +383 -97
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +7 -4
  5. package/dist/cli/notion-write-pretool.mjs +35 -4
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/switchroom.js +2894 -841
  9. package/dist/host-control/main.js +2685 -207
  10. package/dist/vault/approvals/kernel-server.js +7453 -7413
  11. package/dist/vault/broker/server.js +11428 -11388
  12. package/examples/minimal.yaml +1 -0
  13. package/examples/switchroom.yaml +1 -0
  14. package/package.json +3 -3
  15. package/profiles/_base/start.sh.hbs +97 -1
  16. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  17. package/profiles/default/CLAUDE.md.hbs +0 -19
  18. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  19. package/telegram-plugin/answer-stream-flag.ts +12 -49
  20. package/telegram-plugin/answer-stream.ts +5 -150
  21. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  22. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  23. package/telegram-plugin/context-exhaustion.ts +12 -0
  24. package/telegram-plugin/demo-mask.ts +154 -0
  25. package/telegram-plugin/dist/bridge/bridge.js +55 -12
  26. package/telegram-plugin/dist/gateway/gateway.js +2938 -977
  27. package/telegram-plugin/dist/server.js +55 -12
  28. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  29. package/telegram-plugin/draft-stream.ts +47 -410
  30. package/telegram-plugin/final-answer-detect.ts +17 -12
  31. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  32. package/telegram-plugin/format.ts +56 -19
  33. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  34. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  35. package/telegram-plugin/gateway/auth-command.ts +70 -14
  36. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  37. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  38. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  39. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  40. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  41. package/telegram-plugin/gateway/effort-command.ts +8 -3
  42. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  43. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  44. package/telegram-plugin/gateway/gateway.ts +1857 -292
  45. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  46. package/telegram-plugin/gateway/model-command.ts +115 -4
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-command.test.ts +134 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -34,6 +34,7 @@ import {
34
34
  renderAuthSnapshotFormat2,
35
35
  buildSnapshotKeyboard,
36
36
  } from '../auth-snapshot-format.js'
37
+ import { maskEmail } from '../demo-mask.js'
37
38
 
38
39
  // ─── Parser ────────────────────────────────────────────────────────────────
39
40
 
@@ -237,7 +238,19 @@ export interface AuthBrokerClient {
237
238
  probeQuota(
238
239
  accounts: readonly string[],
239
240
  timeoutMs?: number,
240
- ): Promise<{ results: Array<{ label: string; result: import('../quota-check.js').QuotaResult }> }>
241
+ /** #2495 Change 2/3 bypass the broker's probe-on-open TTL (quota-watch
242
+ * alarm corroboration). Normal card opens omit it (TTL-gated). */
243
+ forceLive?: boolean,
244
+ ): Promise<{
245
+ results: Array<{
246
+ label: string
247
+ result: import('../quota-check.js').QuotaResult
248
+ /** #2495 Change 2 — how this result was sourced ("live" vs "cache"). */
249
+ served?: 'live' | 'cache'
250
+ /** Unix ms the served snapshot was captured (set when served==="cache"). */
251
+ capturedAt?: number
252
+ }>
253
+ }>
241
254
  /**
242
255
  * Fleet notification-dedup claim (quota-watch). First caller of `key`
243
256
  * inside `windowMs` gets `granted: true` and sends; everyone else
@@ -280,9 +293,29 @@ export interface AuthCommandContext {
280
293
  */
281
294
  liveQuotas?: (
282
295
  accounts: AccountState[],
283
- ) => Promise<import('../quota-check.js').QuotaResult[]>
296
+ ) => Promise<LiveQuotasResult>
284
297
  /** Operator timezone forwarded to the Format 2 renderer. */
285
298
  tz?: string
299
+ /**
300
+ * Demo mode (the `/auth demo` suffix). Forwarded to the Format 2 renderer
301
+ * so the fleet snapshot masks account-email labels for a screen recording.
302
+ * Off by default; only the default dashboard view (`show`/`list`) honors
303
+ * it — destructive verbs are unaffected. Scope is the email-label PII tier.
304
+ */
305
+ demo?: boolean
306
+ }
307
+
308
+ /**
309
+ * #2495 Change 2 — the enriched probe result. `quotas` stays parallel to
310
+ * `state.accounts` (the contract `buildSnapshotsFromState` relies on);
311
+ * `staleCachedAtMs` is set when ANY account was served from the durable cache
312
+ * (probe-on-open TTL hit OR a failed live probe falling back to cache) — it
313
+ * carries the OLDEST such snapshot's `capturedAt` so the footer stamps
314
+ * "⚠ cached Nm ago" instead of a false "Live · refreshed 0s ago".
315
+ */
316
+ export interface LiveQuotasResult {
317
+ quotas: import('../quota-check.js').QuotaResult[]
318
+ staleCachedAtMs?: number
286
319
  }
287
320
 
288
321
  export interface AuthCommandReply {
@@ -345,10 +378,18 @@ export async function handleAuthCommand(
345
378
  const state = await ctx.client.listState()
346
379
  let liveQuotas: import('../quota-check.js').QuotaResult[] | undefined
347
380
  let liveProbedAtMs: number | undefined
381
+ let staleCachedAtMs: number | undefined
348
382
  if (ctx.liveQuotas && state.accounts.length > 0) {
349
383
  try {
350
- liveQuotas = await ctx.liveQuotas(state.accounts)
351
- liveProbedAtMs = Date.now()
384
+ const enriched = await ctx.liveQuotas(state.accounts)
385
+ liveQuotas = enriched.quotas
386
+ // #2495 Change 2 — only claim a live "refreshed Ns ago" stamp when
387
+ // NOTHING was served from cache; otherwise show "⚠ cached Nm ago".
388
+ if (enriched.staleCachedAtMs != null) {
389
+ staleCachedAtMs = enriched.staleCachedAtMs
390
+ } else {
391
+ liveProbedAtMs = Date.now()
392
+ }
352
393
  } catch {
353
394
  // Live probe failed — fall back to legacy table silently.
354
395
  liveQuotas = undefined
@@ -361,13 +402,15 @@ export async function handleAuthCommand(
361
402
  let keyboard: AuthCommandReply['keyboard']
362
403
  if (liveQuotas && liveQuotas.length === state.accounts.length) {
363
404
  const snapshots = buildSnapshotsFromState(state, liveQuotas)
364
- keyboard = buildSnapshotKeyboard(snapshots)
405
+ keyboard = buildSnapshotKeyboard(snapshots, { now: new Date() })
365
406
  }
366
407
  return {
367
408
  text: renderShowText(state, Date.now(), {
368
409
  liveQuotas,
369
410
  tz: ctx.tz,
370
411
  liveProbedAtMs,
412
+ staleCachedAtMs,
413
+ demo: ctx.demo,
371
414
  }),
372
415
  html: true,
373
416
  keyboard,
@@ -709,6 +752,13 @@ export interface RenderShowOpts {
709
752
  /** Wall-clock ms when the live probes returned, used for "refreshed
710
753
  * Ns ago" footer. Omit to suppress that footer line. */
711
754
  liveProbedAtMs?: number
755
+ /** #2495 Change 2 — set when the render was served from the durable cache
756
+ * (probe-on-open TTL hit or failed probe). Renders "⚠ cached Nm ago" in
757
+ * the footer (takes precedence over liveProbedAtMs). */
758
+ staleCachedAtMs?: number
759
+ /** Demo mode (the `/auth demo` suffix). Masks account-email labels in both
760
+ * the Format 2 snapshot and the legacy accounts table. Off by default. */
761
+ demo?: boolean
712
762
  }
713
763
 
714
764
  /**
@@ -745,6 +795,8 @@ export function renderShowText(
745
795
  tz: opts.tz,
746
796
  now: new Date(now),
747
797
  liveProbedAtMs: opts.liveProbedAtMs,
798
+ staleCachedAtMs: opts.staleCachedAtMs,
799
+ demo: opts.demo,
748
800
  }),
749
801
  )
750
802
  } else {
@@ -753,7 +805,7 @@ export function renderShowText(
753
805
  lines.push('')
754
806
  lines.push('<b>Accounts</b>')
755
807
  lines.push('<pre>')
756
- lines.push(formatAccountsTable(state, now))
808
+ lines.push(formatAccountsTable(state, now, opts.demo ?? false))
757
809
  lines.push('</pre>')
758
810
  }
759
811
  }
@@ -762,7 +814,7 @@ export function renderShowText(
762
814
  if (state.agents.length > 0) {
763
815
  lines.push('<b>Agents</b>')
764
816
  lines.push('<pre>')
765
- lines.push(formatAgentsTable(state))
817
+ lines.push(formatAgentsTable(state, opts.demo ?? false))
766
818
  lines.push('</pre>')
767
819
  }
768
820
 
@@ -770,7 +822,7 @@ export function renderShowText(
770
822
  if (state.consumers.length > 0) {
771
823
  lines.push('<b>Consumers</b>')
772
824
  lines.push('<pre>')
773
- lines.push(formatConsumersTable(state, now))
825
+ lines.push(formatConsumersTable(state, now, opts.demo ?? false))
774
826
  lines.push('</pre>')
775
827
  }
776
828
 
@@ -784,7 +836,7 @@ export function renderShowText(
784
836
  return lines.join('\n')
785
837
  }
786
838
 
787
- function formatAccountsTable(state: ListStateData, now: number): string {
839
+ function formatAccountsTable(state: ListStateData, now: number, demo = false): string {
788
840
  const rows: string[][] = [['ACCOUNT', 'STATUS', 'EXPIRES', 'QUOTA 5h·7d', 'QUOTA-RESET']]
789
841
  for (const acc of state.accounts) {
790
842
  const isActive = acc.label === state.active
@@ -800,7 +852,7 @@ function formatAccountsTable(state: ListStateData, now: number): string {
800
852
  ? formatRelativeMs(acc.exhausted_until - now)
801
853
  : '—'
802
854
  rows.push([
803
- `${marker} ${escapeHtml(acc.label)}`,
855
+ `${marker} ${escapeHtml(demo ? maskEmail(acc.label) : acc.label)}`,
804
856
  status,
805
857
  expires,
806
858
  formatQuotaUtilCell(acc, now),
@@ -830,7 +882,9 @@ export function formatQuotaUtilCell(
830
882
  return `${Math.round(lq.fiveHourUtilizationPct)}%·${Math.round(lq.sevenDayUtilizationPct)}% (${ageStr} ago)`
831
883
  }
832
884
 
833
- function formatAgentsTable(state: ListStateData): string {
885
+ function formatAgentsTable(state: ListStateData, demo = false): string {
886
+ // The ACTIVE column is an account-email label (in-scope PII); the AGENT
887
+ // name is topology (out of scope) and is never masked.
834
888
  const rows: string[][] = [['AGENT', 'ACTIVE', 'SOURCE']]
835
889
  for (const a of state.agents) {
836
890
  const source = a.override
@@ -838,7 +892,7 @@ function formatAgentsTable(state: ListStateData): string {
838
892
  : a.account === state.active
839
893
  ? 'fleet-active'
840
894
  : 'pinned'
841
- rows.push([escapeHtml(a.name), escapeHtml(a.account), source])
895
+ rows.push([escapeHtml(a.name), escapeHtml(demo ? maskEmail(a.account) : a.account), source])
842
896
  }
843
897
  return alignTable(rows)
844
898
  }
@@ -885,14 +939,16 @@ export function renderAgentDetail(
885
939
  return lines.join('\n')
886
940
  }
887
941
 
888
- function formatConsumersTable(state: ListStateData, now: number): string {
942
+ function formatConsumersTable(state: ListStateData, now: number, demo = false): string {
943
+ // ACTIVE is an account-email label (in-scope PII); CONSUMER name is
944
+ // topology (out of scope) and is never masked.
889
945
  const rows: string[][] = [['CONSUMER', 'ACTIVE', 'STATUS']]
890
946
  for (const c of state.consumers) {
891
947
  const status =
892
948
  c.last_seen_at == null
893
949
  ? 'socket bound'
894
950
  : `socket bound (last seen ${formatRelativeMs(now - c.last_seen_at)} ago)`
895
- rows.push([escapeHtml(c.name), escapeHtml(c.account), status])
951
+ rows.push([escapeHtml(c.name), escapeHtml(demo ? maskEmail(c.account) : c.account), status])
896
952
  }
897
953
  return alignTable(rows)
898
954
  }
@@ -181,3 +181,47 @@ export function resolveShutdownMarker(
181
181
  }
182
182
  return { ts: now, signal, reason: EXTERNAL_RESTART_FALLBACK_REASON };
183
183
  }
184
+
185
+ /**
186
+ * Pure decision: should the boot-resume path SUPPRESS the active
187
+ * resume_interrupted inbound because the prior shutdown was clean?
188
+ *
189
+ * A clean marker present and fresh (<= maxAgeMs, default 60s) means the
190
+ * prior shutdown was operator/roll/CLI-initiated — NOT a crash. In that
191
+ * case auto-resuming interrupted work is wasteful: the agent was asked to
192
+ * stop, it stopped cleanly, and the "interrupted" turn was implicitly
193
+ * abandoned by that decision. Burning a full model turn to replay it on
194
+ * every operator restart wastes subscription quota for no user benefit.
195
+ *
196
+ * Returns true ONLY when:
197
+ * - a clean marker is present, AND
198
+ * - the marker is younger than maxAgeMs (default 60s), AND
199
+ * - the SWITCHROOM_BOOT_RESUME_ALWAYS escape hatch is not set.
200
+ *
201
+ * Returns false when:
202
+ * - no marker (crash / OOM / unexpected kill — resume normally), OR
203
+ * - marker is stale (>= maxAgeMs — something stalled; treat as crash), OR
204
+ * - forceAlways is true (the escape hatch is active).
205
+ *
206
+ * The forceAlways parameter maps to the env var
207
+ * SWITCHROOM_BOOT_RESUME_ALWAYS=1, which restores the pre-gate behaviour
208
+ * unconditionally. Pass it in as a parsed boolean so this function stays
209
+ * pure and testable without touching process.env.
210
+ *
211
+ * Keeping this pure makes the decision unit-testable in bun test without
212
+ * spinning up the gateway.
213
+ */
214
+ export function shouldSuppressBootResume(
215
+ marker: CleanShutdownMarker | null,
216
+ now: number,
217
+ { maxAgeMs = DEFAULT_MAX_AGE_MS, forceAlways = false }: {
218
+ maxAgeMs?: number;
219
+ forceAlways?: boolean;
220
+ } = {},
221
+ ): boolean {
222
+ if (forceAlways) return false;
223
+ if (marker === null) return false;
224
+ const age = now - marker.ts;
225
+ if (age < 0) return false; // clock skew defence — treat as stale
226
+ return age < maxAgeMs;
227
+ }
@@ -44,13 +44,28 @@ function fakeDeps(overrides: Partial<Parameters<typeof handleRequestConfigApprov
44
44
  chatId: number | string;
45
45
  messageId: number;
46
46
  text: string;
47
+ stripKeyboard?: boolean;
47
48
  }> = [];
48
49
  const deps = {
49
50
  agentName: "klanker",
50
51
  loadTargetChat: () => ({ chatId: 42 }),
51
52
  postCard: vi.fn(async () => ({ messageId: 1001 })),
52
- buildKeyboard: () => ({ inline_keyboard: [] }),
53
- editCard: async (a: { chatId: number | string; messageId: number; text: string }) => {
53
+ // Deterministic epoch so callback_data is assertable in tests.
54
+ mintEpoch: () => "cafe1234",
55
+ buildKeyboard: (requestId: string, epoch: string) => ({
56
+ inline_keyboard: [
57
+ [
58
+ { text: "✅ Approve", callback_data: `cfg:${requestId}:${epoch}:approve` },
59
+ { text: "🚫 Deny", callback_data: `cfg:${requestId}:${epoch}:deny` },
60
+ ],
61
+ ],
62
+ }),
63
+ editCard: async (a: {
64
+ chatId: number | string;
65
+ messageId: number;
66
+ text: string;
67
+ stripKeyboard?: boolean;
68
+ }) => {
54
69
  editCalls.push(a);
55
70
  },
56
71
  log: () => {},
@@ -227,9 +242,11 @@ describe("resolvePendingConfigApproval — double-tap and verdict propagation",
227
242
  const verdicts = sent.filter((s) => s.type === "config_approval_resolved");
228
243
  expect(verdicts.length).toBe(1);
229
244
  expect(verdicts[0]!.verdict).toBe("approve");
230
- // Card edited once to the interim 'Applying' state.
245
+ // Card edited once to the interim 'Applying' state, with the keyboard
246
+ // stripped so the buttons stop being tappable.
231
247
  expect(editCalls.length).toBe(1);
232
248
  expect(editCalls[0]!.text).toMatch(/Applying/);
249
+ expect(editCalls[0]!.stripKeyboard).toBe(true);
233
250
  });
234
251
 
235
252
  it("returns false when no entry exists (unknown requestId)", async () => {
@@ -261,6 +278,8 @@ describe("timeout path", () => {
261
278
  expect(verdicts.length).toBe(1);
262
279
  expect(verdicts[0]!.verdict).toBe("timeout");
263
280
  expect(editCalls[0]!.text).toMatch(/Expired/);
281
+ // Expired card must also strip the keyboard (no stale tappable buttons).
282
+ expect(editCalls[0]!.stripKeyboard).toBe(true);
264
283
  } finally {
265
284
  vi.useRealTimers();
266
285
  }
@@ -437,7 +456,20 @@ describe("oversize diff → attachment fallback (#1762)", () => {
437
456
  });
438
457
 
439
458
  describe("parseConfigApprovalCallback", () => {
440
- it("parses well-formed callbacks", () => {
459
+ it("parses the new epoch-bearing form cfg:<requestId>:<epoch>:<choice>", () => {
460
+ expect(parseConfigApprovalCallback("cfg:abc:cafe1234:approve")).toEqual({
461
+ requestId: "abc",
462
+ epoch: "cafe1234",
463
+ choice: "approve",
464
+ });
465
+ expect(parseConfigApprovalCallback("cfg:deadbeef:00ff:deny")).toEqual({
466
+ requestId: "deadbeef",
467
+ epoch: "00ff",
468
+ choice: "deny",
469
+ });
470
+ });
471
+
472
+ it("still parses the legacy 3-segment form (no epoch, back-compat)", () => {
441
473
  expect(parseConfigApprovalCallback("cfg:abc:approve")).toEqual({
442
474
  requestId: "abc",
443
475
  choice: "approve",
@@ -455,3 +487,58 @@ describe("parseConfigApprovalCallback", () => {
455
487
  expect(parseConfigApprovalCallback("cfg::approve")).toBeNull();
456
488
  });
457
489
  });
490
+
491
+ describe("stale-tap rejection via per-card epoch", () => {
492
+ it("bakes the minted epoch into the posted card's callback_data", async () => {
493
+ const { client, deps } = fakeDeps();
494
+ await handleRequestConfigApproval(client, baseMsg, deps);
495
+ const postCard = deps.postCard as ReturnType<typeof vi.fn>;
496
+ const kb = postCard.mock.calls[0]![0].replyMarkup as {
497
+ inline_keyboard: Array<Array<{ callback_data: string }>>;
498
+ };
499
+ expect(kb.inline_keyboard[0]![0]!.callback_data).toBe(
500
+ "cfg:req-1:cafe1234:approve",
501
+ );
502
+ expect(kb.inline_keyboard[0]![1]!.callback_data).toBe(
503
+ "cfg:req-1:cafe1234:deny",
504
+ );
505
+ });
506
+
507
+ it("resolves when the tap epoch matches the live card", async () => {
508
+ const { client, deps } = fakeDeps();
509
+ await handleRequestConfigApproval(client, baseMsg, deps);
510
+ const ok = await resolvePendingConfigApproval(
511
+ "req-1",
512
+ "approve",
513
+ deps,
514
+ "cafe1234",
515
+ );
516
+ expect(ok).toBe(true);
517
+ });
518
+
519
+ it("rejects a stale tap whose epoch does NOT match the live card", async () => {
520
+ const { client, sent, deps, editCalls } = fakeDeps();
521
+ await handleRequestConfigApproval(client, baseMsg, deps);
522
+ // A tap carrying a DIFFERENT (stale) epoch must be a no-op — no verdict
523
+ // crosses the wire, no card edit happens, and the request stays live.
524
+ const stale = await resolvePendingConfigApproval(
525
+ "req-1",
526
+ "approve",
527
+ deps,
528
+ "deadbeef",
529
+ );
530
+ expect(stale).toBe(false);
531
+ expect(sent.filter((s) => s.type === "config_approval_resolved")).toEqual(
532
+ [],
533
+ );
534
+ expect(editCalls).toEqual([]);
535
+ // The correct (live) epoch still resolves it.
536
+ const live = await resolvePendingConfigApproval(
537
+ "req-1",
538
+ "approve",
539
+ deps,
540
+ "cafe1234",
541
+ );
542
+ expect(live).toBe(true);
543
+ });
544
+ });
@@ -2,6 +2,7 @@
2
2
  // card, resolves the verdict back to hostd over IPC, and flips the card to
3
3
  // a terminal state on finalize.
4
4
 
5
+ import { randomBytes } from "node:crypto";
5
6
  import type { IpcClient } from "./ipc-server.js";
6
7
  import type {
7
8
  RequestConfigApprovalMessage,
@@ -12,6 +13,15 @@ import { truncateRawToFit } from "./oversize-card-body.js";
12
13
  /** Pending approval state — in-memory only (no SQLite per RFC §3.4). */
13
14
  interface PendingConfigApproval {
14
15
  requestId: string;
16
+ /**
17
+ * Per-card nonce embedded in the callback_data (`cfg:<requestId>:<epoch>:
18
+ * <choice>`). hostd's `approvalId` is only randomBytes(4)=32 bits and the
19
+ * callback_data carried no epoch, so a stale tap on a never-stripped old
20
+ * card could in principle resolve a same-id LIVE request. A tap whose epoch
21
+ * doesn't match the live pending entry is rejected as stale (see
22
+ * `resolvePendingConfigApproval`).
23
+ */
24
+ epoch: string;
15
25
  client: Pick<IpcClient, "send">;
16
26
  chatId: number | string;
17
27
  threadId?: number;
@@ -25,6 +35,12 @@ interface PendingConfigApproval {
25
35
 
26
36
  const pending = new Map<string, PendingConfigApproval>();
27
37
 
38
+ /** Default per-card nonce: 8 hex chars (32 bits) — enough to make a stale
39
+ * tap's epoch effectively never collide with a live card's. */
40
+ function defaultEpoch(): string {
41
+ return randomBytes(4).toString("hex");
42
+ }
43
+
28
44
  // Injected deps — gateway.ts wires these from the existing surface.
29
45
 
30
46
  export interface ConfigApprovalHandlerDeps {
@@ -43,13 +59,26 @@ export interface ConfigApprovalHandlerDeps {
43
59
  /** grammy InlineKeyboard, passed through verbatim. */
44
60
  replyMarkup: unknown;
45
61
  }) => Promise<{ messageId: number } | null>;
46
- /** Build the inline keyboard with [✅ Approve] [🚫 Deny] buttons. */
47
- buildKeyboard: (requestId: string) => unknown;
48
- /** Edit a posted card to a new body. Best-effort failures logged. */
62
+ /**
63
+ * Build the inline keyboard with [✅ Approve] [🚫 Deny] buttons. The
64
+ * `epoch` is a per-card nonce baked into the callback_data so a stale tap
65
+ * on an old card can never match a live request (see PendingConfigApproval).
66
+ */
67
+ buildKeyboard: (requestId: string, epoch: string) => unknown;
68
+ /** Mint a per-card nonce. Default: a short random hex (test seam). */
69
+ mintEpoch?: () => string;
70
+ /**
71
+ * Edit a posted card to a new body. Best-effort — failures logged.
72
+ * When `stripKeyboard` is set, the inline keyboard is removed so the
73
+ * [✅ Approve] [🚫 Deny] buttons stop being tappable on a card that has
74
+ * reached a terminal/interim state — a stale tap must never resolve a
75
+ * request (defense-in-depth alongside the per-card epoch in callback_data).
76
+ */
49
77
  editCard: (args: {
50
78
  chatId: number | string;
51
79
  messageId: number;
52
80
  text: string;
81
+ stripKeyboard?: boolean;
53
82
  }) => Promise<void>;
54
83
  /**
55
84
  * Send the full diff as a `.patch` document attachment when the
@@ -278,7 +307,11 @@ export async function handleRequestConfigApproval(
278
307
  // builder's structured `truncated` flag instead of substring-
279
308
  // matching the sentinel string (#1767 nit).
280
309
  const oversize = prelim !== msg.unifiedDiff || built.truncated;
281
- const replyMarkup = deps.buildKeyboard(msg.requestId);
310
+ // Per-card nonce — baked into callback_data so a stale tap on a previously
311
+ // posted card (e.g. one already finalized/expired) can never match a live
312
+ // pending request even if hostd minted the same 32-bit requestId.
313
+ const epoch = (deps.mintEpoch ?? defaultEpoch)();
314
+ const replyMarkup = deps.buildKeyboard(msg.requestId, epoch);
282
315
 
283
316
  const posted = await deps.postCard({
284
317
  chatId: target.chatId,
@@ -296,6 +329,7 @@ export async function handleRequestConfigApproval(
296
329
 
297
330
  const entry: PendingConfigApproval = {
298
331
  requestId: msg.requestId,
332
+ epoch,
299
333
  client,
300
334
  chatId: target.chatId,
301
335
  ...(target.threadId !== undefined ? { threadId: target.threadId } : {}),
@@ -328,14 +362,30 @@ export async function handleRequestConfigApproval(
328
362
  *
329
363
  * Returns true if THIS call resolved the request (first call wins),
330
364
  * false if it was already resolved.
365
+ *
366
+ * `expectedEpoch` guards an OPERATOR tap: the callback_data carries the
367
+ * per-card nonce, which must match the live pending entry's `epoch`. A
368
+ * mismatch means the tap came from a STALE card (already finalized/expired,
369
+ * keyboard should have been stripped) — reject it as a no-op so it can never
370
+ * resolve a different live request that happens to share the 32-bit
371
+ * requestId. Internal callers (the per-request timeout timer, finalize) pass
372
+ * `undefined` to skip the check — they already hold the authoritative entry.
331
373
  */
332
374
  export async function resolvePendingConfigApproval(
333
375
  requestId: string,
334
376
  verdict: "approve" | "deny" | "timeout",
335
377
  deps: Pick<ConfigApprovalHandlerDeps, "editCard" | "log">,
378
+ expectedEpoch?: string,
336
379
  ): Promise<boolean> {
337
380
  const entry = pending.get(requestId);
338
381
  if (!entry || entry.resolved) return false;
382
+ if (expectedEpoch !== undefined && entry.epoch !== expectedEpoch) {
383
+ deps.log?.(
384
+ `config approval stale-tap rejected (requestId=${requestId}): ` +
385
+ `epoch mismatch (tap=${expectedEpoch} live=${entry.epoch})`,
386
+ );
387
+ return false;
388
+ }
339
389
  entry.resolved = true;
340
390
  if (entry.timer !== null) {
341
391
  clearTimeout(entry.timer);
@@ -364,10 +414,14 @@ export async function resolvePendingConfigApproval(
364
414
  ? "🚫 <b>Denied</b>"
365
415
  : "⏱ <b>Expired</b>";
366
416
  try {
417
+ // Strip the keyboard: once resolved (approve/deny/timeout) the buttons
418
+ // must not stay tappable — a stale tap could otherwise re-hit the
419
+ // callback path.
367
420
  await deps.editCard({
368
421
  chatId: entry.chatId,
369
422
  messageId: entry.messageId,
370
423
  text: interim,
424
+ stripKeyboard: true,
371
425
  });
372
426
  } catch (err) {
373
427
  deps.log?.(
@@ -425,10 +479,12 @@ export async function handleRequestConfigFinalize(
425
479
  ? `✅ <b>Applied</b>${msg.detail ? `\n${escapeHtml(msg.detail)}` : ""}${liveNote}`
426
480
  : `⚠️ <b>Reconcile failed; rolled back</b>${msg.detail ? `\n${escapeHtml(msg.detail)}` : ""}`;
427
481
  try {
482
+ // Finalize is terminal — strip the keyboard so the buttons are gone.
428
483
  await deps.editCard({
429
484
  chatId: entry.chatId,
430
485
  messageId: entry.messageId,
431
486
  text: body,
487
+ stripKeyboard: true,
432
488
  });
433
489
  } catch (err) {
434
490
  deps.log?.(
@@ -483,20 +539,45 @@ export function _peekPendingConfigApprovalForTest(
483
539
  }
484
540
 
485
541
  /**
486
- * Parse `cfg:<requestId>:<choice>` callback data. Returns null on
542
+ * Parse `cfg:<requestId>:<epoch>:<choice>` callback data. Returns null on
487
543
  * malformed input. The callback handler in gateway.ts uses this +
488
- * resolvePendingConfigApproval to drive the tap resolve flow.
544
+ * resolvePendingConfigApproval (passing the parsed `epoch`) to drive the
545
+ * tap → resolve flow; the epoch is verified against the live pending entry
546
+ * so a stale tap can never resolve a same-id live request.
547
+ *
548
+ * The 3-segment legacy form `cfg:<requestId>:<choice>` (no epoch) is still
549
+ * parsed for back-compat with cards posted before this change — `epoch` is
550
+ * undefined there, so the resolver skips the epoch check (degrades to the
551
+ * keyboard-strip protection alone). New cards always carry an epoch.
489
552
  */
490
553
  export function parseConfigApprovalCallback(
491
554
  data: string,
492
- ): { requestId: string; choice: "approve" | "deny" } | null {
555
+ ): { requestId: string; epoch?: string; choice: "approve" | "deny" } | null {
493
556
  if (!data.startsWith("cfg:")) return null;
494
557
  const rest = data.slice(4);
495
- const colon = rest.lastIndexOf(":");
496
- if (colon < 0) return null;
497
- const requestId = rest.slice(0, colon);
498
- const choice = rest.slice(colon + 1);
499
- if (requestId.length === 0 || requestId.length > 64) return null;
558
+ const lastColon = rest.lastIndexOf(":");
559
+ if (lastColon < 0) return null;
560
+ const choice = rest.slice(lastColon + 1);
500
561
  if (choice !== "approve" && choice !== "deny") return null;
501
- return { requestId, choice };
562
+ const head = rest.slice(0, lastColon);
563
+ // New form carries an epoch as the segment before the choice:
564
+ // <requestId>:<epoch>. The epoch is hex (no colon), so split on the LAST
565
+ // remaining colon to separate it from a requestId (which is also hex).
566
+ const epochColon = head.lastIndexOf(":");
567
+ let requestId: string;
568
+ let epoch: string | undefined;
569
+ if (epochColon >= 0) {
570
+ requestId = head.slice(0, epochColon);
571
+ epoch = head.slice(epochColon + 1);
572
+ if (epoch.length === 0 || epoch.length > 32 || !/^[0-9a-fA-F]+$/.test(epoch)) {
573
+ // Not a well-formed epoch — treat the whole head as the legacy
574
+ // requestId (back-compat for ids that themselves contain a colon).
575
+ requestId = head;
576
+ epoch = undefined;
577
+ }
578
+ } else {
579
+ requestId = head;
580
+ }
581
+ if (requestId.length === 0 || requestId.length > 64) return null;
582
+ return { requestId, ...(epoch !== undefined ? { epoch } : {}), choice };
502
583
  }