switchroom 0.13.12 → 0.13.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ /**
2
+ * Unit tests for cross-turn pending-async progress (#1445).
3
+ *
4
+ * Pins the deterministic state machine + edit cadence in isolation
5
+ * from the gateway. The integration with gateway hooks is exercised
6
+ * by the UAT scenario `silence-poke-debug-dm.test.ts`.
7
+ */
8
+
9
+ import { afterEach, beforeEach, describe, expect, it } from 'vitest'
10
+
11
+ import {
12
+ EDIT_INTERVAL_MS,
13
+ MAX_LIFETIME_MS,
14
+ TELEGRAM_MSG_CAP,
15
+ __getStateForTests,
16
+ __resetAllForTests,
17
+ __setDepsForTests,
18
+ __tickForTests,
19
+ clearPending,
20
+ noteAsyncDispatch,
21
+ noteOutbound,
22
+ noteTurnEnd,
23
+ startTurn,
24
+ type PendingProgressEditCtx,
25
+ type PendingProgressMetric,
26
+ } from '../pending-work-progress.js'
27
+
28
+ const KEY = '12345:_'
29
+
30
+ interface Capture {
31
+ edits: PendingProgressEditCtx[]
32
+ metrics: PendingProgressMetric[]
33
+ now: number
34
+ }
35
+
36
+ function setup(): Capture {
37
+ const cap: Capture = { edits: [], metrics: [], now: 0 }
38
+ __resetAllForTests()
39
+ __setDepsForTests({
40
+ editMessage: async (ctx) => {
41
+ cap.edits.push(ctx)
42
+ },
43
+ emitMetric: (e) => {
44
+ cap.metrics.push(e)
45
+ },
46
+ nowMs: () => cap.now,
47
+ })
48
+ return cap
49
+ }
50
+
51
+ async function flush(): Promise<void> {
52
+ // Allow the fire-and-forget promise chain in tick() to settle.
53
+ await Promise.resolve()
54
+ await Promise.resolve()
55
+ }
56
+
57
+ describe('pending-work-progress', () => {
58
+ beforeEach(() => {
59
+ delete process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS
60
+ })
61
+ afterEach(() => {
62
+ __resetAllForTests()
63
+ })
64
+
65
+ it('does nothing on turns without an async dispatch', () => {
66
+ const cap = setup()
67
+ startTurn(KEY)
68
+ noteOutbound(KEY, { messageId: 100, text: 'simple reply' })
69
+ noteTurnEnd(KEY)
70
+ expect(__getStateForTests(KEY)).toBeUndefined()
71
+ cap.now = 60_000
72
+ __tickForTests(cap.now)
73
+ expect(cap.edits).toHaveLength(0)
74
+ expect(cap.metrics).toHaveLength(0)
75
+ })
76
+
77
+ it('activates when turn ends with async dispatch + anchor', () => {
78
+ const cap = setup()
79
+ startTurn(KEY)
80
+ noteAsyncDispatch(KEY)
81
+ noteOutbound(KEY, { messageId: 100, text: 'worker dispatched' })
82
+ cap.now = 1_000
83
+ noteTurnEnd(KEY)
84
+ const s = __getStateForTests(KEY)
85
+ expect(s).toBeDefined()
86
+ expect(s?.activatedAt).toBe(1_000)
87
+ expect(s?.anchorMessageId).toBe(100)
88
+ expect(s?.anchorOriginalText).toBe('worker dispatched')
89
+ expect(cap.metrics).toContainEqual({
90
+ kind: 'pending_progress_started',
91
+ chatKey: KEY,
92
+ })
93
+ })
94
+
95
+ it('does not activate when async dispatch happened but no anchor was captured', () => {
96
+ const cap = setup()
97
+ startTurn(KEY)
98
+ noteAsyncDispatch(KEY)
99
+ // no noteOutbound — model never sent a reply (silent end)
100
+ noteTurnEnd(KEY)
101
+ expect(__getStateForTests(KEY)).toBeUndefined()
102
+ cap.now = 60_000
103
+ __tickForTests(cap.now)
104
+ expect(cap.edits).toHaveLength(0)
105
+ })
106
+
107
+ it('does not activate when an anchor exists but no async dispatch happened', () => {
108
+ const cap = setup()
109
+ startTurn(KEY)
110
+ noteOutbound(KEY, { messageId: 100, text: 'just chatting' })
111
+ noteTurnEnd(KEY)
112
+ expect(__getStateForTests(KEY)).toBeUndefined()
113
+ cap.now = 60_000
114
+ __tickForTests(cap.now)
115
+ expect(cap.edits).toHaveLength(0)
116
+ })
117
+
118
+ it('edits anchor with elapsed-time suffix at EDIT_INTERVAL_MS cadence', async () => {
119
+ const cap = setup()
120
+ startTurn(KEY)
121
+ noteAsyncDispatch(KEY)
122
+ noteOutbound(KEY, {
123
+ messageId: 100,
124
+ text: 'Background sleep running; awaiting completion.',
125
+ })
126
+ cap.now = 0
127
+ noteTurnEnd(KEY)
128
+
129
+ // Tick at half-interval — no edit yet.
130
+ cap.now = EDIT_INTERVAL_MS / 2
131
+ __tickForTests(cap.now)
132
+ await flush()
133
+ expect(cap.edits).toHaveLength(0)
134
+
135
+ // Tick at full interval — first edit fires, "1m" suffix.
136
+ cap.now = EDIT_INTERVAL_MS
137
+ __tickForTests(cap.now)
138
+ await flush()
139
+ expect(cap.edits).toHaveLength(1)
140
+ expect(cap.edits[0].messageId).toBe(100)
141
+ expect(cap.edits[0].newText).toBe(
142
+ 'Background sleep running; awaiting completion.\n\n— still working (1m)',
143
+ )
144
+
145
+ // Tick at 3 intervals total — second edit, "3m".
146
+ cap.now = EDIT_INTERVAL_MS * 3
147
+ __tickForTests(cap.now)
148
+ await flush()
149
+ expect(cap.edits).toHaveLength(2)
150
+ expect(cap.edits[1].newText).toBe(
151
+ 'Background sleep running; awaiting completion.\n\n— still working (3m)',
152
+ )
153
+ })
154
+
155
+ it('strips prior suffix before re-appending so anchor never accumulates', async () => {
156
+ const cap = setup()
157
+ startTurn(KEY)
158
+ noteAsyncDispatch(KEY)
159
+ // Simulate a noteOutbound for text that already carries a stale
160
+ // suffix from an earlier round (defence in depth).
161
+ noteOutbound(KEY, {
162
+ messageId: 100,
163
+ text: 'worker dispatched\n\n— still working (12m)',
164
+ })
165
+ noteTurnEnd(KEY)
166
+ cap.now = EDIT_INTERVAL_MS
167
+ __tickForTests(cap.now)
168
+ await flush()
169
+ // The new edit should be based on 'worker dispatched' alone.
170
+ expect(cap.edits[0].newText).toBe(
171
+ 'worker dispatched\n\n— still working (1m)',
172
+ )
173
+ })
174
+
175
+ it("clears on 'inbound' reason — user re-engaged", () => {
176
+ const cap = setup()
177
+ startTurn(KEY)
178
+ noteAsyncDispatch(KEY)
179
+ noteOutbound(KEY, { messageId: 100, text: 'wd' })
180
+ noteTurnEnd(KEY)
181
+ cap.now = EDIT_INTERVAL_MS * 2
182
+ clearPending(KEY, 'inbound')
183
+ expect(__getStateForTests(KEY)).toBeUndefined()
184
+ expect(cap.metrics).toContainEqual({
185
+ kind: 'pending_progress_cleared',
186
+ chatKey: KEY,
187
+ elapsedMs: EDIT_INTERVAL_MS * 2,
188
+ reason: 'inbound',
189
+ })
190
+ // No further edits after clear.
191
+ cap.now = EDIT_INTERVAL_MS * 3
192
+ __tickForTests(cap.now)
193
+ expect(cap.edits).toHaveLength(0)
194
+ })
195
+
196
+ it("clears on 'handback' reason — model is about to re-engage", () => {
197
+ const cap = setup()
198
+ startTurn(KEY)
199
+ noteAsyncDispatch(KEY)
200
+ noteOutbound(KEY, { messageId: 100, text: 'wd' })
201
+ noteTurnEnd(KEY)
202
+ clearPending(KEY, 'handback')
203
+ expect(__getStateForTests(KEY)).toBeUndefined()
204
+ expect(cap.metrics.some((m) => m.kind === 'pending_progress_cleared' && m.reason === 'handback')).toBe(true)
205
+ })
206
+
207
+ it('times out at MAX_LIFETIME_MS', async () => {
208
+ const cap = setup()
209
+ startTurn(KEY)
210
+ noteAsyncDispatch(KEY)
211
+ noteOutbound(KEY, { messageId: 100, text: 'wd' })
212
+ cap.now = 0
213
+ noteTurnEnd(KEY)
214
+ // Halfway — still active.
215
+ cap.now = MAX_LIFETIME_MS / 2
216
+ __tickForTests(cap.now)
217
+ await flush()
218
+ expect(__getStateForTests(KEY)).toBeDefined()
219
+ // Past the budget — auto-cleared.
220
+ cap.now = MAX_LIFETIME_MS + 1
221
+ __tickForTests(cap.now)
222
+ await flush()
223
+ expect(__getStateForTests(KEY)).toBeUndefined()
224
+ expect(cap.metrics.some((m) => m.kind === 'pending_progress_cleared' && m.reason === 'timeout')).toBe(true)
225
+ })
226
+
227
+ it('skips edit (but advances cadence) if total would exceed Telegram message cap', async () => {
228
+ const cap = setup()
229
+ startTurn(KEY)
230
+ noteAsyncDispatch(KEY)
231
+ // Anchor text long enough that even the smallest suffix overflows.
232
+ const bigText = 'x'.repeat(TELEGRAM_MSG_CAP - 5)
233
+ noteOutbound(KEY, { messageId: 100, text: bigText })
234
+ cap.now = 0
235
+ noteTurnEnd(KEY)
236
+ cap.now = EDIT_INTERVAL_MS
237
+ __tickForTests(cap.now)
238
+ await flush()
239
+ expect(cap.edits).toHaveLength(0)
240
+ // lastEditAt still advanced — we won't spin retrying every tick.
241
+ const s = __getStateForTests(KEY)
242
+ expect(s?.lastEditAt).toBe(EDIT_INTERVAL_MS)
243
+ })
244
+
245
+ it('honors the kill switch — no state, no edits, no metrics', async () => {
246
+ const cap = setup()
247
+ process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS = '1'
248
+ try {
249
+ startTurn(KEY)
250
+ noteAsyncDispatch(KEY)
251
+ noteOutbound(KEY, { messageId: 100, text: 'wd' })
252
+ noteTurnEnd(KEY)
253
+ expect(__getStateForTests(KEY)).toBeUndefined()
254
+ cap.now = EDIT_INTERVAL_MS * 3
255
+ __tickForTests(cap.now)
256
+ await flush()
257
+ expect(cap.edits).toHaveLength(0)
258
+ expect(cap.metrics).toHaveLength(0)
259
+ } finally {
260
+ delete process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS
261
+ }
262
+ })
263
+
264
+ it('startTurn resets per-turn fields but NOT cross-turn activation', () => {
265
+ const cap = setup()
266
+ // Turn 1: dispatches async, ends, pending-progress active.
267
+ startTurn(KEY)
268
+ noteAsyncDispatch(KEY)
269
+ noteOutbound(KEY, { messageId: 100, text: 'wd' })
270
+ cap.now = 1_000
271
+ noteTurnEnd(KEY)
272
+ expect(__getStateForTests(KEY)?.activatedAt).toBe(1_000)
273
+ // Turn 2 starts (e.g. via the gateway's inbound path that already
274
+ // called clearPending). startTurn resets per-turn fields but the
275
+ // map entry has been deleted by clearPending, so this should
276
+ // simply do nothing dangerous if called against an absent key.
277
+ clearPending(KEY, 'inbound')
278
+ startTurn(KEY)
279
+ expect(__getStateForTests(KEY)).toBeUndefined()
280
+ })
281
+
282
+ it('no stale carryover: turn 1 activates, clearPending fires, turn 2 (no async) does not re-activate', async () => {
283
+ // Reproduces the reviewer's blocker #2 path: turn 1 with async
284
+ // dispatch activates pending-progress; an arriving turn 2 (real
285
+ // inbound OR synthesised wake) must clear state so a turn 2 that
286
+ // does NOT itself dispatch async never inherits the prior turn's
287
+ // `pending=true` and re-activates against turn 2's anchor.
288
+ const cap = setup()
289
+ // ── Turn 1: dispatch async, reply, end — activates.
290
+ noteAsyncDispatch(KEY)
291
+ noteOutbound(KEY, { messageId: 100, text: 'worker dispatched' })
292
+ cap.now = 1_000
293
+ noteTurnEnd(KEY)
294
+ expect(__getStateForTests(KEY)?.activatedAt).toBe(1_000)
295
+
296
+ // ── Inbound (or handback / cron / vault grant) for turn 2.
297
+ // Gateway clears state — exactly what the inbound/enqueue hooks
298
+ // wire up at handleInbound + handleSessionEvent.enqueue.
299
+ cap.now = 90_000
300
+ clearPending(KEY, 'inbound')
301
+ expect(__getStateForTests(KEY)).toBeUndefined()
302
+
303
+ // ── Turn 2: reply only, NO async dispatch this turn.
304
+ noteOutbound(KEY, { messageId: 200, text: 'just answering' })
305
+ cap.now = 91_000
306
+ noteTurnEnd(KEY)
307
+
308
+ // Turn 2 must NOT activate — no async was dispatched in this turn.
309
+ // Pre-fix this assertion would fail because the prior turn's
310
+ // `pending=true` was never reset and `noteTurnEnd` re-activated
311
+ // against turn 2's fresh anchor.
312
+ expect(__getStateForTests(KEY)).toBeUndefined()
313
+
314
+ // Confirm: no edits fire over the next several poll intervals.
315
+ cap.now = 91_000 + EDIT_INTERVAL_MS * 3
316
+ __tickForTests(cap.now)
317
+ await flush()
318
+ expect(cap.edits).toHaveLength(0)
319
+ })
320
+
321
+ it('multiple chats — independent state', async () => {
322
+ const cap = setup()
323
+ const KEY_A = 'A:_'
324
+ const KEY_B = 'B:42'
325
+ startTurn(KEY_A)
326
+ noteAsyncDispatch(KEY_A)
327
+ noteOutbound(KEY_A, { messageId: 10, text: 'wd-A' })
328
+ cap.now = 0
329
+ noteTurnEnd(KEY_A)
330
+
331
+ startTurn(KEY_B)
332
+ noteAsyncDispatch(KEY_B)
333
+ noteOutbound(KEY_B, { messageId: 20, text: 'wd-B' })
334
+ noteTurnEnd(KEY_B)
335
+
336
+ cap.now = EDIT_INTERVAL_MS
337
+ __tickForTests(cap.now)
338
+ await flush()
339
+ expect(cap.edits).toHaveLength(2)
340
+ const byMsg = new Map(cap.edits.map((e) => [e.messageId, e]))
341
+ expect(byMsg.get(10)?.chatId).toBe('A')
342
+ expect(byMsg.get(10)?.threadId).toBe(null)
343
+ expect(byMsg.get(20)?.chatId).toBe('B')
344
+ expect(byMsg.get(20)?.threadId).toBe(42)
345
+
346
+ // Clear A only; B should keep ticking.
347
+ clearPending(KEY_A, 'inbound')
348
+ cap.now = EDIT_INTERVAL_MS * 2
349
+ __tickForTests(cap.now)
350
+ await flush()
351
+ expect(cap.edits.filter((e) => e.messageId === 10)).toHaveLength(1)
352
+ expect(cap.edits.filter((e) => e.messageId === 20)).toHaveLength(2)
353
+ })
354
+ })
@@ -8,8 +8,10 @@ import {
8
8
  clearSilentEndState,
9
9
  readSilentEndState,
10
10
  recordSilentTurnEnd,
11
+ recordUndeliveredTurnEnd,
11
12
  SILENT_END_MAX_RETRIES,
12
13
  } from '../silent-end.js'
14
+ import { isFinalAnswerReply } from '../final-answer-detect.js'
13
15
 
14
16
  let stateDir: string
15
17
  const ORIG_ENV = process.env.TELEGRAM_STATE_DIR
@@ -187,6 +189,118 @@ describe('recordSilentTurnEnd — #1161 exhaustion detection', () => {
187
189
  })
188
190
  })
189
191
 
192
+ describe('recordUndeliveredTurnEnd — #1664 extended trigger', () => {
193
+ it('is the same function as recordSilentTurnEnd (semantic alias)', () => {
194
+ expect(recordUndeliveredTurnEnd).toBe(recordSilentTurnEnd)
195
+ })
196
+
197
+ // The gateway computes `finalAnswerDelivered` by OR-ing isFinalAnswerReply
198
+ // across every reply landed this turn, then engages the re-prompt iff the
199
+ // flag is still false at turn_end. These tests reproduce that exact
200
+ // decision: classify the turn's replies, then call recordUndeliveredTurnEnd
201
+ // only when no reply qualified.
202
+ function simulateTurnEnd(
203
+ replies: Array<{ text: string; disableNotification: boolean; done?: boolean }>,
204
+ turnKey: string,
205
+ ): { finalAnswerDelivered: boolean; rePromptEngaged: boolean } {
206
+ const finalAnswerDelivered = replies.some((r) =>
207
+ isFinalAnswerReply(r),
208
+ )
209
+ let rePromptEngaged = false
210
+ if (finalAnswerDelivered === false) {
211
+ recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey })
212
+ rePromptEngaged = true
213
+ }
214
+ return { finalAnswerDelivered, rePromptEngaged }
215
+ }
216
+
217
+ it('#1664 regression: ack reply + answer-as-transcript → re-prompt fires', () => {
218
+ // The exact #1664 shape: the model sent a short interim ack via the
219
+ // reply tool (disable_notification:true), then ended the turn with its
220
+ // real answer as plain transcript text — which the gateway renders into
221
+ // an ephemeral draft and retracts at turn_end, never finalized. No
222
+ // reply qualified as the final answer, so the turn is undelivered.
223
+ const r = simulateTurnEnd(
224
+ [{ text: 'On it — give me a moment.', disableNotification: true }],
225
+ 'c:1664',
226
+ )
227
+ expect(r.finalAnswerDelivered).toBe(false)
228
+ expect(r.rePromptEngaged).toBe(true)
229
+ // State file written so silent-end-interrupt-stop.mjs blocks the stop.
230
+ expect(readSilentEndState()).toMatchObject({ turnKey: 'c:1664', retryCount: 0 })
231
+ })
232
+
233
+ it('a turn with a final-answer reply (notification-bearing) → re-prompt NOT engaged', () => {
234
+ const r = simulateTurnEnd(
235
+ [{ text: 'Here is the answer.', disableNotification: false }],
236
+ 'c:final',
237
+ )
238
+ expect(r.finalAnswerDelivered).toBe(true)
239
+ expect(r.rePromptEngaged).toBe(false)
240
+ expect(readSilentEndState()).toBeNull()
241
+ })
242
+
243
+ it('a long reply mis-marked interim → re-prompt NOT engaged (length backstop)', () => {
244
+ const r = simulateTurnEnd(
245
+ [{ text: 'x'.repeat(500), disableNotification: true }],
246
+ 'c:long',
247
+ )
248
+ expect(r.finalAnswerDelivered).toBe(true)
249
+ expect(r.rePromptEngaged).toBe(false)
250
+ expect(readSilentEndState()).toBeNull()
251
+ })
252
+
253
+ it('zero-outbound turn → re-prompt still engaged (regression of the original case)', () => {
254
+ // No replies at all — the original #1122 silent-end case is now just
255
+ // the subset of "no final answer delivered" where nothing landed.
256
+ const r = simulateTurnEnd([], 'c:zero')
257
+ expect(r.finalAnswerDelivered).toBe(false)
258
+ expect(r.rePromptEngaged).toBe(true)
259
+ expect(readSilentEndState()).toMatchObject({ turnKey: 'c:zero', retryCount: 0 })
260
+ })
261
+
262
+ it('interim ack followed by a final-answer reply in the same turn → NOT engaged', () => {
263
+ // The model ack'd first then properly delivered — finalAnswerDelivered
264
+ // latches true on the second reply; the turn is answered.
265
+ const r = simulateTurnEnd(
266
+ [
267
+ { text: 'Looking into it…', disableNotification: true },
268
+ { text: 'Done — the result is 42.', disableNotification: false },
269
+ ],
270
+ 'c:ack-then-final',
271
+ )
272
+ expect(r.finalAnswerDelivered).toBe(true)
273
+ expect(r.rePromptEngaged).toBe(false)
274
+ expect(readSilentEndState()).toBeNull()
275
+ })
276
+
277
+ it('stream_reply done=true counts as the final answer → NOT engaged', () => {
278
+ const r = simulateTurnEnd(
279
+ [{ text: 'ok', disableNotification: true, done: true }],
280
+ 'c:stream-done',
281
+ )
282
+ expect(r.finalAnswerDelivered).toBe(true)
283
+ expect(r.rePromptEngaged).toBe(false)
284
+ expect(readSilentEndState()).toBeNull()
285
+ })
286
+
287
+ it('exhaustion still applies on the #1664 path after the Stop-hook re-prompt', () => {
288
+ // First undelivered turn-end writes state.
289
+ expect(simulateTurnEnd(
290
+ [{ text: 'one sec', disableNotification: true }],
291
+ 'c:exhaust',
292
+ ).rePromptEngaged).toBe(true)
293
+ // Stop hook blocks once and bumps retryCount (simulated).
294
+ const path = join(stateDir, 'silent-end-pending.json')
295
+ const s = readSilentEndState()!
296
+ writeFileSync(path, JSON.stringify({ ...s, retryCount: s.retryCount + 1 }))
297
+ // Re-prompted turn STILL ends with only an interim ack → exhausted.
298
+ const second = recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:exhaust' })
299
+ expect(second.exhausted).toBe(true)
300
+ expect(readSilentEndState()).toBeNull()
301
+ })
302
+ })
303
+
190
304
  describe('silent-end-interrupt-stop hook — integration', () => {
191
305
  const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
192
306
 
@@ -222,6 +336,10 @@ describe('silent-end-interrupt-stop hook — integration', () => {
222
336
  const out = JSON.parse(r.stdout.trim())
223
337
  expect(out.decision).toBe('block')
224
338
  expect(out.reason).toContain('reply')
339
+ // #1664 — the re-prompt must offer the NO_REPLY escape hatch so a
340
+ // model that already delivered (or intentionally has nothing to add)
341
+ // can end the turn cleanly instead of being forced to re-send.
342
+ expect(out.reason).toContain('NO_REPLY')
225
343
  // retryCount must have been incremented to 1
226
344
  expect(readSilentEndState()!.retryCount).toBe(1)
227
345
  })