switchroom 0.13.12 → 0.13.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +60 -5
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +290 -88
- package/telegram-plugin/final-answer-detect.ts +83 -0
- package/telegram-plugin/gateway/gateway.ts +213 -11
- package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs +17 -5
- package/telegram-plugin/pending-work-progress.ts +377 -0
- package/telegram-plugin/runtime-metrics.ts +20 -0
- package/telegram-plugin/silent-end.ts +37 -11
- package/telegram-plugin/tests/final-answer-detect.test.ts +89 -0
- package/telegram-plugin/tests/pending-work-progress.test.ts +354 -0
- package/telegram-plugin/tests/silent-end.test.ts +118 -0
- package/telegram-plugin/uat/scenarios/cross-turn-pending-progress-dm.test.ts +237 -0
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for cross-turn pending-async progress (#1445).
|
|
3
|
+
*
|
|
4
|
+
* Pins the deterministic state machine + edit cadence in isolation
|
|
5
|
+
* from the gateway. The integration with gateway hooks is exercised
|
|
6
|
+
* by the UAT scenario `silence-poke-debug-dm.test.ts`.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { afterEach, beforeEach, describe, expect, it } from 'vitest'
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
EDIT_INTERVAL_MS,
|
|
13
|
+
MAX_LIFETIME_MS,
|
|
14
|
+
TELEGRAM_MSG_CAP,
|
|
15
|
+
__getStateForTests,
|
|
16
|
+
__resetAllForTests,
|
|
17
|
+
__setDepsForTests,
|
|
18
|
+
__tickForTests,
|
|
19
|
+
clearPending,
|
|
20
|
+
noteAsyncDispatch,
|
|
21
|
+
noteOutbound,
|
|
22
|
+
noteTurnEnd,
|
|
23
|
+
startTurn,
|
|
24
|
+
type PendingProgressEditCtx,
|
|
25
|
+
type PendingProgressMetric,
|
|
26
|
+
} from '../pending-work-progress.js'
|
|
27
|
+
|
|
28
|
+
const KEY = '12345:_'
|
|
29
|
+
|
|
30
|
+
interface Capture {
|
|
31
|
+
edits: PendingProgressEditCtx[]
|
|
32
|
+
metrics: PendingProgressMetric[]
|
|
33
|
+
now: number
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function setup(): Capture {
|
|
37
|
+
const cap: Capture = { edits: [], metrics: [], now: 0 }
|
|
38
|
+
__resetAllForTests()
|
|
39
|
+
__setDepsForTests({
|
|
40
|
+
editMessage: async (ctx) => {
|
|
41
|
+
cap.edits.push(ctx)
|
|
42
|
+
},
|
|
43
|
+
emitMetric: (e) => {
|
|
44
|
+
cap.metrics.push(e)
|
|
45
|
+
},
|
|
46
|
+
nowMs: () => cap.now,
|
|
47
|
+
})
|
|
48
|
+
return cap
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function flush(): Promise<void> {
|
|
52
|
+
// Allow the fire-and-forget promise chain in tick() to settle.
|
|
53
|
+
await Promise.resolve()
|
|
54
|
+
await Promise.resolve()
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
describe('pending-work-progress', () => {
|
|
58
|
+
beforeEach(() => {
|
|
59
|
+
delete process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS
|
|
60
|
+
})
|
|
61
|
+
afterEach(() => {
|
|
62
|
+
__resetAllForTests()
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
it('does nothing on turns without an async dispatch', () => {
|
|
66
|
+
const cap = setup()
|
|
67
|
+
startTurn(KEY)
|
|
68
|
+
noteOutbound(KEY, { messageId: 100, text: 'simple reply' })
|
|
69
|
+
noteTurnEnd(KEY)
|
|
70
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
71
|
+
cap.now = 60_000
|
|
72
|
+
__tickForTests(cap.now)
|
|
73
|
+
expect(cap.edits).toHaveLength(0)
|
|
74
|
+
expect(cap.metrics).toHaveLength(0)
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
it('activates when turn ends with async dispatch + anchor', () => {
|
|
78
|
+
const cap = setup()
|
|
79
|
+
startTurn(KEY)
|
|
80
|
+
noteAsyncDispatch(KEY)
|
|
81
|
+
noteOutbound(KEY, { messageId: 100, text: 'worker dispatched' })
|
|
82
|
+
cap.now = 1_000
|
|
83
|
+
noteTurnEnd(KEY)
|
|
84
|
+
const s = __getStateForTests(KEY)
|
|
85
|
+
expect(s).toBeDefined()
|
|
86
|
+
expect(s?.activatedAt).toBe(1_000)
|
|
87
|
+
expect(s?.anchorMessageId).toBe(100)
|
|
88
|
+
expect(s?.anchorOriginalText).toBe('worker dispatched')
|
|
89
|
+
expect(cap.metrics).toContainEqual({
|
|
90
|
+
kind: 'pending_progress_started',
|
|
91
|
+
chatKey: KEY,
|
|
92
|
+
})
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
it('does not activate when async dispatch happened but no anchor was captured', () => {
|
|
96
|
+
const cap = setup()
|
|
97
|
+
startTurn(KEY)
|
|
98
|
+
noteAsyncDispatch(KEY)
|
|
99
|
+
// no noteOutbound — model never sent a reply (silent end)
|
|
100
|
+
noteTurnEnd(KEY)
|
|
101
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
102
|
+
cap.now = 60_000
|
|
103
|
+
__tickForTests(cap.now)
|
|
104
|
+
expect(cap.edits).toHaveLength(0)
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('does not activate when an anchor exists but no async dispatch happened', () => {
|
|
108
|
+
const cap = setup()
|
|
109
|
+
startTurn(KEY)
|
|
110
|
+
noteOutbound(KEY, { messageId: 100, text: 'just chatting' })
|
|
111
|
+
noteTurnEnd(KEY)
|
|
112
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
113
|
+
cap.now = 60_000
|
|
114
|
+
__tickForTests(cap.now)
|
|
115
|
+
expect(cap.edits).toHaveLength(0)
|
|
116
|
+
})
|
|
117
|
+
|
|
118
|
+
it('edits anchor with elapsed-time suffix at EDIT_INTERVAL_MS cadence', async () => {
|
|
119
|
+
const cap = setup()
|
|
120
|
+
startTurn(KEY)
|
|
121
|
+
noteAsyncDispatch(KEY)
|
|
122
|
+
noteOutbound(KEY, {
|
|
123
|
+
messageId: 100,
|
|
124
|
+
text: 'Background sleep running; awaiting completion.',
|
|
125
|
+
})
|
|
126
|
+
cap.now = 0
|
|
127
|
+
noteTurnEnd(KEY)
|
|
128
|
+
|
|
129
|
+
// Tick at half-interval — no edit yet.
|
|
130
|
+
cap.now = EDIT_INTERVAL_MS / 2
|
|
131
|
+
__tickForTests(cap.now)
|
|
132
|
+
await flush()
|
|
133
|
+
expect(cap.edits).toHaveLength(0)
|
|
134
|
+
|
|
135
|
+
// Tick at full interval — first edit fires, "1m" suffix.
|
|
136
|
+
cap.now = EDIT_INTERVAL_MS
|
|
137
|
+
__tickForTests(cap.now)
|
|
138
|
+
await flush()
|
|
139
|
+
expect(cap.edits).toHaveLength(1)
|
|
140
|
+
expect(cap.edits[0].messageId).toBe(100)
|
|
141
|
+
expect(cap.edits[0].newText).toBe(
|
|
142
|
+
'Background sleep running; awaiting completion.\n\n— still working (1m)',
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
// Tick at 3 intervals total — second edit, "3m".
|
|
146
|
+
cap.now = EDIT_INTERVAL_MS * 3
|
|
147
|
+
__tickForTests(cap.now)
|
|
148
|
+
await flush()
|
|
149
|
+
expect(cap.edits).toHaveLength(2)
|
|
150
|
+
expect(cap.edits[1].newText).toBe(
|
|
151
|
+
'Background sleep running; awaiting completion.\n\n— still working (3m)',
|
|
152
|
+
)
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
it('strips prior suffix before re-appending so anchor never accumulates', async () => {
|
|
156
|
+
const cap = setup()
|
|
157
|
+
startTurn(KEY)
|
|
158
|
+
noteAsyncDispatch(KEY)
|
|
159
|
+
// Simulate a noteOutbound for text that already carries a stale
|
|
160
|
+
// suffix from an earlier round (defence in depth).
|
|
161
|
+
noteOutbound(KEY, {
|
|
162
|
+
messageId: 100,
|
|
163
|
+
text: 'worker dispatched\n\n— still working (12m)',
|
|
164
|
+
})
|
|
165
|
+
noteTurnEnd(KEY)
|
|
166
|
+
cap.now = EDIT_INTERVAL_MS
|
|
167
|
+
__tickForTests(cap.now)
|
|
168
|
+
await flush()
|
|
169
|
+
// The new edit should be based on 'worker dispatched' alone.
|
|
170
|
+
expect(cap.edits[0].newText).toBe(
|
|
171
|
+
'worker dispatched\n\n— still working (1m)',
|
|
172
|
+
)
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
it("clears on 'inbound' reason — user re-engaged", () => {
|
|
176
|
+
const cap = setup()
|
|
177
|
+
startTurn(KEY)
|
|
178
|
+
noteAsyncDispatch(KEY)
|
|
179
|
+
noteOutbound(KEY, { messageId: 100, text: 'wd' })
|
|
180
|
+
noteTurnEnd(KEY)
|
|
181
|
+
cap.now = EDIT_INTERVAL_MS * 2
|
|
182
|
+
clearPending(KEY, 'inbound')
|
|
183
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
184
|
+
expect(cap.metrics).toContainEqual({
|
|
185
|
+
kind: 'pending_progress_cleared',
|
|
186
|
+
chatKey: KEY,
|
|
187
|
+
elapsedMs: EDIT_INTERVAL_MS * 2,
|
|
188
|
+
reason: 'inbound',
|
|
189
|
+
})
|
|
190
|
+
// No further edits after clear.
|
|
191
|
+
cap.now = EDIT_INTERVAL_MS * 3
|
|
192
|
+
__tickForTests(cap.now)
|
|
193
|
+
expect(cap.edits).toHaveLength(0)
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
it("clears on 'handback' reason — model is about to re-engage", () => {
|
|
197
|
+
const cap = setup()
|
|
198
|
+
startTurn(KEY)
|
|
199
|
+
noteAsyncDispatch(KEY)
|
|
200
|
+
noteOutbound(KEY, { messageId: 100, text: 'wd' })
|
|
201
|
+
noteTurnEnd(KEY)
|
|
202
|
+
clearPending(KEY, 'handback')
|
|
203
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
204
|
+
expect(cap.metrics.some((m) => m.kind === 'pending_progress_cleared' && m.reason === 'handback')).toBe(true)
|
|
205
|
+
})
|
|
206
|
+
|
|
207
|
+
it('times out at MAX_LIFETIME_MS', async () => {
|
|
208
|
+
const cap = setup()
|
|
209
|
+
startTurn(KEY)
|
|
210
|
+
noteAsyncDispatch(KEY)
|
|
211
|
+
noteOutbound(KEY, { messageId: 100, text: 'wd' })
|
|
212
|
+
cap.now = 0
|
|
213
|
+
noteTurnEnd(KEY)
|
|
214
|
+
// Halfway — still active.
|
|
215
|
+
cap.now = MAX_LIFETIME_MS / 2
|
|
216
|
+
__tickForTests(cap.now)
|
|
217
|
+
await flush()
|
|
218
|
+
expect(__getStateForTests(KEY)).toBeDefined()
|
|
219
|
+
// Past the budget — auto-cleared.
|
|
220
|
+
cap.now = MAX_LIFETIME_MS + 1
|
|
221
|
+
__tickForTests(cap.now)
|
|
222
|
+
await flush()
|
|
223
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
224
|
+
expect(cap.metrics.some((m) => m.kind === 'pending_progress_cleared' && m.reason === 'timeout')).toBe(true)
|
|
225
|
+
})
|
|
226
|
+
|
|
227
|
+
it('skips edit (but advances cadence) if total would exceed Telegram message cap', async () => {
|
|
228
|
+
const cap = setup()
|
|
229
|
+
startTurn(KEY)
|
|
230
|
+
noteAsyncDispatch(KEY)
|
|
231
|
+
// Anchor text long enough that even the smallest suffix overflows.
|
|
232
|
+
const bigText = 'x'.repeat(TELEGRAM_MSG_CAP - 5)
|
|
233
|
+
noteOutbound(KEY, { messageId: 100, text: bigText })
|
|
234
|
+
cap.now = 0
|
|
235
|
+
noteTurnEnd(KEY)
|
|
236
|
+
cap.now = EDIT_INTERVAL_MS
|
|
237
|
+
__tickForTests(cap.now)
|
|
238
|
+
await flush()
|
|
239
|
+
expect(cap.edits).toHaveLength(0)
|
|
240
|
+
// lastEditAt still advanced — we won't spin retrying every tick.
|
|
241
|
+
const s = __getStateForTests(KEY)
|
|
242
|
+
expect(s?.lastEditAt).toBe(EDIT_INTERVAL_MS)
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
it('honors the kill switch — no state, no edits, no metrics', async () => {
|
|
246
|
+
const cap = setup()
|
|
247
|
+
process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS = '1'
|
|
248
|
+
try {
|
|
249
|
+
startTurn(KEY)
|
|
250
|
+
noteAsyncDispatch(KEY)
|
|
251
|
+
noteOutbound(KEY, { messageId: 100, text: 'wd' })
|
|
252
|
+
noteTurnEnd(KEY)
|
|
253
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
254
|
+
cap.now = EDIT_INTERVAL_MS * 3
|
|
255
|
+
__tickForTests(cap.now)
|
|
256
|
+
await flush()
|
|
257
|
+
expect(cap.edits).toHaveLength(0)
|
|
258
|
+
expect(cap.metrics).toHaveLength(0)
|
|
259
|
+
} finally {
|
|
260
|
+
delete process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS
|
|
261
|
+
}
|
|
262
|
+
})
|
|
263
|
+
|
|
264
|
+
it('startTurn resets per-turn fields but NOT cross-turn activation', () => {
|
|
265
|
+
const cap = setup()
|
|
266
|
+
// Turn 1: dispatches async, ends, pending-progress active.
|
|
267
|
+
startTurn(KEY)
|
|
268
|
+
noteAsyncDispatch(KEY)
|
|
269
|
+
noteOutbound(KEY, { messageId: 100, text: 'wd' })
|
|
270
|
+
cap.now = 1_000
|
|
271
|
+
noteTurnEnd(KEY)
|
|
272
|
+
expect(__getStateForTests(KEY)?.activatedAt).toBe(1_000)
|
|
273
|
+
// Turn 2 starts (e.g. via the gateway's inbound path that already
|
|
274
|
+
// called clearPending). startTurn resets per-turn fields but the
|
|
275
|
+
// map entry has been deleted by clearPending, so this should
|
|
276
|
+
// simply do nothing dangerous if called against an absent key.
|
|
277
|
+
clearPending(KEY, 'inbound')
|
|
278
|
+
startTurn(KEY)
|
|
279
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
it('no stale carryover: turn 1 activates, clearPending fires, turn 2 (no async) does not re-activate', async () => {
|
|
283
|
+
// Reproduces the reviewer's blocker #2 path: turn 1 with async
|
|
284
|
+
// dispatch activates pending-progress; an arriving turn 2 (real
|
|
285
|
+
// inbound OR synthesised wake) must clear state so a turn 2 that
|
|
286
|
+
// does NOT itself dispatch async never inherits the prior turn's
|
|
287
|
+
// `pending=true` and re-activates against turn 2's anchor.
|
|
288
|
+
const cap = setup()
|
|
289
|
+
// ── Turn 1: dispatch async, reply, end — activates.
|
|
290
|
+
noteAsyncDispatch(KEY)
|
|
291
|
+
noteOutbound(KEY, { messageId: 100, text: 'worker dispatched' })
|
|
292
|
+
cap.now = 1_000
|
|
293
|
+
noteTurnEnd(KEY)
|
|
294
|
+
expect(__getStateForTests(KEY)?.activatedAt).toBe(1_000)
|
|
295
|
+
|
|
296
|
+
// ── Inbound (or handback / cron / vault grant) for turn 2.
|
|
297
|
+
// Gateway clears state — exactly what the inbound/enqueue hooks
|
|
298
|
+
// wire up at handleInbound + handleSessionEvent.enqueue.
|
|
299
|
+
cap.now = 90_000
|
|
300
|
+
clearPending(KEY, 'inbound')
|
|
301
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
302
|
+
|
|
303
|
+
// ── Turn 2: reply only, NO async dispatch this turn.
|
|
304
|
+
noteOutbound(KEY, { messageId: 200, text: 'just answering' })
|
|
305
|
+
cap.now = 91_000
|
|
306
|
+
noteTurnEnd(KEY)
|
|
307
|
+
|
|
308
|
+
// Turn 2 must NOT activate — no async was dispatched in this turn.
|
|
309
|
+
// Pre-fix this assertion would fail because the prior turn's
|
|
310
|
+
// `pending=true` was never reset and `noteTurnEnd` re-activated
|
|
311
|
+
// against turn 2's fresh anchor.
|
|
312
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
313
|
+
|
|
314
|
+
// Confirm: no edits fire over the next several poll intervals.
|
|
315
|
+
cap.now = 91_000 + EDIT_INTERVAL_MS * 3
|
|
316
|
+
__tickForTests(cap.now)
|
|
317
|
+
await flush()
|
|
318
|
+
expect(cap.edits).toHaveLength(0)
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
it('multiple chats — independent state', async () => {
|
|
322
|
+
const cap = setup()
|
|
323
|
+
const KEY_A = 'A:_'
|
|
324
|
+
const KEY_B = 'B:42'
|
|
325
|
+
startTurn(KEY_A)
|
|
326
|
+
noteAsyncDispatch(KEY_A)
|
|
327
|
+
noteOutbound(KEY_A, { messageId: 10, text: 'wd-A' })
|
|
328
|
+
cap.now = 0
|
|
329
|
+
noteTurnEnd(KEY_A)
|
|
330
|
+
|
|
331
|
+
startTurn(KEY_B)
|
|
332
|
+
noteAsyncDispatch(KEY_B)
|
|
333
|
+
noteOutbound(KEY_B, { messageId: 20, text: 'wd-B' })
|
|
334
|
+
noteTurnEnd(KEY_B)
|
|
335
|
+
|
|
336
|
+
cap.now = EDIT_INTERVAL_MS
|
|
337
|
+
__tickForTests(cap.now)
|
|
338
|
+
await flush()
|
|
339
|
+
expect(cap.edits).toHaveLength(2)
|
|
340
|
+
const byMsg = new Map(cap.edits.map((e) => [e.messageId, e]))
|
|
341
|
+
expect(byMsg.get(10)?.chatId).toBe('A')
|
|
342
|
+
expect(byMsg.get(10)?.threadId).toBe(null)
|
|
343
|
+
expect(byMsg.get(20)?.chatId).toBe('B')
|
|
344
|
+
expect(byMsg.get(20)?.threadId).toBe(42)
|
|
345
|
+
|
|
346
|
+
// Clear A only; B should keep ticking.
|
|
347
|
+
clearPending(KEY_A, 'inbound')
|
|
348
|
+
cap.now = EDIT_INTERVAL_MS * 2
|
|
349
|
+
__tickForTests(cap.now)
|
|
350
|
+
await flush()
|
|
351
|
+
expect(cap.edits.filter((e) => e.messageId === 10)).toHaveLength(1)
|
|
352
|
+
expect(cap.edits.filter((e) => e.messageId === 20)).toHaveLength(2)
|
|
353
|
+
})
|
|
354
|
+
})
|
|
@@ -8,8 +8,10 @@ import {
|
|
|
8
8
|
clearSilentEndState,
|
|
9
9
|
readSilentEndState,
|
|
10
10
|
recordSilentTurnEnd,
|
|
11
|
+
recordUndeliveredTurnEnd,
|
|
11
12
|
SILENT_END_MAX_RETRIES,
|
|
12
13
|
} from '../silent-end.js'
|
|
14
|
+
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
13
15
|
|
|
14
16
|
let stateDir: string
|
|
15
17
|
const ORIG_ENV = process.env.TELEGRAM_STATE_DIR
|
|
@@ -187,6 +189,118 @@ describe('recordSilentTurnEnd — #1161 exhaustion detection', () => {
|
|
|
187
189
|
})
|
|
188
190
|
})
|
|
189
191
|
|
|
192
|
+
describe('recordUndeliveredTurnEnd — #1664 extended trigger', () => {
|
|
193
|
+
it('is the same function as recordSilentTurnEnd (semantic alias)', () => {
|
|
194
|
+
expect(recordUndeliveredTurnEnd).toBe(recordSilentTurnEnd)
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
// The gateway computes `finalAnswerDelivered` by OR-ing isFinalAnswerReply
|
|
198
|
+
// across every reply landed this turn, then engages the re-prompt iff the
|
|
199
|
+
// flag is still false at turn_end. These tests reproduce that exact
|
|
200
|
+
// decision: classify the turn's replies, then call recordUndeliveredTurnEnd
|
|
201
|
+
// only when no reply qualified.
|
|
202
|
+
function simulateTurnEnd(
|
|
203
|
+
replies: Array<{ text: string; disableNotification: boolean; done?: boolean }>,
|
|
204
|
+
turnKey: string,
|
|
205
|
+
): { finalAnswerDelivered: boolean; rePromptEngaged: boolean } {
|
|
206
|
+
const finalAnswerDelivered = replies.some((r) =>
|
|
207
|
+
isFinalAnswerReply(r),
|
|
208
|
+
)
|
|
209
|
+
let rePromptEngaged = false
|
|
210
|
+
if (finalAnswerDelivered === false) {
|
|
211
|
+
recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey })
|
|
212
|
+
rePromptEngaged = true
|
|
213
|
+
}
|
|
214
|
+
return { finalAnswerDelivered, rePromptEngaged }
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
it('#1664 regression: ack reply + answer-as-transcript → re-prompt fires', () => {
|
|
218
|
+
// The exact #1664 shape: the model sent a short interim ack via the
|
|
219
|
+
// reply tool (disable_notification:true), then ended the turn with its
|
|
220
|
+
// real answer as plain transcript text — which the gateway renders into
|
|
221
|
+
// an ephemeral draft and retracts at turn_end, never finalized. No
|
|
222
|
+
// reply qualified as the final answer, so the turn is undelivered.
|
|
223
|
+
const r = simulateTurnEnd(
|
|
224
|
+
[{ text: 'On it — give me a moment.', disableNotification: true }],
|
|
225
|
+
'c:1664',
|
|
226
|
+
)
|
|
227
|
+
expect(r.finalAnswerDelivered).toBe(false)
|
|
228
|
+
expect(r.rePromptEngaged).toBe(true)
|
|
229
|
+
// State file written so silent-end-interrupt-stop.mjs blocks the stop.
|
|
230
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'c:1664', retryCount: 0 })
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
it('a turn with a final-answer reply (notification-bearing) → re-prompt NOT engaged', () => {
|
|
234
|
+
const r = simulateTurnEnd(
|
|
235
|
+
[{ text: 'Here is the answer.', disableNotification: false }],
|
|
236
|
+
'c:final',
|
|
237
|
+
)
|
|
238
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
239
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
240
|
+
expect(readSilentEndState()).toBeNull()
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
it('a long reply mis-marked interim → re-prompt NOT engaged (length backstop)', () => {
|
|
244
|
+
const r = simulateTurnEnd(
|
|
245
|
+
[{ text: 'x'.repeat(500), disableNotification: true }],
|
|
246
|
+
'c:long',
|
|
247
|
+
)
|
|
248
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
249
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
250
|
+
expect(readSilentEndState()).toBeNull()
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
it('zero-outbound turn → re-prompt still engaged (regression of the original case)', () => {
|
|
254
|
+
// No replies at all — the original #1122 silent-end case is now just
|
|
255
|
+
// the subset of "no final answer delivered" where nothing landed.
|
|
256
|
+
const r = simulateTurnEnd([], 'c:zero')
|
|
257
|
+
expect(r.finalAnswerDelivered).toBe(false)
|
|
258
|
+
expect(r.rePromptEngaged).toBe(true)
|
|
259
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'c:zero', retryCount: 0 })
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
it('interim ack followed by a final-answer reply in the same turn → NOT engaged', () => {
|
|
263
|
+
// The model ack'd first then properly delivered — finalAnswerDelivered
|
|
264
|
+
// latches true on the second reply; the turn is answered.
|
|
265
|
+
const r = simulateTurnEnd(
|
|
266
|
+
[
|
|
267
|
+
{ text: 'Looking into it…', disableNotification: true },
|
|
268
|
+
{ text: 'Done — the result is 42.', disableNotification: false },
|
|
269
|
+
],
|
|
270
|
+
'c:ack-then-final',
|
|
271
|
+
)
|
|
272
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
273
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
274
|
+
expect(readSilentEndState()).toBeNull()
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
it('stream_reply done=true counts as the final answer → NOT engaged', () => {
|
|
278
|
+
const r = simulateTurnEnd(
|
|
279
|
+
[{ text: 'ok', disableNotification: true, done: true }],
|
|
280
|
+
'c:stream-done',
|
|
281
|
+
)
|
|
282
|
+
expect(r.finalAnswerDelivered).toBe(true)
|
|
283
|
+
expect(r.rePromptEngaged).toBe(false)
|
|
284
|
+
expect(readSilentEndState()).toBeNull()
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
it('exhaustion still applies on the #1664 path after the Stop-hook re-prompt', () => {
|
|
288
|
+
// First undelivered turn-end writes state.
|
|
289
|
+
expect(simulateTurnEnd(
|
|
290
|
+
[{ text: 'one sec', disableNotification: true }],
|
|
291
|
+
'c:exhaust',
|
|
292
|
+
).rePromptEngaged).toBe(true)
|
|
293
|
+
// Stop hook blocks once and bumps retryCount (simulated).
|
|
294
|
+
const path = join(stateDir, 'silent-end-pending.json')
|
|
295
|
+
const s = readSilentEndState()!
|
|
296
|
+
writeFileSync(path, JSON.stringify({ ...s, retryCount: s.retryCount + 1 }))
|
|
297
|
+
// Re-prompted turn STILL ends with only an interim ack → exhausted.
|
|
298
|
+
const second = recordUndeliveredTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:exhaust' })
|
|
299
|
+
expect(second.exhausted).toBe(true)
|
|
300
|
+
expect(readSilentEndState()).toBeNull()
|
|
301
|
+
})
|
|
302
|
+
})
|
|
303
|
+
|
|
190
304
|
describe('silent-end-interrupt-stop hook — integration', () => {
|
|
191
305
|
const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
|
|
192
306
|
|
|
@@ -222,6 +336,10 @@ describe('silent-end-interrupt-stop hook — integration', () => {
|
|
|
222
336
|
const out = JSON.parse(r.stdout.trim())
|
|
223
337
|
expect(out.decision).toBe('block')
|
|
224
338
|
expect(out.reason).toContain('reply')
|
|
339
|
+
// #1664 — the re-prompt must offer the NO_REPLY escape hatch so a
|
|
340
|
+
// model that already delivered (or intentionally has nothing to add)
|
|
341
|
+
// can end the turn cleanly instead of being forced to re-send.
|
|
342
|
+
expect(out.reason).toContain('NO_REPLY')
|
|
225
343
|
// retryCount must have been incremented to 1
|
|
226
344
|
expect(readSilentEndState()!.retryCount).toBe(1)
|
|
227
345
|
})
|