switchroom 0.13.12 → 0.13.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,377 @@
1
+ /**
2
+ * Cross-turn pending-async progress — issue #1445.
3
+ *
4
+ * When a turn ends with pending background async work (the model
5
+ * dispatched `Agent` / `Task` and ended its turn before the worker
6
+ * returned), keep editing the model's last reply *in place* at
7
+ * intervals so the user sees ambient liveness during the wait — without
8
+ * any new pinged messages and without re-introducing the retired
9
+ * progress card.
10
+ *
11
+ * Background data justifying this module (2026-05-23 forensic + UAT):
12
+ *
13
+ * - silence-poke success rate is 0–7% across hundreds of fires
14
+ * (finn: 0/78, clerk: 6/91, klanker: 5/158) — the polite levels
15
+ * reach the model as `<system-reminder>`s piggybacked on the next
16
+ * tool result, so they (a) only land if the model is actively
17
+ * cycling tools, (b) compete with hundreds of other tokens, and (c)
18
+ * only ever exist while the turn is open. The 300s framework
19
+ * fallback is the only user-visible silence-poke output, and its
20
+ * first job is to *kill the wedged turn*.
21
+ *
22
+ * - The dominant user-visible failure mode (issue #1445) is in fact
23
+ * cross-turn: the model calls `Agent` (or `Bash` with
24
+ * `run_in_background:true`), sends one ack reply that pings, then
25
+ * ends the turn. The silence-poke ladder is *gone* the moment
26
+ * endTurn() fires. The user then sees nothing for 10–30+ minutes
27
+ * until the worker returns. A live UAT confirmed: a deliberate
28
+ * `sleep 350` prompt produced one `[PING] Background sleep running;
29
+ * awaiting completion notification.` at +19s and the turn ended.
30
+ *
31
+ * Mechanism:
32
+ *
33
+ * tool_use(Agent|Task) → mark chat key `pending=true`
34
+ * outbound reply → capture anchor (messageId, text)
35
+ * turn_end with pending+anchor → activate the timer for the key
36
+ * tick (every 5s, edit every → editMessageText against the anchor
37
+ * EDIT_INTERVAL_MS) appending/refreshing the suffix
38
+ * " — still working (Nm)"
39
+ * inbound user message → clear (user re-engaged or moved on)
40
+ * subagent_handback inject → clear (model about to re-engage)
41
+ * MAX_LIFETIME_MS budget cap → clear (give up; 30 min default)
42
+ *
43
+ * Single shared timer for the whole gateway — like silence-poke's
44
+ * `tick()`, the per-key cost is O(map size) per poll. The poll
45
+ * interval is short (5s) but edits are spaced at EDIT_INTERVAL_MS so
46
+ * the Telegram bot.api editMessageText rate stays well under limits.
47
+ *
48
+ * Edits are plain text (no parseMode). The suffix is appended to the
49
+ * model's authored text; on subsequent edits the prior suffix is
50
+ * stripped before re-appending so the message never accumulates
51
+ * duplicate suffixes.
52
+ *
53
+ * Kill switch: `SWITCHROOM_DISABLE_PENDING_PROGRESS=1` disables the
54
+ * whole subsystem. The conversational-pacing prompt is unaffected.
55
+ */
56
+
57
+ export const EDIT_INTERVAL_MS = 60_000
58
+ export const POLL_INTERVAL_MS = 5_000
59
+ export const MAX_LIFETIME_MS = 30 * 60_000
60
+ /** Telegram message length limit is 4096; budget headroom for the
61
+ * suffix and any escape expansion. If the anchor text plus suffix
62
+ * would exceed this, we skip the edit (the user still sees the
63
+ * original) rather than truncate the model's authored prose. */
64
+ export const TELEGRAM_MSG_CAP = 4000
65
+
66
+ /**
67
+ * Regex matching the suffix we append. Used to strip a prior suffix
68
+ * before appending the next one. The (\d+) covers "1m" / "12m" / etc.
69
+ * Kept anchored to end-of-string so it only matches OUR suffix, not
70
+ * something the model happened to write.
71
+ */
72
+ const SUFFIX_RE = /\n\n— still working \(\d+m\)$/
73
+
74
+ export interface PendingProgressEditCtx {
75
+ chatId: string
76
+ threadId: number | null
77
+ messageId: number
78
+ newText: string
79
+ }
80
+
81
+ /**
82
+ * Discriminated union — kept structurally identical to the
83
+ * `pending_progress_*` variants in `runtime-metrics.ts:RuntimeMetricEvent`
84
+ * so the gateway's `emitMetric: emitRuntimeMetric` wire-up typechecks
85
+ * cleanly with no cast. `started` carries only the chat key; `edited`
86
+ * always carries the cumulative elapsed time; `cleared` carries an
87
+ * optional elapsed + the reason (`inbound` | `handback` | `timeout` |
88
+ * `manual`).
89
+ */
90
+ export type PendingProgressMetric =
91
+ | { kind: 'pending_progress_started'; chatKey: string }
92
+ | { kind: 'pending_progress_edited'; chatKey: string; elapsedMs: number }
93
+ | {
94
+ kind: 'pending_progress_cleared'
95
+ chatKey: string
96
+ elapsedMs?: number
97
+ reason?: string
98
+ }
99
+
100
+ export interface PendingProgressDeps {
101
+ editMessage: (ctx: PendingProgressEditCtx) => Promise<void>
102
+ emitMetric?: (event: PendingProgressMetric) => void
103
+ /** Optional clock override for tests. */
104
+ nowMs?: () => number
105
+ /** Optional poll interval override for tests. */
106
+ pollIntervalMs?: number
107
+ }
108
+
109
+ interface State {
110
+ /** True after a `tool_use(Agent|Task)` was observed for this key in
111
+ * the current turn. Cleared on next turn start. */
112
+ pending: boolean
113
+ /** The captured anchor — last outbound reply message_id for this
114
+ * key. */
115
+ anchorMessageId: number | null
116
+ /** The captured anchor text — what the model wrote, *minus* any
117
+ * prior pending-progress suffix. Used as the base for every edit. */
118
+ anchorOriginalText: string
119
+ /** Wall-clock ms when the cross-turn ambient state was *activated*
120
+ * (at turn_end with pending+anchor). null before activation. */
121
+ activatedAt: number | null
122
+ /** Wall-clock ms of last edit fire — gates the EDIT_INTERVAL_MS
123
+ * cadence. null until first edit fires. */
124
+ lastEditAt: number | null
125
+ }
126
+
127
+ const stateByKey = new Map<string, State>()
128
+ let timer: ReturnType<typeof setInterval> | null = null
129
+ let activeDeps: PendingProgressDeps | null = null
130
+
131
+ function enabled(): boolean {
132
+ const v = process.env.SWITCHROOM_DISABLE_PENDING_PROGRESS
133
+ return !(v === '1' || v === 'true')
134
+ }
135
+
136
+ function nowMs(): number {
137
+ return activeDeps?.nowMs ? activeDeps.nowMs() : Date.now()
138
+ }
139
+
140
+ function ensure(key: string): State {
141
+ let s = stateByKey.get(key)
142
+ if (!s) {
143
+ s = {
144
+ pending: false,
145
+ anchorMessageId: null,
146
+ anchorOriginalText: '',
147
+ activatedAt: null,
148
+ lastEditAt: null,
149
+ }
150
+ stateByKey.set(key, s)
151
+ }
152
+ return s
153
+ }
154
+
155
+ /**
156
+ * Fresh turn — reset the per-turn `pending` flag and the per-turn
157
+ * anchor. The cross-turn `activated` state is per-PRIOR-turn and is
158
+ * cleared by the explicit clear paths (`clearPending` with reason
159
+ * `inbound` / `handback` / `timeout`), not by a new turn. The gateway
160
+ * wires those clears at TWO sites for full coverage:
161
+ *
162
+ * 1. `handleInbound` (real user message) → `clearPending('inbound')`
163
+ * — the fast path; fires the moment the gateway sees an inbound,
164
+ * before the new turn atom is even built.
165
+ * 2. `handleSessionEvent` `enqueue` case (every fresh turn atom)
166
+ * → `clearPending('handback')` — the backstop covering
167
+ * synthesised wakes (subagent-handback, cron, vault grant,
168
+ * restart marker) that push directly to `pendingInboundBuffer`
169
+ * and bypass `handleInbound`. Idempotent w/r/t the first clear.
170
+ *
171
+ * `startTurn` itself only matters if the state map already has an
172
+ * entry for `key` — which post-fix is impossible (the clears
173
+ * delete it). Kept for test ergonomics and as defence-in-depth.
174
+ */
175
+ export function startTurn(key: string): void {
176
+ if (!enabled()) return
177
+ const s = stateByKey.get(key)
178
+ if (s == null) return
179
+ // Only the per-turn fields reset. activatedAt/lastEditAt belong to
180
+ // the prior turn's pending-progress and are cleared separately.
181
+ s.pending = false
182
+ s.anchorMessageId = null
183
+ s.anchorOriginalText = ''
184
+ }
185
+
186
+ /**
187
+ * Mark this chat as having dispatched async background work in the
188
+ * current turn. Idempotent. Called when the gateway sees a `tool_use`
189
+ * for `Agent` or `Task`.
190
+ */
191
+ export function noteAsyncDispatch(key: string): void {
192
+ if (!enabled()) return
193
+ ensure(key).pending = true
194
+ }
195
+
196
+ /**
197
+ * Capture an outbound reply as a candidate anchor for cross-turn
198
+ * editing. Called on every successful bot reply send. If a prior
199
+ * pending-progress suffix is present in the text (rare — should only
200
+ * happen if we sent something to ourselves), strip it before storing
201
+ * so subsequent edits don't double-suffix.
202
+ */
203
+ export function noteOutbound(
204
+ key: string,
205
+ opts: { messageId: number; text: string },
206
+ ): void {
207
+ if (!enabled()) return
208
+ const s = ensure(key)
209
+ s.anchorMessageId = opts.messageId
210
+ s.anchorOriginalText = opts.text.replace(SUFFIX_RE, '')
211
+ }
212
+
213
+ /**
214
+ * Called at turn_end. If the turn had a pending async dispatch AND
215
+ * captured an anchor, activate the cross-turn ambient state — the
216
+ * timer will start editing.
217
+ *
218
+ * If pending=false OR no anchor was captured, drop the state entry
219
+ * entirely (nothing for us to do).
220
+ */
221
+ export function noteTurnEnd(key: string): void {
222
+ if (!enabled()) return
223
+ const s = stateByKey.get(key)
224
+ if (s == null) return
225
+ if (s.pending && s.anchorMessageId != null) {
226
+ s.activatedAt = nowMs()
227
+ // lastEditAt is null so the first edit fires after one full
228
+ // EDIT_INTERVAL_MS from activation — not immediately.
229
+ s.lastEditAt = s.activatedAt
230
+ activeDeps?.emitMetric?.({
231
+ kind: 'pending_progress_started',
232
+ chatKey: key,
233
+ })
234
+ } else {
235
+ stateByKey.delete(key)
236
+ }
237
+ }
238
+
239
+ /**
240
+ * Clear pending-progress for a chat — reasons:
241
+ * 'inbound' — user sent a new message, they're re-engaged
242
+ * 'handback' — switchroom injected a subagent_handback channel turn
243
+ * 'timeout' — exceeded MAX_LIFETIME_MS
244
+ * 'manual' — test / debug
245
+ */
246
+ export function clearPending(
247
+ key: string,
248
+ reason: 'inbound' | 'handback' | 'timeout' | 'manual',
249
+ ): void {
250
+ if (!stateByKey.has(key)) return
251
+ const s = stateByKey.get(key)!
252
+ const elapsed = s.activatedAt != null ? nowMs() - s.activatedAt : 0
253
+ stateByKey.delete(key)
254
+ activeDeps?.emitMetric?.({
255
+ kind: 'pending_progress_cleared',
256
+ chatKey: key,
257
+ elapsedMs: elapsed,
258
+ reason,
259
+ })
260
+ }
261
+
262
+ /**
263
+ * Start the shared interval timer. Idempotent. Honours the kill
264
+ * switch — no-op when disabled.
265
+ */
266
+ export function startTimer(deps: PendingProgressDeps): void {
267
+ if (!enabled()) return
268
+ if (timer != null) return
269
+ activeDeps = deps
270
+ const interval = deps.pollIntervalMs ?? POLL_INTERVAL_MS
271
+ timer = setInterval(() => tick(nowMs()), interval)
272
+ if (typeof timer.unref === 'function') timer.unref()
273
+ }
274
+
275
+ /** Stop the timer. Idempotent. */
276
+ export function stopTimer(): void {
277
+ if (timer != null) {
278
+ clearInterval(timer)
279
+ timer = null
280
+ }
281
+ activeDeps = null
282
+ }
283
+
284
+ /**
285
+ * Parse `<chatId>:<threadIdOrEmpty>` back into structured fields,
286
+ * matching the `statusKey` shape used throughout the gateway.
287
+ */
288
+ function parseKey(key: string): { chatId: string; threadId: number | null } {
289
+ const idx = key.indexOf(':')
290
+ if (idx < 0) return { chatId: key, threadId: null }
291
+ const chatId = key.slice(0, idx)
292
+ const tail = key.slice(idx + 1)
293
+ if (tail === '' || tail === 'undefined') return { chatId, threadId: null }
294
+ const n = Number(tail)
295
+ return { chatId, threadId: Number.isFinite(n) ? n : null }
296
+ }
297
+
298
+ function tick(now: number): void {
299
+ if (activeDeps == null) return
300
+ for (const [key, s] of stateByKey.entries()) {
301
+ if (s.activatedAt == null || s.anchorMessageId == null) continue
302
+
303
+ const elapsed = now - s.activatedAt
304
+ if (elapsed >= MAX_LIFETIME_MS) {
305
+ clearPending(key, 'timeout')
306
+ continue
307
+ }
308
+
309
+ const sinceEdit = s.lastEditAt == null ? 0 : now - s.lastEditAt
310
+ if (sinceEdit < EDIT_INTERVAL_MS) continue
311
+
312
+ // Build suffix from elapsed wall-clock. Always at least 1m so the
313
+ // user-visible counter reads honestly (we only edit at intervals
314
+ // ≥ EDIT_INTERVAL_MS = 60s).
315
+ const minutes = Math.max(1, Math.round(elapsed / 60_000))
316
+ const suffix = `\n\n— still working (${minutes}m)`
317
+ const newText = s.anchorOriginalText + suffix
318
+
319
+ if (newText.length > TELEGRAM_MSG_CAP) {
320
+ // Don't truncate the model's prose — just skip this edit.
321
+ // The previous edit (or the original) is still visible.
322
+ s.lastEditAt = now
323
+ continue
324
+ }
325
+
326
+ const { chatId, threadId } = parseKey(key)
327
+ s.lastEditAt = now
328
+
329
+ const editCtx: PendingProgressEditCtx = {
330
+ chatId,
331
+ threadId,
332
+ messageId: s.anchorMessageId,
333
+ newText,
334
+ }
335
+ // Fire-and-forget so a slow edit doesn't block the tick loop.
336
+ // Errors are logged but never bubble (a 429 / "message not modified"
337
+ // / chat-deleted is a soft failure).
338
+ void Promise.resolve()
339
+ .then(() => activeDeps!.editMessage(editCtx))
340
+ .then(() => {
341
+ activeDeps!.emitMetric?.({
342
+ kind: 'pending_progress_edited',
343
+ chatKey: key,
344
+ elapsedMs: elapsed,
345
+ })
346
+ })
347
+ .catch((err) => {
348
+ process.stderr.write(
349
+ `pending-work-progress: edit failed key=${key} ` +
350
+ `msg=${editCtx.messageId}: ${(err as Error).message}\n`,
351
+ )
352
+ })
353
+ }
354
+ }
355
+
356
+ // ─── Test helpers ─────────────────────────────────────────────────────────
357
+
358
+ /** Test-only: drive one tick deterministically. */
359
+ export function __tickForTests(now: number): void {
360
+ tick(now)
361
+ }
362
+
363
+ /** Test-only: install deps without starting the real timer. */
364
+ export function __setDepsForTests(deps: PendingProgressDeps | null): void {
365
+ activeDeps = deps
366
+ }
367
+
368
+ /** Test-only: peek at per-key state. */
369
+ export function __getStateForTests(key: string): State | undefined {
370
+ return stateByKey.get(key)
371
+ }
372
+
373
+ /** Test-only: full reset. */
374
+ export function __resetAllForTests(): void {
375
+ stateByKey.clear()
376
+ stopTimer()
377
+ }
@@ -104,6 +104,26 @@ export type RuntimeMetricEvent =
104
104
  fallback_kind: 'working' | 'thinking'
105
105
  silence_ms: number
106
106
  }
107
+ /**
108
+ * #1445 cross-turn pending-async ambient lifecycle. `started` fires
109
+ * when a turn ends with a captured anchor AND a pending Agent/Task/
110
+ * Bash-background dispatch — i.e. the framework will now edit the
111
+ * model's last reply in place every ~60s until cleared. `edited`
112
+ * fires on each successful in-place edit; `elapsed_ms` is how long
113
+ * ambient has been running for this chat. `cleared` fires when
114
+ * ambient stops — `reason` says why (inbound / handback / timeout).
115
+ * Targets: edited/started ratio is the "still alive minutes per
116
+ * activation" health proxy; cleared.reason='inbound' should
117
+ * dominate (model + user resolving naturally).
118
+ */
119
+ | { kind: 'pending_progress_started'; chatKey: string }
120
+ | { kind: 'pending_progress_edited'; chatKey: string; elapsedMs: number }
121
+ | {
122
+ kind: 'pending_progress_cleared'
123
+ chatKey: string
124
+ elapsedMs?: number
125
+ reason?: string
126
+ }
107
127
 
108
128
  /**
109
129
  * The JSONL sink lives under the runtime state dir so it's per-agent
@@ -182,22 +182,39 @@ export function readSilentEndState(deps?: SilentEndDeps): SilentEndState | null
182
182
  }
183
183
 
184
184
  /**
185
- * Record a user-message turn that ended with zero outbound messages and
186
- * report whether the deterministic re-prompt has been exhausted. This is
187
- * the gateway's single entry point for the main turn-end path.
185
+ * Record a user-message turn that ended WITHOUT the model delivering a
186
+ * final answer, and report whether the deterministic re-prompt has been
187
+ * exhausted. This is the gateway's single entry point for the main
188
+ * turn-end path.
188
189
  *
189
- * - First silent-end of a turn (no prior state, or prior `retryCount`
190
+ * #1664 the trigger generalized from "zero outbound" to "no final
191
+ * answer delivered". Two cases reach here now:
192
+ * 1. Zero outbound — the turn ended with nothing sent at all (the
193
+ * original #1122/#1161 silent-end case).
194
+ * 2. Interim-ack only — the model sent an ack via reply/stream_reply
195
+ * but ended the turn with its real answer as plain transcript text
196
+ * (rendered into an ephemeral answer-lane draft that gets retracted
197
+ * at turn_end, never finalized). The gateway tracks this via
198
+ * `CurrentTurn.finalAnswerDelivered`; case 1 is just the subset
199
+ * where that flag is false because nothing landed.
200
+ * In both cases the model still owes the user an answer, so the same
201
+ * re-prompt safety net applies — the framework re-prompts; the model
202
+ * re-delivers via the reply tool (never the framework materializing a
203
+ * message from the draft — see `reference/principles.md`).
204
+ *
205
+ * - First undelivered turn-end (no prior state, or prior `retryCount`
190
206
  * still below `SILENT_END_MAX_RETRIES`) → writes the state file via
191
207
  * `writeSilentEndState`, so `silent-end-interrupt-stop.mjs` blocks
192
208
  * the stop and re-prompts the agent. Returns `{ exhausted: false }`.
193
209
  *
194
- * - A silent-end where the prior state for the SAME turn already shows
195
- * `retryCount >= SILENT_END_MAX_RETRIES` → the Stop hook already
196
- * spent its re-prompt and the agent is STILL silent. Recovery has
197
- * failed. Clears the state file (so the Stop hook on this final turn
198
- * finds nothing pending and allows the stop cleanly) and returns
199
- * `{ exhausted: true }` — the caller MUST then deliver a user-facing
200
- * fallback so the turn never just vanishes (#1161).
210
+ * - An undelivered turn-end where the prior state for the SAME turn
211
+ * already shows `retryCount >= SILENT_END_MAX_RETRIES` → the Stop
212
+ * hook already spent its re-prompt and the agent is STILL
213
+ * undelivered. Recovery has failed. Clears the state file (so the
214
+ * Stop hook on this final turn finds nothing pending and allows the
215
+ * stop cleanly) and returns `{ exhausted: true }` — the caller MUST
216
+ * then deliver a user-facing fallback so the turn never just
217
+ * vanishes (#1161).
201
218
  *
202
219
  * Chat-less autonomous wakeup turns never reach here: the gateway only
203
220
  * creates a `currentTurn` (and therefore only runs a turn-end handler)
@@ -228,3 +245,12 @@ export function recordSilentTurnEnd(
228
245
  writeSilentEndState(args, deps)
229
246
  return { exhausted: false }
230
247
  }
248
+
249
+ /**
250
+ * #1664 — semantic alias for `recordSilentTurnEnd`. The trigger is now
251
+ * "no final answer delivered", of which "zero outbound" is one case; new
252
+ * callsites should prefer this name so the intent reads correctly. The
253
+ * behaviour, retry semantics, and `{exhausted}` contract are identical —
254
+ * `recordSilentTurnEnd` is kept for the existing callers and tests.
255
+ */
256
+ export const recordUndeliveredTurnEnd = recordSilentTurnEnd
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Unit coverage for the #1664 final-answer detection predicate.
3
+ *
4
+ * `isFinalAnswerReply` is the finer signal the silent-end re-prompt needs:
5
+ * the gateway's `replyCalled` flag flips on the first reply / stream_reply
6
+ * tool use and cannot tell an interim ack from the real answer. This
7
+ * predicate classifies each reply so a turn whose every reply was "interim"
8
+ * (and whose real answer ended up as plain transcript text) ends with
9
+ * `finalAnswerDelivered === false` and triggers the re-prompt — the #1664
10
+ * bug (streamed answers rendered to a draft, retracted at turn_end, lost).
11
+ *
12
+ * These tests pin the pure predicate. The gateway wires it into
13
+ * executeReply / executeStreamReply (covered by the gateway integration
14
+ * surface); pinning the policy here keeps it auditable without importing
15
+ * the multi-thousand-line gateway module.
16
+ */
17
+
18
+ import { describe, it, expect } from 'vitest'
19
+ import { isFinalAnswerReply, FINAL_ANSWER_MIN_CHARS } from '../final-answer-detect.js'
20
+
21
+ describe('isFinalAnswerReply — #1664 final-answer classification', () => {
22
+ it('classifies a notification-bearing reply as the final answer', () => {
23
+ // disable_notification:false is the pacing contract's "final answer"
24
+ // signal — interim updates pass disable_notification:true.
25
+ expect(
26
+ isFinalAnswerReply({ text: 'short answer', disableNotification: false }),
27
+ ).toBe(true)
28
+ })
29
+
30
+ it('classifies a short interim ack (disable_notification:true) as NOT final', () => {
31
+ expect(
32
+ isFinalAnswerReply({ text: 'on it…', disableNotification: true }),
33
+ ).toBe(false)
34
+ })
35
+
36
+ it('length backstop: a long reply mis-marked interim still counts as final', () => {
37
+ const longText = 'x'.repeat(FINAL_ANSWER_MIN_CHARS)
38
+ expect(
39
+ isFinalAnswerReply({ text: longText, disableNotification: true }),
40
+ ).toBe(true)
41
+ })
42
+
43
+ it('length backstop is inclusive at exactly FINAL_ANSWER_MIN_CHARS', () => {
44
+ expect(
45
+ isFinalAnswerReply({
46
+ text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS),
47
+ disableNotification: true,
48
+ }),
49
+ ).toBe(true)
50
+ // One char under the threshold and marked interim → still interim.
51
+ expect(
52
+ isFinalAnswerReply({
53
+ text: 'x'.repeat(FINAL_ANSWER_MIN_CHARS - 1),
54
+ disableNotification: true,
55
+ }),
56
+ ).toBe(false)
57
+ })
58
+
59
+ it('stream_reply done=true is always the final answer, even short + interim', () => {
60
+ // A done=true call explicitly closes the stream — it IS the answer,
61
+ // regardless of length or the notification flag.
62
+ expect(
63
+ isFinalAnswerReply({ text: 'ok', disableNotification: true, done: true }),
64
+ ).toBe(true)
65
+ })
66
+
67
+ it('a non-terminal stream_reply chunk (done=false) is classified like a plain reply', () => {
68
+ // Short interim chunk → not final.
69
+ expect(
70
+ isFinalAnswerReply({ text: 'thinking…', disableNotification: true, done: false }),
71
+ ).toBe(false)
72
+ // Notification-bearing chunk → final.
73
+ expect(
74
+ isFinalAnswerReply({ text: 'here it is', disableNotification: false, done: false }),
75
+ ).toBe(true)
76
+ })
77
+
78
+ it('an empty reply marked interim is NOT the final answer', () => {
79
+ expect(
80
+ isFinalAnswerReply({ text: '', disableNotification: true }),
81
+ ).toBe(false)
82
+ })
83
+
84
+ it('FINAL_ANSWER_MIN_CHARS is the documented 200-char backstop', () => {
85
+ // Guards the constant against silent drift — the value is referenced
86
+ // in the CurrentTurn doc-comment and the Stop-hook rationale.
87
+ expect(FINAL_ANSWER_MIN_CHARS).toBe(200)
88
+ })
89
+ })