switchroom 0.14.10 → 0.14.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,13 @@
1
- import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'
1
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest'
2
2
  import {
3
3
  startTurn,
4
4
  noteOutbound,
5
- noteSubagentDispatch,
6
5
  noteThinking,
7
6
  noteToolStart,
8
7
  noteToolEnd,
9
8
  noteToolLabel,
10
- consumeArmedPoke,
11
9
  endTurn,
12
10
  silencePokeEnabled,
13
- formatPokeText,
14
11
  formatFrameworkFallbackText,
15
12
  __tickForTests,
16
13
  __setDepsForTests,
@@ -26,27 +23,15 @@ const ORIGINAL_KILL_SWITCH = process.env.SWITCHROOM_DISABLE_SILENCE_POKE
26
23
  interface TestFixtures {
27
24
  emitted: SilencePokeMetric[]
28
25
  fallbacks: FrameworkFallbackContext[]
29
- awarenessPings: FrameworkFallbackContext[]
30
26
  }
31
27
 
32
28
  function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }): TestFixtures {
33
- const fixtures: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
29
+ const fixtures: TestFixtures = { emitted: [], fallbacks: [] }
34
30
  __setDepsForTests({
35
31
  emitMetric: (e) => fixtures.emitted.push(e),
36
32
  onFrameworkFallback: (ctx) => { fixtures.fallbacks.push(ctx) },
37
- onAwarenessPing: (ctx) => { fixtures.awarenessPings.push(ctx) },
38
- // The ack budget (a new poke that fires *earlier* than `soft`) is
39
- // disabled by default in this fixture so the soft/firm/fallback
40
- // ladder tests stay isolated from it. The 'ack budget' describe
41
- // block opts back in with a real value.
42
- //
43
- // The 60s awarenessPing is also disabled by default so the existing
44
- // soft/firm/fallback ladder tests don't see the new sibling event;
45
- // the 'awareness ping' describe block opts back in.
46
33
  thresholdsMs: {
47
34
  ...DEFAULT_THRESHOLDS,
48
- ack: Number.MAX_SAFE_INTEGER,
49
- awarenessPing: Number.MAX_SAFE_INTEGER,
50
35
  ...(opts?.thresholds ?? {}),
51
36
  },
52
37
  })
@@ -85,44 +70,24 @@ describe('silence-poke — kill switch', () => {
85
70
  })
86
71
  })
87
72
 
88
- describe('silence-poke escalation ladder', () => {
89
- it('soft poke fires at 75s', () => {
73
+ // Post-retirement: the model-targeted nudge ladder (ack/soft/firm) and
74
+ // the 60s awareness ping are gone. The ONLY framework action left is the
75
+ // 300s fallback, which the gateway turns into a user-visible "still
76
+ // working…" message AND an unwedge. These tests pin that single terminal
77
+ // action.
78
+ describe('silence-poke — framework fallback (the only remaining action)', () => {
79
+ it('does not fire before the 300s threshold', () => {
90
80
  const fx = setupDeps()
91
81
  startTurn('chat:0', 0)
92
-
93
- __tickForTests(70_000) // before threshold
94
- expect(consumeArmedPoke()).toBeNull()
82
+ __tickForTests(120_000)
83
+ __tickForTests(299_000)
84
+ expect(fx.fallbacks).toHaveLength(0)
95
85
  expect(fx.emitted).toHaveLength(0)
96
-
97
- __tickForTests(75_000) // at threshold
98
- expect(fx.emitted).toEqual([
99
- expect.objectContaining({ kind: 'silence_poke_fired', level: 'soft', subagent_wait: false }),
100
- ])
101
- const text = consumeArmedPoke()
102
- expect(text).toContain('[silence-poke]')
103
- expect(text).toContain('75s')
104
- })
105
-
106
- it('firm poke fires at 180s after soft', () => {
107
- const fx = setupDeps()
108
- startTurn('chat:0', 0)
109
- __tickForTests(75_000)
110
- consumeArmedPoke() // drain the soft
111
- __tickForTests(180_000)
112
- expect(fx.emitted.map((e) => e.kind)).toEqual([
113
- 'silence_poke_fired',
114
- 'silence_poke_fired',
115
- ])
116
- expect(fx.emitted[1]).toMatchObject({ level: 'firm' })
117
- const firm = consumeArmedPoke()
118
- expect(firm).toContain('3 minutes silent')
119
86
  })
120
87
 
121
- it('framework fallback fires at 300s with kind=working when no thinking signal', () => {
88
+ it('fires at 300s with kind=working when no thinking signal', () => {
122
89
  const fx = setupDeps()
123
90
  startTurn('chatX:42', 0)
124
- __tickForTests(75_000)
125
- __tickForTests(180_000)
126
91
  __tickForTests(300_000)
127
92
  expect(fx.fallbacks).toEqual([
128
93
  expect.objectContaining({ chatId: 'chatX', threadId: 42, fallbackKind: 'working' }),
@@ -130,23 +95,19 @@ describe('silence-poke — escalation ladder', () => {
130
95
  expect(fx.emitted.at(-1)).toMatchObject({ kind: 'silence_fallback_sent', fallback_kind: 'working' })
131
96
  })
132
97
 
133
- it('framework fallback fires with kind=thinking if a thinking event landed within 30s', () => {
98
+ it('fires with kind=thinking if a thinking event landed within 30s', () => {
134
99
  const fx = setupDeps()
135
100
  startTurn('c:0', 0)
136
101
  noteThinking('c:0', 280_000)
137
- __tickForTests(75_000)
138
- __tickForTests(180_000)
139
102
  __tickForTests(300_000)
140
103
  expect(fx.fallbacks).toEqual([
141
104
  expect.objectContaining({ fallbackKind: 'thinking' }),
142
105
  ])
143
106
  })
144
107
 
145
- it('framework fallback fires at most once per turn', () => {
108
+ it('fires at most once per turn', () => {
146
109
  const fx = setupDeps()
147
110
  startTurn('c:0', 0)
148
- __tickForTests(75_000)
149
- __tickForTests(180_000)
150
111
  __tickForTests(300_000)
151
112
  __tickForTests(450_000) // continued silence
152
113
  __tickForTests(600_000)
@@ -154,245 +115,17 @@ describe('silence-poke — escalation ladder', () => {
154
115
  })
155
116
  })
156
117
 
157
- // PR1 (human-feel UX epic): the ack budget. A person you message
158
- // answers in a beat the framework enforces that baseline by arming an
159
- // 'ack' poke if NOTHING has been sent within `thresholds.ack` of turn
160
- // start. It is a one-shot nudge (the model still authors every word),
161
- // deliberately OUTSIDE the soft/firm/fallback `pokesFired` ladder: if
162
- // the model never acks, the ladder still escalates on its own schedule.
163
- // See `reference/conversational-pacing.md` and the "Open with an
164
- // acknowledgement" bullet in `profiles/_shared/telegram-style.md.hbs`.
165
- //
166
- // NB: `setupDeps` disables the ack budget by default (ack = MAX_SAFE);
167
- // every test here opts back in with a real `ack` threshold.
168
- describe('silence-poke — ack budget (PR1 human-feel UX)', () => {
169
- it('arms an ack poke at the ack threshold when nothing has been sent', () => {
170
- const fx = setupDeps({ thresholds: { ack: 10_000 } })
171
- startTurn('chat:0', 0)
172
-
173
- __tickForTests(9_000) // before the ack budget
174
- expect(consumeArmedPoke()).toBeNull()
175
- expect(fx.emitted).toHaveLength(0)
176
-
177
- __tickForTests(10_000) // at the ack budget
178
- expect(fx.emitted).toEqual([
179
- expect.objectContaining({ kind: 'silence_poke_fired', level: 'ack' }),
180
- ])
181
- const text = consumeArmedPoke()
182
- expect(text).toContain('[silence-poke]')
183
- expect(text).toContain('reply')
184
- })
185
-
186
- it('does NOT arm an ack poke if an outbound landed before the budget', () => {
187
- const fx = setupDeps({ thresholds: { ack: 10_000 } })
188
- startTurn('chat:0', 0)
189
- noteOutbound('chat:0', 3_000) // model acked fast — inside the budget
190
- __tickForTests(10_000)
191
- __tickForTests(20_000)
192
- expect(consumeArmedPoke()).toBeNull()
193
- expect(
194
- fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'ack'),
195
- ).toHaveLength(0)
196
- })
197
-
198
- it('is one-shot — never re-arms even if the model goes quiet again', () => {
199
- const fx = setupDeps({ thresholds: { ack: 10_000 } })
200
- startTurn('chat:0', 0)
201
- __tickForTests(10_000) // ack fires
202
- consumeArmedPoke() // drain it
203
- noteOutbound('chat:0', 12_000) // model finally acks
204
- // The model goes quiet again. The ack poke is specifically about the
205
- // FIRST outbound — it must not fire twice. A later silence is the
206
- // soft poke's job, not the ack budget's.
207
- __tickForTests(40_000)
208
- expect(
209
- fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'ack'),
210
- ).toHaveLength(1)
211
- })
212
-
213
- it('ackPokeFired resets across turns even when endTurn was skipped (CC-5 invariant)', () => {
214
- // Mirrors the subagentDispatchActive CC-5 guard: `ackPokeFired` is a
215
- // turn-scoped one-shot flag, and the only thing that keeps it from
216
- // leaking into the next turn (when an abnormal abort skips endTurn)
217
- // is startTurn's unconditional state overwrite. Pin that here so a
218
- // future read-modify-write refactor of startTurn fails loud.
219
- setupDeps({ thresholds: { ack: 10_000 } })
220
- startTurn('k', 0)
221
- __tickForTests(10_000) // ack fires
222
- expect(__getStateForTests('k')?.ackPokeFired).toBe(true)
223
- // Turn 2 in the same key, no endTurn — startTurn MUST clear the flag.
224
- startTurn('k', 1_000_000)
225
- expect(__getStateForTests('k')?.ackPokeFired).toBe(false)
226
- })
227
-
228
- it('does not advance the ladder — soft still requires a full 75s of silence', () => {
229
- // The ack poke is deliberately outside `pokesFired`. After it fires,
230
- // a soft poke must still wait the normal 75s.
231
- const fx = setupDeps({ thresholds: { ack: 10_000 } })
232
- startTurn('chat:0', 0)
233
- __tickForTests(10_000) // ack
234
- consumeArmedPoke()
235
- __tickForTests(70_000) // 70s total — under the 75s soft threshold
236
- expect(
237
- fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'soft'),
238
- ).toHaveLength(0)
239
- __tickForTests(75_000)
240
- expect(
241
- fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'soft'),
242
- ).toHaveLength(1)
243
- })
244
-
245
- it('still escalates ack -> soft -> firm -> fallback on a turn that never acks', () => {
246
- const fx = setupDeps({ thresholds: { ack: 10_000 } })
247
- startTurn('chat:0', 0)
248
- __tickForTests(10_000) // ack
249
- consumeArmedPoke()
250
- __tickForTests(75_000) // soft
251
- consumeArmedPoke()
252
- __tickForTests(180_000) // firm
253
- consumeArmedPoke()
254
- __tickForTests(300_000) // fallback
255
- const trail = fx.emitted.map((e) =>
256
- e.kind === 'silence_poke_fired'
257
- ? `poke:${e.level}`
258
- : e.kind === 'silence_fallback_sent'
259
- ? `fallback:${e.fallback_kind}`
260
- : e.kind,
261
- )
262
- expect(trail).toEqual([
263
- 'poke:ack',
264
- 'poke:soft',
265
- 'poke:firm',
266
- 'fallback:working',
267
- ])
268
- })
269
-
270
- it('formatPokeText("ack") nudges for a human acknowledgement via reply', () => {
271
- const text = formatPokeText('ack')
272
- expect(text).toContain('[silence-poke]')
273
- expect(text.toLowerCase()).toContain('acknowledg')
274
- expect(text).toContain('reply')
275
- })
276
- })
277
-
278
- describe('silence-poke — outbound resets clock + success measurement', () => {
279
- it('noteOutbound resets the silence clock', () => {
280
- setupDeps()
281
- startTurn('k', 0)
282
- noteOutbound('k', 50_000)
283
- __tickForTests(120_000) // 70s after outbound — under 75s soft threshold
284
- expect(consumeArmedPoke()).toBeNull()
285
- })
286
-
287
- it('emits silence_poke_succeeded when outbound lands within success window after a poke', () => {
118
+ describe('silence-poke outbound resets the silence clock', () => {
119
+ it('noteOutbound pushes the fallback measurement to the last outbound', () => {
288
120
  const fx = setupDeps()
289
121
  startTurn('k', 0)
290
- __tickForTests(75_000) // soft poke armed
291
- noteOutbound('k', 80_000) // 5s later within 15s success window
292
- expect(fx.emitted.map((e) => e.kind)).toContain('silence_poke_succeeded')
293
- const success = fx.emitted.find((e) => e.kind === 'silence_poke_succeeded')!
294
- expect(success).toMatchObject({ level: 'soft', latency_ms: 5_000 })
295
- })
296
-
297
- it('does NOT emit silence_poke_succeeded if outbound is later than the success window', () => {
298
- const fx = setupDeps()
299
- startTurn('k', 0)
300
- __tickForTests(75_000)
301
- noteOutbound('k', 95_000) // 20s later — outside 15s window
302
- expect(fx.emitted.filter((e) => e.kind === 'silence_poke_succeeded')).toHaveLength(0)
303
- })
304
-
305
- it('outbound resets pokesFired so the next 75s silence can re-arm', () => {
306
- const fx = setupDeps()
307
- startTurn('k', 0)
308
- __tickForTests(75_000) // soft fires
309
- noteOutbound('k', 100_000) // reset
310
- __tickForTests(180_000) // 80s since outbound — under threshold
311
- __tickForTests(180_000 + 50_000) // would be 130s if not reset; still no fire because clock zero = 100_000, so silence = 130s
312
- // Actually 230 - 100 = 130s past outbound, > 75s soft threshold:
313
- expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired')).toHaveLength(2)
314
- expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired').at(-1)).toMatchObject({ level: 'soft' })
315
- })
316
- })
317
-
318
- describe('silence-poke — subagent dispatch extension', () => {
319
- it('extends soft threshold to 300s when noteSubagentDispatch was called', () => {
320
- const fx = setupDeps()
321
- startTurn('k', 0)
322
- noteSubagentDispatch('k')
323
- __tickForTests(120_000) // past 75s but under 300s subagent threshold
324
- expect(fx.emitted).toHaveLength(0)
122
+ noteOutbound('k', 250_000)
123
+ // 300s from turn start, but only 50s since the outbound no fire.
325
124
  __tickForTests(300_000)
326
- expect(fx.emitted).toHaveLength(1)
327
- expect(fx.emitted[0]).toMatchObject({ level: 'soft', subagent_wait: true })
328
- })
329
-
330
- it('subagent flag PERSISTS through narrating outbound (PR4 fix)', () => {
331
- // Reviewer note from PR2 #1125 — the parent's "spinning up @reviewer"
332
- // narration is the outbound that opens the wait. Clearing the
333
- // subagent flag at that moment would defeat the extended-threshold
334
- // guarantee for the wait that follows. The flag must persist until
335
- // endTurn().
336
- const fx = setupDeps()
337
- startTurn('k', 0)
338
- noteSubagentDispatch('k')
339
- noteOutbound('k', 60_000) // parent narrates "spinning up @reviewer"
340
- // Subagent wait continues. With the flag persistent, soft threshold
341
- // is still 300s, so a 90s gap should NOT fire.
342
- __tickForTests(60_000 + 90_000)
343
- expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired')).toHaveLength(0)
344
- // At 300s past the outbound, the soft poke fires (subagent wait
345
- // is genuinely long).
346
- __tickForTests(60_000 + 300_000)
347
- expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired')).toHaveLength(1)
348
- expect(fx.emitted[0]).toMatchObject({ level: 'soft', subagent_wait: true })
349
- })
350
-
351
- it('subagent flag clears on endTurn', () => {
352
- setupDeps()
353
- startTurn('k', 0)
354
- noteSubagentDispatch('k')
355
- // Take snapshot
356
- const before = __getStateForTests('k')
357
- expect(before?.subagentDispatchActive).toBe(true)
358
- endTurn('k')
359
- expect(__getStateForTests('k')).toBeUndefined()
360
- })
361
-
362
- // CC-5 defensive invariant (`docs/status-ask-cause-classes.md`):
363
- // the original catalog claim was that `subagentDispatchActive` can
364
- // leak across turns if `endTurn` is skipped (turn dies abnormally,
365
- // gateway crashes between turn_end signal and cleanup). Investigation
366
- // shows the claim doesn't hold — `startTurn` calls `state.set(key, ...)`
367
- // unconditionally with `subagentDispatchActive: false`, so the next
368
- // turn's startTurn wipes any stale flag.
369
- //
370
- // We're pinning that invariant here as a regression guard. If a future
371
- // refactor changes `startTurn` to a read-modify-write (merge instead
372
- // of overwrite), this test breaks immediately. Keeps the catalog's
373
- // worry productive: even though it's not currently a bug, the
374
- // invariant that makes it not-a-bug is now load-bearing.
375
- it('startTurn overwrites stale subagentDispatchActive when endTurn was skipped (CC-5 invariant)', () => {
376
- const fx = setupDeps()
377
- // Turn 1: dispatch a subagent, then SKIP endTurn (simulating an
378
- // abnormal abort path — context-exhaustion, gateway crash mid-turn,
379
- // etc).
380
- startTurn('k', 0)
381
- noteSubagentDispatch('k')
382
- expect(__getStateForTests('k')?.subagentDispatchActive).toBe(true)
383
-
384
- // Turn 2 in the same key: startTurn MUST clear the flag.
385
- startTurn('k', 1_000_000)
386
- expect(__getStateForTests('k')?.subagentDispatchActive).toBe(false)
387
-
388
- // Verify the soft poke fires at the normal 75s threshold, not at
389
- // the extended 300s subagentSoft threshold. If the flag had leaked,
390
- // ticking at 75s after the new turn start would find subagentSoft
391
- // active and skip the fire.
392
- __tickForTests(1_000_000 + 75_000)
393
- const fired = fx.emitted.filter((e) => e.kind === 'silence_poke_fired')
394
- expect(fired).toHaveLength(1)
395
- expect(fired[0]).toMatchObject({ level: 'soft', subagent_wait: false })
125
+ expect(fx.fallbacks).toHaveLength(0)
126
+ // 300s after the outbound — now it fires.
127
+ __tickForTests(550_000)
128
+ expect(fx.fallbacks).toHaveLength(1)
396
129
  })
397
130
  })
398
131
 
@@ -406,24 +139,16 @@ describe('silence-poke — subagent dispatch extension', () => {
406
139
  // contradicts the gateway's earlier "⚠️ Context window full" / etc.
407
140
  //
408
141
  // Surfaced during CC-5 investigation (`docs/status-ask-cause-classes.md`).
409
- // The fix lives in the gateway (context-exhaust path adds the
410
- // endTurn call); these tests pin the invariant at the silence-poke
411
- // level so the contract is verifiable in isolation of the gateway.
412
142
  describe('silence-poke — abnormal turn-end invariants (CC-5 follow-up)', () => {
413
143
  it('endTurn before the 300s fallback threshold prevents the fallback from firing', () => {
414
144
  const fx = setupDeps()
415
145
  startTurn('k', 0)
416
- // Soft + firm pokes arm; turn is alive and the model could still
417
- // recover.
418
- __tickForTests(75_000)
419
- __tickForTests(180_000)
146
+ __tickForTests(180_000) // turn alive; model could still recover
420
147
  // Gateway aborts the turn at t=250s (context exhaust, wedge,
421
- // crash teardown — any abnormal bail). The contract: endTurn
422
- // gets called BEFORE the 300s threshold.
148
+ // crash teardown — any abnormal bail) BEFORE the 300s threshold.
423
149
  endTurn('k')
424
- // Five minutes total elapse from the original turn start. If
425
- // endTurn left the state in the Map, the framework fallback
426
- // would fire here. The contract is: it MUST NOT.
150
+ // Five minutes total elapse. If endTurn left the state in the Map,
151
+ // the framework fallback would fire here. The contract: it MUST NOT.
427
152
  __tickForTests(300_000)
428
153
  expect(fx.fallbacks).toHaveLength(0)
429
154
  expect(
@@ -431,47 +156,22 @@ describe('silence-poke — abnormal turn-end invariants (CC-5 follow-up)', () =>
431
156
  ).toHaveLength(0)
432
157
  })
433
158
 
434
- it('endTurn after a soft poke fired does not later emit a stale fallback', () => {
435
- const fx = setupDeps()
436
- startTurn('k', 0)
437
- __tickForTests(75_000) // soft fires
438
- expect(
439
- fx.emitted.filter((e) => e.kind === 'silence_poke_fired'),
440
- ).toHaveLength(1)
441
- // Turn aborts well before firm/fallback thresholds.
442
- endTurn('k')
443
- __tickForTests(180_000)
444
- __tickForTests(300_000)
445
- // No firm, no fallback after the turn-abort.
446
- expect(
447
- fx.emitted.filter((e) => e.kind === 'silence_poke_fired'),
448
- ).toHaveLength(1) // unchanged: only the original soft
449
- expect(fx.fallbacks).toHaveLength(0)
450
- })
451
-
452
159
  // #1289: the flush-backstop turn-end branch in the gateway (the path
453
160
  // taken when the agent emits assistant text but never calls the reply
454
161
  // tool) was retrofitted in #1067 to null `currentTurn` early but never
455
- // had `silencePoke.endTurn` added — leaving state2 populated so the
162
+ // had `silencePoke.endTurn` added — leaving state populated so the
456
163
  // 300s framework fallback fired after the gateway already flushed the
457
- // captured prose and considered the turn over. Pin the contract at
458
- // the silence-poke level: a turn that records an outbound (the
459
- // flushed message) and then calls endTurn must not later fire a
460
- // fallback even if 300s elapses from the original turn start.
164
+ // captured prose and considered the turn over. Pin the contract: a
165
+ // turn that records an outbound (the flushed message) and then calls
166
+ // endTurn must not later fire a fallback even if 300s elapses from the
167
+ // original turn start.
461
168
  it('#1289: flush-backstop turn-end (outbound + endTurn) suppresses the 300s fallback', () => {
462
169
  const fx = setupDeps()
463
170
  startTurn('k', 0)
464
- // Some time passes while the agent generates prose without calling
465
- // the reply tool. No soft/firm armed yet.
466
171
  __tickForTests(60_000)
467
- // Gateway turn-flush fires: captured text is sent as an outbound,
468
- // then the flush branch nulls currentTurn AND (post-fix) calls
469
- // signalTracker.clear + silencePoke.endTurn.
470
172
  noteOutbound('k', 60_000)
471
173
  endTurn('k')
472
- // 300s elapses from the original turn start. Pre-fix: the framework
473
- // fallback fired here. Post-fix: the state is drained, no fallback.
474
- __tickForTests(240_000)
174
+ __tickForTests(360_000)
475
175
  expect(fx.fallbacks).toHaveLength(0)
476
176
  expect(
477
177
  fx.emitted.filter((e) => e.kind === 'silence_fallback_sent'),
@@ -493,8 +193,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
493
193
  const fx = setupDeps()
494
194
  startTurn('k', 0)
495
195
  noteToolStart('k', 'T1', 'Grep', 'foo', 30_000)
496
- __tickForTests(75_000)
497
- __tickForTests(180_000)
498
196
  __tickForTests(305_000)
499
197
  expect(fx.fallbacks).toHaveLength(1)
500
198
  const ctx = fx.fallbacks[0]!
@@ -548,8 +246,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
548
246
  })
549
247
 
550
248
  it('raw mcp__ tool name with NO label falls back to the bare name (no leak-but-no-better-option)', () => {
551
- // If the label table doesn't recognise an MCP tool, we have nothing
552
- // better to show than the raw name. Better honest-ugly than silent.
553
249
  const text = formatFrameworkFallbackText('working', 305_000, [
554
250
  { name: 'mcp__some-third-party__do_thing', label: null, durationMs: 305_000 },
555
251
  ])
@@ -559,9 +255,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
559
255
  })
560
256
 
561
257
  it('built-in tool (Grep) with a label keeps the prior "running Name label" shape — name is already human-readable', () => {
562
- // Regression guard: don't accidentally drop the built-in tool name
563
- // when generalising the MCP rule. "Grep" is human-readable; the
564
- // label ("foo") is supplementary detail like the search pattern.
565
258
  const text = formatFrameworkFallbackText('working', 305_000, [
566
259
  { name: 'Grep', label: 'foo', durationMs: 305_000 },
567
260
  ])
@@ -577,7 +270,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
577
270
  expect(
578
271
  formatFrameworkFallbackText('thinking', 305_000, []),
579
272
  ).toBe('still thinking… (no update from agent in 5 min)')
580
- // No third arg → same as empty array.
581
273
  expect(
582
274
  formatFrameworkFallbackText('working', 305_000),
583
275
  ).toBe('still working… (no update from agent in 5 min)')
@@ -596,8 +288,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
596
288
  startTurn('k', 0)
597
289
  noteToolStart('k', 'T1', 'Grep', 'foo', 30_000)
598
290
  noteToolEnd('k', 'T1', 200_000)
599
- __tickForTests(75_000)
600
- __tickForTests(180_000)
601
291
  __tickForTests(305_000)
602
292
  expect(fx.fallbacks).toHaveLength(1)
603
293
  expect(fx.fallbacks[0]!.inFlightTools).toHaveLength(0)
@@ -608,8 +298,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
608
298
  startTurn('k', 0)
609
299
  noteToolStart('k', 'T1', 'Grep', null, 30_000)
610
300
  noteToolLabel('k', 'T1', '"refined-from-sidecar"')
611
- __tickForTests(75_000)
612
- __tickForTests(180_000)
613
301
  __tickForTests(305_000)
614
302
  expect(fx.fallbacks[0]!.inFlightTools[0]!.label).toBe('"refined-from-sidecar"')
615
303
  })
@@ -620,7 +308,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
620
308
  noteToolStart('k', 'T1', 'Grep', 'foo', 30_000)
621
309
  expect(__getStateForTests('k')!.inFlightTools.size).toBe(1)
622
310
  endTurn('k')
623
- // A fresh turn under the same key has an empty map.
624
311
  startTurn('k', 1_000_000)
625
312
  expect(__getStateForTests('k')!.inFlightTools.size).toBe(0)
626
313
  })
@@ -632,8 +319,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
632
319
  noteToolStart('k', 'T-late', 'Read', 'recent.ts', 250_000)
633
320
  noteToolStart('k', 'T-early', 'Grep', '"oldest"', 20_000)
634
321
  noteToolStart('k', 'T-mid', 'Bash', null, 100_000)
635
- __tickForTests(75_000)
636
- __tickForTests(180_000)
637
322
  __tickForTests(305_000)
638
323
  const snap = fx.fallbacks[0]!.inFlightTools
639
324
  expect(snap.map(t => t.name)).toEqual(['Grep', 'Bash', 'Read'])
@@ -642,7 +327,7 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
642
327
  it('tool churn does NOT reset the silence clock (header invariant preserved)', () => {
643
328
  // The whole point of #1292 (b) over (a) is that we enrich the
644
329
  // fallback TEXT, never the timing. Tool activity must not delay
645
- // or suppress the soft/firm/fallback escalation ladder.
330
+ // or suppress the 300s fallback.
646
331
  const fx = setupDeps()
647
332
  startTurn('k', 0)
648
333
  // A constant stream of tool churn through the entire 5min window —
@@ -651,35 +336,17 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
651
336
  noteToolStart('k', `T-${t}`, 'Grep', 'foo', t)
652
337
  noteToolEnd('k', `T-${t}`, t + 500)
653
338
  }
654
- __tickForTests(75_000) // soft
655
- __tickForTests(180_000) // firm
656
- __tickForTests(305_000) // fallback
657
- expect(
658
- fx.emitted.filter(e => e.kind === 'silence_poke_fired'),
659
- ).toHaveLength(2)
339
+ __tickForTests(305_000)
660
340
  expect(fx.fallbacks).toHaveLength(1)
661
341
  })
662
342
 
663
- it('Task tool sets subagentDispatchActive AND populates inFlightTools', () => {
664
- // Two flags are independent: the soft-threshold extension still
665
- // works for sub-agent waits (existing behaviour), AND the fallback
666
- // message names the Task tool as the actual observable.
343
+ it('Task tool populates inFlightTools so the fallback names it as the observable', () => {
667
344
  const fx = setupDeps()
668
345
  startTurn('k', 0)
669
- // Gateway calls both for a Task tool_use (mirrors the wiring at
670
- // gateway.ts onSessionEvent).
671
- noteSubagentDispatch('k')
346
+ // Gateway calls noteToolStart for a Task tool_use (mirrors the
347
+ // wiring at gateway.ts onSessionEvent).
672
348
  noteToolStart('k', 'T1', 'Task', 'spinning up @researcher', 10_000)
673
- // Soft threshold extends to 300s under subagent — so no soft poke
674
- // fires at 75s and no firm fires at 180s (firm requires pokesFired===1,
675
- // i.e. soft must fire first). Once we cross the 300s subagent-soft,
676
- // soft fires; each tick fires one level via the `continue` in tick(),
677
- // so we need three ticks to walk soft → firm → fallback.
678
- __tickForTests(75_000) // suppressed by subagent
679
- __tickForTests(180_000) // still suppressed
680
- __tickForTests(305_000) // soft fires (subagent soft = 300s)
681
- __tickForTests(305_001) // firm fires
682
- __tickForTests(305_002) // fallback fires
349
+ __tickForTests(305_000)
683
350
  expect(fx.fallbacks).toHaveLength(1)
684
351
  const snap = fx.fallbacks[0]!.inFlightTools
685
352
  expect(snap[0]!.name).toBe('Task')
@@ -688,7 +355,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
688
355
 
689
356
  it('noteToolStart on an unknown key is a no-op (no crash, no state)', () => {
690
357
  setupDeps()
691
- // No startTurn first — silence-poke ignores the call.
692
358
  noteToolStart('k-never-started', 'T1', 'Grep', 'foo', 30_000)
693
359
  expect(__getStateForTests('k-never-started')).toBeUndefined()
694
360
  })
@@ -714,38 +380,11 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
714
380
  const text = formatFrameworkFallbackText('working', 305_000, [
715
381
  { name: 'Grep', label: longLabel, durationMs: 305_000 },
716
382
  ])
717
- // 60-char cap (with trailing ellipsis) — verify clipping without
718
- // pinning exact bytes.
719
383
  expect(text.length).toBeLessThan(120)
720
384
  expect(text).toContain('…')
721
385
  })
722
386
  })
723
387
 
724
- describe('silence-poke — consumeArmedPoke draining', () => {
725
- it('drains the armed flag so the next call returns null', () => {
726
- setupDeps()
727
- startTurn('k', 0)
728
- __tickForTests(75_000)
729
- expect(consumeArmedPoke()).not.toBeNull()
730
- expect(consumeArmedPoke()).toBeNull()
731
- })
732
-
733
- it('returns null when nothing is armed', () => {
734
- setupDeps()
735
- startTurn('k', 0)
736
- expect(consumeArmedPoke()).toBeNull()
737
- })
738
-
739
- it('returns the matching level text', () => {
740
- setupDeps()
741
- startTurn('k', 0)
742
- __tickForTests(75_000)
743
- expect(consumeArmedPoke()).toContain('75s')
744
- __tickForTests(180_000)
745
- expect(consumeArmedPoke()).toContain('3 minutes')
746
- })
747
- })
748
-
749
388
  describe('silence-poke — endTurn cleanup', () => {
750
389
  it('endTurn drops state', () => {
751
390
  setupDeps()
@@ -766,104 +405,50 @@ describe('silence-poke — independence across turns', () => {
766
405
  const fx = setupDeps()
767
406
  startTurn('a:0', 0)
768
407
  startTurn('b:0', 0)
769
- noteOutbound('a:0', 50_000)
770
- __tickForTests(75_000)
771
- // a's clock was reset to 50_000, silence=25s — no fire.
772
- // b's clock is still at 0, silence=75ssoft fires.
773
- expect(fx.emitted).toHaveLength(1)
774
- expect(fx.emitted[0]).toMatchObject({ key: 'b:0', level: 'soft' })
408
+ noteOutbound('a:0', 250_000)
409
+ __tickForTests(300_000)
410
+ // a's clock was reset to 250_000 (silence=50s) — no fire.
411
+ // b's clock is still at 0 (silence=300s)fallback fires.
412
+ expect(fx.fallbacks).toHaveLength(1)
413
+ expect(fx.fallbacks[0]).toMatchObject({ key: 'b:0' })
775
414
  })
776
415
  })
777
416
 
778
417
  describe('silence-poke — fallback handler errors do not break timer', () => {
779
418
  it('continues to function if onFrameworkFallback throws', () => {
780
- const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
419
+ const fx: TestFixtures = { emitted: [], fallbacks: [] }
781
420
  __setDepsForTests({
782
421
  emitMetric: (e) => fx.emitted.push(e),
783
422
  onFrameworkFallback: () => { throw new Error('oh no') },
784
- onAwarenessPing: () => {},
785
- // ack + awareness-ping out of the way — this test exercises the
786
- // soft/firm/fallback ladder under a throwing fallback handler.
787
- thresholdsMs: {
788
- ...DEFAULT_THRESHOLDS,
789
- ack: Number.MAX_SAFE_INTEGER,
790
- awarenessPing: Number.MAX_SAFE_INTEGER,
791
- },
423
+ thresholdsMs: { ...DEFAULT_THRESHOLDS },
792
424
  })
793
425
  startTurn('k', 0)
794
426
  expect(() => {
795
- __tickForTests(75_000)
796
- __tickForTests(180_000)
797
427
  __tickForTests(300_000)
798
428
  }).not.toThrow()
799
- // Telemetry still emitted
800
429
  expect(fx.emitted.some((e) => e.kind === 'silence_fallback_sent')).toBe(true)
801
430
  })
802
431
 
803
432
  it('continues to function if onFrameworkFallback returns a rejected promise', async () => {
804
- const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
433
+ const fx: TestFixtures = { emitted: [], fallbacks: [] }
805
434
  __setDepsForTests({
806
435
  emitMetric: (e) => fx.emitted.push(e),
807
436
  onFrameworkFallback: () => Promise.reject(new Error('async fail')),
808
- onAwarenessPing: () => {},
809
- // ack + awareness-ping out of the way — see the throwing-handler test above.
810
- thresholdsMs: {
811
- ...DEFAULT_THRESHOLDS,
812
- ack: Number.MAX_SAFE_INTEGER,
813
- awarenessPing: Number.MAX_SAFE_INTEGER,
814
- },
437
+ thresholdsMs: { ...DEFAULT_THRESHOLDS },
815
438
  })
816
439
  startTurn('k', 0)
817
- __tickForTests(75_000)
818
- __tickForTests(180_000)
819
440
  __tickForTests(300_000)
820
- // Allow microtasks for the rejection-catch to fire
821
441
  await new Promise((r) => setTimeout(r, 0))
822
442
  expect(fx.emitted.some((e) => e.kind === 'silence_fallback_sent')).toBe(true)
823
443
  })
824
444
  })
825
445
 
826
- describe('silence-poke — system reminder text', () => {
827
- it('soft poke text references the 75s threshold and contains the system-reminder marker', () => {
828
- setupDeps()
829
- startTurn('k', 0)
830
- __tickForTests(75_000)
831
- const text = consumeArmedPoke()
832
- expect(text).toContain('[silence-poke]')
833
- expect(text).toContain('75s')
834
- expect(text).toContain('about to finish')
835
- })
836
-
837
- it('firm poke text references the 3-minute threshold', () => {
838
- setupDeps()
839
- startTurn('k', 0)
840
- __tickForTests(75_000)
841
- consumeArmedPoke()
842
- __tickForTests(180_000)
843
- const text = consumeArmedPoke()
844
- expect(text).toContain('3 minutes')
845
- expect(text).toContain('stuck')
846
- })
847
- })
848
-
849
446
  // CC-4 from `docs/status-ask-cause-classes.md`: wording is load-bearing
850
- // (`reference/conversational-pacing.md` § Silence-poke ladder). Snapshot
851
- // the exact strings here so a refactor that drops a key phrase fails
852
- // loud at test time. If you genuinely need to change the wording,
853
- // update the snapshot AND the design doc together.
447
+ // (`reference/conversational-pacing.md` § Safety net). Snapshot the exact
448
+ // strings here so a refactor that drops a key phrase fails loud at test
449
+ // time. If you genuinely need to change the wording, update the snapshot
450
+ // AND the design doc together.
854
451
  describe('silence-poke — wording snapshots (CC-4)', () => {
855
- it('soft poke text is unchanged', () => {
856
- expect(formatPokeText('soft')).toMatchInlineSnapshot(
857
- `"[silence-poke] You've been silent to the user for 75s. If you're still working on this, send one short conversational reply — e.g. "still going, working through X" — so they know you're alive. Keep it brief; don't restate the task. If you're about to finish within the next few seconds, skip the update."`,
858
- )
859
- })
860
-
861
- it('firm poke text is unchanged', () => {
862
- expect(formatPokeText('firm')).toMatchInlineSnapshot(
863
- `"[silence-poke] 3 minutes silent. Please send an update now — what you're working on, or whether you're stuck. If something is taking unusually long (slow tool, network, waiting on a sub-agent), say so explicitly."`,
864
- )
865
- })
866
-
867
452
  it('framework fallback — working at 300s', () => {
868
453
  expect(formatFrameworkFallbackText('working', 300_000)).toMatchInlineSnapshot(
869
454
  `"still working… (no update from agent in 5 min)"`,
@@ -877,9 +462,6 @@ describe('silence-poke — wording snapshots (CC-4)', () => {
877
462
  })
878
463
 
879
464
  it('framework fallback — minutes derived from silenceMs, not hard-coded', () => {
880
- // The "N min" suffix MUST track ctx.silenceMs so the wording stays
881
- // honest if the 300s threshold is tuned. If a refactor accidentally
882
- // hard-codes "5 min", these cases break.
883
465
  expect(formatFrameworkFallbackText('working', 360_000)).toBe(
884
466
  'still working… (no update from agent in 6 min)',
885
467
  )
@@ -889,8 +471,6 @@ describe('silence-poke — wording snapshots (CC-4)', () => {
889
471
  })
890
472
 
891
473
  it('framework fallback — minutes floor at 1 even when silenceMs is small', () => {
892
- // Defensive: a future caller might invoke with sub-minute silenceMs.
893
- // Rendering "0 min" reads as nonsense; floor at 1.
894
474
  expect(formatFrameworkFallbackText('working', 30_000)).toBe(
895
475
  'still working… (no update from agent in 1 min)',
896
476
  )
@@ -907,103 +487,8 @@ describe('silence-poke — performance', () => {
907
487
  startTurn(`chat${i}:0`, 0)
908
488
  }
909
489
  const start = performance.now()
910
- __tickForTests(75_000)
490
+ __tickForTests(75_000) // under fallback threshold — pure iteration cost
911
491
  const elapsed = performance.now() - start
912
- // 1000 turns should tick in well under 50ms — guards against an
913
- // accidentally-quadratic implementation.
914
492
  expect(elapsed).toBeLessThan(50)
915
493
  })
916
494
  })
917
-
918
- describe('silence-poke — awareness ping (early framework-owned user-visible status)', () => {
919
- it('fires once at 60s when no outbound has happened', () => {
920
- const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
921
- startTurn('k', 0)
922
- __tickForTests(59_000)
923
- expect(fx.awarenessPings.length).toBe(0)
924
- __tickForTests(60_000)
925
- expect(fx.awarenessPings.length).toBe(1)
926
- expect(fx.awarenessPings[0]!.silenceMs).toBeGreaterThanOrEqual(60_000)
927
- expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
928
- })
929
-
930
- it('is one-shot per turn — does not re-fire as silence continues', () => {
931
- const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
932
- startTurn('k', 0)
933
- __tickForTests(60_000)
934
- __tickForTests(120_000)
935
- __tickForTests(180_000)
936
- expect(fx.awarenessPings.length).toBe(1)
937
- })
938
-
939
- it('is suppressed by an early outbound', () => {
940
- const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
941
- startTurn('k', 0)
942
- noteOutbound('k', 30_000)
943
- __tickForTests(90_000)
944
- expect(fx.awarenessPings.length).toBe(0)
945
- })
946
-
947
- it('is suppressed when subagentDispatchActive is true', () => {
948
- // Sub-agent dispatch already widens soft to 300s; the awareness-ping
949
- // should also defer so we don't pre-empt the sub-agent's natural
950
- // progress signal.
951
- const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
952
- startTurn('k', 0)
953
- noteSubagentDispatch('k')
954
- __tickForTests(120_000)
955
- expect(fx.awarenessPings.length).toBe(0)
956
- })
957
-
958
- it('does NOT advance the soft/firm/fallback ladder', () => {
959
- // Awareness ping is a sibling signal; soft/firm/fallback continue
960
- // to escalate on their own schedule (and the model-targeted ack-poke
961
- // similarly remains independent).
962
- const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
963
- startTurn('k', 0)
964
- __tickForTests(60_000) // awareness fires
965
- __tickForTests(75_000) // soft fires
966
- __tickForTests(180_000) // firm fires
967
- __tickForTests(300_000) // fallback fires
968
- expect(fx.awarenessPings.length).toBe(1)
969
- expect(fx.fallbacks.length).toBe(1)
970
- expect(fx.emitted.filter(e => e.kind === 'silence_poke_fired').map(e => (e as { level: string }).level))
971
- .toEqual(['soft', 'firm'])
972
- expect(fx.emitted.some(e => e.kind === 'silence_fallback_sent')).toBe(true)
973
- })
974
-
975
- it('carries fallbackKind=thinking when a recent thinking event landed', () => {
976
- const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
977
- startTurn('k', 0)
978
- noteThinking('k', 45_000)
979
- __tickForTests(60_000)
980
- expect(fx.awarenessPings.length).toBe(1)
981
- expect(fx.awarenessPings[0]!.fallbackKind).toBe('thinking')
982
- })
983
-
984
- it('does not fire if turn ends before the threshold', () => {
985
- const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
986
- startTurn('k', 0)
987
- endTurn('k')
988
- __tickForTests(120_000)
989
- expect(fx.awarenessPings.length).toBe(0)
990
- })
991
-
992
- it('handler errors do not break the timer', () => {
993
- const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
994
- __setDepsForTests({
995
- emitMetric: (e) => fx.emitted.push(e),
996
- onFrameworkFallback: () => {},
997
- onAwarenessPing: () => { throw new Error('awareness handler boom') },
998
- thresholdsMs: {
999
- ...DEFAULT_THRESHOLDS,
1000
- ack: Number.MAX_SAFE_INTEGER,
1001
- awarenessPing: 60_000,
1002
- },
1003
- })
1004
- startTurn('k', 0)
1005
- expect(() => __tickForTests(60_000)).not.toThrow()
1006
- // Telemetry still emitted
1007
- expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
1008
- })
1009
- })