switchroom 0.14.10 → 0.14.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +81 -80
- package/dist/auth-broker/index.js +81 -80
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/notion-write-pretool.mjs +83 -82
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +958 -912
- package/dist/host-control/main.js +149 -148
- package/dist/vault/approvals/kernel-server.js +83 -82
- package/dist/vault/broker/server.js +84 -83
- package/package.json +1 -1
- package/telegram-plugin/dist/bridge/bridge.js +112 -112
- package/telegram-plugin/dist/gateway/gateway.js +195 -356
- package/telegram-plugin/dist/server.js +160 -160
- package/telegram-plugin/gateway/boot-card.ts +15 -11
- package/telegram-plugin/gateway/gateway.ts +9 -67
- package/telegram-plugin/runtime-metrics.ts +8 -52
- package/telegram-plugin/silence-poke.ts +39 -312
- package/telegram-plugin/tests/boot-card-silent-on-operator.test.ts +27 -30
- package/telegram-plugin/tests/silence-poke.test.ts +54 -569
- package/telegram-plugin/uat/scenarios/jtbd-fast-ack-dm.test.ts +21 -23
- package/telegram-plugin/uat/scenarios/silence-poke-soft-dm.test.ts +0 -155
|
@@ -1,16 +1,13 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, afterEach
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
2
2
|
import {
|
|
3
3
|
startTurn,
|
|
4
4
|
noteOutbound,
|
|
5
|
-
noteSubagentDispatch,
|
|
6
5
|
noteThinking,
|
|
7
6
|
noteToolStart,
|
|
8
7
|
noteToolEnd,
|
|
9
8
|
noteToolLabel,
|
|
10
|
-
consumeArmedPoke,
|
|
11
9
|
endTurn,
|
|
12
10
|
silencePokeEnabled,
|
|
13
|
-
formatPokeText,
|
|
14
11
|
formatFrameworkFallbackText,
|
|
15
12
|
__tickForTests,
|
|
16
13
|
__setDepsForTests,
|
|
@@ -26,27 +23,15 @@ const ORIGINAL_KILL_SWITCH = process.env.SWITCHROOM_DISABLE_SILENCE_POKE
|
|
|
26
23
|
interface TestFixtures {
|
|
27
24
|
emitted: SilencePokeMetric[]
|
|
28
25
|
fallbacks: FrameworkFallbackContext[]
|
|
29
|
-
awarenessPings: FrameworkFallbackContext[]
|
|
30
26
|
}
|
|
31
27
|
|
|
32
28
|
function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }): TestFixtures {
|
|
33
|
-
const fixtures: TestFixtures = { emitted: [], fallbacks: []
|
|
29
|
+
const fixtures: TestFixtures = { emitted: [], fallbacks: [] }
|
|
34
30
|
__setDepsForTests({
|
|
35
31
|
emitMetric: (e) => fixtures.emitted.push(e),
|
|
36
32
|
onFrameworkFallback: (ctx) => { fixtures.fallbacks.push(ctx) },
|
|
37
|
-
onAwarenessPing: (ctx) => { fixtures.awarenessPings.push(ctx) },
|
|
38
|
-
// The ack budget (a new poke that fires *earlier* than `soft`) is
|
|
39
|
-
// disabled by default in this fixture so the soft/firm/fallback
|
|
40
|
-
// ladder tests stay isolated from it. The 'ack budget' describe
|
|
41
|
-
// block opts back in with a real value.
|
|
42
|
-
//
|
|
43
|
-
// The 60s awarenessPing is also disabled by default so the existing
|
|
44
|
-
// soft/firm/fallback ladder tests don't see the new sibling event;
|
|
45
|
-
// the 'awareness ping' describe block opts back in.
|
|
46
33
|
thresholdsMs: {
|
|
47
34
|
...DEFAULT_THRESHOLDS,
|
|
48
|
-
ack: Number.MAX_SAFE_INTEGER,
|
|
49
|
-
awarenessPing: Number.MAX_SAFE_INTEGER,
|
|
50
35
|
...(opts?.thresholds ?? {}),
|
|
51
36
|
},
|
|
52
37
|
})
|
|
@@ -85,44 +70,24 @@ describe('silence-poke — kill switch', () => {
|
|
|
85
70
|
})
|
|
86
71
|
})
|
|
87
72
|
|
|
88
|
-
|
|
89
|
-
|
|
73
|
+
// Post-retirement: the model-targeted nudge ladder (ack/soft/firm) and
|
|
74
|
+
// the 60s awareness ping are gone. The ONLY framework action left is the
|
|
75
|
+
// 300s fallback, which the gateway turns into a user-visible "still
|
|
76
|
+
// working…" message AND an unwedge. These tests pin that single terminal
|
|
77
|
+
// action.
|
|
78
|
+
describe('silence-poke — framework fallback (the only remaining action)', () => {
|
|
79
|
+
it('does not fire before the 300s threshold', () => {
|
|
90
80
|
const fx = setupDeps()
|
|
91
81
|
startTurn('chat:0', 0)
|
|
92
|
-
|
|
93
|
-
__tickForTests(
|
|
94
|
-
expect(
|
|
82
|
+
__tickForTests(120_000)
|
|
83
|
+
__tickForTests(299_000)
|
|
84
|
+
expect(fx.fallbacks).toHaveLength(0)
|
|
95
85
|
expect(fx.emitted).toHaveLength(0)
|
|
96
|
-
|
|
97
|
-
__tickForTests(75_000) // at threshold
|
|
98
|
-
expect(fx.emitted).toEqual([
|
|
99
|
-
expect.objectContaining({ kind: 'silence_poke_fired', level: 'soft', subagent_wait: false }),
|
|
100
|
-
])
|
|
101
|
-
const text = consumeArmedPoke()
|
|
102
|
-
expect(text).toContain('[silence-poke]')
|
|
103
|
-
expect(text).toContain('75s')
|
|
104
|
-
})
|
|
105
|
-
|
|
106
|
-
it('firm poke fires at 180s after soft', () => {
|
|
107
|
-
const fx = setupDeps()
|
|
108
|
-
startTurn('chat:0', 0)
|
|
109
|
-
__tickForTests(75_000)
|
|
110
|
-
consumeArmedPoke() // drain the soft
|
|
111
|
-
__tickForTests(180_000)
|
|
112
|
-
expect(fx.emitted.map((e) => e.kind)).toEqual([
|
|
113
|
-
'silence_poke_fired',
|
|
114
|
-
'silence_poke_fired',
|
|
115
|
-
])
|
|
116
|
-
expect(fx.emitted[1]).toMatchObject({ level: 'firm' })
|
|
117
|
-
const firm = consumeArmedPoke()
|
|
118
|
-
expect(firm).toContain('3 minutes silent')
|
|
119
86
|
})
|
|
120
87
|
|
|
121
|
-
it('
|
|
88
|
+
it('fires at 300s with kind=working when no thinking signal', () => {
|
|
122
89
|
const fx = setupDeps()
|
|
123
90
|
startTurn('chatX:42', 0)
|
|
124
|
-
__tickForTests(75_000)
|
|
125
|
-
__tickForTests(180_000)
|
|
126
91
|
__tickForTests(300_000)
|
|
127
92
|
expect(fx.fallbacks).toEqual([
|
|
128
93
|
expect.objectContaining({ chatId: 'chatX', threadId: 42, fallbackKind: 'working' }),
|
|
@@ -130,23 +95,19 @@ describe('silence-poke — escalation ladder', () => {
|
|
|
130
95
|
expect(fx.emitted.at(-1)).toMatchObject({ kind: 'silence_fallback_sent', fallback_kind: 'working' })
|
|
131
96
|
})
|
|
132
97
|
|
|
133
|
-
it('
|
|
98
|
+
it('fires with kind=thinking if a thinking event landed within 30s', () => {
|
|
134
99
|
const fx = setupDeps()
|
|
135
100
|
startTurn('c:0', 0)
|
|
136
101
|
noteThinking('c:0', 280_000)
|
|
137
|
-
__tickForTests(75_000)
|
|
138
|
-
__tickForTests(180_000)
|
|
139
102
|
__tickForTests(300_000)
|
|
140
103
|
expect(fx.fallbacks).toEqual([
|
|
141
104
|
expect.objectContaining({ fallbackKind: 'thinking' }),
|
|
142
105
|
])
|
|
143
106
|
})
|
|
144
107
|
|
|
145
|
-
it('
|
|
108
|
+
it('fires at most once per turn', () => {
|
|
146
109
|
const fx = setupDeps()
|
|
147
110
|
startTurn('c:0', 0)
|
|
148
|
-
__tickForTests(75_000)
|
|
149
|
-
__tickForTests(180_000)
|
|
150
111
|
__tickForTests(300_000)
|
|
151
112
|
__tickForTests(450_000) // continued silence
|
|
152
113
|
__tickForTests(600_000)
|
|
@@ -154,245 +115,17 @@ describe('silence-poke — escalation ladder', () => {
|
|
|
154
115
|
})
|
|
155
116
|
})
|
|
156
117
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
// 'ack' poke if NOTHING has been sent within `thresholds.ack` of turn
|
|
160
|
-
// start. It is a one-shot nudge (the model still authors every word),
|
|
161
|
-
// deliberately OUTSIDE the soft/firm/fallback `pokesFired` ladder: if
|
|
162
|
-
// the model never acks, the ladder still escalates on its own schedule.
|
|
163
|
-
// See `reference/conversational-pacing.md` and the "Open with an
|
|
164
|
-
// acknowledgement" bullet in `profiles/_shared/telegram-style.md.hbs`.
|
|
165
|
-
//
|
|
166
|
-
// NB: `setupDeps` disables the ack budget by default (ack = MAX_SAFE);
|
|
167
|
-
// every test here opts back in with a real `ack` threshold.
|
|
168
|
-
describe('silence-poke — ack budget (PR1 human-feel UX)', () => {
|
|
169
|
-
it('arms an ack poke at the ack threshold when nothing has been sent', () => {
|
|
170
|
-
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
171
|
-
startTurn('chat:0', 0)
|
|
172
|
-
|
|
173
|
-
__tickForTests(9_000) // before the ack budget
|
|
174
|
-
expect(consumeArmedPoke()).toBeNull()
|
|
175
|
-
expect(fx.emitted).toHaveLength(0)
|
|
176
|
-
|
|
177
|
-
__tickForTests(10_000) // at the ack budget
|
|
178
|
-
expect(fx.emitted).toEqual([
|
|
179
|
-
expect.objectContaining({ kind: 'silence_poke_fired', level: 'ack' }),
|
|
180
|
-
])
|
|
181
|
-
const text = consumeArmedPoke()
|
|
182
|
-
expect(text).toContain('[silence-poke]')
|
|
183
|
-
expect(text).toContain('reply')
|
|
184
|
-
})
|
|
185
|
-
|
|
186
|
-
it('does NOT arm an ack poke if an outbound landed before the budget', () => {
|
|
187
|
-
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
188
|
-
startTurn('chat:0', 0)
|
|
189
|
-
noteOutbound('chat:0', 3_000) // model acked fast — inside the budget
|
|
190
|
-
__tickForTests(10_000)
|
|
191
|
-
__tickForTests(20_000)
|
|
192
|
-
expect(consumeArmedPoke()).toBeNull()
|
|
193
|
-
expect(
|
|
194
|
-
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'ack'),
|
|
195
|
-
).toHaveLength(0)
|
|
196
|
-
})
|
|
197
|
-
|
|
198
|
-
it('is one-shot — never re-arms even if the model goes quiet again', () => {
|
|
199
|
-
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
200
|
-
startTurn('chat:0', 0)
|
|
201
|
-
__tickForTests(10_000) // ack fires
|
|
202
|
-
consumeArmedPoke() // drain it
|
|
203
|
-
noteOutbound('chat:0', 12_000) // model finally acks
|
|
204
|
-
// The model goes quiet again. The ack poke is specifically about the
|
|
205
|
-
// FIRST outbound — it must not fire twice. A later silence is the
|
|
206
|
-
// soft poke's job, not the ack budget's.
|
|
207
|
-
__tickForTests(40_000)
|
|
208
|
-
expect(
|
|
209
|
-
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'ack'),
|
|
210
|
-
).toHaveLength(1)
|
|
211
|
-
})
|
|
212
|
-
|
|
213
|
-
it('ackPokeFired resets across turns even when endTurn was skipped (CC-5 invariant)', () => {
|
|
214
|
-
// Mirrors the subagentDispatchActive CC-5 guard: `ackPokeFired` is a
|
|
215
|
-
// turn-scoped one-shot flag, and the only thing that keeps it from
|
|
216
|
-
// leaking into the next turn (when an abnormal abort skips endTurn)
|
|
217
|
-
// is startTurn's unconditional state overwrite. Pin that here so a
|
|
218
|
-
// future read-modify-write refactor of startTurn fails loud.
|
|
219
|
-
setupDeps({ thresholds: { ack: 10_000 } })
|
|
220
|
-
startTurn('k', 0)
|
|
221
|
-
__tickForTests(10_000) // ack fires
|
|
222
|
-
expect(__getStateForTests('k')?.ackPokeFired).toBe(true)
|
|
223
|
-
// Turn 2 in the same key, no endTurn — startTurn MUST clear the flag.
|
|
224
|
-
startTurn('k', 1_000_000)
|
|
225
|
-
expect(__getStateForTests('k')?.ackPokeFired).toBe(false)
|
|
226
|
-
})
|
|
227
|
-
|
|
228
|
-
it('does not advance the ladder — soft still requires a full 75s of silence', () => {
|
|
229
|
-
// The ack poke is deliberately outside `pokesFired`. After it fires,
|
|
230
|
-
// a soft poke must still wait the normal 75s.
|
|
231
|
-
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
232
|
-
startTurn('chat:0', 0)
|
|
233
|
-
__tickForTests(10_000) // ack
|
|
234
|
-
consumeArmedPoke()
|
|
235
|
-
__tickForTests(70_000) // 70s total — under the 75s soft threshold
|
|
236
|
-
expect(
|
|
237
|
-
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'soft'),
|
|
238
|
-
).toHaveLength(0)
|
|
239
|
-
__tickForTests(75_000)
|
|
240
|
-
expect(
|
|
241
|
-
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'soft'),
|
|
242
|
-
).toHaveLength(1)
|
|
243
|
-
})
|
|
244
|
-
|
|
245
|
-
it('still escalates ack -> soft -> firm -> fallback on a turn that never acks', () => {
|
|
246
|
-
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
247
|
-
startTurn('chat:0', 0)
|
|
248
|
-
__tickForTests(10_000) // ack
|
|
249
|
-
consumeArmedPoke()
|
|
250
|
-
__tickForTests(75_000) // soft
|
|
251
|
-
consumeArmedPoke()
|
|
252
|
-
__tickForTests(180_000) // firm
|
|
253
|
-
consumeArmedPoke()
|
|
254
|
-
__tickForTests(300_000) // fallback
|
|
255
|
-
const trail = fx.emitted.map((e) =>
|
|
256
|
-
e.kind === 'silence_poke_fired'
|
|
257
|
-
? `poke:${e.level}`
|
|
258
|
-
: e.kind === 'silence_fallback_sent'
|
|
259
|
-
? `fallback:${e.fallback_kind}`
|
|
260
|
-
: e.kind,
|
|
261
|
-
)
|
|
262
|
-
expect(trail).toEqual([
|
|
263
|
-
'poke:ack',
|
|
264
|
-
'poke:soft',
|
|
265
|
-
'poke:firm',
|
|
266
|
-
'fallback:working',
|
|
267
|
-
])
|
|
268
|
-
})
|
|
269
|
-
|
|
270
|
-
it('formatPokeText("ack") nudges for a human acknowledgement via reply', () => {
|
|
271
|
-
const text = formatPokeText('ack')
|
|
272
|
-
expect(text).toContain('[silence-poke]')
|
|
273
|
-
expect(text.toLowerCase()).toContain('acknowledg')
|
|
274
|
-
expect(text).toContain('reply')
|
|
275
|
-
})
|
|
276
|
-
})
|
|
277
|
-
|
|
278
|
-
describe('silence-poke — outbound resets clock + success measurement', () => {
|
|
279
|
-
it('noteOutbound resets the silence clock', () => {
|
|
280
|
-
setupDeps()
|
|
281
|
-
startTurn('k', 0)
|
|
282
|
-
noteOutbound('k', 50_000)
|
|
283
|
-
__tickForTests(120_000) // 70s after outbound — under 75s soft threshold
|
|
284
|
-
expect(consumeArmedPoke()).toBeNull()
|
|
285
|
-
})
|
|
286
|
-
|
|
287
|
-
it('emits silence_poke_succeeded when outbound lands within success window after a poke', () => {
|
|
118
|
+
describe('silence-poke — outbound resets the silence clock', () => {
|
|
119
|
+
it('noteOutbound pushes the fallback measurement to the last outbound', () => {
|
|
288
120
|
const fx = setupDeps()
|
|
289
121
|
startTurn('k', 0)
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
expect(fx.emitted.map((e) => e.kind)).toContain('silence_poke_succeeded')
|
|
293
|
-
const success = fx.emitted.find((e) => e.kind === 'silence_poke_succeeded')!
|
|
294
|
-
expect(success).toMatchObject({ level: 'soft', latency_ms: 5_000 })
|
|
295
|
-
})
|
|
296
|
-
|
|
297
|
-
it('does NOT emit silence_poke_succeeded if outbound is later than the success window', () => {
|
|
298
|
-
const fx = setupDeps()
|
|
299
|
-
startTurn('k', 0)
|
|
300
|
-
__tickForTests(75_000)
|
|
301
|
-
noteOutbound('k', 95_000) // 20s later — outside 15s window
|
|
302
|
-
expect(fx.emitted.filter((e) => e.kind === 'silence_poke_succeeded')).toHaveLength(0)
|
|
303
|
-
})
|
|
304
|
-
|
|
305
|
-
it('outbound resets pokesFired so the next 75s silence can re-arm', () => {
|
|
306
|
-
const fx = setupDeps()
|
|
307
|
-
startTurn('k', 0)
|
|
308
|
-
__tickForTests(75_000) // soft fires
|
|
309
|
-
noteOutbound('k', 100_000) // reset
|
|
310
|
-
__tickForTests(180_000) // 80s since outbound — under threshold
|
|
311
|
-
__tickForTests(180_000 + 50_000) // would be 130s if not reset; still no fire because clock zero = 100_000, so silence = 130s
|
|
312
|
-
// Actually 230 - 100 = 130s past outbound, > 75s soft threshold:
|
|
313
|
-
expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired')).toHaveLength(2)
|
|
314
|
-
expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired').at(-1)).toMatchObject({ level: 'soft' })
|
|
315
|
-
})
|
|
316
|
-
})
|
|
317
|
-
|
|
318
|
-
describe('silence-poke — subagent dispatch extension', () => {
|
|
319
|
-
it('extends soft threshold to 300s when noteSubagentDispatch was called', () => {
|
|
320
|
-
const fx = setupDeps()
|
|
321
|
-
startTurn('k', 0)
|
|
322
|
-
noteSubagentDispatch('k')
|
|
323
|
-
__tickForTests(120_000) // past 75s but under 300s subagent threshold
|
|
324
|
-
expect(fx.emitted).toHaveLength(0)
|
|
122
|
+
noteOutbound('k', 250_000)
|
|
123
|
+
// 300s from turn start, but only 50s since the outbound — no fire.
|
|
325
124
|
__tickForTests(300_000)
|
|
326
|
-
expect(fx.
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
it('subagent flag PERSISTS through narrating outbound (PR4 fix)', () => {
|
|
331
|
-
// Reviewer note from PR2 #1125 — the parent's "spinning up @reviewer"
|
|
332
|
-
// narration is the outbound that opens the wait. Clearing the
|
|
333
|
-
// subagent flag at that moment would defeat the extended-threshold
|
|
334
|
-
// guarantee for the wait that follows. The flag must persist until
|
|
335
|
-
// endTurn().
|
|
336
|
-
const fx = setupDeps()
|
|
337
|
-
startTurn('k', 0)
|
|
338
|
-
noteSubagentDispatch('k')
|
|
339
|
-
noteOutbound('k', 60_000) // parent narrates "spinning up @reviewer"
|
|
340
|
-
// Subagent wait continues. With the flag persistent, soft threshold
|
|
341
|
-
// is still 300s, so a 90s gap should NOT fire.
|
|
342
|
-
__tickForTests(60_000 + 90_000)
|
|
343
|
-
expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired')).toHaveLength(0)
|
|
344
|
-
// At 300s past the outbound, the soft poke fires (subagent wait
|
|
345
|
-
// is genuinely long).
|
|
346
|
-
__tickForTests(60_000 + 300_000)
|
|
347
|
-
expect(fx.emitted.filter((e) => e.kind === 'silence_poke_fired')).toHaveLength(1)
|
|
348
|
-
expect(fx.emitted[0]).toMatchObject({ level: 'soft', subagent_wait: true })
|
|
349
|
-
})
|
|
350
|
-
|
|
351
|
-
it('subagent flag clears on endTurn', () => {
|
|
352
|
-
setupDeps()
|
|
353
|
-
startTurn('k', 0)
|
|
354
|
-
noteSubagentDispatch('k')
|
|
355
|
-
// Take snapshot
|
|
356
|
-
const before = __getStateForTests('k')
|
|
357
|
-
expect(before?.subagentDispatchActive).toBe(true)
|
|
358
|
-
endTurn('k')
|
|
359
|
-
expect(__getStateForTests('k')).toBeUndefined()
|
|
360
|
-
})
|
|
361
|
-
|
|
362
|
-
// CC-5 defensive invariant (`docs/status-ask-cause-classes.md`):
|
|
363
|
-
// the original catalog claim was that `subagentDispatchActive` can
|
|
364
|
-
// leak across turns if `endTurn` is skipped (turn dies abnormally,
|
|
365
|
-
// gateway crashes between turn_end signal and cleanup). Investigation
|
|
366
|
-
// shows the claim doesn't hold — `startTurn` calls `state.set(key, ...)`
|
|
367
|
-
// unconditionally with `subagentDispatchActive: false`, so the next
|
|
368
|
-
// turn's startTurn wipes any stale flag.
|
|
369
|
-
//
|
|
370
|
-
// We're pinning that invariant here as a regression guard. If a future
|
|
371
|
-
// refactor changes `startTurn` to a read-modify-write (merge instead
|
|
372
|
-
// of overwrite), this test breaks immediately. Keeps the catalog's
|
|
373
|
-
// worry productive: even though it's not currently a bug, the
|
|
374
|
-
// invariant that makes it not-a-bug is now load-bearing.
|
|
375
|
-
it('startTurn overwrites stale subagentDispatchActive when endTurn was skipped (CC-5 invariant)', () => {
|
|
376
|
-
const fx = setupDeps()
|
|
377
|
-
// Turn 1: dispatch a subagent, then SKIP endTurn (simulating an
|
|
378
|
-
// abnormal abort path — context-exhaustion, gateway crash mid-turn,
|
|
379
|
-
// etc).
|
|
380
|
-
startTurn('k', 0)
|
|
381
|
-
noteSubagentDispatch('k')
|
|
382
|
-
expect(__getStateForTests('k')?.subagentDispatchActive).toBe(true)
|
|
383
|
-
|
|
384
|
-
// Turn 2 in the same key: startTurn MUST clear the flag.
|
|
385
|
-
startTurn('k', 1_000_000)
|
|
386
|
-
expect(__getStateForTests('k')?.subagentDispatchActive).toBe(false)
|
|
387
|
-
|
|
388
|
-
// Verify the soft poke fires at the normal 75s threshold, not at
|
|
389
|
-
// the extended 300s subagentSoft threshold. If the flag had leaked,
|
|
390
|
-
// ticking at 75s after the new turn start would find subagentSoft
|
|
391
|
-
// active and skip the fire.
|
|
392
|
-
__tickForTests(1_000_000 + 75_000)
|
|
393
|
-
const fired = fx.emitted.filter((e) => e.kind === 'silence_poke_fired')
|
|
394
|
-
expect(fired).toHaveLength(1)
|
|
395
|
-
expect(fired[0]).toMatchObject({ level: 'soft', subagent_wait: false })
|
|
125
|
+
expect(fx.fallbacks).toHaveLength(0)
|
|
126
|
+
// 300s after the outbound — now it fires.
|
|
127
|
+
__tickForTests(550_000)
|
|
128
|
+
expect(fx.fallbacks).toHaveLength(1)
|
|
396
129
|
})
|
|
397
130
|
})
|
|
398
131
|
|
|
@@ -406,24 +139,16 @@ describe('silence-poke — subagent dispatch extension', () => {
|
|
|
406
139
|
// contradicts the gateway's earlier "⚠️ Context window full" / etc.
|
|
407
140
|
//
|
|
408
141
|
// Surfaced during CC-5 investigation (`docs/status-ask-cause-classes.md`).
|
|
409
|
-
// The fix lives in the gateway (context-exhaust path adds the
|
|
410
|
-
// endTurn call); these tests pin the invariant at the silence-poke
|
|
411
|
-
// level so the contract is verifiable in isolation of the gateway.
|
|
412
142
|
describe('silence-poke — abnormal turn-end invariants (CC-5 follow-up)', () => {
|
|
413
143
|
it('endTurn before the 300s fallback threshold prevents the fallback from firing', () => {
|
|
414
144
|
const fx = setupDeps()
|
|
415
145
|
startTurn('k', 0)
|
|
416
|
-
//
|
|
417
|
-
// recover.
|
|
418
|
-
__tickForTests(75_000)
|
|
419
|
-
__tickForTests(180_000)
|
|
146
|
+
__tickForTests(180_000) // turn alive; model could still recover
|
|
420
147
|
// Gateway aborts the turn at t=250s (context exhaust, wedge,
|
|
421
|
-
// crash teardown — any abnormal bail)
|
|
422
|
-
// gets called BEFORE the 300s threshold.
|
|
148
|
+
// crash teardown — any abnormal bail) BEFORE the 300s threshold.
|
|
423
149
|
endTurn('k')
|
|
424
|
-
// Five minutes total elapse
|
|
425
|
-
//
|
|
426
|
-
// would fire here. The contract is: it MUST NOT.
|
|
150
|
+
// Five minutes total elapse. If endTurn left the state in the Map,
|
|
151
|
+
// the framework fallback would fire here. The contract: it MUST NOT.
|
|
427
152
|
__tickForTests(300_000)
|
|
428
153
|
expect(fx.fallbacks).toHaveLength(0)
|
|
429
154
|
expect(
|
|
@@ -431,47 +156,22 @@ describe('silence-poke — abnormal turn-end invariants (CC-5 follow-up)', () =>
|
|
|
431
156
|
).toHaveLength(0)
|
|
432
157
|
})
|
|
433
158
|
|
|
434
|
-
it('endTurn after a soft poke fired does not later emit a stale fallback', () => {
|
|
435
|
-
const fx = setupDeps()
|
|
436
|
-
startTurn('k', 0)
|
|
437
|
-
__tickForTests(75_000) // soft fires
|
|
438
|
-
expect(
|
|
439
|
-
fx.emitted.filter((e) => e.kind === 'silence_poke_fired'),
|
|
440
|
-
).toHaveLength(1)
|
|
441
|
-
// Turn aborts well before firm/fallback thresholds.
|
|
442
|
-
endTurn('k')
|
|
443
|
-
__tickForTests(180_000)
|
|
444
|
-
__tickForTests(300_000)
|
|
445
|
-
// No firm, no fallback after the turn-abort.
|
|
446
|
-
expect(
|
|
447
|
-
fx.emitted.filter((e) => e.kind === 'silence_poke_fired'),
|
|
448
|
-
).toHaveLength(1) // unchanged: only the original soft
|
|
449
|
-
expect(fx.fallbacks).toHaveLength(0)
|
|
450
|
-
})
|
|
451
|
-
|
|
452
159
|
// #1289: the flush-backstop turn-end branch in the gateway (the path
|
|
453
160
|
// taken when the agent emits assistant text but never calls the reply
|
|
454
161
|
// tool) was retrofitted in #1067 to null `currentTurn` early but never
|
|
455
|
-
// had `silencePoke.endTurn` added — leaving
|
|
162
|
+
// had `silencePoke.endTurn` added — leaving state populated so the
|
|
456
163
|
// 300s framework fallback fired after the gateway already flushed the
|
|
457
|
-
// captured prose and considered the turn over. Pin the contract
|
|
458
|
-
//
|
|
459
|
-
//
|
|
460
|
-
//
|
|
164
|
+
// captured prose and considered the turn over. Pin the contract: a
|
|
165
|
+
// turn that records an outbound (the flushed message) and then calls
|
|
166
|
+
// endTurn must not later fire a fallback even if 300s elapses from the
|
|
167
|
+
// original turn start.
|
|
461
168
|
it('#1289: flush-backstop turn-end (outbound + endTurn) suppresses the 300s fallback', () => {
|
|
462
169
|
const fx = setupDeps()
|
|
463
170
|
startTurn('k', 0)
|
|
464
|
-
// Some time passes while the agent generates prose without calling
|
|
465
|
-
// the reply tool. No soft/firm armed yet.
|
|
466
171
|
__tickForTests(60_000)
|
|
467
|
-
// Gateway turn-flush fires: captured text is sent as an outbound,
|
|
468
|
-
// then the flush branch nulls currentTurn AND (post-fix) calls
|
|
469
|
-
// signalTracker.clear + silencePoke.endTurn.
|
|
470
172
|
noteOutbound('k', 60_000)
|
|
471
173
|
endTurn('k')
|
|
472
|
-
|
|
473
|
-
// fallback fired here. Post-fix: the state is drained, no fallback.
|
|
474
|
-
__tickForTests(240_000)
|
|
174
|
+
__tickForTests(360_000)
|
|
475
175
|
expect(fx.fallbacks).toHaveLength(0)
|
|
476
176
|
expect(
|
|
477
177
|
fx.emitted.filter((e) => e.kind === 'silence_fallback_sent'),
|
|
@@ -493,8 +193,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
493
193
|
const fx = setupDeps()
|
|
494
194
|
startTurn('k', 0)
|
|
495
195
|
noteToolStart('k', 'T1', 'Grep', 'foo', 30_000)
|
|
496
|
-
__tickForTests(75_000)
|
|
497
|
-
__tickForTests(180_000)
|
|
498
196
|
__tickForTests(305_000)
|
|
499
197
|
expect(fx.fallbacks).toHaveLength(1)
|
|
500
198
|
const ctx = fx.fallbacks[0]!
|
|
@@ -548,8 +246,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
548
246
|
})
|
|
549
247
|
|
|
550
248
|
it('raw mcp__ tool name with NO label falls back to the bare name (no leak-but-no-better-option)', () => {
|
|
551
|
-
// If the label table doesn't recognise an MCP tool, we have nothing
|
|
552
|
-
// better to show than the raw name. Better honest-ugly than silent.
|
|
553
249
|
const text = formatFrameworkFallbackText('working', 305_000, [
|
|
554
250
|
{ name: 'mcp__some-third-party__do_thing', label: null, durationMs: 305_000 },
|
|
555
251
|
])
|
|
@@ -559,9 +255,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
559
255
|
})
|
|
560
256
|
|
|
561
257
|
it('built-in tool (Grep) with a label keeps the prior "running Name label" shape — name is already human-readable', () => {
|
|
562
|
-
// Regression guard: don't accidentally drop the built-in tool name
|
|
563
|
-
// when generalising the MCP rule. "Grep" is human-readable; the
|
|
564
|
-
// label ("foo") is supplementary detail like the search pattern.
|
|
565
258
|
const text = formatFrameworkFallbackText('working', 305_000, [
|
|
566
259
|
{ name: 'Grep', label: 'foo', durationMs: 305_000 },
|
|
567
260
|
])
|
|
@@ -577,7 +270,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
577
270
|
expect(
|
|
578
271
|
formatFrameworkFallbackText('thinking', 305_000, []),
|
|
579
272
|
).toBe('still thinking… (no update from agent in 5 min)')
|
|
580
|
-
// No third arg → same as empty array.
|
|
581
273
|
expect(
|
|
582
274
|
formatFrameworkFallbackText('working', 305_000),
|
|
583
275
|
).toBe('still working… (no update from agent in 5 min)')
|
|
@@ -596,8 +288,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
596
288
|
startTurn('k', 0)
|
|
597
289
|
noteToolStart('k', 'T1', 'Grep', 'foo', 30_000)
|
|
598
290
|
noteToolEnd('k', 'T1', 200_000)
|
|
599
|
-
__tickForTests(75_000)
|
|
600
|
-
__tickForTests(180_000)
|
|
601
291
|
__tickForTests(305_000)
|
|
602
292
|
expect(fx.fallbacks).toHaveLength(1)
|
|
603
293
|
expect(fx.fallbacks[0]!.inFlightTools).toHaveLength(0)
|
|
@@ -608,8 +298,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
608
298
|
startTurn('k', 0)
|
|
609
299
|
noteToolStart('k', 'T1', 'Grep', null, 30_000)
|
|
610
300
|
noteToolLabel('k', 'T1', '"refined-from-sidecar"')
|
|
611
|
-
__tickForTests(75_000)
|
|
612
|
-
__tickForTests(180_000)
|
|
613
301
|
__tickForTests(305_000)
|
|
614
302
|
expect(fx.fallbacks[0]!.inFlightTools[0]!.label).toBe('"refined-from-sidecar"')
|
|
615
303
|
})
|
|
@@ -620,7 +308,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
620
308
|
noteToolStart('k', 'T1', 'Grep', 'foo', 30_000)
|
|
621
309
|
expect(__getStateForTests('k')!.inFlightTools.size).toBe(1)
|
|
622
310
|
endTurn('k')
|
|
623
|
-
// A fresh turn under the same key has an empty map.
|
|
624
311
|
startTurn('k', 1_000_000)
|
|
625
312
|
expect(__getStateForTests('k')!.inFlightTools.size).toBe(0)
|
|
626
313
|
})
|
|
@@ -632,8 +319,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
632
319
|
noteToolStart('k', 'T-late', 'Read', 'recent.ts', 250_000)
|
|
633
320
|
noteToolStart('k', 'T-early', 'Grep', '"oldest"', 20_000)
|
|
634
321
|
noteToolStart('k', 'T-mid', 'Bash', null, 100_000)
|
|
635
|
-
__tickForTests(75_000)
|
|
636
|
-
__tickForTests(180_000)
|
|
637
322
|
__tickForTests(305_000)
|
|
638
323
|
const snap = fx.fallbacks[0]!.inFlightTools
|
|
639
324
|
expect(snap.map(t => t.name)).toEqual(['Grep', 'Bash', 'Read'])
|
|
@@ -642,7 +327,7 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
642
327
|
it('tool churn does NOT reset the silence clock (header invariant preserved)', () => {
|
|
643
328
|
// The whole point of #1292 (b) over (a) is that we enrich the
|
|
644
329
|
// fallback TEXT, never the timing. Tool activity must not delay
|
|
645
|
-
// or suppress the
|
|
330
|
+
// or suppress the 300s fallback.
|
|
646
331
|
const fx = setupDeps()
|
|
647
332
|
startTurn('k', 0)
|
|
648
333
|
// A constant stream of tool churn through the entire 5min window —
|
|
@@ -651,35 +336,17 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
651
336
|
noteToolStart('k', `T-${t}`, 'Grep', 'foo', t)
|
|
652
337
|
noteToolEnd('k', `T-${t}`, t + 500)
|
|
653
338
|
}
|
|
654
|
-
__tickForTests(
|
|
655
|
-
__tickForTests(180_000) // firm
|
|
656
|
-
__tickForTests(305_000) // fallback
|
|
657
|
-
expect(
|
|
658
|
-
fx.emitted.filter(e => e.kind === 'silence_poke_fired'),
|
|
659
|
-
).toHaveLength(2)
|
|
339
|
+
__tickForTests(305_000)
|
|
660
340
|
expect(fx.fallbacks).toHaveLength(1)
|
|
661
341
|
})
|
|
662
342
|
|
|
663
|
-
it('Task tool
|
|
664
|
-
// Two flags are independent: the soft-threshold extension still
|
|
665
|
-
// works for sub-agent waits (existing behaviour), AND the fallback
|
|
666
|
-
// message names the Task tool as the actual observable.
|
|
343
|
+
it('Task tool populates inFlightTools so the fallback names it as the observable', () => {
|
|
667
344
|
const fx = setupDeps()
|
|
668
345
|
startTurn('k', 0)
|
|
669
|
-
// Gateway calls
|
|
670
|
-
// gateway.ts onSessionEvent).
|
|
671
|
-
noteSubagentDispatch('k')
|
|
346
|
+
// Gateway calls noteToolStart for a Task tool_use (mirrors the
|
|
347
|
+
// wiring at gateway.ts onSessionEvent).
|
|
672
348
|
noteToolStart('k', 'T1', 'Task', 'spinning up @researcher', 10_000)
|
|
673
|
-
|
|
674
|
-
// fires at 75s and no firm fires at 180s (firm requires pokesFired===1,
|
|
675
|
-
// i.e. soft must fire first). Once we cross the 300s subagent-soft,
|
|
676
|
-
// soft fires; each tick fires one level via the `continue` in tick(),
|
|
677
|
-
// so we need three ticks to walk soft → firm → fallback.
|
|
678
|
-
__tickForTests(75_000) // suppressed by subagent
|
|
679
|
-
__tickForTests(180_000) // still suppressed
|
|
680
|
-
__tickForTests(305_000) // soft fires (subagent soft = 300s)
|
|
681
|
-
__tickForTests(305_001) // firm fires
|
|
682
|
-
__tickForTests(305_002) // fallback fires
|
|
349
|
+
__tickForTests(305_000)
|
|
683
350
|
expect(fx.fallbacks).toHaveLength(1)
|
|
684
351
|
const snap = fx.fallbacks[0]!.inFlightTools
|
|
685
352
|
expect(snap[0]!.name).toBe('Task')
|
|
@@ -688,7 +355,6 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
688
355
|
|
|
689
356
|
it('noteToolStart on an unknown key is a no-op (no crash, no state)', () => {
|
|
690
357
|
setupDeps()
|
|
691
|
-
// No startTurn first — silence-poke ignores the call.
|
|
692
358
|
noteToolStart('k-never-started', 'T1', 'Grep', 'foo', 30_000)
|
|
693
359
|
expect(__getStateForTests('k-never-started')).toBeUndefined()
|
|
694
360
|
})
|
|
@@ -714,38 +380,11 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
714
380
|
const text = formatFrameworkFallbackText('working', 305_000, [
|
|
715
381
|
{ name: 'Grep', label: longLabel, durationMs: 305_000 },
|
|
716
382
|
])
|
|
717
|
-
// 60-char cap (with trailing ellipsis) — verify clipping without
|
|
718
|
-
// pinning exact bytes.
|
|
719
383
|
expect(text.length).toBeLessThan(120)
|
|
720
384
|
expect(text).toContain('…')
|
|
721
385
|
})
|
|
722
386
|
})
|
|
723
387
|
|
|
724
|
-
describe('silence-poke — consumeArmedPoke draining', () => {
|
|
725
|
-
it('drains the armed flag so the next call returns null', () => {
|
|
726
|
-
setupDeps()
|
|
727
|
-
startTurn('k', 0)
|
|
728
|
-
__tickForTests(75_000)
|
|
729
|
-
expect(consumeArmedPoke()).not.toBeNull()
|
|
730
|
-
expect(consumeArmedPoke()).toBeNull()
|
|
731
|
-
})
|
|
732
|
-
|
|
733
|
-
it('returns null when nothing is armed', () => {
|
|
734
|
-
setupDeps()
|
|
735
|
-
startTurn('k', 0)
|
|
736
|
-
expect(consumeArmedPoke()).toBeNull()
|
|
737
|
-
})
|
|
738
|
-
|
|
739
|
-
it('returns the matching level text', () => {
|
|
740
|
-
setupDeps()
|
|
741
|
-
startTurn('k', 0)
|
|
742
|
-
__tickForTests(75_000)
|
|
743
|
-
expect(consumeArmedPoke()).toContain('75s')
|
|
744
|
-
__tickForTests(180_000)
|
|
745
|
-
expect(consumeArmedPoke()).toContain('3 minutes')
|
|
746
|
-
})
|
|
747
|
-
})
|
|
748
|
-
|
|
749
388
|
describe('silence-poke — endTurn cleanup', () => {
|
|
750
389
|
it('endTurn drops state', () => {
|
|
751
390
|
setupDeps()
|
|
@@ -766,104 +405,50 @@ describe('silence-poke — independence across turns', () => {
|
|
|
766
405
|
const fx = setupDeps()
|
|
767
406
|
startTurn('a:0', 0)
|
|
768
407
|
startTurn('b:0', 0)
|
|
769
|
-
noteOutbound('a:0',
|
|
770
|
-
__tickForTests(
|
|
771
|
-
// a's clock was reset to
|
|
772
|
-
// b's clock is still at 0
|
|
773
|
-
expect(fx.
|
|
774
|
-
expect(fx.
|
|
408
|
+
noteOutbound('a:0', 250_000)
|
|
409
|
+
__tickForTests(300_000)
|
|
410
|
+
// a's clock was reset to 250_000 (silence=50s) — no fire.
|
|
411
|
+
// b's clock is still at 0 (silence=300s) — fallback fires.
|
|
412
|
+
expect(fx.fallbacks).toHaveLength(1)
|
|
413
|
+
expect(fx.fallbacks[0]).toMatchObject({ key: 'b:0' })
|
|
775
414
|
})
|
|
776
415
|
})
|
|
777
416
|
|
|
778
417
|
describe('silence-poke — fallback handler errors do not break timer', () => {
|
|
779
418
|
it('continues to function if onFrameworkFallback throws', () => {
|
|
780
|
-
const fx: TestFixtures = { emitted: [], fallbacks: []
|
|
419
|
+
const fx: TestFixtures = { emitted: [], fallbacks: [] }
|
|
781
420
|
__setDepsForTests({
|
|
782
421
|
emitMetric: (e) => fx.emitted.push(e),
|
|
783
422
|
onFrameworkFallback: () => { throw new Error('oh no') },
|
|
784
|
-
|
|
785
|
-
// ack + awareness-ping out of the way — this test exercises the
|
|
786
|
-
// soft/firm/fallback ladder under a throwing fallback handler.
|
|
787
|
-
thresholdsMs: {
|
|
788
|
-
...DEFAULT_THRESHOLDS,
|
|
789
|
-
ack: Number.MAX_SAFE_INTEGER,
|
|
790
|
-
awarenessPing: Number.MAX_SAFE_INTEGER,
|
|
791
|
-
},
|
|
423
|
+
thresholdsMs: { ...DEFAULT_THRESHOLDS },
|
|
792
424
|
})
|
|
793
425
|
startTurn('k', 0)
|
|
794
426
|
expect(() => {
|
|
795
|
-
__tickForTests(75_000)
|
|
796
|
-
__tickForTests(180_000)
|
|
797
427
|
__tickForTests(300_000)
|
|
798
428
|
}).not.toThrow()
|
|
799
|
-
// Telemetry still emitted
|
|
800
429
|
expect(fx.emitted.some((e) => e.kind === 'silence_fallback_sent')).toBe(true)
|
|
801
430
|
})
|
|
802
431
|
|
|
803
432
|
it('continues to function if onFrameworkFallback returns a rejected promise', async () => {
|
|
804
|
-
const fx: TestFixtures = { emitted: [], fallbacks: []
|
|
433
|
+
const fx: TestFixtures = { emitted: [], fallbacks: [] }
|
|
805
434
|
__setDepsForTests({
|
|
806
435
|
emitMetric: (e) => fx.emitted.push(e),
|
|
807
436
|
onFrameworkFallback: () => Promise.reject(new Error('async fail')),
|
|
808
|
-
|
|
809
|
-
// ack + awareness-ping out of the way — see the throwing-handler test above.
|
|
810
|
-
thresholdsMs: {
|
|
811
|
-
...DEFAULT_THRESHOLDS,
|
|
812
|
-
ack: Number.MAX_SAFE_INTEGER,
|
|
813
|
-
awarenessPing: Number.MAX_SAFE_INTEGER,
|
|
814
|
-
},
|
|
437
|
+
thresholdsMs: { ...DEFAULT_THRESHOLDS },
|
|
815
438
|
})
|
|
816
439
|
startTurn('k', 0)
|
|
817
|
-
__tickForTests(75_000)
|
|
818
|
-
__tickForTests(180_000)
|
|
819
440
|
__tickForTests(300_000)
|
|
820
|
-
// Allow microtasks for the rejection-catch to fire
|
|
821
441
|
await new Promise((r) => setTimeout(r, 0))
|
|
822
442
|
expect(fx.emitted.some((e) => e.kind === 'silence_fallback_sent')).toBe(true)
|
|
823
443
|
})
|
|
824
444
|
})
|
|
825
445
|
|
|
826
|
-
describe('silence-poke — system reminder text', () => {
|
|
827
|
-
it('soft poke text references the 75s threshold and contains the system-reminder marker', () => {
|
|
828
|
-
setupDeps()
|
|
829
|
-
startTurn('k', 0)
|
|
830
|
-
__tickForTests(75_000)
|
|
831
|
-
const text = consumeArmedPoke()
|
|
832
|
-
expect(text).toContain('[silence-poke]')
|
|
833
|
-
expect(text).toContain('75s')
|
|
834
|
-
expect(text).toContain('about to finish')
|
|
835
|
-
})
|
|
836
|
-
|
|
837
|
-
it('firm poke text references the 3-minute threshold', () => {
|
|
838
|
-
setupDeps()
|
|
839
|
-
startTurn('k', 0)
|
|
840
|
-
__tickForTests(75_000)
|
|
841
|
-
consumeArmedPoke()
|
|
842
|
-
__tickForTests(180_000)
|
|
843
|
-
const text = consumeArmedPoke()
|
|
844
|
-
expect(text).toContain('3 minutes')
|
|
845
|
-
expect(text).toContain('stuck')
|
|
846
|
-
})
|
|
847
|
-
})
|
|
848
|
-
|
|
849
446
|
// CC-4 from `docs/status-ask-cause-classes.md`: wording is load-bearing
|
|
850
|
-
// (`reference/conversational-pacing.md` §
|
|
851
|
-
//
|
|
852
|
-
//
|
|
853
|
-
//
|
|
447
|
+
// (`reference/conversational-pacing.md` § Safety net). Snapshot the exact
|
|
448
|
+
// strings here so a refactor that drops a key phrase fails loud at test
|
|
449
|
+
// time. If you genuinely need to change the wording, update the snapshot
|
|
450
|
+
// AND the design doc together.
|
|
854
451
|
describe('silence-poke — wording snapshots (CC-4)', () => {
|
|
855
|
-
it('soft poke text is unchanged', () => {
|
|
856
|
-
expect(formatPokeText('soft')).toMatchInlineSnapshot(
|
|
857
|
-
`"[silence-poke] You've been silent to the user for 75s. If you're still working on this, send one short conversational reply — e.g. "still going, working through X" — so they know you're alive. Keep it brief; don't restate the task. If you're about to finish within the next few seconds, skip the update."`,
|
|
858
|
-
)
|
|
859
|
-
})
|
|
860
|
-
|
|
861
|
-
it('firm poke text is unchanged', () => {
|
|
862
|
-
expect(formatPokeText('firm')).toMatchInlineSnapshot(
|
|
863
|
-
`"[silence-poke] 3 minutes silent. Please send an update now — what you're working on, or whether you're stuck. If something is taking unusually long (slow tool, network, waiting on a sub-agent), say so explicitly."`,
|
|
864
|
-
)
|
|
865
|
-
})
|
|
866
|
-
|
|
867
452
|
it('framework fallback — working at 300s', () => {
|
|
868
453
|
expect(formatFrameworkFallbackText('working', 300_000)).toMatchInlineSnapshot(
|
|
869
454
|
`"still working… (no update from agent in 5 min)"`,
|
|
@@ -877,9 +462,6 @@ describe('silence-poke — wording snapshots (CC-4)', () => {
|
|
|
877
462
|
})
|
|
878
463
|
|
|
879
464
|
it('framework fallback — minutes derived from silenceMs, not hard-coded', () => {
|
|
880
|
-
// The "N min" suffix MUST track ctx.silenceMs so the wording stays
|
|
881
|
-
// honest if the 300s threshold is tuned. If a refactor accidentally
|
|
882
|
-
// hard-codes "5 min", these cases break.
|
|
883
465
|
expect(formatFrameworkFallbackText('working', 360_000)).toBe(
|
|
884
466
|
'still working… (no update from agent in 6 min)',
|
|
885
467
|
)
|
|
@@ -889,8 +471,6 @@ describe('silence-poke — wording snapshots (CC-4)', () => {
|
|
|
889
471
|
})
|
|
890
472
|
|
|
891
473
|
it('framework fallback — minutes floor at 1 even when silenceMs is small', () => {
|
|
892
|
-
// Defensive: a future caller might invoke with sub-minute silenceMs.
|
|
893
|
-
// Rendering "0 min" reads as nonsense; floor at 1.
|
|
894
474
|
expect(formatFrameworkFallbackText('working', 30_000)).toBe(
|
|
895
475
|
'still working… (no update from agent in 1 min)',
|
|
896
476
|
)
|
|
@@ -907,103 +487,8 @@ describe('silence-poke — performance', () => {
|
|
|
907
487
|
startTurn(`chat${i}:0`, 0)
|
|
908
488
|
}
|
|
909
489
|
const start = performance.now()
|
|
910
|
-
__tickForTests(75_000)
|
|
490
|
+
__tickForTests(75_000) // under fallback threshold — pure iteration cost
|
|
911
491
|
const elapsed = performance.now() - start
|
|
912
|
-
// 1000 turns should tick in well under 50ms — guards against an
|
|
913
|
-
// accidentally-quadratic implementation.
|
|
914
492
|
expect(elapsed).toBeLessThan(50)
|
|
915
493
|
})
|
|
916
494
|
})
|
|
917
|
-
|
|
918
|
-
describe('silence-poke — awareness ping (early framework-owned user-visible status)', () => {
|
|
919
|
-
it('fires once at 60s when no outbound has happened', () => {
|
|
920
|
-
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
921
|
-
startTurn('k', 0)
|
|
922
|
-
__tickForTests(59_000)
|
|
923
|
-
expect(fx.awarenessPings.length).toBe(0)
|
|
924
|
-
__tickForTests(60_000)
|
|
925
|
-
expect(fx.awarenessPings.length).toBe(1)
|
|
926
|
-
expect(fx.awarenessPings[0]!.silenceMs).toBeGreaterThanOrEqual(60_000)
|
|
927
|
-
expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
|
|
928
|
-
})
|
|
929
|
-
|
|
930
|
-
it('is one-shot per turn — does not re-fire as silence continues', () => {
|
|
931
|
-
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
932
|
-
startTurn('k', 0)
|
|
933
|
-
__tickForTests(60_000)
|
|
934
|
-
__tickForTests(120_000)
|
|
935
|
-
__tickForTests(180_000)
|
|
936
|
-
expect(fx.awarenessPings.length).toBe(1)
|
|
937
|
-
})
|
|
938
|
-
|
|
939
|
-
it('is suppressed by an early outbound', () => {
|
|
940
|
-
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
941
|
-
startTurn('k', 0)
|
|
942
|
-
noteOutbound('k', 30_000)
|
|
943
|
-
__tickForTests(90_000)
|
|
944
|
-
expect(fx.awarenessPings.length).toBe(0)
|
|
945
|
-
})
|
|
946
|
-
|
|
947
|
-
it('is suppressed when subagentDispatchActive is true', () => {
|
|
948
|
-
// Sub-agent dispatch already widens soft to 300s; the awareness-ping
|
|
949
|
-
// should also defer so we don't pre-empt the sub-agent's natural
|
|
950
|
-
// progress signal.
|
|
951
|
-
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
952
|
-
startTurn('k', 0)
|
|
953
|
-
noteSubagentDispatch('k')
|
|
954
|
-
__tickForTests(120_000)
|
|
955
|
-
expect(fx.awarenessPings.length).toBe(0)
|
|
956
|
-
})
|
|
957
|
-
|
|
958
|
-
it('does NOT advance the soft/firm/fallback ladder', () => {
|
|
959
|
-
// Awareness ping is a sibling signal; soft/firm/fallback continue
|
|
960
|
-
// to escalate on their own schedule (and the model-targeted ack-poke
|
|
961
|
-
// similarly remains independent).
|
|
962
|
-
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
963
|
-
startTurn('k', 0)
|
|
964
|
-
__tickForTests(60_000) // awareness fires
|
|
965
|
-
__tickForTests(75_000) // soft fires
|
|
966
|
-
__tickForTests(180_000) // firm fires
|
|
967
|
-
__tickForTests(300_000) // fallback fires
|
|
968
|
-
expect(fx.awarenessPings.length).toBe(1)
|
|
969
|
-
expect(fx.fallbacks.length).toBe(1)
|
|
970
|
-
expect(fx.emitted.filter(e => e.kind === 'silence_poke_fired').map(e => (e as { level: string }).level))
|
|
971
|
-
.toEqual(['soft', 'firm'])
|
|
972
|
-
expect(fx.emitted.some(e => e.kind === 'silence_fallback_sent')).toBe(true)
|
|
973
|
-
})
|
|
974
|
-
|
|
975
|
-
it('carries fallbackKind=thinking when a recent thinking event landed', () => {
|
|
976
|
-
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
977
|
-
startTurn('k', 0)
|
|
978
|
-
noteThinking('k', 45_000)
|
|
979
|
-
__tickForTests(60_000)
|
|
980
|
-
expect(fx.awarenessPings.length).toBe(1)
|
|
981
|
-
expect(fx.awarenessPings[0]!.fallbackKind).toBe('thinking')
|
|
982
|
-
})
|
|
983
|
-
|
|
984
|
-
it('does not fire if turn ends before the threshold', () => {
|
|
985
|
-
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
986
|
-
startTurn('k', 0)
|
|
987
|
-
endTurn('k')
|
|
988
|
-
__tickForTests(120_000)
|
|
989
|
-
expect(fx.awarenessPings.length).toBe(0)
|
|
990
|
-
})
|
|
991
|
-
|
|
992
|
-
it('handler errors do not break the timer', () => {
|
|
993
|
-
const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
|
|
994
|
-
__setDepsForTests({
|
|
995
|
-
emitMetric: (e) => fx.emitted.push(e),
|
|
996
|
-
onFrameworkFallback: () => {},
|
|
997
|
-
onAwarenessPing: () => { throw new Error('awareness handler boom') },
|
|
998
|
-
thresholdsMs: {
|
|
999
|
-
...DEFAULT_THRESHOLDS,
|
|
1000
|
-
ack: Number.MAX_SAFE_INTEGER,
|
|
1001
|
-
awarenessPing: 60_000,
|
|
1002
|
-
},
|
|
1003
|
-
})
|
|
1004
|
-
startTurn('k', 0)
|
|
1005
|
-
expect(() => __tickForTests(60_000)).not.toThrow()
|
|
1006
|
-
// Telemetry still emitted
|
|
1007
|
-
expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
|
|
1008
|
-
})
|
|
1009
|
-
})
|