switchroom 0.12.21 → 0.12.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +23 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +7 -6
- package/telegram-plugin/gateway/gateway.ts +40 -1
- package/telegram-plugin/gateway/inbound-delivery-machine.ts +435 -0
- package/telegram-plugin/tests/inbound-delivery-machine.test.ts +475 -0
- package/telegram-plugin/uat/scenarios/jtbd-always-on-after-restart-dm.test.ts +157 -0
- package/telegram-plugin/uat/scenarios/jtbd-fast-trivial-dm.test.ts +127 -0
- package/telegram-plugin/uat/scenarios/jtbd-memory-survives-restart-dm.test.ts +239 -0
- package/telegram-plugin/uat/scenarios/jtbd-wake-audit-content-dm.test.ts +145 -0
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Property tests for `inbound-delivery-machine.ts`.
|
|
3
|
+
*
|
|
4
|
+
* Per RFC `docs/rfcs/inbound-delivery-state-machine.md`: 5 invariants
|
|
5
|
+
* validated over arbitrary event schedules. A counterexample is the
|
|
6
|
+
* minimal evidence that the machine has a bug. The wedge-cluster
|
|
7
|
+
* bugs (v0.12.22 boot-wedge, overlapping-turn silence, #1564 sibling
|
|
8
|
+
* keys) become FAILING property tests if reintroduced.
|
|
9
|
+
*
|
|
10
|
+
* Schedules are generated by a seeded PRNG so failures are
|
|
11
|
+
* reproducible. We don't use fast-check (not a dependency) — the
|
|
12
|
+
* randomness is sufficient for the invariant shapes we're checking,
|
|
13
|
+
* and pure random + shrink-on-failure is documented in the RFC as
|
|
14
|
+
* acceptable for v1.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, expect, it } from 'vitest'
|
|
18
|
+
import {
|
|
19
|
+
type ChatKey,
|
|
20
|
+
type Effect,
|
|
21
|
+
type Event,
|
|
22
|
+
type InboundMessage,
|
|
23
|
+
type PermissionVerdict,
|
|
24
|
+
type State,
|
|
25
|
+
initialState,
|
|
26
|
+
OUTBOUND_RECENT_MS,
|
|
27
|
+
transition,
|
|
28
|
+
TURN_TTL_MS,
|
|
29
|
+
__chatIdOfKeyForTests,
|
|
30
|
+
} from '../gateway/inbound-delivery-machine'
|
|
31
|
+
|
|
32
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
33
|
+
// Seeded PRNG (deterministic for reproducible failures)
|
|
34
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
function mulberry32(seed: number): () => number {
|
|
37
|
+
let s = seed >>> 0
|
|
38
|
+
return () => {
|
|
39
|
+
s = (s + 0x6d2b79f5) >>> 0
|
|
40
|
+
let t = s
|
|
41
|
+
t = Math.imul(t ^ (t >>> 15), t | 1)
|
|
42
|
+
t ^= t + Math.imul(t ^ (t >>> 7), t | 61)
|
|
43
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
48
|
+
// Event generators
|
|
49
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
const CHATS = ['c1', 'c2', 'c3'] as const
|
|
52
|
+
const THREADS = ['_', '1', '2', '0'] as const // includes '_' AND '0' to flag any latent sibling-key handling
|
|
53
|
+
|
|
54
|
+
function makeKey(chat: string, thread: string): ChatKey {
|
|
55
|
+
return `${chat}:${thread}` as ChatKey
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
interface ScheduleContext {
|
|
59
|
+
readonly rand: () => number
|
|
60
|
+
now: number
|
|
61
|
+
msgIdCounter: number
|
|
62
|
+
permIdCounter: number
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function randomKey(ctx: ScheduleContext): ChatKey {
|
|
66
|
+
const c = CHATS[Math.floor(ctx.rand() * CHATS.length)]
|
|
67
|
+
const t = THREADS[Math.floor(ctx.rand() * THREADS.length)]
|
|
68
|
+
return makeKey(c, t)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function nextMsg(ctx: ScheduleContext, isSteering = false): InboundMessage {
|
|
72
|
+
const msgId = ++ctx.msgIdCounter
|
|
73
|
+
return { msgId, isSteering, payload: { id: msgId } }
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function nextVerdict(ctx: ScheduleContext): PermissionVerdict {
|
|
77
|
+
const requestId = `req-${++ctx.permIdCounter}`
|
|
78
|
+
return { requestId, behavior: 'allow', payload: { requestId } }
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function advanceTime(ctx: ScheduleContext, range: number): void {
|
|
82
|
+
ctx.now += Math.floor(ctx.rand() * range)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
type EventKind = Event['kind']
|
|
86
|
+
const EVENT_KINDS: readonly EventKind[] = [
|
|
87
|
+
'bridgeUp',
|
|
88
|
+
'bridgeDown',
|
|
89
|
+
'inbound',
|
|
90
|
+
'turnStart',
|
|
91
|
+
'turnEnd',
|
|
92
|
+
'modelOutbound',
|
|
93
|
+
'permVerdict',
|
|
94
|
+
'tick',
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
function generateEvent(ctx: ScheduleContext, prior: State): Event {
|
|
98
|
+
// Weight inbound + tick + turnEnd higher than rare ops so schedules
|
|
99
|
+
// exercise the steady-state hot path.
|
|
100
|
+
const r = ctx.rand()
|
|
101
|
+
let kind: EventKind
|
|
102
|
+
if (r < 0.3) kind = 'inbound'
|
|
103
|
+
else if (r < 0.45) kind = 'tick'
|
|
104
|
+
else if (r < 0.55) kind = 'turnEnd'
|
|
105
|
+
else if (r < 0.65) kind = 'modelOutbound'
|
|
106
|
+
else if (r < 0.75) kind = 'turnStart'
|
|
107
|
+
else if (r < 0.82) kind = 'permVerdict'
|
|
108
|
+
else if (r < 0.91) kind = 'bridgeUp'
|
|
109
|
+
else kind = 'bridgeDown'
|
|
110
|
+
|
|
111
|
+
switch (kind) {
|
|
112
|
+
case 'bridgeUp':
|
|
113
|
+
return { kind: 'bridgeUp', at: ctx.now }
|
|
114
|
+
case 'bridgeDown':
|
|
115
|
+
return { kind: 'bridgeDown', at: ctx.now }
|
|
116
|
+
case 'inbound': {
|
|
117
|
+
const isSteering = ctx.rand() < 0.1
|
|
118
|
+
return { kind: 'inbound', key: randomKey(ctx), msg: nextMsg(ctx, isSteering), at: ctx.now }
|
|
119
|
+
}
|
|
120
|
+
case 'turnStart':
|
|
121
|
+
return { kind: 'turnStart', key: randomKey(ctx), at: ctx.now }
|
|
122
|
+
case 'turnEnd': {
|
|
123
|
+
// Bias toward an actually-active key so turnEnd is meaningful
|
|
124
|
+
// most of the time.
|
|
125
|
+
const activeKeys = [...prior.perKey.entries()]
|
|
126
|
+
.filter(([, v]) => v.turnStartedAt != null)
|
|
127
|
+
.map(([k]) => k)
|
|
128
|
+
const key = activeKeys.length > 0 && ctx.rand() < 0.8
|
|
129
|
+
? activeKeys[Math.floor(ctx.rand() * activeKeys.length)]
|
|
130
|
+
: randomKey(ctx)
|
|
131
|
+
return { kind: 'turnEnd', key, at: ctx.now, outboundEmitted: ctx.rand() < 0.85 }
|
|
132
|
+
}
|
|
133
|
+
case 'modelOutbound':
|
|
134
|
+
return { kind: 'modelOutbound', key: randomKey(ctx), at: ctx.now }
|
|
135
|
+
case 'permVerdict':
|
|
136
|
+
return { kind: 'permVerdict', verdict: nextVerdict(ctx), at: ctx.now }
|
|
137
|
+
case 'tick':
|
|
138
|
+
// Tick advances time meaningfully — sometimes a small step,
|
|
139
|
+
// sometimes a big one that crosses TURN_TTL_MS so the fallback
|
|
140
|
+
// path is exercised.
|
|
141
|
+
advanceTime(ctx, ctx.rand() < 0.1 ? TURN_TTL_MS * 2 : 5_000)
|
|
142
|
+
return { kind: 'tick', now: ctx.now }
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
147
|
+
// Property: simulate a schedule and assert invariants
|
|
148
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
interface TraceEntry {
|
|
151
|
+
readonly event: Event
|
|
152
|
+
readonly stateBefore: State
|
|
153
|
+
readonly stateAfter: State
|
|
154
|
+
readonly effects: readonly Effect[]
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function runSchedule(seed: number, eventCount: number): TraceEntry[] {
|
|
158
|
+
const ctx: ScheduleContext = {
|
|
159
|
+
rand: mulberry32(seed),
|
|
160
|
+
now: 0,
|
|
161
|
+
msgIdCounter: 0,
|
|
162
|
+
permIdCounter: 0,
|
|
163
|
+
}
|
|
164
|
+
let state = initialState()
|
|
165
|
+
const trace: TraceEntry[] = []
|
|
166
|
+
for (let i = 0; i < eventCount; i++) {
|
|
167
|
+
const event = generateEvent(ctx, state)
|
|
168
|
+
const { state: stateAfter, effects } = transition(state, event)
|
|
169
|
+
trace.push({ event, stateBefore: state, stateAfter, effects })
|
|
170
|
+
state = stateAfter
|
|
171
|
+
// Advance clock a small amount between events so timestamps don't
|
|
172
|
+
// cluster at zero.
|
|
173
|
+
advanceTime(ctx, 1_000)
|
|
174
|
+
}
|
|
175
|
+
return trace
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function formatCounterexample(seed: number, trace: TraceEntry[]): string {
|
|
179
|
+
const lines = [`seed=${seed}, len=${trace.length}`, '']
|
|
180
|
+
for (let i = 0; i < trace.length; i++) {
|
|
181
|
+
const t = trace[i]
|
|
182
|
+
lines.push(`#${i} ${t.event.kind} ${JSON.stringify(t.event)}`)
|
|
183
|
+
if (t.effects.length > 0) {
|
|
184
|
+
lines.push(` → ${t.effects.map((e) => e.kind).join(', ')}`)
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return lines.join('\n')
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
191
|
+
// Invariant #1 — Every inbound is delivered XOR persisted
|
|
192
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
193
|
+
|
|
194
|
+
describe('Invariant #1 — every inbound is delivered XOR persisted', () => {
|
|
195
|
+
it('holds across 1000 random schedules of 50 events', () => {
|
|
196
|
+
for (let seed = 1; seed <= 1000; seed++) {
|
|
197
|
+
const trace = runSchedule(seed, 50)
|
|
198
|
+
for (let i = 0; i < trace.length; i++) {
|
|
199
|
+
const t = trace[i]
|
|
200
|
+
if (t.event.kind !== 'inbound') continue
|
|
201
|
+
const msgId = t.event.msg.msgId
|
|
202
|
+
const delivered = t.effects.some(
|
|
203
|
+
(e) => e.kind === 'deliverToBridge' && e.msg.msgId === msgId,
|
|
204
|
+
)
|
|
205
|
+
const buffered = t.effects.some(
|
|
206
|
+
(e) => e.kind === 'bufferInbound' && e.msg.msgId === msgId,
|
|
207
|
+
)
|
|
208
|
+
const persisted = t.effects.some(
|
|
209
|
+
(e) => e.kind === 'persistInbound' && e.msg.msgId === msgId,
|
|
210
|
+
)
|
|
211
|
+
// Contract: delivered XOR (buffered AND persisted). Never both
|
|
212
|
+
// delivered AND buffered. Never neither.
|
|
213
|
+
if (delivered && (buffered || persisted)) {
|
|
214
|
+
throw new Error(
|
|
215
|
+
`Invariant #1 violated at event #${i}: msg ${msgId} both delivered and buffered.\n` +
|
|
216
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
217
|
+
)
|
|
218
|
+
}
|
|
219
|
+
if (!delivered && !buffered) {
|
|
220
|
+
throw new Error(
|
|
221
|
+
`Invariant #1 violated at event #${i}: msg ${msgId} neither delivered nor buffered.\n` +
|
|
222
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
223
|
+
)
|
|
224
|
+
}
|
|
225
|
+
if (buffered && !persisted) {
|
|
226
|
+
throw new Error(
|
|
227
|
+
`Invariant #1 violated at event #${i}: msg ${msgId} buffered but not persisted.\n` +
|
|
228
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
229
|
+
)
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
expect(true).toBe(true)
|
|
234
|
+
})
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
238
|
+
// Invariant #2 — setTurnStarted has a matching clearTurnStarted
|
|
239
|
+
// before the next end-of-life event (turnEnd, bridgeDown for active,
|
|
240
|
+
// or tick past TURN_TTL).
|
|
241
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
242
|
+
|
|
243
|
+
describe('Invariant #2 — turnStarted is matched by clearTurnStarted before EOL', () => {
|
|
244
|
+
it('holds across 1000 random schedules of 100 events', () => {
|
|
245
|
+
for (let seed = 1; seed <= 1000; seed++) {
|
|
246
|
+
const trace = runSchedule(seed, 100)
|
|
247
|
+
// For each turn started, track when it was started and find the
|
|
248
|
+
// matching clear. The clear must come strictly before:
|
|
249
|
+
// - the next turnEnd for that key
|
|
250
|
+
// - OR the next tick that crosses TURN_TTL
|
|
251
|
+
// - OR bridgeDown (if the active turn — bridgeDown can clear
|
|
252
|
+
// state implicitly per the RFC's state machine; we don't
|
|
253
|
+
// require clear in that case, the bridge_dead state takes
|
|
254
|
+
// over)
|
|
255
|
+
// Simpler equivalent: at the end of every schedule step, the
|
|
256
|
+
// perKey set should never contain a turnStartedAt entry that's
|
|
257
|
+
// STALE BEYOND TTL. The tick handler is the gate.
|
|
258
|
+
for (let i = 0; i < trace.length; i++) {
|
|
259
|
+
const t = trace[i]
|
|
260
|
+
for (const [k, v] of t.stateAfter.perKey) {
|
|
261
|
+
if (v.turnStartedAt == null) continue
|
|
262
|
+
const age = t.event.kind === 'tick'
|
|
263
|
+
? t.event.now - v.turnStartedAt
|
|
264
|
+
: 0
|
|
265
|
+
// Note: we don't have a single "current time" outside
|
|
266
|
+
// tick events. Only ticks can detect TTL expiration; so
|
|
267
|
+
// a stale entry persists across non-tick events until a
|
|
268
|
+
// tick processes it. That's the design.
|
|
269
|
+
if (t.event.kind === 'tick' && age > TURN_TTL_MS) {
|
|
270
|
+
// Recent outbound suppresses fallback (invariant #5) — if
|
|
271
|
+
// suppressed, the entry stays. Confirm the suppression
|
|
272
|
+
// condition holds.
|
|
273
|
+
if (v.lastOutboundAt == null || t.event.now - v.lastOutboundAt >= OUTBOUND_RECENT_MS) {
|
|
274
|
+
throw new Error(
|
|
275
|
+
`Invariant #2 violated at event #${i}: key ${k} has stale turnStartedAt ` +
|
|
276
|
+
`(age=${age}ms > ${TURN_TTL_MS}ms) after a tick.\n` +
|
|
277
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
278
|
+
)
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
expect(true).toBe(true)
|
|
285
|
+
})
|
|
286
|
+
})
|
|
287
|
+
|
|
288
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
289
|
+
// Invariant #3 — per-chat sibling-key cleanup on turnEnd
|
|
290
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
291
|
+
|
|
292
|
+
describe('Invariant #3 — turnEnd sweeps sibling keys for the same chatId', () => {
|
|
293
|
+
it('holds across 1000 random schedules of 100 events', () => {
|
|
294
|
+
for (let seed = 1; seed <= 1000; seed++) {
|
|
295
|
+
const trace = runSchedule(seed, 100)
|
|
296
|
+
for (let i = 0; i < trace.length; i++) {
|
|
297
|
+
const t = trace[i]
|
|
298
|
+
if (t.event.kind !== 'turnEnd') continue
|
|
299
|
+
const chatId = __chatIdOfKeyForTests(t.event.key)
|
|
300
|
+
// After turnEnd, no sibling key for this chatId should retain
|
|
301
|
+
// turnStartedAt != null.
|
|
302
|
+
for (const [k, v] of t.stateAfter.perKey) {
|
|
303
|
+
if (__chatIdOfKeyForTests(k) !== chatId) continue
|
|
304
|
+
if (v.turnStartedAt != null) {
|
|
305
|
+
throw new Error(
|
|
306
|
+
`Invariant #3 violated at event #${i}: turnEnd for ${t.event.key} ` +
|
|
307
|
+
`left sibling key ${k} with turnStartedAt=${v.turnStartedAt}.\n` +
|
|
308
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
309
|
+
)
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
expect(true).toBe(true)
|
|
315
|
+
})
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
319
|
+
// Invariant #4 — permVerdict delivered iff bridge alive
|
|
320
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
321
|
+
|
|
322
|
+
describe('Invariant #4 — permVerdict delivered iff bridge alive', () => {
|
|
323
|
+
it('holds across 1000 random schedules of 50 events', () => {
|
|
324
|
+
for (let seed = 1; seed <= 1000; seed++) {
|
|
325
|
+
const trace = runSchedule(seed, 50)
|
|
326
|
+
for (let i = 0; i < trace.length; i++) {
|
|
327
|
+
const t = trace[i]
|
|
328
|
+
if (t.event.kind !== 'permVerdict') continue
|
|
329
|
+
const alive = t.stateBefore.global.kind !== 'bridge_dead'
|
|
330
|
+
const delivered = t.effects.some((e) => e.kind === 'deliverPermVerdict')
|
|
331
|
+
const persisted = t.effects.some((e) => e.kind === 'persistPermVerdict')
|
|
332
|
+
if (alive && !delivered) {
|
|
333
|
+
throw new Error(
|
|
334
|
+
`Invariant #4 violated at #${i}: bridge alive but permVerdict not delivered.\n` +
|
|
335
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
336
|
+
)
|
|
337
|
+
}
|
|
338
|
+
if (!alive && !persisted) {
|
|
339
|
+
throw new Error(
|
|
340
|
+
`Invariant #4 violated at #${i}: bridge dead but permVerdict not persisted.\n` +
|
|
341
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
342
|
+
)
|
|
343
|
+
}
|
|
344
|
+
if (delivered && persisted) {
|
|
345
|
+
throw new Error(
|
|
346
|
+
`Invariant #4 violated at #${i}: permVerdict both delivered and persisted.\n` +
|
|
347
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
348
|
+
)
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
expect(true).toBe(true)
|
|
353
|
+
})
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
357
|
+
// Invariant #5 — spurious-fallback suppression (the 2026-05-20
|
|
358
|
+
// overlapping-turn silence bug becomes unrepresentable)
|
|
359
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
360
|
+
|
|
361
|
+
describe('Invariant #5 — fallback poke suppressed if model recently broke silence', () => {
|
|
362
|
+
it('holds across 1000 random schedules of 100 events', () => {
|
|
363
|
+
for (let seed = 1; seed <= 1000; seed++) {
|
|
364
|
+
const trace = runSchedule(seed, 100)
|
|
365
|
+
for (let i = 0; i < trace.length; i++) {
|
|
366
|
+
const t = trace[i]
|
|
367
|
+
if (t.event.kind !== 'tick') continue
|
|
368
|
+
const now = t.event.now
|
|
369
|
+
for (const eff of t.effects) {
|
|
370
|
+
if (eff.kind !== 'firePoke' || eff.level !== 'fallback') continue
|
|
371
|
+
// Look up the key's lastOutboundAt at stateBefore.
|
|
372
|
+
const perKey = t.stateBefore.perKey.get(eff.key)
|
|
373
|
+
if (perKey == null) continue
|
|
374
|
+
if (perKey.lastOutboundAt == null) continue
|
|
375
|
+
const sinceOutbound = now - perKey.lastOutboundAt
|
|
376
|
+
if (sinceOutbound < OUTBOUND_RECENT_MS) {
|
|
377
|
+
throw new Error(
|
|
378
|
+
`Invariant #5 violated at #${i}: fallback fired for ${eff.key} ` +
|
|
379
|
+
`but model produced outbound only ${sinceOutbound}ms ago ` +
|
|
380
|
+
`(threshold ${OUTBOUND_RECENT_MS}ms).\n` +
|
|
381
|
+
formatCounterexample(seed, trace.slice(0, i + 1)),
|
|
382
|
+
)
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
expect(true).toBe(true)
|
|
388
|
+
})
|
|
389
|
+
})
|
|
390
|
+
|
|
391
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
392
|
+
// Targeted regression: the v0.12.22 boot-wedge case
|
|
393
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
394
|
+
|
|
395
|
+
describe('Targeted regression — v0.12.22 boot-wedge', () => {
|
|
396
|
+
it('first inbound on a fresh bridge delivers, not buffers', () => {
|
|
397
|
+
let s = initialState()
|
|
398
|
+
s = transition(s, { kind: 'bridgeUp', at: 0 }).state
|
|
399
|
+
const { state: s2, effects } = transition(s, {
|
|
400
|
+
kind: 'inbound',
|
|
401
|
+
key: 'c1:_' as ChatKey,
|
|
402
|
+
msg: { msgId: 1, isSteering: false, payload: {} },
|
|
403
|
+
at: 100,
|
|
404
|
+
})
|
|
405
|
+
const delivered = effects.some((e) => e.kind === 'deliverToBridge')
|
|
406
|
+
const buffered = effects.some((e) => e.kind === 'bufferInbound')
|
|
407
|
+
expect(delivered).toBe(true)
|
|
408
|
+
expect(buffered).toBe(false)
|
|
409
|
+
expect(s2.global.kind).toBe('bridge_alive_in_turn')
|
|
410
|
+
})
|
|
411
|
+
})
|
|
412
|
+
|
|
413
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
414
|
+
// Targeted regression: the 2026-05-20 overlapping-turn silence case
|
|
415
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
416
|
+
|
|
417
|
+
describe('Targeted regression — overlapping-turn silence (the post-v0.12.22 case)', () => {
|
|
418
|
+
it("suppresses fallback when model produced outbound in prior overlapping turn", () => {
|
|
419
|
+
let s = initialState()
|
|
420
|
+
s = transition(s, { kind: 'bridgeUp', at: 0 }).state
|
|
421
|
+
|
|
422
|
+
// Turn A starts and ends with an outbound at t=5000
|
|
423
|
+
s = transition(s, {
|
|
424
|
+
kind: 'inbound',
|
|
425
|
+
key: 'c1:_' as ChatKey,
|
|
426
|
+
msg: { msgId: 1, isSteering: false, payload: {} },
|
|
427
|
+
at: 1000,
|
|
428
|
+
}).state
|
|
429
|
+
s = transition(s, {
|
|
430
|
+
kind: 'turnEnd',
|
|
431
|
+
key: 'c1:_' as ChatKey,
|
|
432
|
+
at: 5000,
|
|
433
|
+
outboundEmitted: true,
|
|
434
|
+
}).state
|
|
435
|
+
|
|
436
|
+
// Turn B starts at t=6000 (immediately after A)
|
|
437
|
+
s = transition(s, {
|
|
438
|
+
kind: 'inbound',
|
|
439
|
+
key: 'c1:_' as ChatKey,
|
|
440
|
+
msg: { msgId: 2, isSteering: false, payload: {} },
|
|
441
|
+
at: 6000,
|
|
442
|
+
}).state
|
|
443
|
+
|
|
444
|
+
// Tick at t=310,000: turn B is 304s old (past TURN_TTL_MS=300_000).
|
|
445
|
+
// But the model emitted an outbound at t=5000 (less than 60s ago
|
|
446
|
+
// RELATIVE TO TURN B'S START, but 305s before now).
|
|
447
|
+
// Per invariant #5: suppress fallback iff lastOutbound is within
|
|
448
|
+
// OUTBOUND_RECENT_MS (60s) of NOW. 310_000 - 5_000 = 305_000 ms;
|
|
449
|
+
// not within the suppression window. So fallback SHOULD fire here.
|
|
450
|
+
const tickResult = transition(s, { kind: 'tick', now: 310_000 })
|
|
451
|
+
const fired = tickResult.effects.some(
|
|
452
|
+
(e) => e.kind === 'firePoke' && e.level === 'fallback',
|
|
453
|
+
)
|
|
454
|
+
expect(fired).toBe(true)
|
|
455
|
+
|
|
456
|
+
// Now reset and verify the suppression case: outbound at t=305_000,
|
|
457
|
+
// tick at t=310_000 — only 5s elapsed since outbound.
|
|
458
|
+
let s2 = initialState()
|
|
459
|
+
s2 = transition(s2, { kind: 'bridgeUp', at: 0 }).state
|
|
460
|
+
s2 = transition(s2, {
|
|
461
|
+
kind: 'inbound',
|
|
462
|
+
key: 'c2:_' as ChatKey,
|
|
463
|
+
msg: { msgId: 3, isSteering: false, payload: {} },
|
|
464
|
+
at: 1000,
|
|
465
|
+
}).state
|
|
466
|
+
// Model emitted an outbound very recently
|
|
467
|
+
s2 = transition(s2, { kind: 'modelOutbound', key: 'c2:_' as ChatKey, at: 305_000 }).state
|
|
468
|
+
// Tick: turn is 309s old, outbound was 5s ago → suppress
|
|
469
|
+
const suppress = transition(s2, { kind: 'tick', now: 310_000 })
|
|
470
|
+
const firedSuppressed = suppress.effects.some(
|
|
471
|
+
(e) => e.kind === 'firePoke' && e.level === 'fallback',
|
|
472
|
+
)
|
|
473
|
+
expect(firedSuppressed).toBe(false)
|
|
474
|
+
})
|
|
475
|
+
})
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD scenario — always-on vision: first message after restart replies
|
|
3
|
+
* quickly, NOT 5 minutes later via silence-poke fallback.
|
|
4
|
+
*
|
|
5
|
+
* Vision (`reference/vision.md`, see [[project_vision_reanchor_human_assistants]]):
|
|
6
|
+
* agents are always-on specialist exec-assistants. A 5-min blank
|
|
7
|
+
* window on the first message after restart is what BROKEN feels
|
|
8
|
+
* like to a user trying to use their assistant.
|
|
9
|
+
*
|
|
10
|
+
* ## The wedge this guards against
|
|
11
|
+
*
|
|
12
|
+
* Pre-v0.12.22, every agent restart produced ~5 min blank on the
|
|
13
|
+
* first user message in each thread:
|
|
14
|
+
*
|
|
15
|
+
* 1. User sends msg → handleInbound runs the fresh-turn branch
|
|
16
|
+
* → activeTurnStartedAt.set(key, now) at gateway.ts:7357
|
|
17
|
+
* 2. The #1556 delivery gate further down (~7551) checks
|
|
18
|
+
* activeTurnStartedAt.size > 0 to decide if a turn is "already
|
|
19
|
+
* in flight" — sees the entry it just wrote → buffer-until-idle
|
|
20
|
+
* 3. Inbound stuck in pendingInboundBuffer. Bridge never sees it.
|
|
21
|
+
* Claude never replies. activeTurnStartedAt[key] stays set.
|
|
22
|
+
* 4. ~300s later silence-poke framework-fallback fires, drains
|
|
23
|
+
* the buffer, the reply finally lands — five minutes late.
|
|
24
|
+
*
|
|
25
|
+
* Documented in `feedback_5min_restart_wedge_violates_vision.md`.
|
|
26
|
+
* Fix is a one-line snapshot of the live size at receipt-time before
|
|
27
|
+
* the fresh-turn branch mutates the Map.
|
|
28
|
+
*
|
|
29
|
+
* ## What this UAT asserts
|
|
30
|
+
*
|
|
31
|
+
* After a deliberate restart, the FIRST message in a fresh thread
|
|
32
|
+
* gets a reply within a budget that excludes the silence-poke
|
|
33
|
+
* fallback floor (300s). Concretely we assert < 120s, which is
|
|
34
|
+
* generous for slow LLM replies but well below the wedge symptom.
|
|
35
|
+
*
|
|
36
|
+
* The test also makes a stricter observation log so a future
|
|
37
|
+
* regression that lands BETWEEN "wedge fixed" (~LLM latency) and
|
|
38
|
+
* "wedge present" (~5 min) is visible — e.g. some slow startup
|
|
39
|
+
* path that takes 60-90s would pass the contract but bear
|
|
40
|
+
* investigation.
|
|
41
|
+
*
|
|
42
|
+
* ## Why this scenario specifically
|
|
43
|
+
*
|
|
44
|
+
* - `smoke-dm-reply.test.ts` covers steady-state inbound→reply but
|
|
45
|
+
* does NOT restart the agent first, so the wedge surfaces as a
|
|
46
|
+
* "first message is slow" pattern that the smoke test would
|
|
47
|
+
* silently absorb on warm runs.
|
|
48
|
+
* - `silent-end-recovery-dm.test.ts` covers mid-turn silent-end →
|
|
49
|
+
* no-reply, but at 6 min budget — too generous to catch the
|
|
50
|
+
* 5-min wedge as a regression.
|
|
51
|
+
* - This is the FIRST UAT to explicitly tie the assertion to the
|
|
52
|
+
* "always-on" vision and measure first-after-restart TTFO.
|
|
53
|
+
*/
|
|
54
|
+
|
|
55
|
+
import { describe, it, expect, beforeAll } from "vitest";
|
|
56
|
+
import { execSync } from "node:child_process";
|
|
57
|
+
import { spinUp } from "../harness.js";
|
|
58
|
+
|
|
59
|
+
const AGENT = "test-harness";
|
|
60
|
+
|
|
61
|
+
// Budget for the marker-safe restart itself (per
|
|
62
|
+
// feedback_agent_restart_needs_sudo_when_running.md, restart blocks
|
|
63
|
+
// ~30s as the gateway's bridge reattaches).
|
|
64
|
+
const RESTART_BUDGET_MS = 90_000;
|
|
65
|
+
|
|
66
|
+
// Hard contract: first-after-restart reply must land in under 2 min.
|
|
67
|
+
// This is generous for slow LLM replies but well below the 5-min
|
|
68
|
+
// silence-poke fallback floor — a regression of the #1556 wedge
|
|
69
|
+
// would trip on TTFO ≥ 300s and fail the test.
|
|
70
|
+
const HARD_REPLY_BUDGET_MS = 120_000;
|
|
71
|
+
|
|
72
|
+
// Vision-aligned target: real expectation is well under 30s on a
|
|
73
|
+
// healthy fleet. A pass between 30-120s is yellow — covered by the
|
|
74
|
+
// contract but worth logging for forensic visibility.
|
|
75
|
+
const VISION_REPLY_BUDGET_MS = 30_000;
|
|
76
|
+
|
|
77
|
+
function canShellSudo(): boolean {
|
|
78
|
+
try {
|
|
79
|
+
execSync("sudo -n true", { stdio: "ignore", timeout: 2_000 });
|
|
80
|
+
return true;
|
|
81
|
+
} catch {
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function restartAgent(name: string): void {
|
|
87
|
+
// Marker-safe restart per memory feedback_compose_rollout.md +
|
|
88
|
+
// feedback_agent_restart_needs_sudo_when_running.md. Apply step
|
|
89
|
+
// self-elevates internally; restart needs the wrapper. We don't
|
|
90
|
+
// call apply here — the agent scaffolds are already current; only
|
|
91
|
+
// the in-memory state needs to reset.
|
|
92
|
+
execSync(
|
|
93
|
+
`sudo -n env PATH=$PATH HOME=$HOME switchroom agent restart ${name} --force`,
|
|
94
|
+
{ stdio: ["ignore", "pipe", "pipe"], timeout: RESTART_BUDGET_MS },
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// This scenario requires NOPASSWD sudo + the switchroom CLI on PATH on
|
|
99
|
+
// the harness host. Skip on CI runners that don't expose those.
|
|
100
|
+
const sudoOk = canShellSudo();
|
|
101
|
+
|
|
102
|
+
(sudoOk ? describe : describe.skip)(
|
|
103
|
+
"uat: always-on after restart",
|
|
104
|
+
() => {
|
|
105
|
+
beforeAll(() => {
|
|
106
|
+
restartAgent(AGENT);
|
|
107
|
+
// Brief settle so the bridge sidecar finishes its reattach
|
|
108
|
+
// before we send the first inbound. The bridge-register log
|
|
109
|
+
// line is the earliest the agent can accept inbound.
|
|
110
|
+
return new Promise((r) => setTimeout(r, 5_000));
|
|
111
|
+
}, RESTART_BUDGET_MS + 10_000);
|
|
112
|
+
|
|
113
|
+
it(
|
|
114
|
+
"first message after fresh restart → reply within 2 min (NOT the 5-min wedge)",
|
|
115
|
+
async () => {
|
|
116
|
+
const sc = await spinUp({ agent: AGENT });
|
|
117
|
+
try {
|
|
118
|
+
const sendStart = Date.now();
|
|
119
|
+
await sc.sendDM("ping — JTBD always-on UAT");
|
|
120
|
+
|
|
121
|
+
const firstReply = await sc.expectMessage(/\S/, {
|
|
122
|
+
from: "bot",
|
|
123
|
+
timeout: HARD_REPLY_BUDGET_MS,
|
|
124
|
+
});
|
|
125
|
+
const ttfo = Date.now() - sendStart;
|
|
126
|
+
|
|
127
|
+
expect(firstReply.text.length).toBeGreaterThan(0);
|
|
128
|
+
|
|
129
|
+
// HARD CONTRACT: the wedge symptom is 300s+ TTFO. Anything
|
|
130
|
+
// ≥ HARD_REPLY_BUDGET_MS (120s) flags a regression of the
|
|
131
|
+
// #1556 self-blocking gate.
|
|
132
|
+
if (ttfo >= HARD_REPLY_BUDGET_MS) {
|
|
133
|
+
throw new Error(
|
|
134
|
+
`[always-on] first-post-restart reply took ${ttfo}ms — ` +
|
|
135
|
+
`matches the #1556 wedge symptom (5-min silence-poke fallback). ` +
|
|
136
|
+
`Vision broken; see feedback_5min_restart_wedge_violates_vision.md`,
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
expect(ttfo).toBeLessThan(HARD_REPLY_BUDGET_MS);
|
|
140
|
+
|
|
141
|
+
// Yellow-band log: passes the contract but degraded from the
|
|
142
|
+
// vision target. Worth investigating if this fires regularly.
|
|
143
|
+
if (ttfo >= VISION_REPLY_BUDGET_MS) {
|
|
144
|
+
console.warn(
|
|
145
|
+
`[always-on] first-post-restart TTFO=${ttfo}ms — passed ` +
|
|
146
|
+
`contract (${HARD_REPLY_BUDGET_MS}ms) but slower than the ` +
|
|
147
|
+
`vision target (${VISION_REPLY_BUDGET_MS}ms). Forensic flag.`,
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
} finally {
|
|
151
|
+
await sc.tearDown();
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
HARD_REPLY_BUDGET_MS + 10_000,
|
|
155
|
+
);
|
|
156
|
+
},
|
|
157
|
+
);
|