switchroom 0.13.33 → 0.13.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/timezone-hook.sh +1 -1
- package/dist/agent-scheduler/index.js +8 -1
- package/dist/auth-broker/index.js +8 -1
- package/dist/cli/switchroom.js +176 -26
- package/dist/host-control/main.js +5222 -203
- package/dist/vault/approvals/kernel-server.js +9 -2
- package/dist/vault/broker/server.js +9 -2
- package/package.json +1 -1
- package/profiles/default/CLAUDE.md.hbs +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +234 -31
- package/telegram-plugin/docs/waiting-ux-spec.md +40 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +188 -1
- package/telegram-plugin/gateway/config-approval-handler.ts +170 -15
- package/telegram-plugin/gateway/diff-preview-card.test.ts +2 -2
- package/telegram-plugin/gateway/diff-preview-card.ts +2 -2
- package/telegram-plugin/gateway/drive-write-approval.test.ts +70 -0
- package/telegram-plugin/gateway/drive-write-approval.ts +51 -2
- package/telegram-plugin/gateway/error-envelope-card.ts +64 -0
- package/telegram-plugin/gateway/gateway.ts +112 -15
- package/telegram-plugin/gateway/ipc-protocol.ts +10 -1
- package/telegram-plugin/gateway/oversize-card-body.test.ts +108 -0
- package/telegram-plugin/gateway/oversize-card-body.ts +114 -0
- package/telegram-plugin/gateway/unhandled-rejection-policy.ts +46 -1
- package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs +118 -41
- package/telegram-plugin/hooks/silent-end-scan.mjs +190 -0
- package/telegram-plugin/pending-work-progress.ts +37 -1
- package/telegram-plugin/tests/boot-clears-clean-shutdown-marker.test.ts +75 -0
- package/telegram-plugin/tests/error-envelope-unlock-card.test.ts +79 -0
- package/telegram-plugin/tests/pending-work-progress.test.ts +134 -0
- package/telegram-plugin/tests/silent-end-integration.test.ts +268 -0
- package/telegram-plugin/tests/silent-end-interrupt-stop-integration.test.ts +242 -0
- package/telegram-plugin/tests/silent-end-interrupt-stop-scan.test.ts +314 -0
- package/telegram-plugin/tests/silent-end.test.ts +227 -38
- package/telegram-plugin/tests/unhandled-rejection-policy.test.ts +51 -6
|
@@ -407,4 +407,138 @@ describe('pending-work-progress', () => {
|
|
|
407
407
|
expect(cap.edits.filter((e) => e.messageId === 10)).toHaveLength(1)
|
|
408
408
|
expect(cap.edits.filter((e) => e.messageId === 20)).toHaveLength(2)
|
|
409
409
|
})
|
|
410
|
+
|
|
411
|
+
// ─── #1760 regression tests ───────────────────────────────────────────
|
|
412
|
+
//
|
|
413
|
+
// The "— still working (Nm)" ticker can get stuck forever editing an
|
|
414
|
+
// old outbound message if the gateway misses the SDK `turn_end` event.
|
|
415
|
+
// Two layers of defence:
|
|
416
|
+
//
|
|
417
|
+
// 1. PRIMARY: the gateway tears down on every `reply: finalized`
|
|
418
|
+
// chokepoint via `clearPending(key, 'reply_finalize')` BEFORE
|
|
419
|
+
// `noteOutbound` on the next turn's first reply. Verified here by
|
|
420
|
+
// simulating a missed-turn_end scenario: the prior turn's ticker
|
|
421
|
+
// is activated, then the gateway processes a fresh reply on a NEW
|
|
422
|
+
// turn without ever calling `noteTurnEnd` for the prior one. The
|
|
423
|
+
// explicit `clearPending('reply_finalize')` call must wipe the
|
|
424
|
+
// stale ambient.
|
|
425
|
+
//
|
|
426
|
+
// 2. DEFENSE-IN-DEPTH: at tick time, if `isActiveTurnNewerThan`
|
|
427
|
+
// returns true (gateway reports a newer turn is active for this
|
|
428
|
+
// chat), the ticker self-terminates instead of editing. Bug
|
|
429
|
+
// becomes "at most one stale tick" rather than "stuck forever."
|
|
430
|
+
|
|
431
|
+
it('#1760 primary: reply_finalize teardown wipes a stale activated ticker', async () => {
|
|
432
|
+
const cap = setup()
|
|
433
|
+
|
|
434
|
+
// Turn 1: dispatch async work, capture an anchor, end the turn.
|
|
435
|
+
// Ticker activates and fires one edit at +60s.
|
|
436
|
+
startTurn(KEY)
|
|
437
|
+
noteAsyncDispatch(KEY)
|
|
438
|
+
noteOutbound(KEY, { messageId: 100, text: 'kicking off worker' })
|
|
439
|
+
noteTurnEnd(KEY)
|
|
440
|
+
cap.now = EDIT_INTERVAL_MS
|
|
441
|
+
__tickForTests(cap.now)
|
|
442
|
+
await flush()
|
|
443
|
+
expect(cap.edits).toHaveLength(1)
|
|
444
|
+
expect(cap.edits[0]?.messageId).toBe(100)
|
|
445
|
+
|
|
446
|
+
// Turn 2 begins WITHOUT a prior `noteTurnEnd` clear (simulating the
|
|
447
|
+
// #1760 missed-turn_end SDK-event-drop). The gateway's reply-
|
|
448
|
+
// finalize chokepoint MUST call clearPending('reply_finalize')
|
|
449
|
+
// BEFORE noteOutbound on the new anchor. After that, the prior
|
|
450
|
+
// ticker is gone and further ticks (even before any new
|
|
451
|
+
// noteTurnEnd) edit nothing.
|
|
452
|
+
clearPending(KEY, 'reply_finalize')
|
|
453
|
+
noteOutbound(KEY, { messageId: 200, text: 'turn 2 reply' })
|
|
454
|
+
|
|
455
|
+
cap.now = EDIT_INTERVAL_MS * 5
|
|
456
|
+
__tickForTests(cap.now)
|
|
457
|
+
await flush()
|
|
458
|
+
// No additional edits — the stale ticker is dead. Note that the new
|
|
459
|
+
// turn's ticker has not been activated yet (noteTurnEnd not called),
|
|
460
|
+
// so nothing should fire here either.
|
|
461
|
+
expect(cap.edits).toHaveLength(1)
|
|
462
|
+
|
|
463
|
+
// The 'reply_finalize' clear must surface as a metric so operators
|
|
464
|
+
// can observe the backstop firing in production.
|
|
465
|
+
const reasons = cap.metrics
|
|
466
|
+
.filter((m): m is Extract<PendingProgressMetric, { kind: 'pending_progress_cleared' }> =>
|
|
467
|
+
m.kind === 'pending_progress_cleared')
|
|
468
|
+
.map((m) => m.reason)
|
|
469
|
+
expect(reasons).toContain('reply_finalize')
|
|
470
|
+
})
|
|
471
|
+
|
|
472
|
+
it('#1760 defense-in-depth: ticker self-terminates when isActiveTurnNewerThan is true', async () => {
|
|
473
|
+
const cap: Capture = { edits: [], metrics: [], now: 0 }
|
|
474
|
+
__resetAllForTests()
|
|
475
|
+
// The activatedAt epoch captured by the ticker:
|
|
476
|
+
const TURN_1_ACTIVATED_AT = 1_000
|
|
477
|
+
// A NEWER turn starts later, simulating turn-2 racing past the
|
|
478
|
+
// missed teardown:
|
|
479
|
+
const TURN_2_STARTED_AT = TURN_1_ACTIVATED_AT + 30_000
|
|
480
|
+
|
|
481
|
+
__setDepsForTests({
|
|
482
|
+
editMessage: async (ctx) => {
|
|
483
|
+
cap.edits.push(ctx)
|
|
484
|
+
},
|
|
485
|
+
emitMetric: (e) => {
|
|
486
|
+
cap.metrics.push(e)
|
|
487
|
+
},
|
|
488
|
+
nowMs: () => cap.now,
|
|
489
|
+
// Reports a newer turn always-on for this test.
|
|
490
|
+
isActiveTurnNewerThan: (_key, activatedAt) =>
|
|
491
|
+
TURN_2_STARTED_AT > activatedAt,
|
|
492
|
+
})
|
|
493
|
+
|
|
494
|
+
// Bootstrap a "prior turn" ticker at TURN_1_ACTIVATED_AT.
|
|
495
|
+
cap.now = TURN_1_ACTIVATED_AT
|
|
496
|
+
startTurn(KEY)
|
|
497
|
+
noteAsyncDispatch(KEY)
|
|
498
|
+
noteOutbound(KEY, { messageId: 100, text: 'kicking off worker' })
|
|
499
|
+
noteTurnEnd(KEY)
|
|
500
|
+
expect(__getStateForTests(KEY)?.activatedAt).toBe(TURN_1_ACTIVATED_AT)
|
|
501
|
+
|
|
502
|
+
// Advance past EDIT_INTERVAL_MS so the tick would otherwise fire.
|
|
503
|
+
cap.now = TURN_1_ACTIVATED_AT + EDIT_INTERVAL_MS + 1_000
|
|
504
|
+
__tickForTests(cap.now)
|
|
505
|
+
await flush()
|
|
506
|
+
|
|
507
|
+
// No edit fired — the predicate detected a newer active turn and
|
|
508
|
+
// dropped the ticker.
|
|
509
|
+
expect(cap.edits).toHaveLength(0)
|
|
510
|
+
expect(__getStateForTests(KEY)).toBeUndefined()
|
|
511
|
+
|
|
512
|
+
const cleared = cap.metrics.find(
|
|
513
|
+
(m): m is Extract<PendingProgressMetric, { kind: 'pending_progress_cleared' }> =>
|
|
514
|
+
m.kind === 'pending_progress_cleared',
|
|
515
|
+
)
|
|
516
|
+
expect(cleared?.reason).toBe('stale_turn')
|
|
517
|
+
})
|
|
518
|
+
|
|
519
|
+
it('#1760 defense-in-depth: predicate returning false leaves ticker alone', async () => {
|
|
520
|
+
const cap: Capture = { edits: [], metrics: [], now: 0 }
|
|
521
|
+
__resetAllForTests()
|
|
522
|
+
__setDepsForTests({
|
|
523
|
+
editMessage: async (ctx) => {
|
|
524
|
+
cap.edits.push(ctx)
|
|
525
|
+
},
|
|
526
|
+
emitMetric: (e) => {
|
|
527
|
+
cap.metrics.push(e)
|
|
528
|
+
},
|
|
529
|
+
nowMs: () => cap.now,
|
|
530
|
+
// No newer turn — the legitimate cross-turn ambient case.
|
|
531
|
+
isActiveTurnNewerThan: () => false,
|
|
532
|
+
})
|
|
533
|
+
|
|
534
|
+
startTurn(KEY)
|
|
535
|
+
noteAsyncDispatch(KEY)
|
|
536
|
+
noteOutbound(KEY, { messageId: 100, text: 'kicking off worker' })
|
|
537
|
+
noteTurnEnd(KEY)
|
|
538
|
+
|
|
539
|
+
cap.now = EDIT_INTERVAL_MS
|
|
540
|
+
__tickForTests(cap.now)
|
|
541
|
+
await flush()
|
|
542
|
+
expect(cap.edits).toHaveLength(1)
|
|
543
|
+
})
|
|
410
544
|
})
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* silent-end-integration.test.ts — #1744 follow-up.
|
|
3
|
+
*
|
|
4
|
+
* The existing silent-end.test.ts (#1741 block, L301-409) exercises the
|
|
5
|
+
* gate as a pure unit via `simulateReplyAtGateway`, which mirrors the
|
|
6
|
+
* `isFinalAnswerReply ? clearSilentEndState(...)` contract used at the
|
|
7
|
+
* `executeReply` send-site (gateway.ts:4609). That's a fine unit test
|
|
8
|
+
* for the `executeReply` path, but it does NOT model the `stream_reply`
|
|
9
|
+
* state machine where:
|
|
10
|
+
*
|
|
11
|
+
* - The FIRST stream emit is gated by `!activeDraftStreams.has(sKey)`
|
|
12
|
+
* (gateway.ts:5178) — only the first emit per stream considers
|
|
13
|
+
* clearing the silent-end state.
|
|
14
|
+
* - LATER emits in the same stream (subsequent calls that edit the
|
|
15
|
+
* same Telegram message) do NOT re-enter that first-emit branch.
|
|
16
|
+
*
|
|
17
|
+
* Pre-fix, a stream whose first emit was ack-shaped (short, silent, no
|
|
18
|
+
* done) and whose LATER emit carried `done=true` or substantive text
|
|
19
|
+
* would skip the clear at the first-emit gate and never re-attempt it,
|
|
20
|
+
* leaving the silent-end state file behind even though the model HAS
|
|
21
|
+
* now delivered its final answer. The Stop hook would then see a stale
|
|
22
|
+
* state file and fire a spurious re-prompt on the next turn end.
|
|
23
|
+
*
|
|
24
|
+
* The fix in gateway.ts:5343 adds a second clear at the
|
|
25
|
+
* `finalAnswerDelivered = true` site (which fires on EVERY emit that
|
|
26
|
+
* qualifies as a final answer, not just the first). This test walks
|
|
27
|
+
* the full ack-then-final stream sequence and asserts the state file's
|
|
28
|
+
* lifecycle matches the contract.
|
|
29
|
+
*
|
|
30
|
+
* KNOWN GAP — true end-to-end coverage of `executeStreamReply` would
|
|
31
|
+
* require importing gateway.ts (a multi-thousand-line module with
|
|
32
|
+
* heavy startup-time side effects: bot creation, IPC server bind, MCP
|
|
33
|
+
* registration, etc.). Neither `executeReply` nor `executeStreamReply`
|
|
34
|
+
* is exported. Instead, this test reproduces the EXACT call sequence
|
|
35
|
+
* the gateway makes at each send-site — same predicate
|
|
36
|
+
* (`isFinalAnswerReply`), same state-file API (`writeSilentEndState` /
|
|
37
|
+
* `clearSilentEndState`), same first-emit gating logic — by walking a
|
|
38
|
+
* tracked `activeDraftStreams` set to model the first-vs-later-emit
|
|
39
|
+
* branching that's the load-bearing detail of the bug. A future
|
|
40
|
+
* refactor that drops the second clear at L5343 would fail the
|
|
41
|
+
* `ack first emit then final later emit` test below, because the
|
|
42
|
+
* state file would never get cleared on the path that no longer
|
|
43
|
+
* passes through the first-emit branch.
|
|
44
|
+
*
|
|
45
|
+
* If the gateway is ever refactored so `executeReply`/`executeStreamReply`
|
|
46
|
+
* become testable in isolation (or get extracted into a thin
|
|
47
|
+
* dispatch shim around a pure handler), prefer wiring those real
|
|
48
|
+
* functions over this faithful-shape reproduction.
|
|
49
|
+
*/
|
|
50
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
51
|
+
import { mkdtempSync, rmSync } from 'node:fs'
|
|
52
|
+
import { tmpdir } from 'node:os'
|
|
53
|
+
import { join } from 'node:path'
|
|
54
|
+
import {
|
|
55
|
+
writeSilentEndState,
|
|
56
|
+
clearSilentEndState,
|
|
57
|
+
readSilentEndState,
|
|
58
|
+
} from '../silent-end.js'
|
|
59
|
+
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
60
|
+
|
|
61
|
+
let stateDir: string
|
|
62
|
+
const ORIG_ENV = process.env.TELEGRAM_STATE_DIR
|
|
63
|
+
|
|
64
|
+
beforeEach(() => {
|
|
65
|
+
stateDir = mkdtempSync(join(tmpdir(), 'silent-end-integration-test-'))
|
|
66
|
+
process.env.TELEGRAM_STATE_DIR = stateDir
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
afterEach(() => {
|
|
70
|
+
rmSync(stateDir, { recursive: true, force: true })
|
|
71
|
+
if (ORIG_ENV != null) process.env.TELEGRAM_STATE_DIR = ORIG_ENV
|
|
72
|
+
else delete process.env.TELEGRAM_STATE_DIR
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Shape-accurate reproduction of the gateway's `executeReply` clear
|
|
77
|
+
* site (gateway.ts:4599-4611). The `reply` tool runs this on every
|
|
78
|
+
* call — no first-emit gating, because `reply` always produces a
|
|
79
|
+
* fresh outbound message.
|
|
80
|
+
*/
|
|
81
|
+
function simulateExecuteReply(
|
|
82
|
+
reply: { text: string; disableNotification: boolean },
|
|
83
|
+
turnKey: string,
|
|
84
|
+
): { finalAnswerDelivered: boolean } {
|
|
85
|
+
const final = isFinalAnswerReply(reply)
|
|
86
|
+
if (final) clearSilentEndState(turnKey)
|
|
87
|
+
return { finalAnswerDelivered: final }
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Shape-accurate reproduction of the gateway's `executeStreamReply`
|
|
92
|
+
* call (gateway.ts:5172-5344). The stream has TWO clear sites:
|
|
93
|
+
*
|
|
94
|
+
* 1. First-emit-only branch (L5178-5195) — fires once per stream,
|
|
95
|
+
* regardless of whether this emit is the final answer. Clears
|
|
96
|
+
* iff this first emit is a final-answer-shaped reply.
|
|
97
|
+
* 2. Final-answer site (L5335-5358, added in #1744 follow-up) —
|
|
98
|
+
* fires on EVERY emit that qualifies as the final answer,
|
|
99
|
+
* including later emits in a stream whose first emit was an ack.
|
|
100
|
+
* This is the load-bearing addition: without it, the ack-first-
|
|
101
|
+
* then-final-later case leaks the state file.
|
|
102
|
+
*
|
|
103
|
+
* `activeDraftStreams` is a Set-like state the gateway carries across
|
|
104
|
+
* calls within the same turn; this test threads it through explicitly.
|
|
105
|
+
*/
|
|
106
|
+
function simulateExecuteStreamReply(
|
|
107
|
+
emit: { text: string; disableNotification: boolean; done?: boolean },
|
|
108
|
+
turnKey: string,
|
|
109
|
+
state: { activeDraftStreams: Set<string>; finalAnswerDelivered: boolean },
|
|
110
|
+
): { finalAnswerDelivered: boolean } {
|
|
111
|
+
// Site 1 — first-emit-only branch.
|
|
112
|
+
const isFirstEmit = !state.activeDraftStreams.has(turnKey)
|
|
113
|
+
if (isFirstEmit) {
|
|
114
|
+
if (isFinalAnswerReply(emit)) {
|
|
115
|
+
clearSilentEndState(turnKey)
|
|
116
|
+
}
|
|
117
|
+
// Mark the stream active for subsequent emits.
|
|
118
|
+
state.activeDraftStreams.add(turnKey)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ... draft / send-message work happens here in the real gateway ...
|
|
122
|
+
|
|
123
|
+
// Site 2 — final-answer site (#1744 follow-up at gateway.ts:5343).
|
|
124
|
+
if (isFinalAnswerReply(emit)) {
|
|
125
|
+
state.finalAnswerDelivered = true
|
|
126
|
+
clearSilentEndState(turnKey)
|
|
127
|
+
}
|
|
128
|
+
return { finalAnswerDelivered: state.finalAnswerDelivered }
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
describe('#1744 — silent-end state-file lifecycle through real call paths', () => {
|
|
132
|
+
it('executeReply: ack reply does not clear, then final-answer reply clears', () => {
|
|
133
|
+
// Turn-end writer fires from a prior turn that ended undelivered,
|
|
134
|
+
// OR the framework re-prompted and the state still persists.
|
|
135
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
136
|
+
|
|
137
|
+
// Interim ack via `reply` — must NOT clear.
|
|
138
|
+
const r1 = simulateExecuteReply({ text: 'On it', disableNotification: true }, 'c:_')
|
|
139
|
+
expect(r1.finalAnswerDelivered).toBe(false)
|
|
140
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
141
|
+
|
|
142
|
+
// Final answer via `reply` (pings) — clears.
|
|
143
|
+
const r2 = simulateExecuteReply(
|
|
144
|
+
{ text: "Here's the result.", disableNotification: false },
|
|
145
|
+
'c:_',
|
|
146
|
+
)
|
|
147
|
+
expect(r2.finalAnswerDelivered).toBe(true)
|
|
148
|
+
expect(readSilentEndState()).toBeNull()
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
it('executeStreamReply: stream that opens with a final-answer first emit clears at first-emit site', () => {
|
|
152
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
153
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
154
|
+
|
|
155
|
+
// First emit is substantive (>=200 chars) — qualifies as final.
|
|
156
|
+
simulateExecuteStreamReply(
|
|
157
|
+
{ text: 'x'.repeat(250), disableNotification: true },
|
|
158
|
+
'c:_',
|
|
159
|
+
state,
|
|
160
|
+
)
|
|
161
|
+
expect(state.finalAnswerDelivered).toBe(true)
|
|
162
|
+
expect(readSilentEndState()).toBeNull()
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
it('executeStreamReply ack-then-final edge case: first emit is an ack (no clear at first-emit gate), later emit is the final answer (must clear at the new L5343 site)', () => {
|
|
166
|
+
// This is the regression the #1744 follow-up fixes. Pre-fix, the
|
|
167
|
+
// first-emit gate would skip the clear (ack-shaped first emit),
|
|
168
|
+
// and the later final-answer emit had NO second clear site —
|
|
169
|
+
// state file leaked.
|
|
170
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
171
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
172
|
+
|
|
173
|
+
// First emit — ack-shaped. First-emit gate fires but the
|
|
174
|
+
// isFinalAnswerReply predicate returns false → no clear here.
|
|
175
|
+
simulateExecuteStreamReply(
|
|
176
|
+
{ text: 'thinking...', disableNotification: true, done: false },
|
|
177
|
+
'c:_',
|
|
178
|
+
state,
|
|
179
|
+
)
|
|
180
|
+
expect(state.finalAnswerDelivered).toBe(false)
|
|
181
|
+
// CONTRACT: state file MUST still be present after the ack first emit.
|
|
182
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
183
|
+
|
|
184
|
+
// Second emit — same stream (activeDraftStreams already has the key),
|
|
185
|
+
// so the first-emit branch is skipped. This emit carries done=true
|
|
186
|
+
// (the real final-answer signal). Without the L5343 clear, the
|
|
187
|
+
// state file would persist and the Stop hook would fire a spurious
|
|
188
|
+
// re-prompt on the next turn.
|
|
189
|
+
simulateExecuteStreamReply(
|
|
190
|
+
{ text: 'done', disableNotification: true, done: true },
|
|
191
|
+
'c:_',
|
|
192
|
+
state,
|
|
193
|
+
)
|
|
194
|
+
expect(state.finalAnswerDelivered).toBe(true)
|
|
195
|
+
// CONTRACT: state file MUST be cleared after the final-answer
|
|
196
|
+
// emit, even though the first-emit branch was skipped.
|
|
197
|
+
expect(readSilentEndState()).toBeNull()
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
it('executeStreamReply: late emit that flips disable_notification=false also clears', () => {
|
|
201
|
+
// Variant of the edge case — final-answer signal is the pacing
|
|
202
|
+
// contract flag rather than done=true.
|
|
203
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
204
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
205
|
+
|
|
206
|
+
simulateExecuteStreamReply(
|
|
207
|
+
{ text: 'one sec', disableNotification: true, done: false },
|
|
208
|
+
'c:_',
|
|
209
|
+
state,
|
|
210
|
+
)
|
|
211
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
212
|
+
|
|
213
|
+
// Later emit drops the disable_notification flag — the pacing
|
|
214
|
+
// contract's "final answer" signal — but not done yet.
|
|
215
|
+
simulateExecuteStreamReply(
|
|
216
|
+
{ text: "Here's what I found.", disableNotification: false, done: false },
|
|
217
|
+
'c:_',
|
|
218
|
+
state,
|
|
219
|
+
)
|
|
220
|
+
expect(state.finalAnswerDelivered).toBe(true)
|
|
221
|
+
expect(readSilentEndState()).toBeNull()
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
it('idempotency: clearSilentEndState is safe to call when the file is already gone', () => {
|
|
225
|
+
// The L5343 clear is unconditional on isFinalAnswerReply — it
|
|
226
|
+
// fires even when the first-emit gate already cleared. The
|
|
227
|
+
// clear must be a no-op in that case so it can't accidentally
|
|
228
|
+
// unlink a fresh state file written for a DIFFERENT later turn.
|
|
229
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
230
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
231
|
+
|
|
232
|
+
// First emit is final — clears via the first-emit site.
|
|
233
|
+
simulateExecuteStreamReply(
|
|
234
|
+
{ text: 'answer', disableNotification: false },
|
|
235
|
+
'c:_',
|
|
236
|
+
state,
|
|
237
|
+
)
|
|
238
|
+
expect(readSilentEndState()).toBeNull()
|
|
239
|
+
|
|
240
|
+
// A second final-shaped emit on the same stream re-enters the
|
|
241
|
+
// L5343 clear. State is already gone — must be a no-op.
|
|
242
|
+
expect(() => {
|
|
243
|
+
simulateExecuteStreamReply(
|
|
244
|
+
{ text: 'addendum', disableNotification: false },
|
|
245
|
+
'c:_',
|
|
246
|
+
state,
|
|
247
|
+
)
|
|
248
|
+
}).not.toThrow()
|
|
249
|
+
expect(readSilentEndState()).toBeNull()
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
it('cross-turn safety: clearSilentEndState on turnKey A does NOT clear state for turnKey B', () => {
|
|
253
|
+
// The clear is keyed on turnKey via the writer's stored value —
|
|
254
|
+
// a clear call for a DIFFERENT turn must not unlink a state file
|
|
255
|
+
// written for the in-flight turn. This guards against the L5343
|
|
256
|
+
// clear accidentally racing a turn-end writer for a newer turn.
|
|
257
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:turn-B' })
|
|
258
|
+
// A stale clear for turn-A — silent-end.ts only unlinks when the
|
|
259
|
+
// stored turnKey matches.
|
|
260
|
+
clearSilentEndState('c:turn-A')
|
|
261
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
262
|
+
expect(readSilentEndState()!.turnKey).toBe('c:turn-B')
|
|
263
|
+
|
|
264
|
+
// The matching clear works.
|
|
265
|
+
clearSilentEndState('c:turn-B')
|
|
266
|
+
expect(readSilentEndState()).toBeNull()
|
|
267
|
+
})
|
|
268
|
+
})
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration test for the silent-end Stop hook .mjs.
|
|
3
|
+
*
|
|
4
|
+
* Spawns the real script as a subprocess with a synthetic transcript
|
|
5
|
+
* on disk and the Stop event JSON on stdin. Pins the contract that
|
|
6
|
+
* matters at the hook boundary: stdout JSON shape, exit code,
|
|
7
|
+
* retry-count side-effect on the state file.
|
|
8
|
+
*
|
|
9
|
+
* Complements `silent-end-interrupt-stop-scan.test.ts` (which pins
|
|
10
|
+
* the pure helper in isolation). This test guards against
|
|
11
|
+
* regressions in:
|
|
12
|
+
* - stdin parsing
|
|
13
|
+
* - transcript_path file IO + fail-open on read errors
|
|
14
|
+
* - state-file retry-count increment
|
|
15
|
+
* - retry-budget exhaustion → allow
|
|
16
|
+
* - block-decision stdout shape
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
20
|
+
import { spawnSync } from 'node:child_process'
|
|
21
|
+
import {
|
|
22
|
+
mkdtempSync,
|
|
23
|
+
mkdirSync,
|
|
24
|
+
writeFileSync,
|
|
25
|
+
readFileSync,
|
|
26
|
+
existsSync,
|
|
27
|
+
rmSync,
|
|
28
|
+
} from 'node:fs'
|
|
29
|
+
import { tmpdir } from 'node:os'
|
|
30
|
+
import { join, resolve } from 'node:path'
|
|
31
|
+
|
|
32
|
+
const HOOK_PATH = resolve(
|
|
33
|
+
__dirname,
|
|
34
|
+
'..',
|
|
35
|
+
'hooks',
|
|
36
|
+
'silent-end-interrupt-stop.mjs',
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
interface RunResult {
|
|
40
|
+
status: number | null
|
|
41
|
+
stdout: string
|
|
42
|
+
stderr: string
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function runHook(input: { event: object; stateDir: string }): RunResult {
|
|
46
|
+
const r = spawnSync('node', [HOOK_PATH], {
|
|
47
|
+
input: JSON.stringify(input.event),
|
|
48
|
+
encoding: 'utf8',
|
|
49
|
+
timeout: 5000,
|
|
50
|
+
env: { ...process.env, TELEGRAM_STATE_DIR: input.stateDir },
|
|
51
|
+
})
|
|
52
|
+
return { status: r.status, stdout: r.stdout, stderr: r.stderr }
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function writeTranscript(dir: string, lines: object[]): string {
|
|
56
|
+
const p = join(dir, 'transcript.jsonl')
|
|
57
|
+
writeFileSync(p, lines.map((l) => JSON.stringify(l)).join('\n'), 'utf8')
|
|
58
|
+
return p
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const ENQUEUE = {
|
|
62
|
+
type: 'queue-operation',
|
|
63
|
+
operation: 'enqueue',
|
|
64
|
+
content: '<channel source="switchroom-telegram" chat_id="111" message_id="42">hi</channel>',
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function reply(text: string, opts: { disable_notification?: boolean; done?: boolean } = {}) {
|
|
68
|
+
return {
|
|
69
|
+
type: 'assistant',
|
|
70
|
+
message: {
|
|
71
|
+
content: [
|
|
72
|
+
{
|
|
73
|
+
type: 'tool_use',
|
|
74
|
+
name: 'mcp__switchroom-telegram__reply',
|
|
75
|
+
input: { text, ...opts },
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
describe('silent-end-interrupt-stop.mjs — integration', () => {
|
|
83
|
+
let tmp: string
|
|
84
|
+
let stateDir: string
|
|
85
|
+
|
|
86
|
+
beforeEach(() => {
|
|
87
|
+
tmp = mkdtempSync(join(tmpdir(), 'silent-end-hook-'))
|
|
88
|
+
stateDir = join(tmp, 'state')
|
|
89
|
+
mkdirSync(stateDir, { recursive: true })
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
afterEach(() => {
|
|
93
|
+
try { rmSync(tmp, { recursive: true, force: true }) } catch { /* ignore */ }
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
it('allows stop when transcript shows a notification-bearing reply', () => {
|
|
97
|
+
const transcript = writeTranscript(tmp, [
|
|
98
|
+
ENQUEUE,
|
|
99
|
+
reply('ok', { disable_notification: false }),
|
|
100
|
+
])
|
|
101
|
+
const r = runHook({
|
|
102
|
+
event: { session_id: 's1', transcript_path: transcript },
|
|
103
|
+
stateDir,
|
|
104
|
+
})
|
|
105
|
+
expect(r.status).toBe(0)
|
|
106
|
+
expect(r.stdout.trim()).toBe('')
|
|
107
|
+
expect(existsSync(join(stateDir, 'silent-end-pending.json'))).toBe(false)
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
it("blocks + writes retryCount=1 when transcript shows ack-only (Ken's repro)", () => {
|
|
111
|
+
const transcript = writeTranscript(tmp, [
|
|
112
|
+
ENQUEUE,
|
|
113
|
+
reply('on it — checking now', { disable_notification: true }),
|
|
114
|
+
{ type: 'assistant', message: { content: [{ type: 'tool_use', name: 'Bash', input: {} }] } },
|
|
115
|
+
// 2237-char answer as plain text, no reply tool
|
|
116
|
+
{
|
|
117
|
+
type: 'assistant',
|
|
118
|
+
message: { content: [{ type: 'text', text: 'A'.repeat(2237) }] },
|
|
119
|
+
},
|
|
120
|
+
])
|
|
121
|
+
const r = runHook({
|
|
122
|
+
event: { session_id: 's1', transcript_path: transcript },
|
|
123
|
+
stateDir,
|
|
124
|
+
})
|
|
125
|
+
expect(r.status).toBe(0)
|
|
126
|
+
const out = JSON.parse(r.stdout)
|
|
127
|
+
expect(out.decision).toBe('block')
|
|
128
|
+
expect(out.reason).toMatch(/Send your final answer/)
|
|
129
|
+
expect(out.reason).toMatch(/NO_REPLY/)
|
|
130
|
+
// Retry-count file was written.
|
|
131
|
+
const statePath = join(stateDir, 'silent-end-pending.json')
|
|
132
|
+
expect(existsSync(statePath)).toBe(true)
|
|
133
|
+
const state = JSON.parse(readFileSync(statePath, 'utf8'))
|
|
134
|
+
expect(state.retryCount).toBe(1)
|
|
135
|
+
// Reviewer-flagged regression: the hook's state-file write MUST
|
|
136
|
+
// include turnKey + chatId derived from the enqueue envelope. Without
|
|
137
|
+
// these, the gateway's later `recordSilentTurnEnd` write (~175ms after
|
|
138
|
+
// the hook) sees a turnKey mismatch and resets retryCount to 0,
|
|
139
|
+
// doubling the effective re-prompt budget. The shape here must match
|
|
140
|
+
// `chatKey(chatId, threadId)` at telegram-plugin/gateway/chat-key.ts:46.
|
|
141
|
+
expect(state.chatId).toBe('111')
|
|
142
|
+
expect(state.turnKey).toBe('111:_')
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
it('preserves retryCount across the hook→gateway write order (reviewer regression)', () => {
|
|
146
|
+
// Simulates what happens on the gateway side once it runs its own
|
|
147
|
+
// `writeSilentEndState` ~175ms after the hook: it reads the hook's
|
|
148
|
+
// file, sees matching turnKey, preserves retryCount. Then the next
|
|
149
|
+
// `recordSilentTurnEnd` call sees retryCount=1 >= MAX_RETRIES=1 and
|
|
150
|
+
// returns exhausted — the design budget. Without matching turnKey
|
|
151
|
+
// this branch never fires on time and the budget doubles.
|
|
152
|
+
const transcript = writeTranscript(tmp, [
|
|
153
|
+
ENQUEUE,
|
|
154
|
+
reply('on it', { disable_notification: true }),
|
|
155
|
+
])
|
|
156
|
+
const r = runHook({
|
|
157
|
+
event: { session_id: 's1', transcript_path: transcript },
|
|
158
|
+
stateDir,
|
|
159
|
+
})
|
|
160
|
+
expect(r.status).toBe(0)
|
|
161
|
+
const statePath = join(stateDir, 'silent-end-pending.json')
|
|
162
|
+
const state = JSON.parse(readFileSync(statePath, 'utf8'))
|
|
163
|
+
expect(state.turnKey).toBe('111:_')
|
|
164
|
+
expect(state.retryCount).toBe(1)
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
it('allows stop when retry budget already exhausted (retryCount >= MAX_RETRIES)', () => {
|
|
168
|
+
const transcript = writeTranscript(tmp, [
|
|
169
|
+
ENQUEUE,
|
|
170
|
+
// Still no final reply, BUT retry already spent — gateway will
|
|
171
|
+
// post the user-facing fallback so the user isn't left silent.
|
|
172
|
+
reply('ack', { disable_notification: true }),
|
|
173
|
+
])
|
|
174
|
+
const statePath = join(stateDir, 'silent-end-pending.json')
|
|
175
|
+
writeFileSync(statePath, JSON.stringify({ retryCount: 1, chatId: '111' }), 'utf8')
|
|
176
|
+
|
|
177
|
+
const r = runHook({
|
|
178
|
+
event: { session_id: 's1', transcript_path: transcript },
|
|
179
|
+
stateDir,
|
|
180
|
+
})
|
|
181
|
+
expect(r.status).toBe(0)
|
|
182
|
+
expect(r.stdout.trim()).toBe('')
|
|
183
|
+
expect(r.stderr).toMatch(/retry exhausted/)
|
|
184
|
+
// State unchanged.
|
|
185
|
+
const state = JSON.parse(readFileSync(statePath, 'utf8'))
|
|
186
|
+
expect(state.retryCount).toBe(1)
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
it('NO_REPLY in transcript → allow stop, no state file written', () => {
|
|
190
|
+
const transcript = writeTranscript(tmp, [
|
|
191
|
+
ENQUEUE,
|
|
192
|
+
reply('NO_REPLY'),
|
|
193
|
+
])
|
|
194
|
+
const r = runHook({
|
|
195
|
+
event: { session_id: 's1', transcript_path: transcript },
|
|
196
|
+
stateDir,
|
|
197
|
+
})
|
|
198
|
+
expect(r.status).toBe(0)
|
|
199
|
+
expect(r.stdout.trim()).toBe('')
|
|
200
|
+
expect(existsSync(join(stateDir, 'silent-end-pending.json'))).toBe(false)
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
it('fail-open when transcript_path missing from event', () => {
|
|
204
|
+
const r = runHook({
|
|
205
|
+
event: { session_id: 's1' },
|
|
206
|
+
stateDir,
|
|
207
|
+
})
|
|
208
|
+
expect(r.status).toBe(0)
|
|
209
|
+
expect(r.stdout.trim()).toBe('')
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
it('fail-open when transcript_path does not exist on disk', () => {
|
|
213
|
+
const r = runHook({
|
|
214
|
+
event: { session_id: 's1', transcript_path: '/does/not/exist.jsonl' },
|
|
215
|
+
stateDir,
|
|
216
|
+
})
|
|
217
|
+
expect(r.status).toBe(0)
|
|
218
|
+
expect(r.stdout.trim()).toBe('')
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
it('fail-open on malformed stdin', () => {
|
|
222
|
+
const r = spawnSync('node', [HOOK_PATH], {
|
|
223
|
+
input: 'this is not JSON',
|
|
224
|
+
encoding: 'utf8',
|
|
225
|
+
timeout: 5000,
|
|
226
|
+
env: { ...process.env, TELEGRAM_STATE_DIR: stateDir },
|
|
227
|
+
})
|
|
228
|
+
expect(r.status).toBe(0)
|
|
229
|
+
expect(r.stdout.trim()).toBe('')
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
it('empty stdin → exit 0 immediately', () => {
|
|
233
|
+
const r = spawnSync('node', [HOOK_PATH], {
|
|
234
|
+
input: '',
|
|
235
|
+
encoding: 'utf8',
|
|
236
|
+
timeout: 5000,
|
|
237
|
+
env: { ...process.env, TELEGRAM_STATE_DIR: stateDir },
|
|
238
|
+
})
|
|
239
|
+
expect(r.status).toBe(0)
|
|
240
|
+
expect(r.stdout.trim()).toBe('')
|
|
241
|
+
})
|
|
242
|
+
})
|