switchroom 0.15.45 → 0.16.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +56 -15
- package/dist/auth-broker/index.js +383 -97
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +7 -4
- package/dist/cli/notion-write-pretool.mjs +35 -4
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/switchroom.js +2894 -841
- package/dist/host-control/main.js +2685 -207
- package/dist/vault/approvals/kernel-server.js +7453 -7413
- package/dist/vault/broker/server.js +11428 -11388
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +97 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +0 -19
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +55 -12
- package/telegram-plugin/dist/gateway/gateway.js +2938 -977
- package/telegram-plugin/dist/server.js +55 -12
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1857 -292
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/model-command.ts +115 -4
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-command.test.ts +134 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -151,6 +151,124 @@ describe('detectModelUnavailable — reset-time extraction', () => {
|
|
|
151
151
|
})
|
|
152
152
|
})
|
|
153
153
|
|
|
154
|
+
// ─── SESSION-cap (time-only) reset parsing — auth-failover-stall Fix 2 ─────────
|
|
155
|
+
//
|
|
156
|
+
// A session cap surfaces as "resets <time>" with NO month/day. Pre-fix this
|
|
157
|
+
// was unparseable → resetAt undefined → the 429 inference path applied the +7d
|
|
158
|
+
// weekly floor, benching the account for a WEEK. The new branch resolves it to
|
|
159
|
+
// the NEXT occurrence of that wall-clock time (hours away), tz-aware.
|
|
160
|
+
describe('detectModelUnavailable — time-only session-cap reset (Fix 2)', () => {
|
|
161
|
+
const HOUR = 3600_000
|
|
162
|
+
const WEEK = 7 * 24 * HOUR
|
|
163
|
+
|
|
164
|
+
// Next occurrence of a wall-clock time in a tz must be ≤24h away — and
|
|
165
|
+
// crucially NOT the +7d weekly floor.
|
|
166
|
+
function expectHoursAway(d: Date | undefined): void {
|
|
167
|
+
expect(d).toBeInstanceOf(Date)
|
|
168
|
+
const deltaMs = (d as Date).getTime() - Date.now()
|
|
169
|
+
expect(deltaMs).toBeGreaterThan(0)
|
|
170
|
+
expect(deltaMs).toBeLessThanOrEqual(24 * HOUR + 60_000)
|
|
171
|
+
// The whole point: never the weekly floor.
|
|
172
|
+
expect(deltaMs).toBeLessThan(WEEK - HOUR)
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// The next wall-clock occurrence of `hour:minute` in `tz` should land on
|
|
176
|
+
// that exact minute (sanity that we resolved the time, not a fudge).
|
|
177
|
+
function expectWallClock(d: Date | undefined, tz: string, hour: number, minute = 0): void {
|
|
178
|
+
expect(d).toBeInstanceOf(Date)
|
|
179
|
+
const parts = Object.fromEntries(
|
|
180
|
+
new Intl.DateTimeFormat('en-US', {
|
|
181
|
+
timeZone: tz, hour: '2-digit', minute: '2-digit', hour12: false,
|
|
182
|
+
})
|
|
183
|
+
.formatToParts(d as Date)
|
|
184
|
+
.filter((p) => p.type !== 'literal')
|
|
185
|
+
.map((p) => [p.type, p.value]),
|
|
186
|
+
)
|
|
187
|
+
expect(Number(parts.hour) % 24).toBe(hour)
|
|
188
|
+
expect(Number(parts.minute)).toBe(minute)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
it('parses "resets 5pm (Australia/Melbourne)" to the next 17:00 there, hours away (NOT +7d)', () => {
|
|
192
|
+
const d = detectModelUnavailable(
|
|
193
|
+
"You've hit your session limit · resets 5pm (Australia/Melbourne)",
|
|
194
|
+
)
|
|
195
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
196
|
+
expectHoursAway(d?.resetAt)
|
|
197
|
+
expectWallClock(d?.resetAt, 'Australia/Melbourne', 17, 0)
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
it('parses the "at"-prefixed form — "resets at 5pm (Australia/Melbourne)" (parity with wedge-watchdog parseWeeklyReset)', () => {
|
|
201
|
+
// wedge-watchdog's parseWeeklyReset time-only regex accepts an optional
|
|
202
|
+
// "(?:at\s+)?" token; this parser must accept the IDENTICAL grammar or the
|
|
203
|
+
// "at"-prefixed string falls through to the +7d weekly floor — the
|
|
204
|
+
// week-long-bench bug this PR exists to kill.
|
|
205
|
+
const d = detectModelUnavailable(
|
|
206
|
+
"You've hit your session limit · resets at 5pm (Australia/Melbourne)",
|
|
207
|
+
)
|
|
208
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
209
|
+
expectHoursAway(d?.resetAt)
|
|
210
|
+
expectWallClock(d?.resetAt, 'Australia/Melbourne', 17, 0)
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
it('parses am times — "resets 8:50am (Australia/Melbourne)"', () => {
|
|
214
|
+
const d = detectModelUnavailable("You've hit your limit · resets 8:50am (Australia/Melbourne)")
|
|
215
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
216
|
+
expectHoursAway(d?.resetAt)
|
|
217
|
+
expectWallClock(d?.resetAt, 'Australia/Melbourne', 8, 50)
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
it('parses a time WITHOUT minutes — "resets 9am (UTC)"', () => {
|
|
221
|
+
const d = detectModelUnavailable('hit your limit · resets 9am (UTC)')
|
|
222
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
223
|
+
expectHoursAway(d?.resetAt)
|
|
224
|
+
expectWallClock(d?.resetAt, 'UTC', 9, 0)
|
|
225
|
+
})
|
|
226
|
+
|
|
227
|
+
it('parses a time WITHOUT a tz label (best-effort UTC) — "resets 11pm"', () => {
|
|
228
|
+
const d = detectModelUnavailable('usage limit hit · resets 11pm')
|
|
229
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
230
|
+
expectHoursAway(d?.resetAt)
|
|
231
|
+
expectWallClock(d?.resetAt, 'UTC', 23, 0)
|
|
232
|
+
})
|
|
233
|
+
|
|
234
|
+
it('parses 24-hour clock times — "resets 17:00 (UTC)"', () => {
|
|
235
|
+
const d = detectModelUnavailable('hit your limit · resets 17:00 (UTC)')
|
|
236
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
237
|
+
expectHoursAway(d?.resetAt)
|
|
238
|
+
expectWallClock(d?.resetAt, 'UTC', 17, 0)
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
it('STILL parses a bare ISO-8601 reset (calendar-path regression guard)', () => {
|
|
242
|
+
const d = detectModelUnavailable('quota exhausted, retry at 2026-05-03T11:00:00Z')
|
|
243
|
+
expect(d?.resetAt?.toISOString()).toBe('2026-05-03T11:00:00.000Z')
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
it('a month/day "resets" string is NOT hijacked into the time-only branch', () => {
|
|
247
|
+
// The negative lookahead must reject "May"/"Jun" so a date-bearing string
|
|
248
|
+
// never resolves to "tomorrow at HH:MM". (The month/day+time calendar form
|
|
249
|
+
// itself does not currently resolve to a Date — that is pre-existing
|
|
250
|
+
// behaviour; the load-bearing guard is that the time-only branch leaves it
|
|
251
|
+
// alone rather than producing a WRONG hours-away time.)
|
|
252
|
+
const may = detectModelUnavailable("You're out of extra usage · resets May 3, 11am")
|
|
253
|
+
expect(may?.kind).toBe('quota_exhausted')
|
|
254
|
+
// If the time-only branch had wrongly fired, resetAt would be ≤24h away.
|
|
255
|
+
if (may?.resetAt) {
|
|
256
|
+
const deltaMs = may.resetAt.getTime() - Date.now()
|
|
257
|
+
// A genuine May-3 resolution is many days away (or in the past); never the
|
|
258
|
+
// bare next-11am-tomorrow the time-only branch would have produced.
|
|
259
|
+
expect(Math.abs(deltaMs)).toBeGreaterThan(2 * 24 * HOUR)
|
|
260
|
+
}
|
|
261
|
+
const jun = detectModelUnavailable(
|
|
262
|
+
"hit your limit · resets Jun 9, 5am (Australia/Melbourne)",
|
|
263
|
+
)
|
|
264
|
+
expect(jun?.kind).toBe('quota_exhausted')
|
|
265
|
+
if (jun?.resetAt) {
|
|
266
|
+
const deltaMs = jun.resetAt.getTime() - Date.now()
|
|
267
|
+
expect(Math.abs(deltaMs)).toBeGreaterThan(2 * 24 * HOUR)
|
|
268
|
+
}
|
|
269
|
+
})
|
|
270
|
+
})
|
|
271
|
+
|
|
154
272
|
// ─── formatModelUnavailableCard ──────────────────────────────────────────────
|
|
155
273
|
|
|
156
274
|
describe('formatModelUnavailableCard — actionable card', () => {
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
normalizeNarrative,
|
|
4
|
+
prefixSimilarity,
|
|
5
|
+
isDraftOfReply,
|
|
6
|
+
DRAFT_SUPPRESS_THRESHOLD,
|
|
7
|
+
REPLY_TOOLS,
|
|
8
|
+
} from '../narrative-dedup.js'
|
|
9
|
+
|
|
10
|
+
describe('narrative-dedup', () => {
|
|
11
|
+
it('pins the threshold so a silent retune breaks the test', () => {
|
|
12
|
+
expect(DRAFT_SUPPRESS_THRESHOLD).toBe(0.8)
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
it('REPLY_TOOLS holds exactly reply + stream_reply', () => {
|
|
16
|
+
expect(REPLY_TOOLS.has('reply')).toBe(true)
|
|
17
|
+
expect(REPLY_TOOLS.has('stream_reply')).toBe(true)
|
|
18
|
+
expect(REPLY_TOOLS.has('Bash')).toBe(false)
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
describe('normalizeNarrative', () => {
|
|
22
|
+
it('strips markdown emphasis/heading/quote marks, collapses whitespace, lowercases', () => {
|
|
23
|
+
expect(normalizeNarrative('**Bold** _italic_ `code`')).toBe('bold italic code')
|
|
24
|
+
expect(normalizeNarrative('> # Heading\n text')).toBe('heading text')
|
|
25
|
+
})
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
describe('prefixSimilarity', () => {
|
|
29
|
+
it('returns 1 for identical strings', () => {
|
|
30
|
+
expect(prefixSimilarity('hello there', 'hello there')).toBe(1)
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
it('returns 0 when either side is empty (no divide-by-zero)', () => {
|
|
34
|
+
expect(prefixSimilarity('', 'something')).toBe(0)
|
|
35
|
+
expect(prefixSimilarity('something', '')).toBe(0)
|
|
36
|
+
expect(prefixSimilarity('', '')).toBe(0)
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
it('ratio is over the SHORTER normalized string', () => {
|
|
40
|
+
// "abc" vs "abcdef": shared prefix 3 of shorter length 3 = 1.0
|
|
41
|
+
expect(prefixSimilarity('abc', 'abcdef')).toBe(1)
|
|
42
|
+
// "abx" vs "abcdef": shared prefix 2 of shorter length 3 ≈ 0.667
|
|
43
|
+
expect(prefixSimilarity('abx', 'abcdef')).toBeCloseTo(2 / 3, 5)
|
|
44
|
+
})
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
describe('isDraftOfReply', () => {
|
|
48
|
+
it('SUPPRESS: identical draft and reply', () => {
|
|
49
|
+
const t = 'The repo is at /home/user/code/switchroom.'
|
|
50
|
+
expect(isDraftOfReply(t, t)).toBe(true)
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
it('SUPPRESS: draft whose trailing sentence was trimmed before sending (~0.85 prefix)', () => {
|
|
54
|
+
const draft = 'The repo is at /home/user/code/switchroom. I will start now.'
|
|
55
|
+
const reply = 'The repo is at /home/user/code/switchroom.'
|
|
56
|
+
// reply is the shorter string and is a full prefix of the draft → 1.0
|
|
57
|
+
expect(prefixSimilarity(draft, reply)).toBe(1)
|
|
58
|
+
expect(isDraftOfReply(draft, reply)).toBe(true)
|
|
59
|
+
// And the symmetric framing (draft slightly longer head, reply trimmed):
|
|
60
|
+
const draft2 = 'Found both repos and confirmed the remote is correct here.'
|
|
61
|
+
const reply2 = 'Found both repos and confirmed the remote is correct.'
|
|
62
|
+
expect(prefixSimilarity(draft2, reply2)).toBeGreaterThanOrEqual(0.85)
|
|
63
|
+
expect(isDraftOfReply(draft2, reply2)).toBe(true)
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
it('SHOW: post-action narration that merely precedes a different reply', () => {
|
|
67
|
+
// "Sent. Waiting on the build…" vs an unrelated reply payload — short
|
|
68
|
+
// string, near-zero shared prefix → below threshold → SHOW.
|
|
69
|
+
const narration = 'Sent. Waiting on the build…'
|
|
70
|
+
const reply = "Here's the result of the build: all green."
|
|
71
|
+
expect(prefixSimilarity(narration, reply)).toBeLessThan(DRAFT_SUPPRESS_THRESHOLD)
|
|
72
|
+
expect(isDraftOfReply(narration, reply)).toBe(false)
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
it('SHOW: empty reply text never suppresses (no divide-by-zero)', () => {
|
|
76
|
+
expect(isDraftOfReply('On it. Let me find the repo…', '')).toBe(false)
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
it('SUPPRESS: draft differs from reply only by markdown decoration', () => {
|
|
80
|
+
const draft = 'Here is the **plan**: do A then B.'
|
|
81
|
+
const reply = 'Here is the plan: do A then B.'
|
|
82
|
+
// After normalization the markdown stars vanish → identical → suppress.
|
|
83
|
+
expect(normalizeNarrative(draft)).toBe(normalizeNarrative(reply))
|
|
84
|
+
expect(isDraftOfReply(draft, reply)).toBe(true)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
it('NIT 2: the doubled-capturedText proxy mis-suppresses; the actual reply text does not', () => {
|
|
88
|
+
// The bug: flushPendingNarrativeAtTurnEnd used to compare a trailing
|
|
89
|
+
// narration against capturedText.join(''). When the model emits the same
|
|
90
|
+
// short string twice in a turn — e.g. "Done." as working narration and
|
|
91
|
+
// then "Done." as the reply — that proxy becomes the CONCATENATION
|
|
92
|
+
// "Done.Done.", whose prefix the trailing narration still matches above
|
|
93
|
+
// threshold → genuine trailing narration WRONGLY suppressed.
|
|
94
|
+
const trailing = 'Done.'
|
|
95
|
+
const doubledProxy = 'Done.' + 'Done.' // capturedText.join('') of two "Done." blocks
|
|
96
|
+
const actualReply = 'Done.'
|
|
97
|
+
|
|
98
|
+
// Old (broken) comparison: trailing vs the doubled proxy → wrongly suppresses.
|
|
99
|
+
expect(isDraftOfReply(trailing, doubledProxy)).toBe(true)
|
|
100
|
+
|
|
101
|
+
// New comparison: trailing vs the ACTUAL reply text. Here the reply text
|
|
102
|
+
// really IS "Done.", so a trailing "Done." is a genuine duplicate and is
|
|
103
|
+
// correctly suppressed — the fix preserves the common-case suppression.
|
|
104
|
+
expect(isDraftOfReply(trailing, actualReply)).toBe(true)
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('NIT 2: genuine trailing narration is preserved when the reply text differs', () => {
|
|
108
|
+
// The case the proxy hurt most: the turn's reply is a SHORT distinct
|
|
109
|
+
// string and the trailing narration is genuine liveness. Comparing
|
|
110
|
+
// against the actual reply text (not a concatenation that happens to
|
|
111
|
+
// share a prefix) keeps the trailing narration SHOWN.
|
|
112
|
+
const trailingNarration = 'Done — all green, pushing now.'
|
|
113
|
+
const actualReply = 'Here is the summary you asked for: 3 files changed.'
|
|
114
|
+
// Below threshold against the real reply → SHOW (not suppressed).
|
|
115
|
+
expect(isDraftOfReply(trailingNarration, actualReply)).toBe(false)
|
|
116
|
+
})
|
|
117
|
+
})
|
|
118
|
+
})
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the activity-feed-teardown fix (orphaned-reply backstop).
|
|
3
|
+
*
|
|
4
|
+
* Root cause: the orphaned-reply backstop fired a synthetic turn_end
|
|
5
|
+
* (`durationMs: -1`) after 30 s of silence, even mid-tool-call. That nulled
|
|
6
|
+
* `currentTurn` and dropped every subsequent `tool_label`, darkening the live
|
|
7
|
+
* activity feed for the rest of the turn.
|
|
8
|
+
*
|
|
9
|
+
* Fix: three layers described in the PR.
|
|
10
|
+
* PRIMARY — fuse fires mid-tool → re-arm instead (bounded by ORPHANED_REPLY_MAX_REARMS).
|
|
11
|
+
* SECONDARY — tool_label re-arms the fuse so active label streams keep it fresh.
|
|
12
|
+
* DEFENSIVE — turn_end entry rejects the synthetic event if tools are in flight.
|
|
13
|
+
*
|
|
14
|
+
* These tests cover the pure / unit-testable surfaces:
|
|
15
|
+
* - shouldArmOrphanedReplyTimeout (existing, now with midToolCall param)
|
|
16
|
+
* - ORPHANED_REPLY_MAX_REARMS constant math
|
|
17
|
+
* - The re-arm guard logic (pure decision extracted from the closure)
|
|
18
|
+
* - The defensive turn_end discriminator (durationMs === -1 + in-flight check)
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect } from 'vitest'
|
|
22
|
+
import {
|
|
23
|
+
shouldArmOrphanedReplyTimeout,
|
|
24
|
+
ORPHANED_REPLY_TIMEOUT_MS,
|
|
25
|
+
ORPHANED_REPLY_MAX_REARMS,
|
|
26
|
+
} from '../context-exhaustion.js'
|
|
27
|
+
import { ToolFlightTracker } from '../gateway/interrupt-defer.js'
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Helpers — pure decision functions mirroring the gateway closure logic.
|
|
31
|
+
// These extract the discriminable parts of the fix so they are unit-testable
|
|
32
|
+
// without instantiating the full gateway.
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Mirrors the PRIMARY fix decision inside the setTimeout callback:
|
|
37
|
+
* should the backstop re-arm (true) or fire turn_end (false)?
|
|
38
|
+
*/
|
|
39
|
+
function shouldRearmInsteadOfFire(opts: {
|
|
40
|
+
midToolCall: boolean
|
|
41
|
+
rearmCount: number
|
|
42
|
+
maxRearms: number
|
|
43
|
+
}): boolean {
|
|
44
|
+
return opts.midToolCall && opts.rearmCount < opts.maxRearms
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Mirrors the DEFENSIVE fix at turn_end entry:
|
|
49
|
+
* should a synthetic turn_end (durationMs === -1) be suppressed?
|
|
50
|
+
*/
|
|
51
|
+
function shouldSuppressSyntheticTurnEnd(opts: {
|
|
52
|
+
durationMs: number
|
|
53
|
+
midToolCall: boolean
|
|
54
|
+
}): boolean {
|
|
55
|
+
return opts.durationMs === -1 && opts.midToolCall
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// Tests: ORPHANED_REPLY_MAX_REARMS constant
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
describe('ORPHANED_REPLY_MAX_REARMS', () => {
|
|
63
|
+
it('is 20 (20 × 30 s = 10 min cap)', () => {
|
|
64
|
+
expect(ORPHANED_REPLY_MAX_REARMS).toBe(20)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it('combined with ORPHANED_REPLY_TIMEOUT_MS covers at least 10 min of tool activity', () => {
|
|
68
|
+
const coverageMs = ORPHANED_REPLY_MAX_REARMS * ORPHANED_REPLY_TIMEOUT_MS
|
|
69
|
+
// 20 × 30 000 ms = 600 000 ms = 10 min
|
|
70
|
+
expect(coverageMs).toBeGreaterThanOrEqual(10 * 60 * 1000)
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('fuse duration is still 30 s', () => {
|
|
74
|
+
expect(ORPHANED_REPLY_TIMEOUT_MS).toBe(30_000)
|
|
75
|
+
})
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// Tests: PRIMARY fix — re-arm guard
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
describe('PRIMARY fix: re-arm guard (shouldRearmInsteadOfFire)', () => {
|
|
83
|
+
it('re-arms when a tool is in flight and rearm count is under the cap', () => {
|
|
84
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 0, maxRearms: 20 })).toBe(true)
|
|
85
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 19, maxRearms: 20 })).toBe(true)
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
it('fires once rearm count reaches the cap, even mid-tool-call', () => {
|
|
89
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 20, maxRearms: 20 })).toBe(false)
|
|
90
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 21, maxRearms: 20 })).toBe(false)
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
it('fires immediately when no tool is in flight, regardless of rearm count', () => {
|
|
94
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: false, rearmCount: 0, maxRearms: 20 })).toBe(false)
|
|
95
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: false, rearmCount: 5, maxRearms: 20 })).toBe(false)
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('rearm count transitions: 0 → cap-1 → cap fires', () => {
|
|
99
|
+
const max = ORPHANED_REPLY_MAX_REARMS
|
|
100
|
+
for (let i = 0; i < max; i++) {
|
|
101
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: i, maxRearms: max })).toBe(true)
|
|
102
|
+
}
|
|
103
|
+
// At exactly the cap: fire
|
|
104
|
+
expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: max, maxRearms: max })).toBe(false)
|
|
105
|
+
})
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
// Tests: DEFENSIVE fix — synthetic turn_end suppressor
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
describe('DEFENSIVE fix: synthetic turn_end suppressor', () => {
|
|
113
|
+
it('suppresses a synthetic turn_end (durationMs === -1) when tools are in flight', () => {
|
|
114
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: -1, midToolCall: true })).toBe(true)
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
it('does NOT suppress a synthetic turn_end when no tools are in flight', () => {
|
|
118
|
+
// No tools → the backstop should fire normally (turn is genuinely orphaned)
|
|
119
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: -1, midToolCall: false })).toBe(false)
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
it('does NOT suppress an authoritative turn_end (durationMs >= 0)', () => {
|
|
123
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, midToolCall: true })).toBe(false)
|
|
124
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 1, midToolCall: true })).toBe(false)
|
|
125
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 12345, midToolCall: true })).toBe(false)
|
|
126
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, midToolCall: false })).toBe(false)
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
it('only durationMs === -1 is the synthetic discriminator', () => {
|
|
130
|
+
// Values near -1 must not accidentally trigger suppression
|
|
131
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: -2, midToolCall: true })).toBe(false)
|
|
132
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: -0.5, midToolCall: true })).toBe(false)
|
|
133
|
+
})
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
// ---------------------------------------------------------------------------
|
|
137
|
+
// Tests: ToolFlightTracker integration with the guard logic
|
|
138
|
+
// ---------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
describe('ToolFlightTracker + guard integration', () => {
|
|
141
|
+
it('re-arm fires when a Bash tool is in flight', () => {
|
|
142
|
+
const tracker = new ToolFlightTracker()
|
|
143
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_1' })
|
|
144
|
+
|
|
145
|
+
expect(shouldRearmInsteadOfFire({
|
|
146
|
+
midToolCall: tracker.isMidToolCall(),
|
|
147
|
+
rearmCount: 0,
|
|
148
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
149
|
+
})).toBe(true)
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
it('fires normally after tool_result completes the tool', () => {
|
|
153
|
+
const tracker = new ToolFlightTracker()
|
|
154
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_1' })
|
|
155
|
+
tracker.onEvent({ kind: 'tool_result', toolUseId: 'bash_1' })
|
|
156
|
+
|
|
157
|
+
expect(shouldRearmInsteadOfFire({
|
|
158
|
+
midToolCall: tracker.isMidToolCall(),
|
|
159
|
+
rearmCount: 0,
|
|
160
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
161
|
+
})).toBe(false)
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
it('defensive guard suppresses synthetic turn_end mid-Bash', () => {
|
|
165
|
+
const tracker = new ToolFlightTracker()
|
|
166
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_2' })
|
|
167
|
+
|
|
168
|
+
expect(shouldSuppressSyntheticTurnEnd({
|
|
169
|
+
durationMs: -1,
|
|
170
|
+
midToolCall: tracker.isMidToolCall(),
|
|
171
|
+
})).toBe(true)
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
it('defensive guard allows synthetic turn_end after all tools complete', () => {
|
|
175
|
+
const tracker = new ToolFlightTracker()
|
|
176
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_2' })
|
|
177
|
+
tracker.onEvent({ kind: 'tool_result', toolUseId: 'bash_2' })
|
|
178
|
+
|
|
179
|
+
expect(shouldSuppressSyntheticTurnEnd({
|
|
180
|
+
durationMs: -1,
|
|
181
|
+
midToolCall: tracker.isMidToolCall(),
|
|
182
|
+
})).toBe(false)
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
it('parallel tools: re-arm persists while ANY tool is in flight', () => {
|
|
186
|
+
const tracker = new ToolFlightTracker()
|
|
187
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'read_1' })
|
|
188
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'read_2' })
|
|
189
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'edit_1' })
|
|
190
|
+
|
|
191
|
+
// Still re-arming: 3 tools open
|
|
192
|
+
expect(shouldRearmInsteadOfFire({
|
|
193
|
+
midToolCall: tracker.isMidToolCall(),
|
|
194
|
+
rearmCount: 0,
|
|
195
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
196
|
+
})).toBe(true)
|
|
197
|
+
|
|
198
|
+
// Two complete
|
|
199
|
+
tracker.onEvent({ kind: 'tool_result', toolUseId: 'read_1' })
|
|
200
|
+
tracker.onEvent({ kind: 'tool_result', toolUseId: 'read_2' })
|
|
201
|
+
|
|
202
|
+
// Still re-arming: edit_1 open
|
|
203
|
+
expect(shouldRearmInsteadOfFire({
|
|
204
|
+
midToolCall: tracker.isMidToolCall(),
|
|
205
|
+
rearmCount: 1,
|
|
206
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
207
|
+
})).toBe(true)
|
|
208
|
+
|
|
209
|
+
// All complete
|
|
210
|
+
tracker.onEvent({ kind: 'tool_result', toolUseId: 'edit_1' })
|
|
211
|
+
|
|
212
|
+
expect(shouldRearmInsteadOfFire({
|
|
213
|
+
midToolCall: tracker.isMidToolCall(),
|
|
214
|
+
rearmCount: 2,
|
|
215
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
216
|
+
})).toBe(false)
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
it('cap fires even mid-tool after 20 re-arms (wedged tool surfaces)', () => {
|
|
220
|
+
const tracker = new ToolFlightTracker()
|
|
221
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'hung_bash' })
|
|
222
|
+
|
|
223
|
+
// First 20 re-arms proceed
|
|
224
|
+
for (let i = 0; i < ORPHANED_REPLY_MAX_REARMS; i++) {
|
|
225
|
+
expect(shouldRearmInsteadOfFire({
|
|
226
|
+
midToolCall: tracker.isMidToolCall(),
|
|
227
|
+
rearmCount: i,
|
|
228
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
229
|
+
})).toBe(true)
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// 21st: cap exceeded — fire despite in-flight
|
|
233
|
+
expect(shouldRearmInsteadOfFire({
|
|
234
|
+
midToolCall: tracker.isMidToolCall(),
|
|
235
|
+
rearmCount: ORPHANED_REPLY_MAX_REARMS,
|
|
236
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
237
|
+
})).toBe(false)
|
|
238
|
+
})
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
// ---------------------------------------------------------------------------
|
|
242
|
+
// Tests: shouldArmOrphanedReplyTimeout (existing surface, unchanged)
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
describe('shouldArmOrphanedReplyTimeout (existing — unchanged by this fix)', () => {
|
|
246
|
+
it('arms when conditions are met', () => {
|
|
247
|
+
expect(
|
|
248
|
+
shouldArmOrphanedReplyTimeout({
|
|
249
|
+
currentSessionChatId: '123',
|
|
250
|
+
capturedTextCount: 1,
|
|
251
|
+
replyCalled: false,
|
|
252
|
+
}),
|
|
253
|
+
).toBe(true)
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
it('does not arm after reply has been called', () => {
|
|
257
|
+
expect(
|
|
258
|
+
shouldArmOrphanedReplyTimeout({
|
|
259
|
+
currentSessionChatId: '123',
|
|
260
|
+
capturedTextCount: 5,
|
|
261
|
+
replyCalled: true,
|
|
262
|
+
}),
|
|
263
|
+
).toBe(false)
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
it('does not arm when no chat is active', () => {
|
|
267
|
+
expect(
|
|
268
|
+
shouldArmOrphanedReplyTimeout({
|
|
269
|
+
currentSessionChatId: null,
|
|
270
|
+
capturedTextCount: 1,
|
|
271
|
+
replyCalled: false,
|
|
272
|
+
}),
|
|
273
|
+
).toBe(false)
|
|
274
|
+
})
|
|
275
|
+
|
|
276
|
+
it('does not arm when no text captured yet', () => {
|
|
277
|
+
expect(
|
|
278
|
+
shouldArmOrphanedReplyTimeout({
|
|
279
|
+
currentSessionChatId: '123',
|
|
280
|
+
capturedTextCount: 0,
|
|
281
|
+
replyCalled: false,
|
|
282
|
+
}),
|
|
283
|
+
).toBe(false)
|
|
284
|
+
})
|
|
285
|
+
})
|