switchroom 0.15.45 → 0.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +56 -15
  2. package/dist/auth-broker/index.js +383 -97
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +7 -4
  5. package/dist/cli/notion-write-pretool.mjs +35 -4
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/switchroom.js +2894 -841
  9. package/dist/host-control/main.js +2685 -207
  10. package/dist/vault/approvals/kernel-server.js +7453 -7413
  11. package/dist/vault/broker/server.js +11428 -11388
  12. package/examples/minimal.yaml +1 -0
  13. package/examples/switchroom.yaml +1 -0
  14. package/package.json +3 -3
  15. package/profiles/_base/start.sh.hbs +97 -1
  16. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  17. package/profiles/default/CLAUDE.md.hbs +0 -19
  18. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  19. package/telegram-plugin/answer-stream-flag.ts +12 -49
  20. package/telegram-plugin/answer-stream.ts +5 -150
  21. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  22. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  23. package/telegram-plugin/context-exhaustion.ts +12 -0
  24. package/telegram-plugin/demo-mask.ts +154 -0
  25. package/telegram-plugin/dist/bridge/bridge.js +55 -12
  26. package/telegram-plugin/dist/gateway/gateway.js +2938 -977
  27. package/telegram-plugin/dist/server.js +55 -12
  28. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  29. package/telegram-plugin/draft-stream.ts +47 -410
  30. package/telegram-plugin/final-answer-detect.ts +17 -12
  31. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  32. package/telegram-plugin/format.ts +56 -19
  33. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  34. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  35. package/telegram-plugin/gateway/auth-command.ts +70 -14
  36. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  37. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  38. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  39. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  40. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  41. package/telegram-plugin/gateway/effort-command.ts +8 -3
  42. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  43. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  44. package/telegram-plugin/gateway/gateway.ts +1857 -292
  45. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  46. package/telegram-plugin/gateway/model-command.ts +115 -4
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-command.test.ts +134 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -151,6 +151,124 @@ describe('detectModelUnavailable — reset-time extraction', () => {
151
151
  })
152
152
  })
153
153
 
154
+ // ─── SESSION-cap (time-only) reset parsing — auth-failover-stall Fix 2 ─────────
155
+ //
156
+ // A session cap surfaces as "resets <time>" with NO month/day. Pre-fix this
157
+ // was unparseable → resetAt undefined → the 429 inference path applied the +7d
158
+ // weekly floor, benching the account for a WEEK. The new branch resolves it to
159
+ // the NEXT occurrence of that wall-clock time (hours away), tz-aware.
160
+ describe('detectModelUnavailable — time-only session-cap reset (Fix 2)', () => {
161
+ const HOUR = 3600_000
162
+ const WEEK = 7 * 24 * HOUR
163
+
164
+ // Next occurrence of a wall-clock time in a tz must be ≤24h away — and
165
+ // crucially NOT the +7d weekly floor.
166
+ function expectHoursAway(d: Date | undefined): void {
167
+ expect(d).toBeInstanceOf(Date)
168
+ const deltaMs = (d as Date).getTime() - Date.now()
169
+ expect(deltaMs).toBeGreaterThan(0)
170
+ expect(deltaMs).toBeLessThanOrEqual(24 * HOUR + 60_000)
171
+ // The whole point: never the weekly floor.
172
+ expect(deltaMs).toBeLessThan(WEEK - HOUR)
173
+ }
174
+
175
+ // The next wall-clock occurrence of `hour:minute` in `tz` should land on
176
+ // that exact minute (sanity that we resolved the time, not a fudge).
177
+ function expectWallClock(d: Date | undefined, tz: string, hour: number, minute = 0): void {
178
+ expect(d).toBeInstanceOf(Date)
179
+ const parts = Object.fromEntries(
180
+ new Intl.DateTimeFormat('en-US', {
181
+ timeZone: tz, hour: '2-digit', minute: '2-digit', hour12: false,
182
+ })
183
+ .formatToParts(d as Date)
184
+ .filter((p) => p.type !== 'literal')
185
+ .map((p) => [p.type, p.value]),
186
+ )
187
+ expect(Number(parts.hour) % 24).toBe(hour)
188
+ expect(Number(parts.minute)).toBe(minute)
189
+ }
190
+
191
+ it('parses "resets 5pm (Australia/Melbourne)" to the next 17:00 there, hours away (NOT +7d)', () => {
192
+ const d = detectModelUnavailable(
193
+ "You've hit your session limit · resets 5pm (Australia/Melbourne)",
194
+ )
195
+ expect(d?.kind).toBe('quota_exhausted')
196
+ expectHoursAway(d?.resetAt)
197
+ expectWallClock(d?.resetAt, 'Australia/Melbourne', 17, 0)
198
+ })
199
+
200
+ it('parses the "at"-prefixed form — "resets at 5pm (Australia/Melbourne)" (parity with wedge-watchdog parseWeeklyReset)', () => {
201
+ // wedge-watchdog's parseWeeklyReset time-only regex accepts an optional
202
+ // "(?:at\s+)?" token; this parser must accept the IDENTICAL grammar or the
203
+ // "at"-prefixed string falls through to the +7d weekly floor — the
204
+ // week-long-bench bug this PR exists to kill.
205
+ const d = detectModelUnavailable(
206
+ "You've hit your session limit · resets at 5pm (Australia/Melbourne)",
207
+ )
208
+ expect(d?.kind).toBe('quota_exhausted')
209
+ expectHoursAway(d?.resetAt)
210
+ expectWallClock(d?.resetAt, 'Australia/Melbourne', 17, 0)
211
+ })
212
+
213
+ it('parses am times — "resets 8:50am (Australia/Melbourne)"', () => {
214
+ const d = detectModelUnavailable("You've hit your limit · resets 8:50am (Australia/Melbourne)")
215
+ expect(d?.kind).toBe('quota_exhausted')
216
+ expectHoursAway(d?.resetAt)
217
+ expectWallClock(d?.resetAt, 'Australia/Melbourne', 8, 50)
218
+ })
219
+
220
+ it('parses a time WITHOUT minutes — "resets 9am (UTC)"', () => {
221
+ const d = detectModelUnavailable('hit your limit · resets 9am (UTC)')
222
+ expect(d?.kind).toBe('quota_exhausted')
223
+ expectHoursAway(d?.resetAt)
224
+ expectWallClock(d?.resetAt, 'UTC', 9, 0)
225
+ })
226
+
227
+ it('parses a time WITHOUT a tz label (best-effort UTC) — "resets 11pm"', () => {
228
+ const d = detectModelUnavailable('usage limit hit · resets 11pm')
229
+ expect(d?.kind).toBe('quota_exhausted')
230
+ expectHoursAway(d?.resetAt)
231
+ expectWallClock(d?.resetAt, 'UTC', 23, 0)
232
+ })
233
+
234
+ it('parses 24-hour clock times — "resets 17:00 (UTC)"', () => {
235
+ const d = detectModelUnavailable('hit your limit · resets 17:00 (UTC)')
236
+ expect(d?.kind).toBe('quota_exhausted')
237
+ expectHoursAway(d?.resetAt)
238
+ expectWallClock(d?.resetAt, 'UTC', 17, 0)
239
+ })
240
+
241
+ it('STILL parses a bare ISO-8601 reset (calendar-path regression guard)', () => {
242
+ const d = detectModelUnavailable('quota exhausted, retry at 2026-05-03T11:00:00Z')
243
+ expect(d?.resetAt?.toISOString()).toBe('2026-05-03T11:00:00.000Z')
244
+ })
245
+
246
+ it('a month/day "resets" string is NOT hijacked into the time-only branch', () => {
247
+ // The negative lookahead must reject "May"/"Jun" so a date-bearing string
248
+ // never resolves to "tomorrow at HH:MM". (The month/day+time calendar form
249
+ // itself does not currently resolve to a Date — that is pre-existing
250
+ // behaviour; the load-bearing guard is that the time-only branch leaves it
251
+ // alone rather than producing a WRONG hours-away time.)
252
+ const may = detectModelUnavailable("You're out of extra usage · resets May 3, 11am")
253
+ expect(may?.kind).toBe('quota_exhausted')
254
+ // If the time-only branch had wrongly fired, resetAt would be ≤24h away.
255
+ if (may?.resetAt) {
256
+ const deltaMs = may.resetAt.getTime() - Date.now()
257
+ // A genuine May-3 resolution is many days away (or in the past); never the
258
+ // bare next-11am-tomorrow the time-only branch would have produced.
259
+ expect(Math.abs(deltaMs)).toBeGreaterThan(2 * 24 * HOUR)
260
+ }
261
+ const jun = detectModelUnavailable(
262
+ "hit your limit · resets Jun 9, 5am (Australia/Melbourne)",
263
+ )
264
+ expect(jun?.kind).toBe('quota_exhausted')
265
+ if (jun?.resetAt) {
266
+ const deltaMs = jun.resetAt.getTime() - Date.now()
267
+ expect(Math.abs(deltaMs)).toBeGreaterThan(2 * 24 * HOUR)
268
+ }
269
+ })
270
+ })
271
+
154
272
  // ─── formatModelUnavailableCard ──────────────────────────────────────────────
155
273
 
156
274
  describe('formatModelUnavailableCard — actionable card', () => {
@@ -0,0 +1,118 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import {
3
+ normalizeNarrative,
4
+ prefixSimilarity,
5
+ isDraftOfReply,
6
+ DRAFT_SUPPRESS_THRESHOLD,
7
+ REPLY_TOOLS,
8
+ } from '../narrative-dedup.js'
9
+
10
+ describe('narrative-dedup', () => {
11
+ it('pins the threshold so a silent retune breaks the test', () => {
12
+ expect(DRAFT_SUPPRESS_THRESHOLD).toBe(0.8)
13
+ })
14
+
15
+ it('REPLY_TOOLS holds exactly reply + stream_reply', () => {
16
+ expect(REPLY_TOOLS.has('reply')).toBe(true)
17
+ expect(REPLY_TOOLS.has('stream_reply')).toBe(true)
18
+ expect(REPLY_TOOLS.has('Bash')).toBe(false)
19
+ })
20
+
21
+ describe('normalizeNarrative', () => {
22
+ it('strips markdown emphasis/heading/quote marks, collapses whitespace, lowercases', () => {
23
+ expect(normalizeNarrative('**Bold** _italic_ `code`')).toBe('bold italic code')
24
+ expect(normalizeNarrative('> # Heading\n text')).toBe('heading text')
25
+ })
26
+ })
27
+
28
+ describe('prefixSimilarity', () => {
29
+ it('returns 1 for identical strings', () => {
30
+ expect(prefixSimilarity('hello there', 'hello there')).toBe(1)
31
+ })
32
+
33
+ it('returns 0 when either side is empty (no divide-by-zero)', () => {
34
+ expect(prefixSimilarity('', 'something')).toBe(0)
35
+ expect(prefixSimilarity('something', '')).toBe(0)
36
+ expect(prefixSimilarity('', '')).toBe(0)
37
+ })
38
+
39
+ it('ratio is over the SHORTER normalized string', () => {
40
+ // "abc" vs "abcdef": shared prefix 3 of shorter length 3 = 1.0
41
+ expect(prefixSimilarity('abc', 'abcdef')).toBe(1)
42
+ // "abx" vs "abcdef": shared prefix 2 of shorter length 3 ≈ 0.667
43
+ expect(prefixSimilarity('abx', 'abcdef')).toBeCloseTo(2 / 3, 5)
44
+ })
45
+ })
46
+
47
+ describe('isDraftOfReply', () => {
48
+ it('SUPPRESS: identical draft and reply', () => {
49
+ const t = 'The repo is at /home/user/code/switchroom.'
50
+ expect(isDraftOfReply(t, t)).toBe(true)
51
+ })
52
+
53
+ it('SUPPRESS: draft whose trailing sentence was trimmed before sending (~0.85 prefix)', () => {
54
+ const draft = 'The repo is at /home/user/code/switchroom. I will start now.'
55
+ const reply = 'The repo is at /home/user/code/switchroom.'
56
+ // reply is the shorter string and is a full prefix of the draft → 1.0
57
+ expect(prefixSimilarity(draft, reply)).toBe(1)
58
+ expect(isDraftOfReply(draft, reply)).toBe(true)
59
+ // And the symmetric framing (draft slightly longer head, reply trimmed):
60
+ const draft2 = 'Found both repos and confirmed the remote is correct here.'
61
+ const reply2 = 'Found both repos and confirmed the remote is correct.'
62
+ expect(prefixSimilarity(draft2, reply2)).toBeGreaterThanOrEqual(0.85)
63
+ expect(isDraftOfReply(draft2, reply2)).toBe(true)
64
+ })
65
+
66
+ it('SHOW: post-action narration that merely precedes a different reply', () => {
67
+ // "Sent. Waiting on the build…" vs an unrelated reply payload — short
68
+ // string, near-zero shared prefix → below threshold → SHOW.
69
+ const narration = 'Sent. Waiting on the build…'
70
+ const reply = "Here's the result of the build: all green."
71
+ expect(prefixSimilarity(narration, reply)).toBeLessThan(DRAFT_SUPPRESS_THRESHOLD)
72
+ expect(isDraftOfReply(narration, reply)).toBe(false)
73
+ })
74
+
75
+ it('SHOW: empty reply text never suppresses (no divide-by-zero)', () => {
76
+ expect(isDraftOfReply('On it. Let me find the repo…', '')).toBe(false)
77
+ })
78
+
79
+ it('SUPPRESS: draft differs from reply only by markdown decoration', () => {
80
+ const draft = 'Here is the **plan**: do A then B.'
81
+ const reply = 'Here is the plan: do A then B.'
82
+ // After normalization the markdown stars vanish → identical → suppress.
83
+ expect(normalizeNarrative(draft)).toBe(normalizeNarrative(reply))
84
+ expect(isDraftOfReply(draft, reply)).toBe(true)
85
+ })
86
+
87
+ it('NIT 2: the doubled-capturedText proxy mis-suppresses; the actual reply text does not', () => {
88
+ // The bug: flushPendingNarrativeAtTurnEnd used to compare a trailing
89
+ // narration against capturedText.join(''). When the model emits the same
90
+ // short string twice in a turn — e.g. "Done." as working narration and
91
+ // then "Done." as the reply — that proxy becomes the CONCATENATION
92
+ // "Done.Done.", whose prefix the trailing narration still matches above
93
+ // threshold → genuine trailing narration WRONGLY suppressed.
94
+ const trailing = 'Done.'
95
+ const doubledProxy = 'Done.' + 'Done.' // capturedText.join('') of two "Done." blocks
96
+ const actualReply = 'Done.'
97
+
98
+ // Old (broken) comparison: trailing vs the doubled proxy → wrongly suppresses.
99
+ expect(isDraftOfReply(trailing, doubledProxy)).toBe(true)
100
+
101
+ // New comparison: trailing vs the ACTUAL reply text. Here the reply text
102
+ // really IS "Done.", so a trailing "Done." is a genuine duplicate and is
103
+ // correctly suppressed — the fix preserves the common-case suppression.
104
+ expect(isDraftOfReply(trailing, actualReply)).toBe(true)
105
+ })
106
+
107
+ it('NIT 2: genuine trailing narration is preserved when the reply text differs', () => {
108
+ // The case the proxy hurt most: the turn's reply is a SHORT distinct
109
+ // string and the trailing narration is genuine liveness. Comparing
110
+ // against the actual reply text (not a concatenation that happens to
111
+ // share a prefix) keeps the trailing narration SHOWN.
112
+ const trailingNarration = 'Done — all green, pushing now.'
113
+ const actualReply = 'Here is the summary you asked for: 3 files changed.'
114
+ // Below threshold against the real reply → SHOW (not suppressed).
115
+ expect(isDraftOfReply(trailingNarration, actualReply)).toBe(false)
116
+ })
117
+ })
118
+ })
@@ -0,0 +1,285 @@
1
+ /**
2
+ * Unit tests for the activity-feed-teardown fix (orphaned-reply backstop).
3
+ *
4
+ * Root cause: the orphaned-reply backstop fired a synthetic turn_end
5
+ * (`durationMs: -1`) after 30 s of silence, even mid-tool-call. That nulled
6
+ * `currentTurn` and dropped every subsequent `tool_label`, darkening the live
7
+ * activity feed for the rest of the turn.
8
+ *
9
+ * Fix: three layers described in the PR.
10
+ * PRIMARY — fuse fires mid-tool → re-arm instead (bounded by ORPHANED_REPLY_MAX_REARMS).
11
+ * SECONDARY — tool_label re-arms the fuse so active label streams keep it fresh.
12
+ * DEFENSIVE — turn_end entry rejects the synthetic event if tools are in flight.
13
+ *
14
+ * These tests cover the pure / unit-testable surfaces:
15
+ * - shouldArmOrphanedReplyTimeout (existing, now with midToolCall param)
16
+ * - ORPHANED_REPLY_MAX_REARMS constant math
17
+ * - The re-arm guard logic (pure decision extracted from the closure)
18
+ * - The defensive turn_end discriminator (durationMs === -1 + in-flight check)
19
+ */
20
+
21
+ import { describe, it, expect } from 'vitest'
22
+ import {
23
+ shouldArmOrphanedReplyTimeout,
24
+ ORPHANED_REPLY_TIMEOUT_MS,
25
+ ORPHANED_REPLY_MAX_REARMS,
26
+ } from '../context-exhaustion.js'
27
+ import { ToolFlightTracker } from '../gateway/interrupt-defer.js'
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Helpers — pure decision functions mirroring the gateway closure logic.
31
+ // These extract the discriminable parts of the fix so they are unit-testable
32
+ // without instantiating the full gateway.
33
+ // ---------------------------------------------------------------------------
34
+
35
+ /**
36
+ * Mirrors the PRIMARY fix decision inside the setTimeout callback:
37
+ * should the backstop re-arm (true) or fire turn_end (false)?
38
+ */
39
+ function shouldRearmInsteadOfFire(opts: {
40
+ midToolCall: boolean
41
+ rearmCount: number
42
+ maxRearms: number
43
+ }): boolean {
44
+ return opts.midToolCall && opts.rearmCount < opts.maxRearms
45
+ }
46
+
47
+ /**
48
+ * Mirrors the DEFENSIVE fix at turn_end entry:
49
+ * should a synthetic turn_end (durationMs === -1) be suppressed?
50
+ */
51
+ function shouldSuppressSyntheticTurnEnd(opts: {
52
+ durationMs: number
53
+ midToolCall: boolean
54
+ }): boolean {
55
+ return opts.durationMs === -1 && opts.midToolCall
56
+ }
57
+
58
+ // ---------------------------------------------------------------------------
59
+ // Tests: ORPHANED_REPLY_MAX_REARMS constant
60
+ // ---------------------------------------------------------------------------
61
+
62
+ describe('ORPHANED_REPLY_MAX_REARMS', () => {
63
+ it('is 20 (20 × 30 s = 10 min cap)', () => {
64
+ expect(ORPHANED_REPLY_MAX_REARMS).toBe(20)
65
+ })
66
+
67
+ it('combined with ORPHANED_REPLY_TIMEOUT_MS covers at least 10 min of tool activity', () => {
68
+ const coverageMs = ORPHANED_REPLY_MAX_REARMS * ORPHANED_REPLY_TIMEOUT_MS
69
+ // 20 × 30 000 ms = 600 000 ms = 10 min
70
+ expect(coverageMs).toBeGreaterThanOrEqual(10 * 60 * 1000)
71
+ })
72
+
73
+ it('fuse duration is still 30 s', () => {
74
+ expect(ORPHANED_REPLY_TIMEOUT_MS).toBe(30_000)
75
+ })
76
+ })
77
+
78
+ // ---------------------------------------------------------------------------
79
+ // Tests: PRIMARY fix — re-arm guard
80
+ // ---------------------------------------------------------------------------
81
+
82
+ describe('PRIMARY fix: re-arm guard (shouldRearmInsteadOfFire)', () => {
83
+ it('re-arms when a tool is in flight and rearm count is under the cap', () => {
84
+ expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 0, maxRearms: 20 })).toBe(true)
85
+ expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 19, maxRearms: 20 })).toBe(true)
86
+ })
87
+
88
+ it('fires once rearm count reaches the cap, even mid-tool-call', () => {
89
+ expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 20, maxRearms: 20 })).toBe(false)
90
+ expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: 21, maxRearms: 20 })).toBe(false)
91
+ })
92
+
93
+ it('fires immediately when no tool is in flight, regardless of rearm count', () => {
94
+ expect(shouldRearmInsteadOfFire({ midToolCall: false, rearmCount: 0, maxRearms: 20 })).toBe(false)
95
+ expect(shouldRearmInsteadOfFire({ midToolCall: false, rearmCount: 5, maxRearms: 20 })).toBe(false)
96
+ })
97
+
98
+ it('rearm count transitions: 0 → cap-1 → cap fires', () => {
99
+ const max = ORPHANED_REPLY_MAX_REARMS
100
+ for (let i = 0; i < max; i++) {
101
+ expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: i, maxRearms: max })).toBe(true)
102
+ }
103
+ // At exactly the cap: fire
104
+ expect(shouldRearmInsteadOfFire({ midToolCall: true, rearmCount: max, maxRearms: max })).toBe(false)
105
+ })
106
+ })
107
+
108
+ // ---------------------------------------------------------------------------
109
+ // Tests: DEFENSIVE fix — synthetic turn_end suppressor
110
+ // ---------------------------------------------------------------------------
111
+
112
+ describe('DEFENSIVE fix: synthetic turn_end suppressor', () => {
113
+ it('suppresses a synthetic turn_end (durationMs === -1) when tools are in flight', () => {
114
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: -1, midToolCall: true })).toBe(true)
115
+ })
116
+
117
+ it('does NOT suppress a synthetic turn_end when no tools are in flight', () => {
118
+ // No tools → the backstop should fire normally (turn is genuinely orphaned)
119
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: -1, midToolCall: false })).toBe(false)
120
+ })
121
+
122
+ it('does NOT suppress an authoritative turn_end (durationMs >= 0)', () => {
123
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, midToolCall: true })).toBe(false)
124
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 1, midToolCall: true })).toBe(false)
125
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 12345, midToolCall: true })).toBe(false)
126
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, midToolCall: false })).toBe(false)
127
+ })
128
+
129
+ it('only durationMs === -1 is the synthetic discriminator', () => {
130
+ // Values near -1 must not accidentally trigger suppression
131
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: -2, midToolCall: true })).toBe(false)
132
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: -0.5, midToolCall: true })).toBe(false)
133
+ })
134
+ })
135
+
136
+ // ---------------------------------------------------------------------------
137
+ // Tests: ToolFlightTracker integration with the guard logic
138
+ // ---------------------------------------------------------------------------
139
+
140
+ describe('ToolFlightTracker + guard integration', () => {
141
+ it('re-arm fires when a Bash tool is in flight', () => {
142
+ const tracker = new ToolFlightTracker()
143
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_1' })
144
+
145
+ expect(shouldRearmInsteadOfFire({
146
+ midToolCall: tracker.isMidToolCall(),
147
+ rearmCount: 0,
148
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
149
+ })).toBe(true)
150
+ })
151
+
152
+ it('fires normally after tool_result completes the tool', () => {
153
+ const tracker = new ToolFlightTracker()
154
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_1' })
155
+ tracker.onEvent({ kind: 'tool_result', toolUseId: 'bash_1' })
156
+
157
+ expect(shouldRearmInsteadOfFire({
158
+ midToolCall: tracker.isMidToolCall(),
159
+ rearmCount: 0,
160
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
161
+ })).toBe(false)
162
+ })
163
+
164
+ it('defensive guard suppresses synthetic turn_end mid-Bash', () => {
165
+ const tracker = new ToolFlightTracker()
166
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_2' })
167
+
168
+ expect(shouldSuppressSyntheticTurnEnd({
169
+ durationMs: -1,
170
+ midToolCall: tracker.isMidToolCall(),
171
+ })).toBe(true)
172
+ })
173
+
174
+ it('defensive guard allows synthetic turn_end after all tools complete', () => {
175
+ const tracker = new ToolFlightTracker()
176
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash_2' })
177
+ tracker.onEvent({ kind: 'tool_result', toolUseId: 'bash_2' })
178
+
179
+ expect(shouldSuppressSyntheticTurnEnd({
180
+ durationMs: -1,
181
+ midToolCall: tracker.isMidToolCall(),
182
+ })).toBe(false)
183
+ })
184
+
185
+ it('parallel tools: re-arm persists while ANY tool is in flight', () => {
186
+ const tracker = new ToolFlightTracker()
187
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'read_1' })
188
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'read_2' })
189
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'edit_1' })
190
+
191
+ // Still re-arming: 3 tools open
192
+ expect(shouldRearmInsteadOfFire({
193
+ midToolCall: tracker.isMidToolCall(),
194
+ rearmCount: 0,
195
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
196
+ })).toBe(true)
197
+
198
+ // Two complete
199
+ tracker.onEvent({ kind: 'tool_result', toolUseId: 'read_1' })
200
+ tracker.onEvent({ kind: 'tool_result', toolUseId: 'read_2' })
201
+
202
+ // Still re-arming: edit_1 open
203
+ expect(shouldRearmInsteadOfFire({
204
+ midToolCall: tracker.isMidToolCall(),
205
+ rearmCount: 1,
206
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
207
+ })).toBe(true)
208
+
209
+ // All complete
210
+ tracker.onEvent({ kind: 'tool_result', toolUseId: 'edit_1' })
211
+
212
+ expect(shouldRearmInsteadOfFire({
213
+ midToolCall: tracker.isMidToolCall(),
214
+ rearmCount: 2,
215
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
216
+ })).toBe(false)
217
+ })
218
+
219
+ it('cap fires even mid-tool after 20 re-arms (wedged tool surfaces)', () => {
220
+ const tracker = new ToolFlightTracker()
221
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'hung_bash' })
222
+
223
+ // First 20 re-arms proceed
224
+ for (let i = 0; i < ORPHANED_REPLY_MAX_REARMS; i++) {
225
+ expect(shouldRearmInsteadOfFire({
226
+ midToolCall: tracker.isMidToolCall(),
227
+ rearmCount: i,
228
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
229
+ })).toBe(true)
230
+ }
231
+
232
+ // 21st: cap exceeded — fire despite in-flight
233
+ expect(shouldRearmInsteadOfFire({
234
+ midToolCall: tracker.isMidToolCall(),
235
+ rearmCount: ORPHANED_REPLY_MAX_REARMS,
236
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
237
+ })).toBe(false)
238
+ })
239
+ })
240
+
241
+ // ---------------------------------------------------------------------------
242
+ // Tests: shouldArmOrphanedReplyTimeout (existing surface, unchanged)
243
+ // ---------------------------------------------------------------------------
244
+
245
+ describe('shouldArmOrphanedReplyTimeout (existing — unchanged by this fix)', () => {
246
+ it('arms when conditions are met', () => {
247
+ expect(
248
+ shouldArmOrphanedReplyTimeout({
249
+ currentSessionChatId: '123',
250
+ capturedTextCount: 1,
251
+ replyCalled: false,
252
+ }),
253
+ ).toBe(true)
254
+ })
255
+
256
+ it('does not arm after reply has been called', () => {
257
+ expect(
258
+ shouldArmOrphanedReplyTimeout({
259
+ currentSessionChatId: '123',
260
+ capturedTextCount: 5,
261
+ replyCalled: true,
262
+ }),
263
+ ).toBe(false)
264
+ })
265
+
266
+ it('does not arm when no chat is active', () => {
267
+ expect(
268
+ shouldArmOrphanedReplyTimeout({
269
+ currentSessionChatId: null,
270
+ capturedTextCount: 1,
271
+ replyCalled: false,
272
+ }),
273
+ ).toBe(false)
274
+ })
275
+
276
+ it('does not arm when no text captured yet', () => {
277
+ expect(
278
+ shouldArmOrphanedReplyTimeout({
279
+ currentSessionChatId: '123',
280
+ capturedTextCount: 0,
281
+ replyCalled: false,
282
+ }),
283
+ ).toBe(false)
284
+ })
285
+ })