switchroom 0.15.44 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3249 -1241
  10. package/dist/cli/ui/index.html +1 -1
  11. package/dist/host-control/main.js +2833 -355
  12. package/dist/vault/approvals/kernel-server.js +7482 -7439
  13. package/dist/vault/broker/server.js +11315 -11272
  14. package/examples/minimal.yaml +1 -0
  15. package/examples/switchroom.yaml +1 -0
  16. package/package.json +3 -3
  17. package/profiles/_base/start.sh.hbs +88 -1
  18. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  19. package/profiles/default/CLAUDE.md.hbs +3 -22
  20. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  21. package/telegram-plugin/answer-stream-flag.ts +12 -49
  22. package/telegram-plugin/answer-stream.ts +5 -150
  23. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  24. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  25. package/telegram-plugin/context-exhaustion.ts +12 -0
  26. package/telegram-plugin/demo-mask.ts +154 -0
  27. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  28. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  29. package/telegram-plugin/dist/server.js +215 -172
  30. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  31. package/telegram-plugin/draft-stream.ts +47 -410
  32. package/telegram-plugin/final-answer-detect.ts +17 -12
  33. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  34. package/telegram-plugin/format.ts +56 -19
  35. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  36. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  37. package/telegram-plugin/gateway/auth-command.ts +70 -14
  38. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  39. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  40. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  41. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  42. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  43. package/telegram-plugin/gateway/effort-command.ts +8 -3
  44. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  45. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  46. package/telegram-plugin/gateway/gateway.ts +1837 -291
  47. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  48. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  49. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  50. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  51. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  52. package/telegram-plugin/history.ts +33 -11
  53. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  54. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  55. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  56. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  57. package/telegram-plugin/issues-card.ts +4 -0
  58. package/telegram-plugin/model-unavailable.ts +124 -0
  59. package/telegram-plugin/narrative-dedup.ts +69 -0
  60. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  61. package/telegram-plugin/package.json +3 -3
  62. package/telegram-plugin/pending-work-progress.ts +12 -0
  63. package/telegram-plugin/permission-rule.ts +32 -5
  64. package/telegram-plugin/permission-title.ts +152 -9
  65. package/telegram-plugin/quota-check.ts +13 -0
  66. package/telegram-plugin/quota-watch.ts +135 -7
  67. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  68. package/telegram-plugin/registry/turns-schema.ts +9 -0
  69. package/telegram-plugin/runtime-metrics.ts +13 -0
  70. package/telegram-plugin/session-tail.ts +96 -11
  71. package/telegram-plugin/silence-poke.ts +170 -24
  72. package/telegram-plugin/slot-banner-driver.ts +3 -0
  73. package/telegram-plugin/status-no-truncate.ts +44 -0
  74. package/telegram-plugin/status-reactions.ts +20 -3
  75. package/telegram-plugin/stream-controller.ts +4 -23
  76. package/telegram-plugin/stream-reply-handler.ts +6 -24
  77. package/telegram-plugin/streaming-metrics.ts +91 -0
  78. package/telegram-plugin/subagent-watcher.ts +212 -66
  79. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  80. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  81. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  82. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  83. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  84. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  85. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  86. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  87. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  88. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  89. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  90. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  91. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  92. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  93. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  94. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  95. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  96. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  97. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  98. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  99. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  100. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  101. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  102. package/telegram-plugin/tests/history.test.ts +60 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -0,0 +1,526 @@
1
+ /**
2
+ * feed-survival.test.ts — unit tests for the feed-survival primitive.
3
+ *
4
+ * Tests the three gaps identified in the audit:
5
+ *
6
+ * Gap 1 — detached background work (Bash run_in_background, Agent/Task)
7
+ * empties inFlight on near-instant tool_result, but the work is
8
+ * still running. Both orphaned-reply timer and silence-poke must
9
+ * not tear down the feed while hasPendingAsyncDispatch is true.
10
+ *
11
+ * Gap 2 — silence-poke defer was gated on SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1
12
+ * (default OFF). The new isLegitimatelyWorking callback makes the
13
+ * defer the DEFAULT when wired, without requiring the env var.
14
+ *
15
+ * Gap 3 — ask_user with TTL >10min hit ORPHANED_REPLY_MAX_REARMS and was
16
+ * force-closed mid-human-wait. Now exempt from the cap.
17
+ *
18
+ * Regression: a truly idle turn (no work) still backstops/tears down as before.
19
+ */
20
+
21
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest'
22
+ import {
23
+ startTurn,
24
+ noteOutbound,
25
+ noteToolStart,
26
+ noteToolEnd,
27
+ endTurn,
28
+ __tickForTests,
29
+ __setDepsForTests,
30
+ __getStateForTests,
31
+ __resetAllForTests,
32
+ DEFAULT_THRESHOLDS,
33
+ type SilencePokeMetric,
34
+ type FrameworkFallbackContext,
35
+ } from '../silence-poke.js'
36
+ import {
37
+ noteAsyncDispatch,
38
+ hasPendingAsyncDispatch,
39
+ noteOutbound as ppNoteOutbound,
40
+ noteTurnEnd as ppNoteTurnEnd,
41
+ startTurn as ppStartTurn,
42
+ clearPending,
43
+ __resetAllForTests as ppReset,
44
+ __setDepsForTests as ppSetDeps,
45
+ } from '../pending-work-progress.js'
46
+ import { ToolFlightTracker } from '../gateway/interrupt-defer.js'
47
+ import {
48
+ ORPHANED_REPLY_TIMEOUT_MS,
49
+ ORPHANED_REPLY_MAX_REARMS,
50
+ } from '../context-exhaustion.js'
51
+
52
+ // ─── Helpers ──────────────────────────────────────────────────────────────────
53
+
54
+ interface SilenceFixtures {
55
+ emitted: SilencePokeMetric[]
56
+ fallbacks: FrameworkFallbackContext[]
57
+ }
58
+
59
+ function setupSilenceDeps(opts?: {
60
+ thresholds?: Partial<typeof DEFAULT_THRESHOLDS> & { fallbackHardCeiling?: number }
61
+ isLegitimatelyWorking?: (key: string) => boolean
62
+ }): SilenceFixtures {
63
+ const fixtures: SilenceFixtures = { emitted: [], fallbacks: [] }
64
+ __setDepsForTests({
65
+ emitMetric: (e) => fixtures.emitted.push(e),
66
+ onFrameworkFallback: (ctx) => { fixtures.fallbacks.push(ctx) },
67
+ thresholdsMs: {
68
+ ...DEFAULT_THRESHOLDS,
69
+ ...(opts?.thresholds ?? {}),
70
+ },
71
+ ...(opts?.isLegitimatelyWorking != null
72
+ ? { isLegitimatelyWorking: opts.isLegitimatelyWorking }
73
+ : {}),
74
+ })
75
+ return fixtures
76
+ }
77
+
78
+ beforeEach(() => {
79
+ __resetAllForTests()
80
+ ppReset()
81
+ })
82
+
83
+ afterEach(() => {
84
+ __resetAllForTests()
85
+ ppReset()
86
+ delete process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS
87
+ })
88
+
89
+ // ─── hasPendingAsyncDispatch ──────────────────────────────────────────────────
90
+
91
+ describe('hasPendingAsyncDispatch', () => {
92
+ beforeEach(() => {
93
+ ppSetDeps({
94
+ editMessage: async () => {},
95
+ })
96
+ })
97
+
98
+ it('returns false before any dispatch is noted', () => {
99
+ ppStartTurn('chat:0')
100
+ expect(hasPendingAsyncDispatch('chat:0')).toBe(false)
101
+ })
102
+
103
+ it('returns true after noteAsyncDispatch (Bash run_in_background / Agent / Task)', () => {
104
+ ppStartTurn('chat:0')
105
+ noteAsyncDispatch('chat:0')
106
+ expect(hasPendingAsyncDispatch('chat:0')).toBe(true)
107
+ })
108
+
109
+ it('returns false after clearPending (inbound clears the flag)', () => {
110
+ ppStartTurn('chat:0')
111
+ noteAsyncDispatch('chat:0')
112
+ clearPending('chat:0', 'inbound')
113
+ expect(hasPendingAsyncDispatch('chat:0')).toBe(false)
114
+ })
115
+
116
+ it('returns false for an unknown key', () => {
117
+ expect(hasPendingAsyncDispatch('never-started')).toBe(false)
118
+ })
119
+
120
+ it('returns true during active turn after dispatch, false after turn-end clears', () => {
121
+ ppStartTurn('chat:0')
122
+ noteAsyncDispatch('chat:0')
123
+ expect(hasPendingAsyncDispatch('chat:0')).toBe(true)
124
+ // Turn ends with pending+no-anchor → state is deleted
125
+ ppNoteTurnEnd('chat:0')
126
+ expect(hasPendingAsyncDispatch('chat:0')).toBe(false)
127
+ })
128
+
129
+ it('cross-turn: dispatch + outbound anchor + turn-end → pending PERSISTS (the core feed-survival path)', () => {
130
+ // The critical production scenario: the agent dispatches detached
131
+ // background work (run_in_background Bash / Agent / Task), posts a reply
132
+ // (capturing an anchor), and the turn ends — but the bg work is still
133
+ // running. pending+anchor at turn_end activates rather than deletes, so
134
+ // hasPendingAsyncDispatch stays true and both teardown timers keep
135
+ // deferring while the detached work runs.
136
+ ppStartTurn('chat:0')
137
+ noteAsyncDispatch('chat:0')
138
+ ppNoteOutbound('chat:0', { messageId: 4242, text: 'kicked off the build, polling…', parseMode: 'HTML' })
139
+ ppNoteTurnEnd('chat:0')
140
+ expect(hasPendingAsyncDispatch('chat:0')).toBe(true)
141
+ })
142
+ })
143
+
144
+ // ─── Silence-poke: isLegitimatelyWorking callback defer ──────────────────────
145
+
146
+ describe('silence-poke — isLegitimatelyWorking callback (default-on defer)', () => {
147
+ it('defers the 300s fallback when the callback returns true', () => {
148
+ let working = true
149
+ const f = setupSilenceDeps({
150
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
151
+ isLegitimatelyWorking: () => working,
152
+ })
153
+ startTurn('chat:0', 0)
154
+ __tickForTests(300_000) // would fire without the callback
155
+ expect(f.fallbacks).toHaveLength(0)
156
+
157
+ __tickForTests(500_000) // still working
158
+ expect(f.fallbacks).toHaveLength(0)
159
+
160
+ working = false // work done
161
+ __tickForTests(500_001) // silence was already past threshold
162
+ expect(f.fallbacks).toHaveLength(1)
163
+ })
164
+
165
+ it('does NOT defer when the callback returns false (genuinely idle turn)', () => {
166
+ const f = setupSilenceDeps({
167
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
168
+ isLegitimatelyWorking: () => false,
169
+ })
170
+ startTurn('chat:0', 0)
171
+ __tickForTests(300_000)
172
+ // Idle turn: no work, fallback fires immediately
173
+ expect(f.fallbacks).toHaveLength(1)
174
+ })
175
+
176
+ it('fires at the hard ceiling even when callback keeps returning true (hung-signal)', () => {
177
+ const f = setupSilenceDeps({
178
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
179
+ isLegitimatelyWorking: () => true, // signal never clears
180
+ })
181
+ startTurn('chat:0', 0)
182
+ __tickForTests(300_000)
183
+ expect(f.fallbacks).toHaveLength(0) // deferred
184
+ __tickForTests(900_000) // crosses ceiling
185
+ expect(f.fallbacks).toHaveLength(1) // bounded — still unwedges
186
+ })
187
+
188
+ it('wired callback returns false → fallback fires even with inFlightTools non-empty (callback supersedes legacy flag)', () => {
189
+ // When isLegitimatelyWorking is wired, it is consulted; the legacy flag
190
+ // is not consulted for the new path. Verify by having callback=false and
191
+ // inFlightTools non-empty — the fallback fires because the callback says "no".
192
+ const f = setupSilenceDeps({
193
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
194
+ isLegitimatelyWorking: () => false,
195
+ })
196
+ startTurn('chat:0', 0)
197
+ noteToolStart('chat:0', 't1', 'Bash', 'audit', 10_000)
198
+ __tickForTests(300_000)
199
+ // callback says false → no defer, fallback fires
200
+ expect(f.fallbacks).toHaveLength(1)
201
+ })
202
+
203
+ it('SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=0 force-disables the defer even with callback wired', () => {
204
+ process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS = '0'
205
+ const f = setupSilenceDeps({
206
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
207
+ isLegitimatelyWorking: () => true,
208
+ })
209
+ startTurn('chat:0', 0)
210
+ __tickForTests(300_000)
211
+ expect(f.fallbacks).toHaveLength(1) // force-disabled: fallback fires
212
+ })
213
+ })
214
+
215
+ // ─── Silence-poke: detached background work (Gap 1) ──────────────────────────
216
+
217
+ describe('silence-poke — detached Bash run_in_background keeps feed alive', () => {
218
+ beforeEach(() => {
219
+ ppSetDeps({ editMessage: async () => {} })
220
+ })
221
+
222
+ it('defers the 300s fallback while hasPendingAsyncDispatch is true', () => {
223
+ // Simulate: model calls Bash(run_in_background:true), gets back instant
224
+ // handle (tool_result), turn ends, but background process is running.
225
+ // pendingProgress.pending stays true.
226
+ ppStartTurn('chat:0')
227
+ noteAsyncDispatch('chat:0') // Bash dispatched
228
+
229
+ const f = setupSilenceDeps({
230
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
231
+ isLegitimatelyWorking: (key) => hasPendingAsyncDispatch(key),
232
+ })
233
+ startTurn('chat:0', 0)
234
+ __tickForTests(300_000) // inFlight is empty but bg work is pending
235
+ expect(f.fallbacks).toHaveLength(0)
236
+
237
+ __tickForTests(500_000) // still pending
238
+ expect(f.fallbacks).toHaveLength(0)
239
+
240
+ // Background work completes: pendingProgress cleared
241
+ clearPending('chat:0', 'inbound')
242
+ __tickForTests(500_001)
243
+ expect(f.fallbacks).toHaveLength(1) // now fires
244
+ })
245
+
246
+ it('a truly idle turn (no background work, no tool in flight) still fires at 300s', () => {
247
+ const f = setupSilenceDeps({
248
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
249
+ isLegitimatelyWorking: (key) => hasPendingAsyncDispatch(key),
250
+ })
251
+ startTurn('chat:0', 0)
252
+ __tickForTests(300_000)
253
+ expect(f.fallbacks).toHaveLength(1) // no work: still backstops
254
+ })
255
+ })
256
+
257
+ // ─── Orphaned-reply: ask_user exemption from ORPHANED_REPLY_MAX_REARMS ───────
258
+
259
+ /**
260
+ * Pure decision functions mirroring the orphaned-reply guard logic in gateway.ts.
261
+ * Extracted so the ask_user-exempt path can be tested without the full gateway.
262
+ */
263
+ function shouldRearmOrphanedReply(opts: {
264
+ isLegitimatelyWorking: boolean
265
+ humanWaiting: boolean
266
+ rearmCount: number
267
+ maxRearms: number
268
+ }): 'rearm' | 'fire' {
269
+ const { isLegitimatelyWorking: working, humanWaiting, rearmCount, maxRearms } = opts
270
+ if (working || humanWaiting) {
271
+ // ask_user: exempt from cap — keep re-arming as long as human-wait is open
272
+ if (humanWaiting) return 'rearm'
273
+ // Other work: honour cap
274
+ if (rearmCount < maxRearms) return 'rearm'
275
+ // Cap exceeded: fire backstop (a genuinely hung tool must surface)
276
+ return 'fire'
277
+ }
278
+ return 'fire'
279
+ }
280
+
281
+ describe('orphaned-reply: ask_user exempt from ORPHANED_REPLY_MAX_REARMS', () => {
282
+ it('re-arms indefinitely while ask_user is open (humanWaiting=true)', () => {
283
+ // Even at count === ORPHANED_REPLY_MAX_REARMS, humanWaiting=true → still rearms
284
+ for (let i = 0; i <= ORPHANED_REPLY_MAX_REARMS + 5; i++) {
285
+ expect(shouldRearmOrphanedReply({
286
+ isLegitimatelyWorking: false,
287
+ humanWaiting: true,
288
+ rearmCount: i,
289
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
290
+ })).toBe('rearm')
291
+ }
292
+ })
293
+
294
+ it('fires once ask_user closes (humanWaiting=false, no other work)', () => {
295
+ expect(shouldRearmOrphanedReply({
296
+ isLegitimatelyWorking: false,
297
+ humanWaiting: false,
298
+ rearmCount: 0,
299
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
300
+ })).toBe('fire')
301
+ })
302
+
303
+ it('standard foreground tool: still bound by ORPHANED_REPLY_MAX_REARMS', () => {
304
+ // Under cap: rearm
305
+ expect(shouldRearmOrphanedReply({
306
+ isLegitimatelyWorking: true,
307
+ humanWaiting: false,
308
+ rearmCount: ORPHANED_REPLY_MAX_REARMS - 1,
309
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
310
+ })).toBe('rearm')
311
+ // At cap: fire
312
+ expect(shouldRearmOrphanedReply({
313
+ isLegitimatelyWorking: true,
314
+ humanWaiting: false,
315
+ rearmCount: ORPHANED_REPLY_MAX_REARMS,
316
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
317
+ })).toBe('fire')
318
+ })
319
+
320
+ it('ask_user combined with other work: cap still bypassed (humanWaiting wins)', () => {
321
+ expect(shouldRearmOrphanedReply({
322
+ isLegitimatelyWorking: true,
323
+ humanWaiting: true,
324
+ rearmCount: ORPHANED_REPLY_MAX_REARMS + 100,
325
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
326
+ })).toBe('rearm')
327
+ })
328
+ })
329
+
330
+ // ─── Orphaned-reply: detached work (Gap 1) ───────────────────────────────────
331
+
332
+ describe('orphaned-reply: detached background work keeps feed alive past 30s', () => {
333
+ it('re-arms while isLegitimatelyWorking=true (bg work in flight)', () => {
334
+ // Mirrors the gateway guard logic
335
+ function rearmDecision(working: boolean, humanWaiting: boolean, count: number): 'rearm' | 'fire' {
336
+ return shouldRearmOrphanedReply({
337
+ isLegitimatelyWorking: working,
338
+ humanWaiting,
339
+ rearmCount: count,
340
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
341
+ })
342
+ }
343
+
344
+ // Bash run_in_background: inFlight empty, hasPendingAsyncDispatch=true
345
+ expect(rearmDecision(true, false, 0)).toBe('rearm')
346
+ expect(rearmDecision(true, false, ORPHANED_REPLY_MAX_REARMS - 1)).toBe('rearm')
347
+ // Cap hit: fire (the bg process must have been stuck for 10min+)
348
+ expect(rearmDecision(true, false, ORPHANED_REPLY_MAX_REARMS)).toBe('fire')
349
+ })
350
+
351
+ it('fires immediately when no work at all (truly idle turn)', () => {
352
+ expect(shouldRearmOrphanedReply({
353
+ isLegitimatelyWorking: false,
354
+ humanWaiting: false,
355
+ rearmCount: 0,
356
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
357
+ })).toBe('fire')
358
+ })
359
+ })
360
+
361
+ // ─── Synthetic turn_end suppressor with isLegitimatelyWorking ─────────────────
362
+
363
+ /**
364
+ * Mirrors the DEFENSIVE FIX in gateway.ts turn_end handler.
365
+ * Now uses isLegitimatelyWorking instead of just isMidToolCall().
366
+ */
367
+ function shouldSuppressSyntheticTurnEnd(opts: {
368
+ durationMs: number
369
+ isLegitimatelyWorking: boolean
370
+ }): boolean {
371
+ return opts.durationMs === -1 && opts.isLegitimatelyWorking
372
+ }
373
+
374
+ describe('DEFENSIVE FIX: synthetic turn_end suppressor (extended predicate)', () => {
375
+ it('suppresses when durationMs=-1 and bg work is pending (detached Bash)', () => {
376
+ expect(shouldSuppressSyntheticTurnEnd({
377
+ durationMs: -1,
378
+ isLegitimatelyWorking: true, // hasPendingAsyncDispatch returned true
379
+ })).toBe(true)
380
+ })
381
+
382
+ it('suppresses when durationMs=-1 and ask_user is open (human-wait)', () => {
383
+ expect(shouldSuppressSyntheticTurnEnd({
384
+ durationMs: -1,
385
+ isLegitimatelyWorking: true, // pendingAskUser has entry for chat
386
+ })).toBe(true)
387
+ })
388
+
389
+ it('does NOT suppress when durationMs=-1 but no work (genuinely idle)', () => {
390
+ expect(shouldSuppressSyntheticTurnEnd({
391
+ durationMs: -1,
392
+ isLegitimatelyWorking: false,
393
+ })).toBe(false)
394
+ })
395
+
396
+ it('does NOT suppress a REAL turn_end (durationMs >= 0)', () => {
397
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, isLegitimatelyWorking: true })).toBe(false)
398
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 1, isLegitimatelyWorking: true })).toBe(false)
399
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 12345, isLegitimatelyWorking: true })).toBe(false)
400
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, isLegitimatelyWorking: false })).toBe(false)
401
+ })
402
+
403
+ it('only durationMs === -1 is the synthetic discriminator (near-miss values)', () => {
404
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: -2, isLegitimatelyWorking: true })).toBe(false)
405
+ expect(shouldSuppressSyntheticTurnEnd({ durationMs: -0.5, isLegitimatelyWorking: true })).toBe(false)
406
+ })
407
+ })
408
+
409
+ // ─── Regression: idle turns still backstop as before ─────────────────────────
410
+
411
+ describe('REGRESSION: idle turn (no work) still backstops', () => {
412
+ it('silence-poke fires at 300s for a genuinely idle turn (callback wired)', () => {
413
+ const f = setupSilenceDeps({
414
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
415
+ isLegitimatelyWorking: () => false, // nothing working
416
+ })
417
+ startTurn('chat:0', 0)
418
+ __tickForTests(300_000)
419
+ expect(f.fallbacks).toHaveLength(1)
420
+ })
421
+
422
+ it('silence-poke fires at 300s for an idle turn (no callback, no legacy defer)', () => {
423
+ const f = setupSilenceDeps() // nothing wired
424
+ startTurn('chat:0', 0)
425
+ __tickForTests(300_000)
426
+ expect(f.fallbacks).toHaveLength(1)
427
+ })
428
+
429
+ it('orphaned-reply fires for an idle turn (no work, no human wait)', () => {
430
+ expect(shouldRearmOrphanedReply({
431
+ isLegitimatelyWorking: false,
432
+ humanWaiting: false,
433
+ rearmCount: 0,
434
+ maxRearms: ORPHANED_REPLY_MAX_REARMS,
435
+ })).toBe('fire')
436
+ })
437
+
438
+ it('ToolFlightTracker empty → not legitimately working via isMidToolCall', () => {
439
+ const tracker = new ToolFlightTracker()
440
+ expect(tracker.isMidToolCall()).toBe(false)
441
+ })
442
+
443
+ it('ToolFlightTracker with tool in flight → legitimately working', () => {
444
+ const tracker = new ToolFlightTracker()
445
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'bg-bash' })
446
+ expect(tracker.isMidToolCall()).toBe(true)
447
+ // After tool_result (instant handle return for run_in_background):
448
+ tracker.onEvent({ kind: 'tool_result', toolUseId: 'bg-bash' })
449
+ expect(tracker.isMidToolCall()).toBe(false)
450
+ // But hasPendingAsyncDispatch should still signal the bg work is running
451
+ })
452
+ })
453
+
454
+ // ─── Combined: isLegitimatelyWorking integrates all three signals ─────────────
455
+
456
+ describe('isLegitimatelyWorking — all three signals', () => {
457
+ beforeEach(() => {
458
+ ppSetDeps({ editMessage: async () => {} })
459
+ })
460
+
461
+ it('true when only isMidToolCall (foreground tool)', () => {
462
+ const tracker = new ToolFlightTracker()
463
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 't1' })
464
+ // Simulate the predicate
465
+ const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || false
466
+ expect(working).toBe(true)
467
+ })
468
+
469
+ it('true when only hasPendingAsyncDispatch (Bash run_in_background)', () => {
470
+ const tracker = new ToolFlightTracker()
471
+ // tool_use fired then instantly tool_result came back
472
+ tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash1' })
473
+ tracker.onEvent({ kind: 'tool_result', toolUseId: 'bash1' })
474
+ ppStartTurn('chat:0')
475
+ noteAsyncDispatch('chat:0')
476
+ const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || false
477
+ expect(tracker.isMidToolCall()).toBe(false) // inFlight cleared
478
+ expect(hasPendingAsyncDispatch('chat:0')).toBe(true) // bg work pending
479
+ expect(working).toBe(true)
480
+ })
481
+
482
+ it('true when only human-wait (ask_user open)', () => {
483
+ const tracker = new ToolFlightTracker()
484
+ // ask_user tool_use is in inFlight while waiting; here we simulate
485
+ // the defence-in-depth path where inFlight was cleared unexpectedly
486
+ const askUserInFlight = true // pendingAskUser.size > 0 for this chat
487
+ const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || askUserInFlight
488
+ expect(working).toBe(true)
489
+ })
490
+
491
+ it('false when all three signals are clear (genuinely idle)', () => {
492
+ const tracker = new ToolFlightTracker()
493
+ const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || false
494
+ expect(working).toBe(false)
495
+ })
496
+ })
497
+
498
+ // ─── Silence-poke: hard ceiling is correctly applied ─────────────────────────
499
+
500
+ describe('silence-poke — hard ceiling bounds the defer', () => {
501
+ it('fires at ceiling even when isLegitimatelyWorking stays true (leak-guard)', () => {
502
+ const f = setupSilenceDeps({
503
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 600_000 },
504
+ isLegitimatelyWorking: () => true, // signal never clears
505
+ })
506
+ startTurn('chat:0', 0)
507
+ __tickForTests(300_000)
508
+ expect(f.fallbacks).toHaveLength(0)
509
+ __tickForTests(599_000)
510
+ expect(f.fallbacks).toHaveLength(0)
511
+ __tickForTests(600_000) // at ceiling
512
+ expect(f.fallbacks).toHaveLength(1)
513
+ })
514
+
515
+ it('fallbackFired is true after ceiling fire (no double-fire)', () => {
516
+ const f = setupSilenceDeps({
517
+ thresholds: { fallback: 300_000, fallbackHardCeiling: 600_000 },
518
+ isLegitimatelyWorking: () => true,
519
+ })
520
+ startTurn('chat:0', 0)
521
+ __tickForTests(600_000)
522
+ __tickForTests(700_000) // additional tick after ceiling
523
+ expect(f.fallbacks).toHaveLength(1) // only fires once
524
+ expect(__getStateForTests('chat:0')!.fallbackFired).toBe(true)
525
+ })
526
+ })