switchroom 0.15.44 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3249 -1241
  10. package/dist/cli/ui/index.html +1 -1
  11. package/dist/host-control/main.js +2833 -355
  12. package/dist/vault/approvals/kernel-server.js +7482 -7439
  13. package/dist/vault/broker/server.js +11315 -11272
  14. package/examples/minimal.yaml +1 -0
  15. package/examples/switchroom.yaml +1 -0
  16. package/package.json +3 -3
  17. package/profiles/_base/start.sh.hbs +88 -1
  18. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  19. package/profiles/default/CLAUDE.md.hbs +3 -22
  20. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  21. package/telegram-plugin/answer-stream-flag.ts +12 -49
  22. package/telegram-plugin/answer-stream.ts +5 -150
  23. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  24. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  25. package/telegram-plugin/context-exhaustion.ts +12 -0
  26. package/telegram-plugin/demo-mask.ts +154 -0
  27. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  28. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  29. package/telegram-plugin/dist/server.js +215 -172
  30. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  31. package/telegram-plugin/draft-stream.ts +47 -410
  32. package/telegram-plugin/final-answer-detect.ts +17 -12
  33. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  34. package/telegram-plugin/format.ts +56 -19
  35. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  36. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  37. package/telegram-plugin/gateway/auth-command.ts +70 -14
  38. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  39. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  40. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  41. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  42. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  43. package/telegram-plugin/gateway/effort-command.ts +8 -3
  44. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  45. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  46. package/telegram-plugin/gateway/gateway.ts +1837 -291
  47. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  48. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  49. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  50. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  51. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  52. package/telegram-plugin/history.ts +33 -11
  53. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  54. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  55. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  56. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  57. package/telegram-plugin/issues-card.ts +4 -0
  58. package/telegram-plugin/model-unavailable.ts +124 -0
  59. package/telegram-plugin/narrative-dedup.ts +69 -0
  60. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  61. package/telegram-plugin/package.json +3 -3
  62. package/telegram-plugin/pending-work-progress.ts +12 -0
  63. package/telegram-plugin/permission-rule.ts +32 -5
  64. package/telegram-plugin/permission-title.ts +152 -9
  65. package/telegram-plugin/quota-check.ts +13 -0
  66. package/telegram-plugin/quota-watch.ts +135 -7
  67. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  68. package/telegram-plugin/registry/turns-schema.ts +9 -0
  69. package/telegram-plugin/runtime-metrics.ts +13 -0
  70. package/telegram-plugin/session-tail.ts +96 -11
  71. package/telegram-plugin/silence-poke.ts +170 -24
  72. package/telegram-plugin/slot-banner-driver.ts +3 -0
  73. package/telegram-plugin/status-no-truncate.ts +44 -0
  74. package/telegram-plugin/status-reactions.ts +20 -3
  75. package/telegram-plugin/stream-controller.ts +4 -23
  76. package/telegram-plugin/stream-reply-handler.ts +6 -24
  77. package/telegram-plugin/streaming-metrics.ts +91 -0
  78. package/telegram-plugin/subagent-watcher.ts +212 -66
  79. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  80. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  81. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  82. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  83. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  84. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  85. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  86. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  87. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  88. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  89. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  90. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  91. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  92. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  93. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  94. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  95. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  96. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  97. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  98. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  99. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  100. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  101. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  102. package/telegram-plugin/tests/history.test.ts +60 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -0,0 +1,597 @@
1
+ /**
2
+ * Integration test: telegram-activity-visibility
3
+ *
4
+ * Supersedes PR #2587 (inert — read frozen `lastToolLabelAt`) and #2588
5
+ * (partial — only fixed Lever 5, left narrative clip at 120 chars).
6
+ *
7
+ * This test exercises the REAL code paths — not injected state:
8
+ *
9
+ * Fix 2 (post-answer background-agent liveness) — drives the REAL code paths:
10
+ * - The actual `startSubagentWatcher` with a real fs mock drives `onProgress`,
11
+ * which stamps `turn.subagentActivityAt` (the gateway's one-line stamp,
12
+ * gated exactly as gateway.ts gates it on a non-null currentTurn).
13
+ * - The REAL `mayOpenActivityCard` AND the REAL `evaluatePostAnswerLiveness`
14
+ * (the helper feedHeartbeatTick consults each tick) decide the verdict — not
15
+ * a re-implemented copy of the gate.
16
+ * - Concern 2 lifecycle: the gateway's `currentTurn` gating is modelled
17
+ * verbatim to prove the stamp + heartbeat EMIT in the post-answer/pre-teardown
18
+ * window but are INERT once `currentTurn` nulls at turn_end — and that the
19
+ * decoupled worker still surfaces (and is bounded) via the real,
20
+ * currentTurn-independent `workerActivityFeed`.
21
+ * - Concern 3: the staleness cap flips the verdict to 'stale' once the worker's
22
+ * last advance is older than the cap, so the card stops climbing forever.
23
+ *
24
+ * Fix 1 (narrative as first-class feed lines):
25
+ * - `clipNarrative` is called on a long narrative string → verifies 200-char
26
+ * limit (not the old 120-char one that truncated mid-sentence).
27
+ * - `appendActivityLabel` accumulates narrative AND tool label lines side-by-side
28
+ * in mirrorLines (distinct, not overwriting) → verifies the feed reads
29
+ * "narrative → tool → narrative" in order.
30
+ * - `mayOpenActivityCard` with narrative producer pre-answer → allows OPEN
31
+ * (Lever 5 removed, #2588).
32
+ *
33
+ * Each test also verifies it FAILS on the original code, as required:
34
+ * - Fix 2 without `subagentActivityAt` → gate would block (returns false).
35
+ * - Fix 1 at old 120-char clip → narrative truncates.
36
+ * - Fix 1 with Lever 5 active → narrative cannot open a card.
37
+ */
38
+
39
+ import { describe, it, expect } from 'vitest'
40
+ import * as realFs from 'fs'
41
+ import { startSubagentWatcher } from '../subagent-watcher.js'
42
+ import { mayOpenActivityCard } from '../gateway/feed-open-gate.js'
43
+ import { clipNarrative, appendActivityLabel } from '../tool-activity-summary.js'
44
+ import { evaluatePostAnswerLiveness } from '../turn-liveness-floor.js'
45
+ import {
46
+ createWorkerActivityFeed,
47
+ type WorkerActivityView,
48
+ type BotApiForWorkerFeed,
49
+ } from '../worker-activity-feed.js'
50
+
51
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
52
+
53
+ function buildJSONL(...lines: object[]): string {
54
+ return lines.map((l) => JSON.stringify(l)).join('\n') + '\n'
55
+ }
56
+
57
+ function subAgentUserMsg(text: string) {
58
+ return { type: 'user', message: { content: [{ type: 'text', text }] } }
59
+ }
60
+
61
+ function subAgentToolUse(name: string) {
62
+ return { type: 'assistant', message: { content: [{ type: 'tool_use', name, id: 'id1', input: {} }] } }
63
+ }
64
+
65
+ function subAgentAssistantText(text: string) {
66
+ return { type: 'assistant', message: { content: [{ type: 'text', text }] } }
67
+ }
68
+
69
+ // ─── Fix 2: Post-answer background-agent liveness ────────────────────────────
70
+
71
+ describe('Fix 2: post-answer background-agent liveness (watcher → gate → liveness card)', () => {
72
+ /**
73
+ * This test uses the REAL `startSubagentWatcher` with an injected mock fs
74
+ * (the same pattern as subagent-watcher.test.ts — the authoritative harness
75
+ * for watcher tests). The `onProgress` callback is the REAL watcher code path.
76
+ *
77
+ * Simulate: parent turn has delivered substantive answer → turn.finalAnswerEverDelivered=true.
78
+ * Then watcher fires onProgress for a background sub-agent → we capture the
79
+ * timestamp it would write to turn.subagentActivityAt.
80
+ * Then call the REAL mayOpenActivityCard with that signal → assert it returns true.
81
+ *
82
+ * This exercises the entire watcher → signal → gate pipeline, not injected state.
83
+ */
84
+
85
+ it('watcher onProgress advances subagentActivityAt and gate allows liveness card (real pipeline)', async () => {
86
+ // --- Setup fake turn state mirroring what gateway.ts holds post-answer ---
87
+ // The parent answered at time 1000; we are now at 1500 (post-answer).
88
+ const turn = {
89
+ finalAnswerEverDelivered: true,
90
+ finalAnswerDelivered: true,
91
+ finalAnswerDeliveredAt: 1000,
92
+ subagentActivityAt: undefined as number | undefined,
93
+ labeledToolCount: 2,
94
+ }
95
+ // Model the gateway's module-scope `currentTurn` mirror so the test can
96
+ // reproduce its lifecycle: non-null in the post-answer/pre-teardown window,
97
+ // nulled at `endCurrentTurnAtomic` (turn_end). The onProgress stamp and the
98
+ // heartbeat both read THIS — the crux of concern 2.
99
+ let currentTurn: typeof turn | null = turn
100
+
101
+ // --- Wire up the REAL startSubagentWatcher with mock fs ---
102
+ // Pattern: start with an EMPTY subagents dir so the boot scan finds nothing
103
+ // (no historical entries). Then simulate a new file appearing after boot.
104
+ const agentDir = '/home/user/.switchroom/agents/myagent'
105
+ const projectsRoot = `${agentDir}/.claude/projects`
106
+ const projectDir = `${projectsRoot}/myproject`
107
+ const sessionDir = `${projectDir}/session-abc`
108
+ const subagentsDir = `${sessionDir}/subagents`
109
+ const jsonlPath = `${subagentsDir}/agent-bg01.jsonl`
110
+
111
+ // The JSONL has a tool_use then a narrative block.
112
+ // sub_agent_tool_use fires onProgress(progressLine), sub_agent_text fires onProgress(latestSummary).
113
+ const content = buildJSONL(
114
+ subAgentUserMsg('Analyse the 30 changed files'),
115
+ subAgentToolUse('Read'),
116
+ subAgentAssistantText('I have read the files and analysed the scope of the change'),
117
+ )
118
+ const contentBuf = Buffer.from(content, 'utf-8')
119
+
120
+ // Start with empty subagents dir so boot scan registers nothing historical
121
+ const fileContents: Map<string, Buffer> = new Map()
122
+ let lastOpenedPath: string | null = null
123
+
124
+ // Control knobs for per-phase fs state
125
+ let jsonlVisible = false
126
+ const mockFs = {
127
+ existsSync: ((p: realFs.PathLike) => {
128
+ const ps = String(p)
129
+ const staticPaths = [agentDir, projectsRoot, projectDir, sessionDir, subagentsDir]
130
+ if (staticPaths.includes(ps)) return true
131
+ if (ps === jsonlPath) return jsonlVisible
132
+ return false
133
+ }) as typeof realFs.existsSync,
134
+ readdirSync: ((p: realFs.PathLike) => {
135
+ const ps = String(p)
136
+ if (ps === projectsRoot) return ['myproject']
137
+ if (ps === projectDir) return ['session-abc']
138
+ if (ps === sessionDir) return ['subagents']
139
+ if (ps === subagentsDir) return jsonlVisible ? ['agent-bg01.jsonl'] : []
140
+ return []
141
+ }) as unknown as typeof realFs.readdirSync,
142
+ statSync: ((p: realFs.PathLike) => {
143
+ const ps = String(p)
144
+ if (ps === jsonlPath && jsonlVisible) return { size: contentBuf.length, mtimeMs: 1500, isDirectory: () => false } as unknown as realFs.Stats
145
+ return { size: 0, mtimeMs: 0, isDirectory: () => false } as unknown as realFs.Stats
146
+ }) as typeof realFs.statSync,
147
+ openSync: ((p: realFs.PathLike) => { lastOpenedPath = String(p); return 42 }) as unknown as typeof realFs.openSync,
148
+ closeSync: (() => { lastOpenedPath = null }) as typeof realFs.closeSync,
149
+ readSync: ((_fd: number, buf: NodeJS.ArrayBufferView, offset: number, length: number, position: number | null): number => {
150
+ if (lastOpenedPath !== jsonlPath) return 0
151
+ const pos = position ?? 0
152
+ const src = contentBuf.slice(pos, pos + length)
153
+ ;(src as Buffer).copy(buf as Buffer, offset)
154
+ return src.length
155
+ }) as unknown as typeof realFs.readSync,
156
+ watch: (() => ({ close: () => {} })) as unknown as typeof realFs.watch,
157
+ }
158
+
159
+ let currentTime = 1500
160
+ const intervals: Array<{ fn: () => void; ms: number }> = []
161
+ let nextRef = 0
162
+
163
+ const progressEvents: Array<{ agentId: string; latestSummary: string }> = []
164
+
165
+ const watcher = startSubagentWatcher({
166
+ agentDir,
167
+ // Omit agentCwd so the watcher doesn't filter by slug — keeps the test simple
168
+ now: () => currentTime,
169
+ setInterval: (fn, ms) => {
170
+ const ref = nextRef++
171
+ intervals.push({ fn, ms })
172
+ return { ref }
173
+ },
174
+ clearInterval: () => {},
175
+ setTimeout: (_fn, _ms) => { return { ref: nextRef++ } },
176
+ clearTimeout: () => {},
177
+ fs: mockFs,
178
+ onProgress: ({ agentId, latestSummary }) => {
179
+ progressEvents.push({ agentId, latestSummary })
180
+ // Mirror ONLY the gateway's one-line stamp (`stampTurn.subagentActivityAt
181
+ // = Date.now()`), gated exactly as gateway.ts gates it: `currentTurn !=
182
+ // null && finalAnswerEverDelivered`. The DECISION the heartbeat then
183
+ // makes off this signal is NOT re-implemented here — the test drives the
184
+ // REAL `evaluatePostAnswerLiveness` helper below (concern 1/2: drive the
185
+ // real code path, not a copy of the gate).
186
+ const stampTurn = currentTurn // gateway: `const stampTurn = currentTurn`
187
+ if (stampTurn != null && stampTurn.finalAnswerEverDelivered) {
188
+ stampTurn.subagentActivityAt = currentTime
189
+ }
190
+ },
191
+ log: () => {},
192
+ })
193
+ // After startSubagentWatcher returns, bootScanInProgress = false.
194
+ // The JSONL was not visible during boot, so it is NOT historical.
195
+
196
+ // Phase 2: simulate the file appearing after boot (background worker dispatched after answer)
197
+ jsonlVisible = true
198
+ currentTime = 1600
199
+
200
+ // Trigger a poll — the watcher finds the new file, registers it as live (non-historical),
201
+ // does an initial read, and fires onProgress for the tool_use and/or text events.
202
+ const pollInterval = intervals[0]
203
+ expect(pollInterval).toBeDefined()
204
+ pollInterval.fn()
205
+
206
+ watcher.stop()
207
+
208
+ // --- Assert the REAL watcher fired onProgress ---
209
+ // The JSONL has a tool_use (fires progressLine) and a text block (fires latestSummary).
210
+ // At minimum one onProgress should have fired.
211
+ expect(progressEvents.length).toBeGreaterThan(0)
212
+ expect(progressEvents[0].agentId).toBe('bg01')
213
+
214
+ // --- Assert subagentActivityAt was stamped by the onProgress callback ---
215
+ // This is the Fix 2 signal: the watcher's onProgress writes it to the turn
216
+ // independently of lastToolLabelAt (which is frozen by the drop-guard).
217
+ expect(turn.subagentActivityAt).toBe(currentTime)
218
+ expect(turn.subagentActivityAt!).toBeGreaterThan(turn.finalAnswerDeliveredAt!)
219
+
220
+ // --- Assert the REAL mayOpenActivityCard gate allows a liveness card ---
221
+ // Fix 2's Lever 1 exception: postAnswerSubagentActivity=true + tool producer → allowed.
222
+ const allowed = mayOpenActivityCard({
223
+ producer: 'tool',
224
+ finalAnswerEverDelivered: turn.finalAnswerEverDelivered,
225
+ labeledToolCount: turn.labeledToolCount,
226
+ postAnswerSubagentActivity: true, // derived from subagentActivityAt > finalAnswerDeliveredAt
227
+ })
228
+ expect(allowed).toBe(true)
229
+
230
+ // --- Confirm FAILS without the fix ---
231
+ // Without postAnswerSubagentActivity, Lever 1 blocks: the old #2587 code path
232
+ // drove this off lastToolLabelAt (frozen) and never set postAnswerSubagentActivity.
233
+ const blockedWithoutFix = mayOpenActivityCard({
234
+ producer: 'tool',
235
+ finalAnswerEverDelivered: turn.finalAnswerEverDelivered,
236
+ labeledToolCount: turn.labeledToolCount,
237
+ // postAnswerSubagentActivity omitted → old Lever 1 block (was the bug in #2587)
238
+ })
239
+ expect(blockedWithoutFix).toBe(false)
240
+
241
+ // --- Drive the REAL feedHeartbeatTick decision helper (not a re-impl) ---
242
+ // The heartbeat reads `currentTurn`; in this post-answer/pre-teardown window
243
+ // it is still non-null AND just-stamped, so the REAL `evaluatePostAnswerLiveness`
244
+ // returns 'emit' → the liveness card renders. (Concern 2 (a): the stamp fires
245
+ // and the card renders while the turn is alive.)
246
+ expect(currentTurn).not.toBeNull()
247
+ const verdictInWindow = evaluatePostAnswerLiveness({
248
+ subagentActivityAt: currentTurn!.subagentActivityAt,
249
+ finalAnswerDeliveredAt: currentTurn!.finalAnswerDeliveredAt,
250
+ now: currentTime + 5, // a heartbeat tick moments after the stamp
251
+ staleCapMs: 30_000,
252
+ })
253
+ expect(verdictInWindow).toBe('emit')
254
+ })
255
+
256
+ it('idle post-answer (no watcher activity) → evaluatePostAnswerLiveness returns "idle" → silent', () => {
257
+ // When subagentActivityAt is undefined (no watcher activity since the answer)
258
+ // the REAL heartbeat decision returns 'idle' → the post-answer branch returns
259
+ // early and no card opens. The reply-is-last invariant holds for idle turns.
260
+ const verdict = evaluatePostAnswerLiveness({
261
+ subagentActivityAt: undefined,
262
+ finalAnswerDeliveredAt: 1000,
263
+ now: 50_000,
264
+ staleCapMs: 30_000,
265
+ })
266
+ expect(verdict).toBe('idle')
267
+ })
268
+
269
+ it('subagentActivityAt at/before the answer → "idle" (pre-answer label never opens a post-answer card)', () => {
270
+ expect(
271
+ evaluatePostAnswerLiveness({
272
+ subagentActivityAt: 500,
273
+ finalAnswerDeliveredAt: 1000,
274
+ now: 1500,
275
+ staleCapMs: 30_000,
276
+ }),
277
+ ).toBe('idle')
278
+ })
279
+ })
280
+
281
+ // ─── Fix 2 — concern 2: the currentTurn path is INERT after teardown ─────────
282
+
283
+ describe('Fix 2 / concern 2: currentTurn nulls at turn_end → heartbeat path inert; worker feed covers it', () => {
284
+ /**
285
+ * The reviewer's concern: the post-answer liveness fix stamps + renders off
286
+ * `currentTurn`, which `endCurrentTurnAtomic` nulls at `turn_end`. A genuinely
287
+ * DECOUPLED background worker keeps ticking PAST teardown, so when its later
288
+ * onProgress arrives `currentTurn` is null → the stamp is inert and the
289
+ * heartbeat (which early-returns `if (turn == null) return`) is silent.
290
+ *
291
+ * This test reproduces that lifecycle EXACTLY (the gateway's `currentTurn`
292
+ * gating, which can't be imported, modelled verbatim) and proves:
293
+ * (a) in the post-answer/pre-teardown window the stamp fires and the REAL
294
+ * `evaluatePostAnswerLiveness` returns 'emit';
295
+ * (b) after teardown (`currentTurn = null`) a later worker tick CANNOT stamp
296
+ * and the heartbeat is structurally silent — i.e. the currentTurn fix IS
297
+ * inert for a decoupled worker, as suspected.
298
+ * The follow-on `describe` then proves the decoupled worker still surfaces via
299
+ * the currentTurn-INDEPENDENT `workerActivityFeed` (the by-design coverage).
300
+ */
301
+
302
+ // The gateway's stamp, verbatim (the only line we can't import): gated on a
303
+ // non-null currentTurn that has delivered its substantive answer.
304
+ function gatewayStamp(currentTurn: { finalAnswerEverDelivered: boolean; subagentActivityAt?: number } | null, now: number): void {
305
+ const stampTurn = currentTurn
306
+ if (stampTurn != null && stampTurn.finalAnswerEverDelivered) {
307
+ stampTurn.subagentActivityAt = now
308
+ }
309
+ }
310
+
311
+ // The gateway's feedHeartbeatTick post-answer entry, reduced to its decision:
312
+ // `if (turn == null) return` (no-turn), else the REAL evaluatePostAnswerLiveness.
313
+ function heartbeatVerdict(
314
+ currentTurn: { finalAnswerDelivered: boolean; finalAnswerDeliveredAt?: number; subagentActivityAt?: number } | null,
315
+ now: number,
316
+ ): 'no-turn' | 'pre-answer' | ReturnType<typeof evaluatePostAnswerLiveness> {
317
+ const turn = currentTurn
318
+ if (turn == null) return 'no-turn' // gateway: `if (turn == null) return`
319
+ if (!turn.finalAnswerDelivered) return 'pre-answer'
320
+ return evaluatePostAnswerLiveness({
321
+ subagentActivityAt: turn.subagentActivityAt,
322
+ finalAnswerDeliveredAt: turn.finalAnswerDeliveredAt,
323
+ now,
324
+ staleCapMs: 30_000,
325
+ })
326
+ }
327
+
328
+ it('(a) in-window: currentTurn alive → stamp fires and heartbeat emits', () => {
329
+ const turn = {
330
+ finalAnswerEverDelivered: true,
331
+ finalAnswerDelivered: true,
332
+ finalAnswerDeliveredAt: 1000,
333
+ subagentActivityAt: undefined as number | undefined,
334
+ }
335
+ let currentTurn: typeof turn | null = turn
336
+
337
+ // Worker ticks at t=1600, still inside the turn (pre-teardown).
338
+ gatewayStamp(currentTurn, 1600)
339
+ expect(turn.subagentActivityAt).toBe(1600)
340
+ expect(heartbeatVerdict(currentTurn, 1605)).toBe('emit')
341
+ })
342
+
343
+ it('(b) post-teardown: currentTurn nulled → later worker tick cannot stamp; heartbeat is silent (INERT)', () => {
344
+ const turn = {
345
+ finalAnswerEverDelivered: true,
346
+ finalAnswerDelivered: true,
347
+ finalAnswerDeliveredAt: 1000,
348
+ subagentActivityAt: undefined as number | undefined,
349
+ }
350
+ let currentTurn: typeof turn | null = turn
351
+
352
+ // turn_end fires → endCurrentTurnAtomic nulls the module-scope mirror.
353
+ currentTurn = null
354
+
355
+ // The DECOUPLED worker keeps running and ticks much later (t=120_000).
356
+ gatewayStamp(currentTurn, 120_000)
357
+ // Nothing to stamp — the turn object is unreferenced by the live mirror.
358
+ expect(turn.subagentActivityAt).toBeUndefined()
359
+ // And the heartbeat is structurally silent: no live turn to render on.
360
+ expect(heartbeatVerdict(currentTurn, 120_005)).toBe('no-turn')
361
+ })
362
+
363
+ it('concern 3: while the turn is alive but the worker went stale, heartbeat stops ("stale")', () => {
364
+ const turn = {
365
+ finalAnswerDelivered: true,
366
+ finalAnswerDeliveredAt: 1000,
367
+ // last advance at 2000; the worker has since finished (onFinish froze it).
368
+ subagentActivityAt: 2000 as number | undefined,
369
+ }
370
+ const currentTurn: typeof turn | null = turn
371
+ // One tick just after the last advance still emits…
372
+ expect(heartbeatVerdict(currentTurn, 2500)).toBe('emit')
373
+ // …but once `now - subagentActivityAt >= 30s` the verdict flips to 'stale'
374
+ // and the card stops climbing `running` forever (the concern-3 bug).
375
+ expect(heartbeatVerdict(currentTurn, 2000 + 30_000)).toBe('stale')
376
+ expect(heartbeatVerdict(currentTurn, 2000 + 90_000)).toBe('stale')
377
+ })
378
+ })
379
+
380
+ // ─── Fix 2 — concern 2 resolution: the decoupled worker surfaces via the
381
+ // currentTurn-INDEPENDENT workerActivityFeed (and is bounded) ────────────
382
+
383
+ describe('Fix 2 / concern 2: decoupled background-worker activity surfaces via the real workerActivityFeed', () => {
384
+ /**
385
+ * Because the currentTurn heartbeat is inert post-teardown (proven above), the
386
+ * decoupled worker's activity is surfaced by the dedicated `workerActivityFeed`
387
+ * — a regular chat message edited in place, keyed by jsonl agent id, that the
388
+ * watcher keeps driving AFTER the parent turn ends (NO currentTurn dependency).
389
+ * This drives the REAL `createWorkerActivityFeed` to prove:
390
+ * - a running worker paints + edits a live message with NO turn in scope, and
391
+ * - it is BOUNDED: `finish` posts the terminal edit, the handle is dropped,
392
+ * and a later heartbeat tick emits nothing (no unbounded climb).
393
+ */
394
+
395
+ interface FakeBot extends BotApiForWorkerFeed {
396
+ sent: Array<{ chatId: string; text: string }>
397
+ edits: Array<{ messageId: number; text: string }>
398
+ }
399
+ function makeBot(): FakeBot {
400
+ let nextId = 5000
401
+ const fb: FakeBot = {
402
+ sent: [],
403
+ edits: [],
404
+ sendMessage: async (chatId, text) => {
405
+ fb.sent.push({ chatId, text })
406
+ return { message_id: nextId++ }
407
+ },
408
+ editMessageText: async (_chatId, messageId, text) => {
409
+ fb.edits.push({ messageId, text })
410
+ return {}
411
+ },
412
+ }
413
+ return fb
414
+ }
415
+ function wView(p: Partial<WorkerActivityView> = {}): WorkerActivityView {
416
+ return {
417
+ description: 'analyse the 30 changed files',
418
+ lastTool: { name: 'Read', sanitisedArg: 'src/auth' },
419
+ toolCount: 2,
420
+ latestSummary: 'reading the auth module',
421
+ elapsedMs: 10_000,
422
+ state: 'running',
423
+ ...p,
424
+ }
425
+ }
426
+
427
+ it('surfaces a running decoupled worker with NO live turn, then stops at finish (bounded)', async () => {
428
+ const bot = makeBot()
429
+ let clock = 1_000_000
430
+ const ticks: Array<() => void> = []
431
+ const feed = createWorkerActivityFeed({
432
+ bot,
433
+ now: () => clock,
434
+ firstPaintMinMs: 8000,
435
+ minEditIntervalMs: 0,
436
+ setInterval: (cb) => { ticks.push(cb); return ticks.length },
437
+ clearInterval: () => {},
438
+ })
439
+
440
+ // The parent turn has long ended (currentTurn is null) — irrelevant here:
441
+ // the feed is keyed by agentId and never reads currentTurn.
442
+ await feed.update('bg01', 'chat-77', wView({ elapsedMs: 12_000 }))
443
+ expect(bot.sent.length).toBe(1) // painted a live message post-teardown
444
+ expect(feed.has('bg01')).toBe(true)
445
+
446
+ // A later running tick edits the same message in place.
447
+ clock += 6000
448
+ await feed.update('bg01', 'chat-77', wView({ elapsedMs: 18_000, toolCount: 3, latestSummary: 'patching token parser' }))
449
+ expect(bot.edits.length).toBeGreaterThanOrEqual(1)
450
+
451
+ // Terminal: finish posts the recap edit and DROPS the handle.
452
+ clock += 3000
453
+ await feed.finish('bg01', wView({ state: 'done', toolCount: 4, latestSummary: 'opened PR #42' }))
454
+ expect(feed.has('bg01')).toBe(false)
455
+ const editsAfterFinish = bot.edits.length
456
+
457
+ // Bounded: a subsequent heartbeat tick must NOT keep editing a finished worker.
458
+ clock += 60_000
459
+ ticks.forEach((t) => t())
460
+ await Promise.resolve()
461
+ expect(bot.edits.length).toBe(editsAfterFinish)
462
+
463
+ feed.stop()
464
+ })
465
+
466
+ it('the worker-feed heartbeat only ticks RUNNING workers (terminal worker never climbs)', async () => {
467
+ const bot = makeBot()
468
+ let clock = 2_000_000
469
+ const ticks: Array<() => void> = []
470
+ const feed = createWorkerActivityFeed({
471
+ bot,
472
+ now: () => clock,
473
+ firstPaintMinMs: 0,
474
+ minEditIntervalMs: 0,
475
+ heartbeatTickMs: 6000,
476
+ setInterval: (cb) => { ticks.push(cb); return ticks.length },
477
+ clearInterval: () => {},
478
+ })
479
+ await feed.update('bg02', 'chat-9', wView({ elapsedMs: 1000 }))
480
+ expect(bot.sent.length).toBe(1)
481
+ await feed.finish('bg02', wView({ state: 'done', latestSummary: 'done' }))
482
+ const editCount = bot.edits.length
483
+
484
+ // Heartbeat after finish: the handle is gone → no further edits ever.
485
+ clock += 600_000
486
+ ticks.forEach((t) => t())
487
+ await Promise.resolve()
488
+ expect(bot.edits.length).toBe(editCount)
489
+ feed.stop()
490
+ })
491
+ })
492
+
493
+ // ─── Fix 1: Narrative as first-class feed lines ───────────────────────────────
494
+
495
+ describe('Fix 1: narrative as durable feed lines (clip length + Lever 5 removal)', () => {
496
+ /**
497
+ * These tests drive the REAL clipNarrative and appendActivityLabel functions
498
+ * from tool-activity-summary.ts, and the REAL mayOpenActivityCard gate.
499
+ * The pipeline is: raw text → clipNarrative → appendActivityLabel →
500
+ * mirrorLines (persistent alongside tool labels).
501
+ */
502
+
503
+ it('clipNarrative raises clip to 200 chars (readable feed-line, matches STATUS_LINE_MAX)', () => {
504
+ // A narrative that is longer than the old 120-char limit but fits in 200.
505
+ // Before Fix 1: the 120-char clip would have truncated this mid-sentence.
506
+ const longNarrative = 'I will now analyse all 30 changed files in /src/auth to understand the scope of the authentication regression before patching the vulnerable token-parsing code path'
507
+ // Confirm it is longer than 120 chars (would have been clipped before the fix)
508
+ expect(longNarrative.length).toBeGreaterThan(120)
509
+ // Confirm it is ≤ 200 chars (the new limit matches STATUS_LINE_MAX)
510
+ expect(longNarrative.length).toBeLessThanOrEqual(200)
511
+
512
+ const clipped = clipNarrative(longNarrative)
513
+ // With Fix 1 (200 chars): the full narrative is preserved
514
+ expect(clipped).toBe(longNarrative)
515
+
516
+ // CONFIRM FAILS WITHOUT FIX: old 120-char limit would have truncated it
517
+ const oldClip = longNarrative.slice(0, 120)
518
+ expect(clipped).not.toBe(oldClip) // the fix produces a longer result
519
+ expect(clipped.length).toBeGreaterThan(oldClip.length)
520
+ })
521
+
522
+ it('clipNarrative still clips at 200 chars and takes first line only', () => {
523
+ // A multi-line narrative: only first line, and capped at 200.
524
+ const multiLine = 'First line of narrative\nSecond line should be dropped'
525
+ const clipped = clipNarrative(multiLine)
526
+ expect(clipped).toBe('First line of narrative')
527
+ expect(clipped).not.toContain('\n')
528
+
529
+ // A narrative longer than 200 chars IS clipped
530
+ const tooLong = 'A'.repeat(250)
531
+ const clippedLong = clipNarrative(tooLong)
532
+ expect(clippedLong.length).toBe(200)
533
+ })
534
+
535
+ it('narrative and tool label lines both persist in mirrorLines (durable, not overwriting)', () => {
536
+ // The REAL appendActivityLabel function: appends to mirrorLines without removing
537
+ // prior entries. Narrative lines and tool labels coexist in order.
538
+ const mirrorLines: string[] = []
539
+
540
+ // Step 1: narrative fires before a tool (the agent thinks aloud)
541
+ const narr1 = 'I will read the authentication module first'
542
+ appendActivityLabel(mirrorLines, narr1)
543
+ expect(mirrorLines).toHaveLength(1)
544
+ expect(mirrorLines[0]).toBe(narr1)
545
+
546
+ // Step 2: tool label arrives (producer B — the tool runs)
547
+ const tool1 = 'Reading /src/auth/accounts.ts'
548
+ appendActivityLabel(mirrorLines, tool1)
549
+ expect(mirrorLines).toHaveLength(2)
550
+ expect(mirrorLines[1]).toBe(tool1)
551
+
552
+ // Step 3: another narrative after the tool (post-action narration)
553
+ const narr2 = 'Now I will patch the token-parsing path'
554
+ appendActivityLabel(mirrorLines, narr2)
555
+ expect(mirrorLines).toHaveLength(3)
556
+ expect(mirrorLines[2]).toBe(narr2)
557
+
558
+ // The feed reads: narrative → tool → narrative (interleaved, legible)
559
+ expect(mirrorLines[0]).toBe(narr1)
560
+ expect(mirrorLines[1]).toBe(tool1)
561
+ expect(mirrorLines[2]).toBe(narr2)
562
+ })
563
+
564
+ it('0-tool narrative DOES open a card pre-answer (Lever 5 removed, Fix 1 / #2588)', () => {
565
+ // Before Fix 1: Lever 5 blocked narrative from opening a card on 0-tool turns.
566
+ // After Fix 1: pre-answer narrative may open; Lever 2 (clearActivitySummary)
567
+ // handles reply-is-last ordering.
568
+ const allowed = mayOpenActivityCard({
569
+ producer: 'narrative',
570
+ finalAnswerEverDelivered: false,
571
+ labeledToolCount: 0, // 0-tool conversational turn
572
+ })
573
+ expect(allowed).toBe(true)
574
+
575
+ // CONFIRM FAILS WITHOUT FIX:
576
+ // The old Lever 5 would have returned false here. We can verify this by
577
+ // simulating the old gate logic directly:
578
+ function oldMayOpenActivityCard(input: { producer: string; finalAnswerEverDelivered: boolean; labeledToolCount: number }): boolean {
579
+ if (input.finalAnswerEverDelivered) return false
580
+ if (input.producer === 'narrative' && input.labeledToolCount === 0) return false // old Lever 5
581
+ return true
582
+ }
583
+ expect(oldMayOpenActivityCard({ producer: 'narrative', finalAnswerEverDelivered: false, labeledToolCount: 0 })).toBe(false)
584
+ // The fix changes this to true — the narrative card CAN open pre-answer.
585
+ })
586
+
587
+ it('post-answer narrative remains blocked (Lever 1 still applies after Fix 1)', () => {
588
+ // Fix 1 only removes Lever 5 for pre-answer. Post-answer is still covered
589
+ // by Lever 1 (finalAnswerEverDelivered) — reply-is-last is preserved.
590
+ const blocked = mayOpenActivityCard({
591
+ producer: 'narrative',
592
+ finalAnswerEverDelivered: true,
593
+ labeledToolCount: 2,
594
+ })
595
+ expect(blocked).toBe(false)
596
+ })
597
+ })