switchroom 0.15.44 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3249 -1241
  10. package/dist/cli/ui/index.html +1 -1
  11. package/dist/host-control/main.js +2833 -355
  12. package/dist/vault/approvals/kernel-server.js +7482 -7439
  13. package/dist/vault/broker/server.js +11315 -11272
  14. package/examples/minimal.yaml +1 -0
  15. package/examples/switchroom.yaml +1 -0
  16. package/package.json +3 -3
  17. package/profiles/_base/start.sh.hbs +88 -1
  18. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  19. package/profiles/default/CLAUDE.md.hbs +3 -22
  20. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  21. package/telegram-plugin/answer-stream-flag.ts +12 -49
  22. package/telegram-plugin/answer-stream.ts +5 -150
  23. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  24. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  25. package/telegram-plugin/context-exhaustion.ts +12 -0
  26. package/telegram-plugin/demo-mask.ts +154 -0
  27. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  28. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  29. package/telegram-plugin/dist/server.js +215 -172
  30. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  31. package/telegram-plugin/draft-stream.ts +47 -410
  32. package/telegram-plugin/final-answer-detect.ts +17 -12
  33. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  34. package/telegram-plugin/format.ts +56 -19
  35. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  36. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  37. package/telegram-plugin/gateway/auth-command.ts +70 -14
  38. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  39. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  40. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  41. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  42. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  43. package/telegram-plugin/gateway/effort-command.ts +8 -3
  44. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  45. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  46. package/telegram-plugin/gateway/gateway.ts +1837 -291
  47. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  48. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  49. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  50. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  51. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  52. package/telegram-plugin/history.ts +33 -11
  53. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  54. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  55. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  56. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  57. package/telegram-plugin/issues-card.ts +4 -0
  58. package/telegram-plugin/model-unavailable.ts +124 -0
  59. package/telegram-plugin/narrative-dedup.ts +69 -0
  60. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  61. package/telegram-plugin/package.json +3 -3
  62. package/telegram-plugin/pending-work-progress.ts +12 -0
  63. package/telegram-plugin/permission-rule.ts +32 -5
  64. package/telegram-plugin/permission-title.ts +152 -9
  65. package/telegram-plugin/quota-check.ts +13 -0
  66. package/telegram-plugin/quota-watch.ts +135 -7
  67. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  68. package/telegram-plugin/registry/turns-schema.ts +9 -0
  69. package/telegram-plugin/runtime-metrics.ts +13 -0
  70. package/telegram-plugin/session-tail.ts +96 -11
  71. package/telegram-plugin/silence-poke.ts +170 -24
  72. package/telegram-plugin/slot-banner-driver.ts +3 -0
  73. package/telegram-plugin/status-no-truncate.ts +44 -0
  74. package/telegram-plugin/status-reactions.ts +20 -3
  75. package/telegram-plugin/stream-controller.ts +4 -23
  76. package/telegram-plugin/stream-reply-handler.ts +6 -24
  77. package/telegram-plugin/streaming-metrics.ts +91 -0
  78. package/telegram-plugin/subagent-watcher.ts +212 -66
  79. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  80. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  81. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  82. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  83. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  84. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  85. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  86. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  87. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  88. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  89. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  90. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  91. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  92. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  93. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  94. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  95. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  96. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  97. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  98. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  99. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  100. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  101. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  102. package/telegram-plugin/tests/history.test.ts +60 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -0,0 +1,58 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { clipNarrative } from '../tool-activity-summary.js'
3
+
4
+ // `clipNarrative` is the shared narrative-clip primitive used by both the
5
+ // main-agent gateway path (showNarrativeStep) and the sub-agent watcher. It
6
+ // collapses multi-line text to the first line, trims, and slices to 200 chars
7
+ // (= STATUS_LINE_MAX). Fix 1 raised the cap from 120 → 200 to match the tool-
8
+ // label cap so a narrative line reads as legibly as a tool step (no
9
+ // mid-sentence truncation on typical agent narration lengths of 130–180 chars).
10
+
11
+ describe('clipNarrative — shared subagent-watcher narrative clip', () => {
12
+ it('collapses a multi-line block to its first line', () => {
13
+ const text = 'On it. Let me find the repo…\nthen build it\nand run the tests'
14
+ expect(clipNarrative(text)).toBe('On it. Let me find the repo…')
15
+ })
16
+
17
+ it('trims surrounding whitespace on the first line', () => {
18
+ expect(clipNarrative(' Found both repos: \nmore')).toBe('Found both repos:')
19
+ })
20
+
21
+ it('caps the first line at 200 characters (Fix 1: raised from 120 to match STATUS_LINE_MAX)', () => {
22
+ // 200-char cap: a 250-char first line is clipped at exactly 200.
23
+ const long = 'x'.repeat(250)
24
+ const out = clipNarrative(long)
25
+ expect(out.length).toBe(200)
26
+ expect(out).toBe('x'.repeat(200))
27
+ })
28
+
29
+ it('a first line under 200 chars is returned verbatim', () => {
30
+ expect(clipNarrative('short narrative line')).toBe('short narrative line')
31
+ })
32
+
33
+ it('a narrative between 120 and 200 chars is NOT truncated (the Fix 1 improvement)', () => {
34
+ // Before Fix 1 the 120-char clip truncated typical 130–180 char agent narration.
35
+ // After Fix 1 these are returned in full up to 200 chars.
36
+ const narrative = 'I will now analyse all 30 changed files in /src/auth to understand the scope before patching the vulnerable token-parsing code path'
37
+ expect(narrative.length).toBeGreaterThan(120)
38
+ expect(narrative.length).toBeLessThanOrEqual(200)
39
+ expect(clipNarrative(narrative)).toBe(narrative) // not truncated
40
+ })
41
+
42
+ it('matches the new 200-char cap expression (STATUS_LINE_MAX equivalence)', () => {
43
+ const STATUS_LINE_MAX = 200
44
+ const samples = [
45
+ 'line one\nline two',
46
+ ' padded \nrest',
47
+ 'a'.repeat(150),
48
+ 'b'.repeat(250),
49
+ 'single',
50
+ '',
51
+ 'tab\tand spaces \nnext',
52
+ ]
53
+ for (const s of samples) {
54
+ const expected = s.split('\n')[0].trim().slice(0, STATUS_LINE_MAX)
55
+ expect(clipNarrative(s)).toBe(expected)
56
+ }
57
+ })
58
+ })
@@ -90,6 +90,16 @@ function writeMeta(agentType: string, description: string): string {
90
90
  return jsonlPath
91
91
  }
92
92
 
93
+ /**
94
+ * Write meta.json with a `toolUseId` field (Bug 1 fix — direct PK lookup).
95
+ * Modern Claude Code versions write this alongside `agentType` and `description`.
96
+ */
97
+ function writeMetaWithToolUseId(toolUseId: string, agentType: string, description: string): string {
98
+ const jsonlPath = join(tempDir, 'worker.jsonl')
99
+ writeFileSync(join(tempDir, 'worker.meta.json'), JSON.stringify({ agentType, description, toolUseId }))
100
+ return jsonlPath
101
+ }
102
+
93
103
  function readSub(id: string) {
94
104
  return db.prepare('SELECT jsonl_agent_id, parent_turn_key FROM subagents WHERE id = ?').get(id) as
95
105
  | { jsonl_agent_id: string | null; parent_turn_key: string | null }
@@ -154,6 +164,57 @@ describe('backfillJsonlAgentId — parent_turn_key resolution', () => {
154
164
  })
155
165
  })
156
166
 
167
+ // ─── #2501: direct toolUseId PK lookup (Bug 1 fix) ───────────────────────────
168
+ // Claude Code writes `toolUseId` to meta.json so backfillJsonlAgentId can
169
+ // match the row by its primary key instead of the fuzzy (agentType, description)
170
+ // match that fails when descriptions collide or are null.
171
+ describe('backfillJsonlAgentId — toolUseId direct PK lookup (#2501)', () => {
172
+ it('uses toolUseId from meta.json to link the row directly by PK', () => {
173
+ insertTurn({ turnKey: '555:10', chatId: '555', startedAt: 1000, endedAt: 2000 })
174
+ insertSub({ id: 'toolu_direct', agentType: 'general-purpose', description: null as unknown as string, startedAt: 1500 })
175
+
176
+ const jsonlPath = writeMetaWithToolUseId('toolu_direct', 'general-purpose', '')
177
+ backfillJsonlAgentId(db, jsonlPath, 'agentstem_direct')
178
+
179
+ const row = readSub('toolu_direct')
180
+ expect(row?.jsonl_agent_id).toBe('agentstem_direct')
181
+ })
182
+
183
+ it('direct PK lookup succeeds even when description is null (fuzzy match would fail)', () => {
184
+ // Two rows with the same agentType and NULL description — fuzzy match is ambiguous.
185
+ insertSub({ id: 'toolu_null_desc_1', agentType: 'general-purpose', description: null as unknown as string, startedAt: 1000 })
186
+ insertSub({ id: 'toolu_null_desc_2', agentType: 'general-purpose', description: null as unknown as string, startedAt: 2000 })
187
+
188
+ // meta.json carries the exact toolUseId for the first row.
189
+ const jsonlPath = writeMetaWithToolUseId('toolu_null_desc_1', 'general-purpose', '')
190
+ backfillJsonlAgentId(db, jsonlPath, 'agentstem_null_desc')
191
+
192
+ // Must link to the first row, not the more-recent second one.
193
+ expect(readSub('toolu_null_desc_1')?.jsonl_agent_id).toBe('agentstem_null_desc')
194
+ expect(readSub('toolu_null_desc_2')?.jsonl_agent_id).toBeNull()
195
+ })
196
+
197
+ it('falls back to fuzzy match when toolUseId in meta.json does not match any row', () => {
198
+ insertSub({ id: 'toolu_fuzzy', agentType: 'researcher', description: 'Find stuff', startedAt: 1000 })
199
+
200
+ // toolUseId points at a non-existent row; fuzzy match should still work.
201
+ const jsonlPath = writeMetaWithToolUseId('toolu_NONEXISTENT', 'researcher', 'Find stuff')
202
+ backfillJsonlAgentId(db, jsonlPath, 'agentstem_fuzzy_fallback')
203
+
204
+ expect(readSub('toolu_fuzzy')?.jsonl_agent_id).toBe('agentstem_fuzzy_fallback')
205
+ })
206
+
207
+ it('meta.json with only toolUseId and no agentType/description still links the row', () => {
208
+ insertSub({ id: 'toolu_id_only', agentType: null as unknown as string, description: null as unknown as string, startedAt: 1000 })
209
+
210
+ const jsonlPath = join(tempDir, 'worker.jsonl')
211
+ writeFileSync(join(tempDir, 'worker.meta.json'), JSON.stringify({ toolUseId: 'toolu_id_only' }))
212
+ backfillJsonlAgentId(db, jsonlPath, 'agentstem_id_only')
213
+
214
+ expect(readSub('toolu_id_only')?.jsonl_agent_id).toBe('agentstem_id_only')
215
+ })
216
+ })
217
+
157
218
  // ─── #2081: overlapping windows + hook-stamped value precedence ───────────────
158
219
  // The backfill is now only a FALLBACK — the PreToolUse hook stamps
159
220
  // parent_turn_key from the live turn-active marker at dispatch
@@ -203,3 +264,44 @@ describe('backfillJsonlAgentId — overlapping windows / hook precedence (#2081)
203
264
  expect(row?.parent_turn_key).toBe('-100:7:1400')
204
265
  })
205
266
  })
267
+
268
+ // ─── #2506: null meta.json guard ─────────────────────────────────────────────
269
+ // JSON.parse('null') succeeds and returns null. Before the fix, the enclosing
270
+ // try/catch only covered the read+parse, so execution fell through to
271
+ // `if (!meta.agentType …)` which threw TypeError: Cannot read properties of
272
+ // null. The fix guards with `if (!meta || …)` so a literal-null meta.json
273
+ // degrades to the same clean skip as any other "no usable fields" case.
274
+ describe('backfillJsonlAgentId — null meta.json guard (#2506)', () => {
275
+ it('meta.json containing literal null → clean skip, no throw, row stays unlinked', () => {
276
+ insertSub({ id: 'toolu_null_meta', agentType: 'worker', description: 'task', startedAt: 1000 })
277
+
278
+ const jsonlPath = join(tempDir, 'null_meta_worker.jsonl')
279
+ writeFileSync(join(tempDir, 'null_meta_worker.meta.json'), 'null')
280
+
281
+ const logs: string[] = []
282
+ // Must not throw — before the fix this crashed with TypeError: Cannot read
283
+ // properties of null (reading 'agentType').
284
+ expect(() => backfillJsonlAgentId(db, jsonlPath, 'agentstem_null_meta', (m) => logs.push(m))).not.toThrow()
285
+
286
+ // Row must remain unlinked (the skip path, not the link path).
287
+ const row = readSub('toolu_null_meta')
288
+ expect(row?.jsonl_agent_id).toBeNull()
289
+
290
+ // A skip log must have been emitted (not a misleading 'backfill error').
291
+ expect(logs.some((l) => l.includes('backfill skip') && l.includes('agentstem_null_meta'))).toBe(true)
292
+ })
293
+
294
+ it('meta.json containing literal null → skip log does NOT contain "error"', () => {
295
+ // The outer try/catch in registerAgent would have surfaced this as a
296
+ // "backfill error" before the fix; now it should be a clean "backfill skip".
297
+ const jsonlPath = join(tempDir, 'null_meta_worker2.jsonl')
298
+ writeFileSync(join(tempDir, 'null_meta_worker2.meta.json'), 'null')
299
+
300
+ const logs: string[] = []
301
+ backfillJsonlAgentId(db, jsonlPath, 'agentstem_null_meta2', (m) => logs.push(m))
302
+
303
+ // Should have a skip log, not an error log.
304
+ expect(logs.some((l) => l.toLowerCase().includes('error'))).toBe(false)
305
+ expect(logs.some((l) => l.includes('backfill skip'))).toBe(true)
306
+ })
307
+ })
@@ -0,0 +1,225 @@
1
+ /**
2
+ * Regression test for workflow sub-agent feed visibility.
3
+ *
4
+ * Root cause: rescanSubagentDirs() did a flat readdir on subagents/ and only
5
+ * matched entries whose names started with "agent-" and ended with ".jsonl".
6
+ * Workflow sub-agents (spawned by the Workflow tool) write to a nested path:
7
+ * <session>/subagents/workflows/wf_<id>/agent-<id>.jsonl
8
+ * The "workflows" directory entry doesn't match the agent-*.jsonl filter, so
9
+ * it was silently skipped and no workflow agent was ever registered or tailed.
10
+ *
11
+ * Fix: descend one level into subagents/workflows/wf_NNN dirs using the same
12
+ * watchAndScan helper so all downstream tiers (registry, tailing, stall
13
+ * detection, historical suppression, terminatedAgentIds dedup) apply
14
+ * identically.
15
+ */
16
+
17
+ import { describe, it, expect, afterEach } from 'vitest'
18
+ import { mkdtempSync, mkdirSync, writeFileSync } from 'fs'
19
+ import { tmpdir } from 'os'
20
+ import { join } from 'path'
21
+ import { rmSync } from 'fs'
22
+ import { startSubagentWatcher } from '../subagent-watcher.js'
23
+
24
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
25
+
26
+ function buildJSONL(...lines: object[]): string {
27
+ return lines.map((l) => JSON.stringify(l)).join('\n') + '\n'
28
+ }
29
+
30
+ function minimalAgentJsonl(): string {
31
+ return buildJSONL(
32
+ { type: 'user', message: { content: [{ type: 'text', text: 'Implement the workflow step' }] } },
33
+ )
34
+ }
35
+
36
+ // ─── Test suite ───────────────────────────────────────────────────────────────
37
+
38
+ describe('workflow sub-agent feed visibility', () => {
39
+ let tmpRoot = ''
40
+ const started: Array<ReturnType<typeof startSubagentWatcher>> = []
41
+
42
+ afterEach(() => {
43
+ while (started.length) {
44
+ try { started.pop()?.stop() } catch { /* ignore */ }
45
+ }
46
+ if (tmpRoot) {
47
+ try { rmSync(tmpRoot, { recursive: true, force: true }) } catch { /* ignore */ }
48
+ tmpRoot = ''
49
+ }
50
+ })
51
+
52
+ /**
53
+ * Start a minimal watcher against a real tmpdir and return a poll() helper.
54
+ * Timers are stubbed so no real setInterval fires; the caller drives polls
55
+ * manually to control the scan sequence.
56
+ */
57
+ function startWatcher(agentDir: string): {
58
+ poll: () => void
59
+ watcher: ReturnType<typeof startSubagentWatcher>
60
+ registered: string[]
61
+ } {
62
+ const registered: string[] = []
63
+ const intervals: Array<{ fn: () => void; ref: number }> = []
64
+ const timeouts: Array<{ fn: () => void; ref: number }> = []
65
+ let nextRef = 1
66
+
67
+ const watcher = startSubagentWatcher({
68
+ agentDir,
69
+ onFinish: (info) => { registered.push(`finish:${info.agentId}`) },
70
+ stallThresholdMs: 60_000,
71
+ silentSynthesisStallThresholdMs: 60_000,
72
+ rescanMs: 500,
73
+ now: () => Date.now(),
74
+ setInterval: (fn) => {
75
+ const ref = nextRef++
76
+ intervals.push({ fn, ref })
77
+ return { ref }
78
+ },
79
+ clearInterval: (handle) => {
80
+ const { ref } = handle as { ref: number }
81
+ const idx = intervals.findIndex((i) => i.ref === ref)
82
+ if (idx !== -1) intervals.splice(idx, 1)
83
+ },
84
+ setTimeout: (fn) => {
85
+ const ref = nextRef++
86
+ timeouts.push({ fn, ref })
87
+ return { ref }
88
+ },
89
+ clearTimeout: (handle) => {
90
+ const { ref } = handle as { ref: number }
91
+ const idx = timeouts.findIndex((t) => t.ref === ref)
92
+ if (idx !== -1) timeouts.splice(idx, 1)
93
+ },
94
+ log: () => {},
95
+ })
96
+ started.push(watcher)
97
+
98
+ return {
99
+ poll: () => intervals[0]?.fn(),
100
+ watcher,
101
+ registered,
102
+ }
103
+ }
104
+
105
+ it('registers a workflow agent at subagents/workflows/wf_<id>/agent-<id>.jsonl', () => {
106
+ tmpRoot = mkdtempSync(join(tmpdir(), 'switchroom-workflow-test-'))
107
+ const agentDir = join(tmpRoot, 'agent')
108
+
109
+ // Lay out the nested workflow path that the Workflow tool produces
110
+ const wfDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents', 'workflows', 'wf_test')
111
+ mkdirSync(wfDir, { recursive: true })
112
+ const agentId = 'abcdef1234567890'
113
+ const jsonlPath = join(wfDir, `agent-${agentId}.jsonl`)
114
+ writeFileSync(jsonlPath, minimalAgentJsonl())
115
+
116
+ const { poll, watcher } = startWatcher(agentDir)
117
+ poll()
118
+
119
+ const entry = watcher.getRegistry().get(agentId)
120
+ expect(entry, 'workflow agent should be registered').toBeDefined()
121
+ expect(entry?.state).toBe('running')
122
+ // The agent file was just written, so the boot-promotion path correctly
123
+ // marks it live (historical=false) — the user is still awaiting its output.
124
+ })
125
+
126
+ it('does NOT register a sibling journal.jsonl in the wf dir', () => {
127
+ tmpRoot = mkdtempSync(join(tmpdir(), 'switchroom-workflow-journal-test-'))
128
+ const agentDir = join(tmpRoot, 'agent')
129
+
130
+ const wfDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents', 'workflows', 'wf_test')
131
+ mkdirSync(wfDir, { recursive: true })
132
+
133
+ const agentId = 'deadbeefcafe0001'
134
+ writeFileSync(join(wfDir, `agent-${agentId}.jsonl`), minimalAgentJsonl())
135
+ // journal.jsonl is written by the Workflow tool alongside the agent transcript
136
+ writeFileSync(join(wfDir, 'journal.jsonl'), buildJSONL({ type: 'workflow_event', step: 1 }))
137
+
138
+ const { poll, watcher } = startWatcher(agentDir)
139
+ poll()
140
+
141
+ // The agent JSONL must be registered
142
+ expect(watcher.getRegistry().get(agentId), 'agent entry must exist').toBeDefined()
143
+ // journal.jsonl must NOT appear in the registry (no agent id derived from it)
144
+ const journalKey = 'journal' // would be the agentId if erroneously registered
145
+ expect(watcher.getRegistry().get(journalKey), 'journal.jsonl must not be registered').toBeUndefined()
146
+ // Registry should have exactly one entry (the real agent)
147
+ expect(watcher.getRegistry().size).toBe(1)
148
+ })
149
+
150
+ it('registers multiple workflow agents across different wf_* dirs', () => {
151
+ tmpRoot = mkdtempSync(join(tmpdir(), 'switchroom-workflow-multi-test-'))
152
+ const agentDir = join(tmpRoot, 'agent')
153
+
154
+ const workflowsBase = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents', 'workflows')
155
+
156
+ const wfIds = ['wf_step1', 'wf_step2', 'wf_step3']
157
+ const agentIds: string[] = []
158
+ for (const wfId of wfIds) {
159
+ const wfDir = join(workflowsBase, wfId)
160
+ mkdirSync(wfDir, { recursive: true })
161
+ const agentId = `agent${wfId.replace('wf_', '')}cafebabe`
162
+ agentIds.push(agentId)
163
+ writeFileSync(join(wfDir, `agent-${agentId}.jsonl`), minimalAgentJsonl())
164
+ // Each wf dir also gets a journal.jsonl — must be ignored
165
+ writeFileSync(join(wfDir, 'journal.jsonl'), buildJSONL({ step: wfId }))
166
+ }
167
+
168
+ const { poll, watcher } = startWatcher(agentDir)
169
+ poll()
170
+
171
+ for (const agentId of agentIds) {
172
+ expect(watcher.getRegistry().get(agentId), `${agentId} should be registered`).toBeDefined()
173
+ }
174
+ // Exactly the three agents, no journals
175
+ expect(watcher.getRegistry().size).toBe(3)
176
+ })
177
+
178
+ it('skips a stray non-directory file sitting directly in workflows/', () => {
179
+ tmpRoot = mkdtempSync(join(tmpdir(), 'switchroom-workflow-stray-test-'))
180
+ const agentDir = join(tmpRoot, 'agent')
181
+
182
+ const workflowsBase = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents', 'workflows')
183
+ mkdirSync(workflowsBase, { recursive: true })
184
+ // A stray regular file directly under workflows/ (e.g. an index/lock the
185
+ // Workflow tool might drop there). statSync succeeds on it, so without the
186
+ // isDirectory() guard it would be handed to watchAndScan and open a wasted
187
+ // fs.watch. It must be skipped — not registered — and not crash the scan.
188
+ writeFileSync(join(workflowsBase, 'index.json'), '{"runs":[]}')
189
+
190
+ // A real workflow agent alongside the stray file must still register
191
+ const wfDir = join(workflowsBase, 'wf_real')
192
+ mkdirSync(wfDir, { recursive: true })
193
+ const agentId = 'feedface00001111'
194
+ writeFileSync(join(wfDir, `agent-${agentId}.jsonl`), minimalAgentJsonl())
195
+
196
+ const { poll, watcher } = startWatcher(agentDir)
197
+ poll()
198
+
199
+ expect(watcher.getRegistry().get(agentId), 'real workflow agent should register').toBeDefined()
200
+ // Only the real agent — the stray file produced no entry
201
+ expect(watcher.getRegistry().size).toBe(1)
202
+ })
203
+
204
+ it('discovers a wf_* dir created AFTER the watcher starts (runtime poll)', () => {
205
+ tmpRoot = mkdtempSync(join(tmpdir(), 'switchroom-workflow-late-test-'))
206
+ const agentDir = join(tmpRoot, 'agent')
207
+
208
+ // Session + subagents dir present at boot, but no workflows/ run yet
209
+ const subagents = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
210
+ mkdirSync(subagents, { recursive: true })
211
+
212
+ const { poll, watcher } = startWatcher(agentDir)
213
+ poll() // boot scan: nothing to register
214
+ expect(watcher.getRegistry().size).toBe(0)
215
+
216
+ // A workflow run begins after the watcher is already live
217
+ const wfDir = join(subagents, 'workflows', 'wf_late')
218
+ mkdirSync(wfDir, { recursive: true })
219
+ const agentId = 'lateb00b1234abcd'
220
+ writeFileSync(join(wfDir, `agent-${agentId}.jsonl`), minimalAgentJsonl())
221
+
222
+ poll() // next poll tick re-descends workflows/ and picks it up
223
+ expect(watcher.getRegistry().get(agentId), 'late workflow agent should be discovered on poll').toBeDefined()
224
+ })
225
+ })
@@ -519,6 +519,153 @@ describe('startSubagentWatcher', () => {
519
519
  expect(toolTick?.latestSummary).toBe('')
520
520
  })
521
521
 
522
+ it('narrative gate: a draft-then-reply sub_agent_text is SUPPRESSED (no progress cue)', () => {
523
+ // The worker composes its answer as a text block, then calls
524
+ // stream_reply with near-identical text. The narrative cue must be
525
+ // suppressed so the answer is not double-surfaced.
526
+ const narrativeCues: string[] = []
527
+ const agentDir = join(tmpRoot, 'agent')
528
+ const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
529
+ mkdirSync(subagentsDir, { recursive: true })
530
+ const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
531
+ const h = startWatcherSync({
532
+ agentDir,
533
+ onProgress: ({ progressLine, latestSummary }) => {
534
+ // Narrative ticks carry NO progressLine (tool ticks do); record them.
535
+ if (progressLine == null) narrativeCues.push(latestSummary)
536
+ },
537
+ })
538
+ writeFileSync(jsonlPath, buildJSONL(subAgentUserMsg('Find the repo path')))
539
+ h.poll()
540
+ const answer = 'The repo is at /home/user/code/switchroom.'
541
+ appendFileSync(jsonlPath, buildJSONL(subAgentAssistantText(answer)))
542
+ h.poll()
543
+ // The draft is staged, not yet resolved — no cue yet.
544
+ // The very next event is a reply tool with matching text → SUPPRESS.
545
+ appendFileSync(jsonlPath, buildJSONL({
546
+ type: 'assistant',
547
+ message: { content: [{ type: 'tool_use', name: 'stream_reply', id: 'r1', input: { text: answer } }] },
548
+ }))
549
+ h.poll()
550
+ expect(narrativeCues.length).toBe(0)
551
+ })
552
+
553
+ it('narrative gate: pure working narration is SHOWN (cue fires)', () => {
554
+ // "On it. Let me find the repo…" followed by a Bash tool — the next
555
+ // event is a non-reply tool, so the narration is SHOWN.
556
+ const narrativeCues: string[] = []
557
+ const agentDir = join(tmpRoot, 'agent')
558
+ const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
559
+ mkdirSync(subagentsDir, { recursive: true })
560
+ const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
561
+ const h = startWatcherSync({
562
+ agentDir,
563
+ onProgress: ({ progressLine, latestSummary }) => {
564
+ if (progressLine == null) narrativeCues.push(latestSummary)
565
+ },
566
+ })
567
+ writeFileSync(jsonlPath, buildJSONL(subAgentUserMsg('Find the repo')))
568
+ h.poll()
569
+ appendFileSync(jsonlPath, buildJSONL(subAgentAssistantText('On it. Let me find the repo.')))
570
+ h.poll()
571
+ appendFileSync(jsonlPath, buildJSONL(subAgentToolUse('Bash', 'b1')))
572
+ h.poll()
573
+ // The staged narration was resolved by the non-reply Bash → SHOWN.
574
+ expect(narrativeCues.length).toBe(1)
575
+ expect(narrativeCues[0]).toContain('find the repo')
576
+ })
577
+
578
+ it('narrative gate: trailing narration at turn_end is SHOWN', () => {
579
+ const narrativeCues: string[] = []
580
+ const agentDir = join(tmpRoot, 'agent')
581
+ const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
582
+ mkdirSync(subagentsDir, { recursive: true })
583
+ const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
584
+ const h = startWatcherSync({
585
+ agentDir,
586
+ onProgress: ({ progressLine, latestSummary }) => {
587
+ if (progressLine == null) narrativeCues.push(latestSummary)
588
+ },
589
+ })
590
+ writeFileSync(jsonlPath, buildJSONL(subAgentUserMsg('Do the task')))
591
+ h.poll()
592
+ // A trailing narration block, then turn_end (turn_duration). The
593
+ // trailing block has no reply after it → SHOWN at turn_end.
594
+ appendFileSync(jsonlPath, buildJSONL(subAgentAssistantText('Done. All green.')))
595
+ h.poll()
596
+ appendFileSync(jsonlPath, buildJSONL(subAgentTurnDuration()))
597
+ h.poll()
598
+ expect(narrativeCues.length).toBe(1)
599
+ expect(narrativeCues[0]).toContain('All green')
600
+ })
601
+
602
+ it('NIT 3: trailing narration that DRAFTS a prior stream_reply is SUPPRESSED at turn_end', () => {
603
+ // A FOREGROUND sub-agent calls stream_reply as its final tool, THEN
604
+ // emits a trailing text block that is a draft of that delivered answer,
605
+ // then turn_end. Before the fix, turn_end SHOWed pending narration
606
+ // unconditionally (toolName === null skipped dedup), double-surfacing a
607
+ // draft of the answer. Now it suppresses a trailing draft symmetric with
608
+ // main-agent step 3.
609
+ const narrativeCues: string[] = []
610
+ const agentDir = join(tmpRoot, 'agent')
611
+ const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
612
+ mkdirSync(subagentsDir, { recursive: true })
613
+ const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
614
+ const h = startWatcherSync({
615
+ agentDir,
616
+ onProgress: ({ progressLine, latestSummary }) => {
617
+ if (progressLine == null) narrativeCues.push(latestSummary)
618
+ },
619
+ })
620
+ writeFileSync(jsonlPath, buildJSONL(subAgentUserMsg('Summarise the diff')))
621
+ h.poll()
622
+ const answer = 'The fix touches three files and adds a unit test for the double-Done case.'
623
+ // Final tool of the turn is stream_reply carrying the answer.
624
+ appendFileSync(jsonlPath, buildJSONL({
625
+ type: 'assistant',
626
+ message: { content: [{ type: 'tool_use', name: 'stream_reply', id: 'r1', input: { text: answer } }] },
627
+ }))
628
+ h.poll()
629
+ // Trailing text block (separate message) that drafts the delivered answer.
630
+ appendFileSync(jsonlPath, buildJSONL(subAgentAssistantText(answer)))
631
+ h.poll()
632
+ appendFileSync(jsonlPath, buildJSONL(subAgentTurnDuration()))
633
+ h.poll()
634
+ // SUPPRESSED: the trailing draft of the reply is not surfaced.
635
+ expect(narrativeCues.length).toBe(0)
636
+ })
637
+
638
+ it('NIT 3: genuine trailing narration AFTER a stream_reply is still SHOWN', () => {
639
+ // Symmetric guard: after delivering its answer the sub-agent emits a
640
+ // DIFFERENT trailing line ("Done — cleaning up."). That is genuine
641
+ // liveness, not a draft of the answer, so it must still SHOW.
642
+ const narrativeCues: string[] = []
643
+ const agentDir = join(tmpRoot, 'agent')
644
+ const subagentsDir = join(agentDir, '.claude', 'projects', 'p1', 'session-abc', 'subagents')
645
+ mkdirSync(subagentsDir, { recursive: true })
646
+ const jsonlPath = join(subagentsDir, 'agent-deadbeef.jsonl')
647
+ const h = startWatcherSync({
648
+ agentDir,
649
+ onProgress: ({ progressLine, latestSummary }) => {
650
+ if (progressLine == null) narrativeCues.push(latestSummary)
651
+ },
652
+ })
653
+ writeFileSync(jsonlPath, buildJSONL(subAgentUserMsg('Summarise the diff')))
654
+ h.poll()
655
+ const answer = 'The fix touches three files and adds a unit test for the double-Done case.'
656
+ appendFileSync(jsonlPath, buildJSONL({
657
+ type: 'assistant',
658
+ message: { content: [{ type: 'tool_use', name: 'stream_reply', id: 'r1', input: { text: answer } }] },
659
+ }))
660
+ h.poll()
661
+ appendFileSync(jsonlPath, buildJSONL(subAgentAssistantText('Done — cleaning up the worktree now.')))
662
+ h.poll()
663
+ appendFileSync(jsonlPath, buildJSONL(subAgentTurnDuration()))
664
+ h.poll()
665
+ expect(narrativeCues.length).toBe(1)
666
+ expect(narrativeCues[0]).toContain('cleaning up')
667
+ })
668
+
522
669
  it('captures the full last narrative line into lastResultText (handback)', () => {
523
670
  // lastSummaryLine keeps only the first line, 120 chars — a progress
524
671
  // preview. lastResultText keeps the full last narrative emission: