switchroom 0.14.14 โ†’ 0.14.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -340,4 +340,83 @@ describe('StatusReactionController', () => {
340
340
  await flush()
341
341
  expect(calls).toEqual(['๐Ÿ‘€'])
342
342
  })
343
+
344
+ // hold(): freeze on a WORKING glyph while background sub-agent workers
345
+ // outlive the parent turn, deferring the terminal ๐Ÿ‘ (worker-reaction fix).
346
+ describe('hold() โ€” defer ๐Ÿ‘ while a background worker runs', () => {
347
+ it('suppresses stall promotion (no ๐Ÿฅฑ/๐Ÿ˜จ) while held', async () => {
348
+ const { emit, calls } = makeEmitter()
349
+ const ctrl = new StatusReactionController(emit)
350
+ ctrl.setQueued()
351
+ ctrl.setTool('Bash') // working: ๐Ÿ‘จโ€๐Ÿ’ป
352
+ vi.advanceTimersByTime(3500)
353
+ await flush()
354
+
355
+ ctrl.hold()
356
+ await flush()
357
+ // Well past both stall thresholds โ€” held must not yawn or panic.
358
+ vi.advanceTimersByTime(120000)
359
+ await flush()
360
+ expect(calls).not.toContain('๐Ÿฅฑ')
361
+ expect(calls).not.toContain('๐Ÿ˜จ')
362
+ })
363
+
364
+ it('promotes a read/thinking glyph to a working glyph on hold', async () => {
365
+ const { emit, calls } = makeEmitter()
366
+ const ctrl = new StatusReactionController(emit)
367
+ ctrl.setQueued() // ๐Ÿ‘€ (read-receipt)
368
+ await flush()
369
+ expect(calls).toEqual(['๐Ÿ‘€'])
370
+
371
+ ctrl.hold() // should paint an explicit WORKING glyph (โœ๏ธ)
372
+ await flush()
373
+ expect(calls[calls.length - 1]).toBe('โœ')
374
+ })
375
+
376
+ it('finalize() still terminates to ๐Ÿ‘ after hold (deferred terminal)', async () => {
377
+ const { emit, calls } = makeEmitter()
378
+ const ctrl = new StatusReactionController(emit)
379
+ ctrl.setQueued()
380
+ ctrl.setTool() // โœ
381
+ vi.advanceTimersByTime(3500)
382
+ await flush()
383
+
384
+ ctrl.hold()
385
+ await flush()
386
+ // Worker runs for a while, then completes โ†’ gateway finalizes.
387
+ vi.advanceTimersByTime(60000)
388
+ await flush()
389
+ ctrl.finalize('done')
390
+ await flush()
391
+ expect(calls[calls.length - 1]).toBe('๐Ÿ‘')
392
+ })
393
+
394
+ it('does not double-paint when already on a working glyph', async () => {
395
+ const { emit, calls } = makeEmitter()
396
+ const ctrl = new StatusReactionController(emit)
397
+ ctrl.setQueued()
398
+ ctrl.setTool() // โœ
399
+ vi.advanceTimersByTime(3500)
400
+ await flush()
401
+ const before = calls.length
402
+
403
+ ctrl.hold() // already on โœ โ†’ no new emit
404
+ await flush()
405
+ expect(calls.length).toBe(before)
406
+ })
407
+
408
+ it('hold() after finalize is a no-op (cannot resurrect a finished controller)', async () => {
409
+ const { emit, calls } = makeEmitter()
410
+ const ctrl = new StatusReactionController(emit)
411
+ ctrl.setQueued()
412
+ ctrl.finalize('done')
413
+ await flush()
414
+ const snapshot = [...calls]
415
+
416
+ ctrl.hold()
417
+ vi.advanceTimersByTime(120000)
418
+ await flush()
419
+ expect(calls).toEqual(snapshot)
420
+ })
421
+ })
343
422
  })
@@ -0,0 +1,256 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import {
3
+ renderWorkerActivity,
4
+ createWorkerActivityFeed,
5
+ type WorkerActivityView,
6
+ type BotApiForWorkerFeed,
7
+ } from '../worker-activity-feed.js'
8
+
9
+ function view(partial: Partial<WorkerActivityView> = {}): WorkerActivityView {
10
+ return {
11
+ description: 'research competitors',
12
+ lastTool: { name: 'Bash', sanitisedArg: 'grep -r pricing' },
13
+ toolCount: 3,
14
+ latestSummary: 'scanning vendor pages',
15
+ elapsedMs: 10_000,
16
+ state: 'running',
17
+ ...partial,
18
+ }
19
+ }
20
+
21
+ interface FakeBot extends BotApiForWorkerFeed {
22
+ sent: Array<{ chatId: string; text: string; opts?: Record<string, unknown> }>
23
+ edits: Array<{ messageId: number; text: string }>
24
+ failNextSendWith?: unknown
25
+ failNextEditWith?: unknown
26
+ }
27
+
28
+ function makeFakeBot(): FakeBot {
29
+ let nextId = 1000
30
+ const fb: FakeBot = {
31
+ sent: [],
32
+ edits: [],
33
+ sendMessage: async (chatId, text, opts) => {
34
+ if (fb.failNextSendWith != null) {
35
+ const e = fb.failNextSendWith
36
+ fb.failNextSendWith = undefined
37
+ throw e
38
+ }
39
+ fb.sent.push({ chatId, text, opts })
40
+ return { message_id: nextId++ }
41
+ },
42
+ editMessageText: async (_chatId, messageId, text) => {
43
+ if (fb.failNextEditWith != null) {
44
+ const e = fb.failNextEditWith
45
+ fb.failNextEditWith = undefined
46
+ throw e
47
+ }
48
+ fb.edits.push({ messageId, text })
49
+ return {}
50
+ },
51
+ }
52
+ return fb
53
+ }
54
+
55
+ // โ”€โ”€โ”€ renderWorkerActivity (pure) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
56
+
57
+ describe('renderWorkerActivity', () => {
58
+ it('renders running header + tool activity line + summary', () => {
59
+ const out = renderWorkerActivity(view())
60
+ expect(out).toContain('๐Ÿ”ง <b>Worker</b> ยท <i>research competitors</i>')
61
+ expect(out).toContain('โšก <code>Bash</code> grep -r pricing')
62
+ expect(out).toContain('(3 tools ยท ')
63
+ expect(out).toContain('โ†ณ <i>scanning vendor pages</i>')
64
+ })
65
+
66
+ it('shows a "startingโ€ฆ" line when no tool has run yet', () => {
67
+ const out = renderWorkerActivity(view({ lastTool: null, latestSummary: '' }))
68
+ expect(out).toContain('๐Ÿ”ง <b>Worker</b>')
69
+ expect(out).toContain('startingโ€ฆ')
70
+ expect(out).not.toContain('โšก')
71
+ })
72
+
73
+ it('omits the summary line when latestSummary is blank', () => {
74
+ const out = renderWorkerActivity(view({ latestSummary: ' ' }))
75
+ expect(out).not.toContain('โ†ณ')
76
+ })
77
+
78
+ it('uses singular "tool" for a single tool call', () => {
79
+ const out = renderWorkerActivity(view({ toolCount: 1 }))
80
+ expect(out).toContain('(1 tool ยท ')
81
+ })
82
+
83
+ it('renders a done terminal recap', () => {
84
+ const out = renderWorkerActivity(view({ state: 'done', toolCount: 5 }))
85
+ expect(out).toContain('โœ… <b>Worker done</b> ยท <i>research competitors</i>')
86
+ expect(out).toContain('5 tools ยท ')
87
+ expect(out).not.toContain('โšก')
88
+ })
89
+
90
+ it('renders a failed terminal recap', () => {
91
+ const out = renderWorkerActivity(view({ state: 'failed' }))
92
+ expect(out).toContain('โš ๏ธ <b>Worker failed</b>')
93
+ })
94
+
95
+ it('escapes HTML in description, tool, arg, and summary', () => {
96
+ const out = renderWorkerActivity(
97
+ view({
98
+ description: 'a <b>bold</b> task',
99
+ lastTool: { name: 'Ba<sh', sanitisedArg: 'x & y' },
100
+ latestSummary: 'a > b',
101
+ }),
102
+ )
103
+ expect(out).toContain('a &lt;b&gt;bold&lt;/b&gt; task')
104
+ expect(out).toContain('Ba&lt;sh')
105
+ expect(out).toContain('x &amp; y')
106
+ expect(out).toContain('a &gt; b')
107
+ })
108
+ })
109
+
110
+ // โ”€โ”€โ”€ createWorkerActivityFeed (lifecycle) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
111
+
112
+ describe('createWorkerActivityFeed', () => {
113
+ it('holds first paint until the worker has run firstPaintMinMs', async () => {
114
+ const bot = makeFakeBot()
115
+ let clock = 0
116
+ const feed = createWorkerActivityFeed({
117
+ bot,
118
+ now: () => clock,
119
+ firstPaintMinMs: 8000,
120
+ })
121
+ clock = 5000
122
+ await feed.update('w1', 'chat', view({ elapsedMs: 5000 }))
123
+ expect(bot.sent).toHaveLength(0)
124
+ expect(feed.has('w1')).toBe(false)
125
+
126
+ clock = 9000
127
+ await feed.update('w1', 'chat', view({ elapsedMs: 9000 }))
128
+ expect(bot.sent).toHaveLength(1)
129
+ expect(bot.sent[0].chatId).toBe('chat')
130
+ expect(bot.sent[0].opts?.parse_mode).toBe('HTML')
131
+ expect(feed.has('w1')).toBe(true)
132
+ })
133
+
134
+ it('dedups an identical body (no edit)', async () => {
135
+ const bot = makeFakeBot()
136
+ let clock = 10_000
137
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
138
+ await feed.update('w1', 'chat', view())
139
+ expect(bot.sent).toHaveLength(1)
140
+ clock = 20_000
141
+ await feed.update('w1', 'chat', view()) // same body
142
+ expect(bot.edits).toHaveLength(0)
143
+ })
144
+
145
+ it('throttles edits inside minEditIntervalMs but lets them through after', async () => {
146
+ const bot = makeFakeBot()
147
+ let clock = 10_000
148
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 2500 })
149
+ await feed.update('w1', 'chat', view({ toolCount: 1 }))
150
+ expect(bot.sent).toHaveLength(1)
151
+
152
+ clock = 11_000 // +1000 < 2500
153
+ await feed.update('w1', 'chat', view({ toolCount: 2 }))
154
+ expect(bot.edits).toHaveLength(0)
155
+
156
+ clock = 13_000 // +3000 since last edit > 2500
157
+ await feed.update('w1', 'chat', view({ toolCount: 3 }))
158
+ expect(bot.edits).toHaveLength(1)
159
+ expect(bot.edits[0].text).toContain('(3 tools ยท ')
160
+ })
161
+
162
+ it('forces a terminal edit on finish, skipping the throttle', async () => {
163
+ const bot = makeFakeBot()
164
+ let clock = 10_000
165
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 9_999_999 })
166
+ await feed.update('w1', 'chat', view())
167
+ expect(bot.sent).toHaveLength(1)
168
+
169
+ clock = 10_500 // well within the throttle window
170
+ await feed.finish('w1', view({ state: 'done', toolCount: 5 }))
171
+ expect(bot.edits).toHaveLength(1)
172
+ expect(bot.edits[0].text).toContain('โœ… <b>Worker done</b>')
173
+ // finish forgets the worker.
174
+ expect(feed.has('w1')).toBe(false)
175
+ expect(feed.size).toBe(0)
176
+ })
177
+
178
+ it('finish is a no-op when no message was ever posted', async () => {
179
+ const bot = makeFakeBot()
180
+ let clock = 0
181
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, firstPaintMinMs: 8000 })
182
+ clock = 2000
183
+ await feed.update('w1', 'chat', view({ elapsedMs: 2000 })) // too short to paint
184
+ expect(bot.sent).toHaveLength(0)
185
+ await feed.finish('w1', view({ state: 'done' }))
186
+ expect(bot.edits).toHaveLength(0)
187
+ expect(bot.sent).toHaveLength(0)
188
+ })
189
+
190
+ it('drop forgets a worker without editing', async () => {
191
+ const bot = makeFakeBot()
192
+ let clock = 10_000
193
+ const feed = createWorkerActivityFeed({ bot, now: () => clock })
194
+ await feed.update('w1', 'chat', view())
195
+ expect(feed.has('w1')).toBe(true)
196
+ feed.drop('w1')
197
+ expect(feed.has('w1')).toBe(false)
198
+ expect(feed.size).toBe(0)
199
+ await feed.finish('w1', view({ state: 'done' }))
200
+ expect(bot.edits).toHaveLength(0)
201
+ })
202
+
203
+ it('honours a 429 cooldown before retrying the first paint', async () => {
204
+ const bot = makeFakeBot()
205
+ let clock = 10_000
206
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, firstPaintMinMs: 0 })
207
+ bot.failNextSendWith = { error_code: 429, parameters: { retry_after: 2 } }
208
+ await feed.update('w1', 'chat', view())
209
+ expect(bot.sent).toHaveLength(0) // failed send
210
+
211
+ clock = 11_000 // still inside cooldown (10_000 + 2000 + 500 jitter = 12_500)
212
+ await feed.update('w1', 'chat', view())
213
+ expect(bot.sent).toHaveLength(0)
214
+
215
+ clock = 13_000 // past cooldown
216
+ await feed.update('w1', 'chat', view())
217
+ expect(bot.sent).toHaveLength(1)
218
+ })
219
+
220
+ it('re-posts after a stale-message edit failure', async () => {
221
+ const bot = makeFakeBot()
222
+ let clock = 10_000
223
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
224
+ await feed.update('w1', 'chat', view({ toolCount: 1 }))
225
+ expect(bot.sent).toHaveLength(1)
226
+
227
+ clock = 20_000
228
+ bot.failNextEditWith = new Error('Bad Request: message to edit not found')
229
+ await feed.update('w1', 'chat', view({ toolCount: 2 }))
230
+ expect(bot.edits).toHaveLength(0) // edit threw
231
+ expect(feed.has('w1')).toBe(false) // messageId reset
232
+
233
+ clock = 30_000
234
+ await feed.update('w1', 'chat', view({ toolCount: 3 }))
235
+ expect(bot.sent).toHaveLength(2) // re-posted
236
+ expect(feed.has('w1')).toBe(true)
237
+ })
238
+
239
+ it('skips entirely when chatId is empty (owner DM unconfigured)', async () => {
240
+ const bot = makeFakeBot()
241
+ let clock = 10_000
242
+ const feed = createWorkerActivityFeed({ bot, now: () => clock })
243
+ await feed.update('w1', '', view())
244
+ expect(bot.sent).toHaveLength(0)
245
+ expect(feed.has('w1')).toBe(false)
246
+ expect(feed.size).toBe(0)
247
+ })
248
+
249
+ it('forwards threadId as message_thread_id on send', async () => {
250
+ const bot = makeFakeBot()
251
+ let clock = 10_000
252
+ const feed = createWorkerActivityFeed({ bot, now: () => clock })
253
+ await feed.update('w1', 'chat', view(), 42)
254
+ expect(bot.sent[0].opts?.message_thread_id).toBe(42)
255
+ })
256
+ })
@@ -0,0 +1,125 @@
1
+ /**
2
+ * Live worker-activity feed (#2000) โ€” UAT.
3
+ *
4
+ * A *background* sub-agent decouples from the parent turn; when the turn
5
+ * ends nothing surfaces its ongoing jsonl activity and a long worker
6
+ * reads as silence. The feed (flag `SWITCHROOM_WORKER_ACTIVITY_FEED=1`,
7
+ * set on the test-harness agent for this run) posts ONE regular Telegram
8
+ * message per background worker and edits it in place โ€” current tool +
9
+ * short summary + elapsed โ€” finalizing with a recap on completion.
10
+ *
11
+ * This scenario dispatches a real background worker (~60s of paced
12
+ * sleep/echo work, so it narrates between tools and the feed can paint
13
+ * + edit), then asserts:
14
+ *
15
+ * 1. a worker-feed message appears (๐Ÿ”ง Worker ยท โ€ฆ), distinct from the
16
+ * parent's ack reply โ€” proving background activity surfaces after
17
+ * the parent turn closed;
18
+ * 2. the message edits in place while work is in flight (body changes
19
+ * across a window) โ€” proving it's live, not a one-shot post;
20
+ * 3. it finalizes to the terminal recap (โœ… Worker done ยท โ€ฆ / N tools).
21
+ *
22
+ * It logs every observed body so a human can read the real rendered UX.
23
+ *
24
+ * Prompt is the deterministic Option-1 dispatch from
25
+ * `bg-sub-agent-dispatch-dm.test.ts` (naming the Agent tool + arg keeps
26
+ * the model from running the sleeps inline via Bash).
27
+ */
28
+
29
+ import { describe, expect, it } from "vitest";
30
+ import { spinUp } from "../harness.js";
31
+
32
+ // The worker must keep its jsonl ticking faster than the *test-harness*
33
+ // stall window (SWITCHROOM_SUBAGENT_STALL_MS=5000 /
34
+ // SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS=10000 in switchroom.yaml โ€” see
35
+ // PR #1110): a worker silent for >15s gets a *synthesized* terminal
36
+ // turn_end mid-flight, which flips the watcher entry to `done` and
37
+ // suppresses every later onProgress (the feed then never paints). Long
38
+ // silent `sleep 20`s tripped exactly that. So we drive ~10 short steps,
39
+ // each its own Bash call with a one-line narration, keeping the gap
40
+ // between jsonl emissions ~2s โ€” well under the 5s stall floor โ€” for
41
+ // ~30-40s total: long enough to clear the 8s first-paint, throttle, and
42
+ // land several in-place edits before the real end_turn.
43
+ const BG_DISPATCH_PROMPT =
44
+ `Use the Agent tool with subagent_type "general-purpose" and ` +
45
+ `run_in_background: true to dispatch a worker with this exact task: ` +
46
+ `"Do ten steps, ONE AT A TIME, k = 1 through 10. Before each step ` +
47
+ `write a brief one-sentence narration of what you are about to do, ` +
48
+ `then run \`sleep 2\` via the Bash tool, then run \`echo step-k\` via ` +
49
+ `the Bash tool (substitute the real number for k). Run every sleep and ` +
50
+ `every echo as its OWN separate Bash call โ€” never batch or chain them ` +
51
+ `with && โ€” and narrate before each so progress surfaces incrementally. ` +
52
+ `Do not stop early; complete all ten steps." After dispatching, send a ` +
53
+ `brief reply saying you've kicked off the background worker so I can ` +
54
+ `watch its progress.`;
55
+
56
+ // The feed header rendered in Telegram: "๐Ÿ”ง Worker ยท <desc>" (running)
57
+ // or "โœ… Worker done ยท โ€ฆ" / "โš ๏ธ Worker failed ยท โ€ฆ" (terminal).
58
+ const WORKER_FEED_RE = /๐Ÿ”ง\s*Worker|Worker done|Worker failed|โšก/i;
59
+ const WORKER_DONE_RE = /โœ…\s*Worker done|โš ๏ธ\s*Worker failed/i;
60
+
61
+ describe("uat: live worker-activity feed (#2000)", () => {
62
+ it(
63
+ "surfaces a background worker as a live, editing message that finalizes",
64
+ async () => {
65
+ const sc = await spinUp({ agent: "test-harness" });
66
+ try {
67
+ await sc.sendDM(BG_DISPATCH_PROMPT);
68
+
69
+ // Parent ack โ€” some bot reply so we know the parent turn closed.
70
+ const ack = await sc.expectMessage(/.+/, {
71
+ from: "bot",
72
+ timeout: 45_000,
73
+ });
74
+ console.log(`[worker-feed UAT] parent ack: ${JSON.stringify(ack.text)}`);
75
+
76
+ // The worker-feed message. May arrive after the parent ack since
77
+ // first-paint waits for the worker to run ~8s and narrate.
78
+ const feed = await sc.expectMessage(WORKER_FEED_RE, {
79
+ from: "bot",
80
+ timeout: 75_000,
81
+ });
82
+ console.log(
83
+ `[worker-feed UAT] first feed paint (id=${feed.messageId}): ${JSON.stringify(feed.text)}`,
84
+ );
85
+ expect(feed.messageId).toBeGreaterThan(0);
86
+
87
+ // Live edit: snapshot, wait past the throttle + a heartbeat, and
88
+ // re-fetch the SAME message. Body should change as work advances.
89
+ // Soft: a very terse worker might narrate only once; we still
90
+ // require the terminal recap below, which is the load-bearing
91
+ // proof. Log either way so the real cadence is visible.
92
+ const before = feed.text;
93
+ await new Promise((r) => setTimeout(r, 12_000));
94
+ const mid = await sc.driver.getMessage(sc.botUserId, feed.messageId);
95
+ console.log(
96
+ `[worker-feed UAT] after 12s (id=${feed.messageId}): ${JSON.stringify(mid?.text ?? null)}`,
97
+ );
98
+ expect(mid, "worker-feed message vanished mid-flight").not.toBeNull();
99
+
100
+ // Terminal recap โ€” poll the same message until it flips to the
101
+ // done/failed header. Generous budget: ~60s of work + finalize.
102
+ let doneText: string | null = null;
103
+ const deadline = Date.now() + 120_000;
104
+ while (Date.now() < deadline) {
105
+ const m = await sc.driver.getMessage(sc.botUserId, feed.messageId);
106
+ if (m != null && WORKER_DONE_RE.test(m.text)) {
107
+ doneText = m.text;
108
+ break;
109
+ }
110
+ await new Promise((r) => setTimeout(r, 5_000));
111
+ }
112
+ console.log(
113
+ `[worker-feed UAT] terminal (id=${feed.messageId}): ${JSON.stringify(doneText)}`,
114
+ );
115
+ expect(doneText, "worker-feed never reached a terminal recap").not.toBeNull();
116
+ expect(doneText!).toMatch(/tools?|tool ยท/i);
117
+ // Did the body actually move between first paint and terminal?
118
+ expect(doneText).not.toBe(before);
119
+ } finally {
120
+ await sc.tearDown();
121
+ }
122
+ },
123
+ 240_000,
124
+ );
125
+ });