switchroom 0.14.14 โ 0.14.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +448 -162
- package/telegram-plugin/gateway/gateway.ts +144 -8
- package/telegram-plugin/reaction-defer.ts +98 -0
- package/telegram-plugin/status-reactions.ts +31 -1
- package/telegram-plugin/subagent-watcher.ts +13 -0
- package/telegram-plugin/tests/reaction-defer.test.ts +187 -0
- package/telegram-plugin/tests/status-reactions.test.ts +79 -0
- package/telegram-plugin/tests/worker-activity-feed.test.ts +256 -0
- package/telegram-plugin/uat/scenarios/jtbd-worker-activity-feed-dm.test.ts +125 -0
- package/telegram-plugin/worker-activity-feed.ts +314 -0
|
@@ -340,4 +340,83 @@ describe('StatusReactionController', () => {
|
|
|
340
340
|
await flush()
|
|
341
341
|
expect(calls).toEqual(['๐'])
|
|
342
342
|
})
|
|
343
|
+
|
|
344
|
+
// hold(): freeze on a WORKING glyph while background sub-agent workers
|
|
345
|
+
// outlive the parent turn, deferring the terminal ๐ (worker-reaction fix).
|
|
346
|
+
describe('hold() โ defer ๐ while a background worker runs', () => {
|
|
347
|
+
it('suppresses stall promotion (no ๐ฅฑ/๐จ) while held', async () => {
|
|
348
|
+
const { emit, calls } = makeEmitter()
|
|
349
|
+
const ctrl = new StatusReactionController(emit)
|
|
350
|
+
ctrl.setQueued()
|
|
351
|
+
ctrl.setTool('Bash') // working: ๐จโ๐ป
|
|
352
|
+
vi.advanceTimersByTime(3500)
|
|
353
|
+
await flush()
|
|
354
|
+
|
|
355
|
+
ctrl.hold()
|
|
356
|
+
await flush()
|
|
357
|
+
// Well past both stall thresholds โ held must not yawn or panic.
|
|
358
|
+
vi.advanceTimersByTime(120000)
|
|
359
|
+
await flush()
|
|
360
|
+
expect(calls).not.toContain('๐ฅฑ')
|
|
361
|
+
expect(calls).not.toContain('๐จ')
|
|
362
|
+
})
|
|
363
|
+
|
|
364
|
+
it('promotes a read/thinking glyph to a working glyph on hold', async () => {
|
|
365
|
+
const { emit, calls } = makeEmitter()
|
|
366
|
+
const ctrl = new StatusReactionController(emit)
|
|
367
|
+
ctrl.setQueued() // ๐ (read-receipt)
|
|
368
|
+
await flush()
|
|
369
|
+
expect(calls).toEqual(['๐'])
|
|
370
|
+
|
|
371
|
+
ctrl.hold() // should paint an explicit WORKING glyph (โ๏ธ)
|
|
372
|
+
await flush()
|
|
373
|
+
expect(calls[calls.length - 1]).toBe('โ')
|
|
374
|
+
})
|
|
375
|
+
|
|
376
|
+
it('finalize() still terminates to ๐ after hold (deferred terminal)', async () => {
|
|
377
|
+
const { emit, calls } = makeEmitter()
|
|
378
|
+
const ctrl = new StatusReactionController(emit)
|
|
379
|
+
ctrl.setQueued()
|
|
380
|
+
ctrl.setTool() // โ
|
|
381
|
+
vi.advanceTimersByTime(3500)
|
|
382
|
+
await flush()
|
|
383
|
+
|
|
384
|
+
ctrl.hold()
|
|
385
|
+
await flush()
|
|
386
|
+
// Worker runs for a while, then completes โ gateway finalizes.
|
|
387
|
+
vi.advanceTimersByTime(60000)
|
|
388
|
+
await flush()
|
|
389
|
+
ctrl.finalize('done')
|
|
390
|
+
await flush()
|
|
391
|
+
expect(calls[calls.length - 1]).toBe('๐')
|
|
392
|
+
})
|
|
393
|
+
|
|
394
|
+
it('does not double-paint when already on a working glyph', async () => {
|
|
395
|
+
const { emit, calls } = makeEmitter()
|
|
396
|
+
const ctrl = new StatusReactionController(emit)
|
|
397
|
+
ctrl.setQueued()
|
|
398
|
+
ctrl.setTool() // โ
|
|
399
|
+
vi.advanceTimersByTime(3500)
|
|
400
|
+
await flush()
|
|
401
|
+
const before = calls.length
|
|
402
|
+
|
|
403
|
+
ctrl.hold() // already on โ โ no new emit
|
|
404
|
+
await flush()
|
|
405
|
+
expect(calls.length).toBe(before)
|
|
406
|
+
})
|
|
407
|
+
|
|
408
|
+
it('hold() after finalize is a no-op (cannot resurrect a finished controller)', async () => {
|
|
409
|
+
const { emit, calls } = makeEmitter()
|
|
410
|
+
const ctrl = new StatusReactionController(emit)
|
|
411
|
+
ctrl.setQueued()
|
|
412
|
+
ctrl.finalize('done')
|
|
413
|
+
await flush()
|
|
414
|
+
const snapshot = [...calls]
|
|
415
|
+
|
|
416
|
+
ctrl.hold()
|
|
417
|
+
vi.advanceTimersByTime(120000)
|
|
418
|
+
await flush()
|
|
419
|
+
expect(calls).toEqual(snapshot)
|
|
420
|
+
})
|
|
421
|
+
})
|
|
343
422
|
})
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
renderWorkerActivity,
|
|
4
|
+
createWorkerActivityFeed,
|
|
5
|
+
type WorkerActivityView,
|
|
6
|
+
type BotApiForWorkerFeed,
|
|
7
|
+
} from '../worker-activity-feed.js'
|
|
8
|
+
|
|
9
|
+
function view(partial: Partial<WorkerActivityView> = {}): WorkerActivityView {
|
|
10
|
+
return {
|
|
11
|
+
description: 'research competitors',
|
|
12
|
+
lastTool: { name: 'Bash', sanitisedArg: 'grep -r pricing' },
|
|
13
|
+
toolCount: 3,
|
|
14
|
+
latestSummary: 'scanning vendor pages',
|
|
15
|
+
elapsedMs: 10_000,
|
|
16
|
+
state: 'running',
|
|
17
|
+
...partial,
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
interface FakeBot extends BotApiForWorkerFeed {
|
|
22
|
+
sent: Array<{ chatId: string; text: string; opts?: Record<string, unknown> }>
|
|
23
|
+
edits: Array<{ messageId: number; text: string }>
|
|
24
|
+
failNextSendWith?: unknown
|
|
25
|
+
failNextEditWith?: unknown
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function makeFakeBot(): FakeBot {
|
|
29
|
+
let nextId = 1000
|
|
30
|
+
const fb: FakeBot = {
|
|
31
|
+
sent: [],
|
|
32
|
+
edits: [],
|
|
33
|
+
sendMessage: async (chatId, text, opts) => {
|
|
34
|
+
if (fb.failNextSendWith != null) {
|
|
35
|
+
const e = fb.failNextSendWith
|
|
36
|
+
fb.failNextSendWith = undefined
|
|
37
|
+
throw e
|
|
38
|
+
}
|
|
39
|
+
fb.sent.push({ chatId, text, opts })
|
|
40
|
+
return { message_id: nextId++ }
|
|
41
|
+
},
|
|
42
|
+
editMessageText: async (_chatId, messageId, text) => {
|
|
43
|
+
if (fb.failNextEditWith != null) {
|
|
44
|
+
const e = fb.failNextEditWith
|
|
45
|
+
fb.failNextEditWith = undefined
|
|
46
|
+
throw e
|
|
47
|
+
}
|
|
48
|
+
fb.edits.push({ messageId, text })
|
|
49
|
+
return {}
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
return fb
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// โโโ renderWorkerActivity (pure) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
56
|
+
|
|
57
|
+
describe('renderWorkerActivity', () => {
|
|
58
|
+
it('renders running header + tool activity line + summary', () => {
|
|
59
|
+
const out = renderWorkerActivity(view())
|
|
60
|
+
expect(out).toContain('๐ง <b>Worker</b> ยท <i>research competitors</i>')
|
|
61
|
+
expect(out).toContain('โก <code>Bash</code> grep -r pricing')
|
|
62
|
+
expect(out).toContain('(3 tools ยท ')
|
|
63
|
+
expect(out).toContain('โณ <i>scanning vendor pages</i>')
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
it('shows a "startingโฆ" line when no tool has run yet', () => {
|
|
67
|
+
const out = renderWorkerActivity(view({ lastTool: null, latestSummary: '' }))
|
|
68
|
+
expect(out).toContain('๐ง <b>Worker</b>')
|
|
69
|
+
expect(out).toContain('startingโฆ')
|
|
70
|
+
expect(out).not.toContain('โก')
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('omits the summary line when latestSummary is blank', () => {
|
|
74
|
+
const out = renderWorkerActivity(view({ latestSummary: ' ' }))
|
|
75
|
+
expect(out).not.toContain('โณ')
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it('uses singular "tool" for a single tool call', () => {
|
|
79
|
+
const out = renderWorkerActivity(view({ toolCount: 1 }))
|
|
80
|
+
expect(out).toContain('(1 tool ยท ')
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('renders a done terminal recap', () => {
|
|
84
|
+
const out = renderWorkerActivity(view({ state: 'done', toolCount: 5 }))
|
|
85
|
+
expect(out).toContain('โ
<b>Worker done</b> ยท <i>research competitors</i>')
|
|
86
|
+
expect(out).toContain('5 tools ยท ')
|
|
87
|
+
expect(out).not.toContain('โก')
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
it('renders a failed terminal recap', () => {
|
|
91
|
+
const out = renderWorkerActivity(view({ state: 'failed' }))
|
|
92
|
+
expect(out).toContain('โ ๏ธ <b>Worker failed</b>')
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
it('escapes HTML in description, tool, arg, and summary', () => {
|
|
96
|
+
const out = renderWorkerActivity(
|
|
97
|
+
view({
|
|
98
|
+
description: 'a <b>bold</b> task',
|
|
99
|
+
lastTool: { name: 'Ba<sh', sanitisedArg: 'x & y' },
|
|
100
|
+
latestSummary: 'a > b',
|
|
101
|
+
}),
|
|
102
|
+
)
|
|
103
|
+
expect(out).toContain('a <b>bold</b> task')
|
|
104
|
+
expect(out).toContain('Ba<sh')
|
|
105
|
+
expect(out).toContain('x & y')
|
|
106
|
+
expect(out).toContain('a > b')
|
|
107
|
+
})
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
// โโโ createWorkerActivityFeed (lifecycle) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
111
|
+
|
|
112
|
+
describe('createWorkerActivityFeed', () => {
|
|
113
|
+
it('holds first paint until the worker has run firstPaintMinMs', async () => {
|
|
114
|
+
const bot = makeFakeBot()
|
|
115
|
+
let clock = 0
|
|
116
|
+
const feed = createWorkerActivityFeed({
|
|
117
|
+
bot,
|
|
118
|
+
now: () => clock,
|
|
119
|
+
firstPaintMinMs: 8000,
|
|
120
|
+
})
|
|
121
|
+
clock = 5000
|
|
122
|
+
await feed.update('w1', 'chat', view({ elapsedMs: 5000 }))
|
|
123
|
+
expect(bot.sent).toHaveLength(0)
|
|
124
|
+
expect(feed.has('w1')).toBe(false)
|
|
125
|
+
|
|
126
|
+
clock = 9000
|
|
127
|
+
await feed.update('w1', 'chat', view({ elapsedMs: 9000 }))
|
|
128
|
+
expect(bot.sent).toHaveLength(1)
|
|
129
|
+
expect(bot.sent[0].chatId).toBe('chat')
|
|
130
|
+
expect(bot.sent[0].opts?.parse_mode).toBe('HTML')
|
|
131
|
+
expect(feed.has('w1')).toBe(true)
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
it('dedups an identical body (no edit)', async () => {
|
|
135
|
+
const bot = makeFakeBot()
|
|
136
|
+
let clock = 10_000
|
|
137
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
|
|
138
|
+
await feed.update('w1', 'chat', view())
|
|
139
|
+
expect(bot.sent).toHaveLength(1)
|
|
140
|
+
clock = 20_000
|
|
141
|
+
await feed.update('w1', 'chat', view()) // same body
|
|
142
|
+
expect(bot.edits).toHaveLength(0)
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
it('throttles edits inside minEditIntervalMs but lets them through after', async () => {
|
|
146
|
+
const bot = makeFakeBot()
|
|
147
|
+
let clock = 10_000
|
|
148
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 2500 })
|
|
149
|
+
await feed.update('w1', 'chat', view({ toolCount: 1 }))
|
|
150
|
+
expect(bot.sent).toHaveLength(1)
|
|
151
|
+
|
|
152
|
+
clock = 11_000 // +1000 < 2500
|
|
153
|
+
await feed.update('w1', 'chat', view({ toolCount: 2 }))
|
|
154
|
+
expect(bot.edits).toHaveLength(0)
|
|
155
|
+
|
|
156
|
+
clock = 13_000 // +3000 since last edit > 2500
|
|
157
|
+
await feed.update('w1', 'chat', view({ toolCount: 3 }))
|
|
158
|
+
expect(bot.edits).toHaveLength(1)
|
|
159
|
+
expect(bot.edits[0].text).toContain('(3 tools ยท ')
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
it('forces a terminal edit on finish, skipping the throttle', async () => {
|
|
163
|
+
const bot = makeFakeBot()
|
|
164
|
+
let clock = 10_000
|
|
165
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 9_999_999 })
|
|
166
|
+
await feed.update('w1', 'chat', view())
|
|
167
|
+
expect(bot.sent).toHaveLength(1)
|
|
168
|
+
|
|
169
|
+
clock = 10_500 // well within the throttle window
|
|
170
|
+
await feed.finish('w1', view({ state: 'done', toolCount: 5 }))
|
|
171
|
+
expect(bot.edits).toHaveLength(1)
|
|
172
|
+
expect(bot.edits[0].text).toContain('โ
<b>Worker done</b>')
|
|
173
|
+
// finish forgets the worker.
|
|
174
|
+
expect(feed.has('w1')).toBe(false)
|
|
175
|
+
expect(feed.size).toBe(0)
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
it('finish is a no-op when no message was ever posted', async () => {
|
|
179
|
+
const bot = makeFakeBot()
|
|
180
|
+
let clock = 0
|
|
181
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, firstPaintMinMs: 8000 })
|
|
182
|
+
clock = 2000
|
|
183
|
+
await feed.update('w1', 'chat', view({ elapsedMs: 2000 })) // too short to paint
|
|
184
|
+
expect(bot.sent).toHaveLength(0)
|
|
185
|
+
await feed.finish('w1', view({ state: 'done' }))
|
|
186
|
+
expect(bot.edits).toHaveLength(0)
|
|
187
|
+
expect(bot.sent).toHaveLength(0)
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
it('drop forgets a worker without editing', async () => {
|
|
191
|
+
const bot = makeFakeBot()
|
|
192
|
+
let clock = 10_000
|
|
193
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock })
|
|
194
|
+
await feed.update('w1', 'chat', view())
|
|
195
|
+
expect(feed.has('w1')).toBe(true)
|
|
196
|
+
feed.drop('w1')
|
|
197
|
+
expect(feed.has('w1')).toBe(false)
|
|
198
|
+
expect(feed.size).toBe(0)
|
|
199
|
+
await feed.finish('w1', view({ state: 'done' }))
|
|
200
|
+
expect(bot.edits).toHaveLength(0)
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
it('honours a 429 cooldown before retrying the first paint', async () => {
|
|
204
|
+
const bot = makeFakeBot()
|
|
205
|
+
let clock = 10_000
|
|
206
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, firstPaintMinMs: 0 })
|
|
207
|
+
bot.failNextSendWith = { error_code: 429, parameters: { retry_after: 2 } }
|
|
208
|
+
await feed.update('w1', 'chat', view())
|
|
209
|
+
expect(bot.sent).toHaveLength(0) // failed send
|
|
210
|
+
|
|
211
|
+
clock = 11_000 // still inside cooldown (10_000 + 2000 + 500 jitter = 12_500)
|
|
212
|
+
await feed.update('w1', 'chat', view())
|
|
213
|
+
expect(bot.sent).toHaveLength(0)
|
|
214
|
+
|
|
215
|
+
clock = 13_000 // past cooldown
|
|
216
|
+
await feed.update('w1', 'chat', view())
|
|
217
|
+
expect(bot.sent).toHaveLength(1)
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
it('re-posts after a stale-message edit failure', async () => {
|
|
221
|
+
const bot = makeFakeBot()
|
|
222
|
+
let clock = 10_000
|
|
223
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
|
|
224
|
+
await feed.update('w1', 'chat', view({ toolCount: 1 }))
|
|
225
|
+
expect(bot.sent).toHaveLength(1)
|
|
226
|
+
|
|
227
|
+
clock = 20_000
|
|
228
|
+
bot.failNextEditWith = new Error('Bad Request: message to edit not found')
|
|
229
|
+
await feed.update('w1', 'chat', view({ toolCount: 2 }))
|
|
230
|
+
expect(bot.edits).toHaveLength(0) // edit threw
|
|
231
|
+
expect(feed.has('w1')).toBe(false) // messageId reset
|
|
232
|
+
|
|
233
|
+
clock = 30_000
|
|
234
|
+
await feed.update('w1', 'chat', view({ toolCount: 3 }))
|
|
235
|
+
expect(bot.sent).toHaveLength(2) // re-posted
|
|
236
|
+
expect(feed.has('w1')).toBe(true)
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
it('skips entirely when chatId is empty (owner DM unconfigured)', async () => {
|
|
240
|
+
const bot = makeFakeBot()
|
|
241
|
+
let clock = 10_000
|
|
242
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock })
|
|
243
|
+
await feed.update('w1', '', view())
|
|
244
|
+
expect(bot.sent).toHaveLength(0)
|
|
245
|
+
expect(feed.has('w1')).toBe(false)
|
|
246
|
+
expect(feed.size).toBe(0)
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
it('forwards threadId as message_thread_id on send', async () => {
|
|
250
|
+
const bot = makeFakeBot()
|
|
251
|
+
let clock = 10_000
|
|
252
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock })
|
|
253
|
+
await feed.update('w1', 'chat', view(), 42)
|
|
254
|
+
expect(bot.sent[0].opts?.message_thread_id).toBe(42)
|
|
255
|
+
})
|
|
256
|
+
})
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live worker-activity feed (#2000) โ UAT.
|
|
3
|
+
*
|
|
4
|
+
* A *background* sub-agent decouples from the parent turn; when the turn
|
|
5
|
+
* ends nothing surfaces its ongoing jsonl activity and a long worker
|
|
6
|
+
* reads as silence. The feed (flag `SWITCHROOM_WORKER_ACTIVITY_FEED=1`,
|
|
7
|
+
* set on the test-harness agent for this run) posts ONE regular Telegram
|
|
8
|
+
* message per background worker and edits it in place โ current tool +
|
|
9
|
+
* short summary + elapsed โ finalizing with a recap on completion.
|
|
10
|
+
*
|
|
11
|
+
* This scenario dispatches a real background worker (~60s of paced
|
|
12
|
+
* sleep/echo work, so it narrates between tools and the feed can paint
|
|
13
|
+
* + edit), then asserts:
|
|
14
|
+
*
|
|
15
|
+
* 1. a worker-feed message appears (๐ง Worker ยท โฆ), distinct from the
|
|
16
|
+
* parent's ack reply โ proving background activity surfaces after
|
|
17
|
+
* the parent turn closed;
|
|
18
|
+
* 2. the message edits in place while work is in flight (body changes
|
|
19
|
+
* across a window) โ proving it's live, not a one-shot post;
|
|
20
|
+
* 3. it finalizes to the terminal recap (โ
Worker done ยท โฆ / N tools).
|
|
21
|
+
*
|
|
22
|
+
* It logs every observed body so a human can read the real rendered UX.
|
|
23
|
+
*
|
|
24
|
+
* Prompt is the deterministic Option-1 dispatch from
|
|
25
|
+
* `bg-sub-agent-dispatch-dm.test.ts` (naming the Agent tool + arg keeps
|
|
26
|
+
* the model from running the sleeps inline via Bash).
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { describe, expect, it } from "vitest";
|
|
30
|
+
import { spinUp } from "../harness.js";
|
|
31
|
+
|
|
32
|
+
// The worker must keep its jsonl ticking faster than the *test-harness*
|
|
33
|
+
// stall window (SWITCHROOM_SUBAGENT_STALL_MS=5000 /
|
|
34
|
+
// SWITCHROOM_SUBAGENT_STALL_TERMINAL_MS=10000 in switchroom.yaml โ see
|
|
35
|
+
// PR #1110): a worker silent for >15s gets a *synthesized* terminal
|
|
36
|
+
// turn_end mid-flight, which flips the watcher entry to `done` and
|
|
37
|
+
// suppresses every later onProgress (the feed then never paints). Long
|
|
38
|
+
// silent `sleep 20`s tripped exactly that. So we drive ~10 short steps,
|
|
39
|
+
// each its own Bash call with a one-line narration, keeping the gap
|
|
40
|
+
// between jsonl emissions ~2s โ well under the 5s stall floor โ for
|
|
41
|
+
// ~30-40s total: long enough to clear the 8s first-paint, throttle, and
|
|
42
|
+
// land several in-place edits before the real end_turn.
|
|
43
|
+
const BG_DISPATCH_PROMPT =
|
|
44
|
+
`Use the Agent tool with subagent_type "general-purpose" and ` +
|
|
45
|
+
`run_in_background: true to dispatch a worker with this exact task: ` +
|
|
46
|
+
`"Do ten steps, ONE AT A TIME, k = 1 through 10. Before each step ` +
|
|
47
|
+
`write a brief one-sentence narration of what you are about to do, ` +
|
|
48
|
+
`then run \`sleep 2\` via the Bash tool, then run \`echo step-k\` via ` +
|
|
49
|
+
`the Bash tool (substitute the real number for k). Run every sleep and ` +
|
|
50
|
+
`every echo as its OWN separate Bash call โ never batch or chain them ` +
|
|
51
|
+
`with && โ and narrate before each so progress surfaces incrementally. ` +
|
|
52
|
+
`Do not stop early; complete all ten steps." After dispatching, send a ` +
|
|
53
|
+
`brief reply saying you've kicked off the background worker so I can ` +
|
|
54
|
+
`watch its progress.`;
|
|
55
|
+
|
|
56
|
+
// The feed header rendered in Telegram: "๐ง Worker ยท <desc>" (running)
|
|
57
|
+
// or "โ
Worker done ยท โฆ" / "โ ๏ธ Worker failed ยท โฆ" (terminal).
|
|
58
|
+
const WORKER_FEED_RE = /๐ง\s*Worker|Worker done|Worker failed|โก/i;
|
|
59
|
+
const WORKER_DONE_RE = /โ
\s*Worker done|โ ๏ธ\s*Worker failed/i;
|
|
60
|
+
|
|
61
|
+
describe("uat: live worker-activity feed (#2000)", () => {
|
|
62
|
+
it(
|
|
63
|
+
"surfaces a background worker as a live, editing message that finalizes",
|
|
64
|
+
async () => {
|
|
65
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
66
|
+
try {
|
|
67
|
+
await sc.sendDM(BG_DISPATCH_PROMPT);
|
|
68
|
+
|
|
69
|
+
// Parent ack โ some bot reply so we know the parent turn closed.
|
|
70
|
+
const ack = await sc.expectMessage(/.+/, {
|
|
71
|
+
from: "bot",
|
|
72
|
+
timeout: 45_000,
|
|
73
|
+
});
|
|
74
|
+
console.log(`[worker-feed UAT] parent ack: ${JSON.stringify(ack.text)}`);
|
|
75
|
+
|
|
76
|
+
// The worker-feed message. May arrive after the parent ack since
|
|
77
|
+
// first-paint waits for the worker to run ~8s and narrate.
|
|
78
|
+
const feed = await sc.expectMessage(WORKER_FEED_RE, {
|
|
79
|
+
from: "bot",
|
|
80
|
+
timeout: 75_000,
|
|
81
|
+
});
|
|
82
|
+
console.log(
|
|
83
|
+
`[worker-feed UAT] first feed paint (id=${feed.messageId}): ${JSON.stringify(feed.text)}`,
|
|
84
|
+
);
|
|
85
|
+
expect(feed.messageId).toBeGreaterThan(0);
|
|
86
|
+
|
|
87
|
+
// Live edit: snapshot, wait past the throttle + a heartbeat, and
|
|
88
|
+
// re-fetch the SAME message. Body should change as work advances.
|
|
89
|
+
// Soft: a very terse worker might narrate only once; we still
|
|
90
|
+
// require the terminal recap below, which is the load-bearing
|
|
91
|
+
// proof. Log either way so the real cadence is visible.
|
|
92
|
+
const before = feed.text;
|
|
93
|
+
await new Promise((r) => setTimeout(r, 12_000));
|
|
94
|
+
const mid = await sc.driver.getMessage(sc.botUserId, feed.messageId);
|
|
95
|
+
console.log(
|
|
96
|
+
`[worker-feed UAT] after 12s (id=${feed.messageId}): ${JSON.stringify(mid?.text ?? null)}`,
|
|
97
|
+
);
|
|
98
|
+
expect(mid, "worker-feed message vanished mid-flight").not.toBeNull();
|
|
99
|
+
|
|
100
|
+
// Terminal recap โ poll the same message until it flips to the
|
|
101
|
+
// done/failed header. Generous budget: ~60s of work + finalize.
|
|
102
|
+
let doneText: string | null = null;
|
|
103
|
+
const deadline = Date.now() + 120_000;
|
|
104
|
+
while (Date.now() < deadline) {
|
|
105
|
+
const m = await sc.driver.getMessage(sc.botUserId, feed.messageId);
|
|
106
|
+
if (m != null && WORKER_DONE_RE.test(m.text)) {
|
|
107
|
+
doneText = m.text;
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
await new Promise((r) => setTimeout(r, 5_000));
|
|
111
|
+
}
|
|
112
|
+
console.log(
|
|
113
|
+
`[worker-feed UAT] terminal (id=${feed.messageId}): ${JSON.stringify(doneText)}`,
|
|
114
|
+
);
|
|
115
|
+
expect(doneText, "worker-feed never reached a terminal recap").not.toBeNull();
|
|
116
|
+
expect(doneText!).toMatch(/tools?|tool ยท/i);
|
|
117
|
+
// Did the body actually move between first paint and terminal?
|
|
118
|
+
expect(doneText).not.toBe(before);
|
|
119
|
+
} finally {
|
|
120
|
+
await sc.tearDown();
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
240_000,
|
|
124
|
+
);
|
|
125
|
+
});
|