switchroom 0.14.18 → 0.14.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -92,6 +92,42 @@ describe('renderWorkerActivity', () => {
92
92
  expect(out).toContain('⚠️ <b>Worker failed</b>')
93
93
  })
94
94
 
95
+ it('grows a narrative block when narrativeLines is present', () => {
96
+ const out = renderWorkerActivity(
97
+ view({
98
+ latestSummary: 'newest only — should be ignored',
99
+ narrativeLines: ['read the brief', 'scanned vendor A', 'scanned vendor B'],
100
+ }),
101
+ )
102
+ expect(out).toContain('↳ <i>read the brief</i>')
103
+ expect(out).toContain('↳ <i>scanned vendor A</i>')
104
+ expect(out).toContain('↳ <i>scanned vendor B</i>')
105
+ // The single-line latestSummary fallback is NOT used when a block is present.
106
+ expect(out).not.toContain('newest only')
107
+ // Three narrative lines → three ↳ lines.
108
+ expect(out.match(/↳/g) ?? []).toHaveLength(3)
109
+ })
110
+
111
+ it('falls back to latestSummary when narrativeLines is empty', () => {
112
+ const out = renderWorkerActivity(view({ narrativeLines: [], latestSummary: 'one line' }))
113
+ expect(out).toContain('↳ <i>one line</i>')
114
+ expect(out.match(/↳/g) ?? []).toHaveLength(1)
115
+ })
116
+
117
+ it('drops blank narrative lines from the block', () => {
118
+ const out = renderWorkerActivity(
119
+ view({ narrativeLines: ['kept', ' ', 'also kept'] }),
120
+ )
121
+ expect(out).toContain('↳ <i>kept</i>')
122
+ expect(out).toContain('↳ <i>also kept</i>')
123
+ expect(out.match(/↳/g) ?? []).toHaveLength(2)
124
+ })
125
+
126
+ it('escapes HTML inside narrative lines', () => {
127
+ const out = renderWorkerActivity(view({ narrativeLines: ['a <b>x</b> & y'] }))
128
+ expect(out).toContain('a &lt;b&gt;x&lt;/b&gt; &amp; y')
129
+ })
130
+
95
131
  it('escapes HTML in description, tool, arg, and summary', () => {
96
132
  const out = renderWorkerActivity(
97
133
  view({
@@ -246,6 +282,83 @@ describe('createWorkerActivityFeed', () => {
246
282
  expect(feed.size).toBe(0)
247
283
  })
248
284
 
285
+ it('accumulates distinct narrative lines into a growing block across ticks', async () => {
286
+ const bot = makeFakeBot()
287
+ let clock = 10_000
288
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
289
+
290
+ await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'read the brief' }))
291
+ expect(bot.sent).toHaveLength(1)
292
+ expect(bot.sent[0].text).toContain('↳ <i>read the brief</i>')
293
+
294
+ clock = 11_000
295
+ await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'scanned vendor A' }))
296
+ clock = 12_000
297
+ await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'scanned vendor B' }))
298
+
299
+ const last = bot.edits.at(-1)!
300
+ expect(last.text).toContain('↳ <i>read the brief</i>')
301
+ expect(last.text).toContain('↳ <i>scanned vendor A</i>')
302
+ expect(last.text).toContain('↳ <i>scanned vendor B</i>')
303
+ expect(last.text.match(/↳/g) ?? []).toHaveLength(3)
304
+ })
305
+
306
+ it('dedups a repeated narrative line so the block does not duplicate', async () => {
307
+ const bot = makeFakeBot()
308
+ let clock = 10_000
309
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
310
+
311
+ await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'same line' }))
312
+ // Repeated narrative but a changed tool count → body differs, edit fires,
313
+ // but the narrative block must not gain a duplicate line.
314
+ clock = 11_000
315
+ await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'same line' }))
316
+
317
+ const last = bot.edits.at(-1)!
318
+ expect(last.text.match(/↳/g) ?? []).toHaveLength(1)
319
+ })
320
+
321
+ it('caps the narrative block to the last 6 lines', async () => {
322
+ const bot = makeFakeBot()
323
+ let clock = 10_000
324
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
325
+
326
+ for (let i = 1; i <= 9; i++) {
327
+ clock += 1000
328
+ await feed.update('w1', 'chat', view({ toolCount: i, latestSummary: `line ${i}` }))
329
+ }
330
+
331
+ const last = bot.edits.at(-1)!
332
+ expect(last.text.match(/↳/g) ?? []).toHaveLength(6)
333
+ // Oldest lines evicted; newest retained.
334
+ expect(last.text).not.toContain('line 1')
335
+ expect(last.text).not.toContain('line 3')
336
+ expect(last.text).toContain('line 4')
337
+ expect(last.text).toContain('line 9')
338
+ })
339
+
340
+ it('grows the narrative even while throttled (line surfaces on next edit)', async () => {
341
+ const bot = makeFakeBot()
342
+ let clock = 10_000
343
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 2500 })
344
+
345
+ await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'line A' }))
346
+ expect(bot.sent).toHaveLength(1)
347
+
348
+ // Throttled tick — no edit, but the line must still be accumulated.
349
+ clock = 11_000
350
+ await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'line B' }))
351
+ expect(bot.edits).toHaveLength(0)
352
+
353
+ // Past the throttle — the edit now carries BOTH lines.
354
+ clock = 13_000
355
+ await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'line C' }))
356
+ const last = bot.edits.at(-1)!
357
+ expect(last.text).toContain('↳ <i>line A</i>')
358
+ expect(last.text).toContain('↳ <i>line B</i>')
359
+ expect(last.text).toContain('↳ <i>line C</i>')
360
+ })
361
+
249
362
  it('forwards threadId as message_thread_id on send', async () => {
250
363
  const bot = makeFakeBot()
251
364
  let clock = 10_000
@@ -0,0 +1,158 @@
1
+ /**
2
+ * JTBD scenario — forwarded burst / split paste coalesces into ONE turn.
3
+ *
4
+ * Serves: `reference/steer-or-queue-mid-flight.md` — the "Forwarded
5
+ * burst / split paste" UAT prompt. When several messages land in quick
6
+ * succession from the same sender (a forward of 3-4 messages, or a long
7
+ * paste Telegram split into chunks), inbound coalescing must merge them
8
+ * into a SINGLE Claude turn with shared context — not reply to each
9
+ * fragment in isolation.
10
+ *
11
+ * This is the end-to-end gate for the A1 coalescing work shipped in
12
+ * v0.14.18 (#2007). The merge logic itself is covered by unit + fuzz
13
+ * tests (`inbound-coalesce.test.ts`, `pending-inbound-buffer.test.ts`),
14
+ * but only this scenario exercises the real inbound → gateway coalescer
15
+ * → claude → outbound path over a live Telegram chat.
16
+ *
17
+ * ## How the signal is constructed
18
+ *
19
+ * Naively asking the agent to "combine facts from several messages"
20
+ * does NOT distinguish coalesced from fanned-out: even when each
21
+ * message becomes its own turn, every later turn carries the PRIOR
22
+ * turns in its conversation history, so the model could still answer
23
+ * from history. History bleed makes a content-combination assertion
24
+ * useless as a coalescing probe.
25
+ *
26
+ * The distinguishing fact is whether a SINGLE turn saw all the parts in
27
+ * ONE incoming message. So we anchor the instruction on "this single
28
+ * message": three messages are fired near-simultaneously (so they land
29
+ * inside the default 500ms coalesce window), each carrying a distinct
30
+ * code token, and the instruction (in the last part) asks the agent to
31
+ * echo every token it received *in this one incoming message*.
32
+ *
33
+ * - Coalesced → the merged turn's single message contains ALPHA,
34
+ * BRAVO and CHARLIE → the reply names all three.
35
+ * - Fanned out → the message carrying the instruction contains only
36
+ * its own token → the reply names just that one. (And the other
37
+ * two tokens arrive as their own separate turns.)
38
+ *
39
+ * Tokens are deliberately odd uppercase strings so a substring match is
40
+ * unambiguous and won't collide with incidental words in the reply.
41
+ *
42
+ * Order-independence: the three sends are dispatched concurrently to
43
+ * guarantee they share a coalesce window, which means Telegram may
44
+ * deliver them in any order. We assert SET membership (all three
45
+ * present), never order.
46
+ */
47
+
48
+ import { describe, it, expect } from "vitest";
49
+ import { spinUp } from "../harness.js";
50
+ import { pollUntil } from "../assertions.js";
51
+ import type { ObservedMessage } from "../driver.js";
52
+
53
+ const AGENT = "test-harness";
54
+
55
+ // Distinctive code tokens — unlikely to appear incidentally in a reply.
56
+ const TOKENS = ["ALPHA", "BRAVO", "CHARLIE"] as const;
57
+
58
+ const BURST: string[] = [
59
+ `BURST-PROBE part 1 of 3. Code token: ${TOKENS[0]}.`,
60
+ `BURST-PROBE part 2 of 3. Code token: ${TOKENS[1]}.`,
61
+ `BURST-PROBE part 3 of 3. Code token: ${TOKENS[2]}. ` +
62
+ `Reply with ONLY the BURST-PROBE code tokens contained in THIS SINGLE ` +
63
+ `incoming message, slash-separated. If this message contains just one ` +
64
+ `token, reply with only that token.`,
65
+ ];
66
+
67
+ // Generous budget: TTFO on the test-harness is ~7s warm; coalescing
68
+ // adds the (sub-second) window plus normal model latency.
69
+ const ANSWER_TIMEOUT_MS = 40_000;
70
+
71
+ function tokensIn(text: string): string[] {
72
+ const upper = text.toUpperCase();
73
+ return TOKENS.filter((t) => upper.includes(t));
74
+ }
75
+
76
+ describe("uat: forwarded burst / split paste coalesces into one turn", () => {
77
+ it(
78
+ "a 3-message burst is answered as ONE shared-context turn",
79
+ async () => {
80
+ const sc = await spinUp({ agent: AGENT });
81
+ try {
82
+ // Start observing BEFORE the burst — observeMessages only sees
83
+ // live updates, not history. Drain into a per-message-id map so
84
+ // streamed edits collapse to the message's latest text.
85
+ const latestById = new Map<number, ObservedMessage>();
86
+ const stream = sc.driver.observeMessages(sc.botUserId);
87
+ const consume = (async () => {
88
+ for await (const m of stream) {
89
+ if (m.senderUserId === sc.driverUserId) continue; // skip our own sends
90
+ latestById.set(m.messageId, m);
91
+ }
92
+ })();
93
+
94
+ // Fire all three concurrently so they share one coalesce window.
95
+ // Concurrency (not serial awaits) is what keeps inter-arrival
96
+ // under the 500ms default — three serial round-trips could blow
97
+ // the window on a slow link.
98
+ await Promise.all(BURST.map((t) => sc.sendDM(t)));
99
+
100
+ // Wait until a single bot message names all three tokens — the
101
+ // proof that one turn saw the whole burst in one incoming
102
+ // message.
103
+ const allThree = await pollUntil(
104
+ () => {
105
+ for (const m of latestById.values()) {
106
+ if (tokensIn(m.text).length === TOKENS.length) return m;
107
+ }
108
+ return undefined;
109
+ },
110
+ { timeout: ANSWER_TIMEOUT_MS, interval: 500 },
111
+ ).catch(() => undefined);
112
+
113
+ // Close the observer stream.
114
+ await stream[Symbol.asyncIterator]().return?.(undefined as never);
115
+ await consume;
116
+
117
+ const botMsgs = [...latestById.values()];
118
+ const tokenBearing = botMsgs.filter((m) => tokensIn(m.text).length > 0);
119
+
120
+ if (!allThree) {
121
+ const seen = tokenBearing
122
+ .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
123
+ .join(" ");
124
+ throw new Error(
125
+ `[forwarded-burst] No single bot reply named all of ` +
126
+ `${TOKENS.join("/")}. This is the coalescing regression: the ` +
127
+ `burst fanned out into separate turns so no turn saw the full ` +
128
+ `message. Token-bearing replies: ${seen || "(none)"}.`,
129
+ );
130
+ }
131
+
132
+ expect(tokensIn(allThree.text).sort()).toEqual([...TOKENS].sort());
133
+
134
+ // Forensic: a coalesced burst should produce ONE answer that
135
+ // names the tokens. Several token-bearing replies hint at a
136
+ // partial fan-out even though one of them happened to be
137
+ // complete — worth a warning before it becomes a hard failure.
138
+ if (tokenBearing.length > 1) {
139
+ console.warn(
140
+ `[forwarded-burst] ${tokenBearing.length} token-bearing replies ` +
141
+ `observed (expected 1). Possible partial fan-out: ` +
142
+ tokenBearing
143
+ .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
144
+ .join(" "),
145
+ );
146
+ } else {
147
+ console.log(
148
+ `[forwarded-burst] One shared-context reply named all tokens ` +
149
+ `(#${allThree.messageId}). Coalescing healthy.`,
150
+ );
151
+ }
152
+ } finally {
153
+ await sc.tearDown();
154
+ }
155
+ },
156
+ ANSWER_TIMEOUT_MS + 20_000,
157
+ );
158
+ });
@@ -46,6 +46,14 @@ export interface WorkerActivityView {
46
46
  toolCount: number
47
47
  /** The worker's latest narrative line, if any (already capped upstream). */
48
48
  latestSummary: string
49
+ /**
50
+ * Accumulated narrative lines, oldest→newest, already deduped + capped by
51
+ * the feed manager. When present and non-empty, the render grows a block
52
+ * of `↳` lines (mirroring the main agent's live answer) instead of
53
+ * collapsing to the single `latestSummary` line. Absent/empty → the
54
+ * single-line fallback (back-compat for direct render callers).
55
+ */
56
+ narrativeLines?: string[]
49
57
  /** Wall-clock since dispatch, ms. */
50
58
  elapsedMs: number
51
59
  state: WorkerActivityState
@@ -68,6 +76,13 @@ export interface BotApiForWorkerFeed {
68
76
  const DESC_MAX = 80
69
77
  const TOOL_ARG_MAX = 64
70
78
  const SUMMARY_MAX = 100
79
+ /**
80
+ * How many trailing narrative lines the live feed keeps visible. The feed
81
+ * grows like the main agent's answer but can't grow unbounded — Telegram
82
+ * caps message length and a wall of stale lines buries the live one. Six
83
+ * keeps recent context without dominating the chat.
84
+ */
85
+ const NARRATIVE_MAX_LINES = 6
71
86
 
72
87
  /**
73
88
  * Render the worker-activity message body as Telegram HTML.
@@ -105,10 +120,23 @@ export function renderWorkerActivity(v: WorkerActivityView): string {
105
120
  activity = `<i>starting… (${elapsed})</i>`
106
121
  }
107
122
 
108
- const summary = v.latestSummary.trim()
109
123
  const lines = [header, activity]
110
- if (summary.length > 0) {
111
- lines.push(` <i>${escapeHtml(truncate(summary, SUMMARY_MAX))}</i>`)
124
+
125
+ // Growing narrative block when the manager has accumulated lines; the feed
126
+ // reads like the main agent's live answer rather than a single replaced
127
+ // status line. Fall back to the single latestSummary line otherwise.
128
+ const narrative = (v.narrativeLines ?? [])
129
+ .map((s) => s.trim())
130
+ .filter((s) => s.length > 0)
131
+ if (narrative.length > 0) {
132
+ for (const line of narrative) {
133
+ lines.push(` ↳ <i>${escapeHtml(truncate(line, SUMMARY_MAX))}</i>`)
134
+ }
135
+ } else {
136
+ const summary = v.latestSummary.trim()
137
+ if (summary.length > 0) {
138
+ lines.push(` ↳ <i>${escapeHtml(truncate(summary, SUMMARY_MAX))}</i>`)
139
+ }
112
140
  }
113
141
  return lines.join('\n')
114
142
  }
@@ -142,6 +170,12 @@ interface WorkerHandle {
142
170
  lastBody: string | null
143
171
  lastEditAt: number
144
172
  cooldownUntil: number
173
+ /**
174
+ * Accumulated narrative lines (oldest→newest), deduped against the
175
+ * immediately-preceding line and capped to NARRATIVE_MAX_LINES. Grows the
176
+ * live render so the feed reads like the main agent's answer.
177
+ */
178
+ narrative: string[]
145
179
  /** Per-worker serialization chain so ticks can't interleave sends. */
146
180
  chain: Promise<void>
147
181
  }
@@ -203,9 +237,24 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
203
237
  log(`worker-feed: ${label} 429 — backing off ${retryAfter}s`)
204
238
  }
205
239
 
240
+ function accumulateNarrative(h: WorkerHandle, view: WorkerActivityView): void {
241
+ const line = view.latestSummary.trim()
242
+ if (line.length === 0) return
243
+ // Dedup against the immediately-preceding line — the watcher re-emits the
244
+ // same narrative across ticks while a tool runs; we only grow on change.
245
+ if (h.narrative[h.narrative.length - 1] === line) return
246
+ h.narrative.push(line)
247
+ if (h.narrative.length > NARRATIVE_MAX_LINES) {
248
+ h.narrative.splice(0, h.narrative.length - NARRATIVE_MAX_LINES)
249
+ }
250
+ }
251
+
206
252
  async function doUpdate(h: WorkerHandle, view: WorkerActivityView): Promise<void> {
253
+ // Accumulate before any gate so a throttled/cooled-down tick still grows
254
+ // the narrative — the line surfaces on the next edit that does fire.
255
+ accumulateNarrative(h, view)
207
256
  if (nowFn() < h.cooldownUntil) return
208
- const body = renderWorkerActivity(view)
257
+ const body = renderWorkerActivity({ ...view, narrativeLines: h.narrative })
209
258
 
210
259
  // First paint: hold off until the worker has run long enough to be
211
260
  // worth a message; trivial workers stay silent (handback covers them).
@@ -284,6 +333,7 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
284
333
  lastBody: null,
285
334
  lastEditAt: 0,
286
335
  cooldownUntil: 0,
336
+ narrative: [],
287
337
  chain: Promise.resolve(),
288
338
  }
289
339
  handles.set(agentId, h)