switchroom 0.14.19 → 0.14.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Unit tests for the A2 multi-attachment helpers
3
+ * (telegram-plugin/gateway/coalesce-attachments.ts).
4
+ *
5
+ * These pin the two pure pieces of the multi-attachment fold-in that live
6
+ * outside gateway.ts so they can be exercised without loadAccess()/IPC:
7
+ * 1. splitCoalescedAttachments — primary + capped extras, arrival order.
8
+ * 2. buildExtraAttachmentMeta — numbered meta fields starting at _2.
9
+ *
10
+ * The default cap (1) MUST reproduce the historical single-attachment shape:
11
+ * primary only, no extras, no numbered meta.
12
+ */
13
+
14
+ import { describe, expect, it } from 'vitest'
15
+ import {
16
+ splitCoalescedAttachments,
17
+ buildExtraAttachmentMeta,
18
+ type ResolvedExtraAttachment,
19
+ } from '../gateway/coalesce-attachments.js'
20
+
21
+ interface Entry {
22
+ text: string
23
+ att?: string
24
+ }
25
+
26
+ const has = (e: Entry): boolean => e.att != null
27
+
28
+ describe('splitCoalescedAttachments', () => {
29
+ it('default cap 1: keeps only the first attachment as primary, no extras', () => {
30
+ const entries: Entry[] = [
31
+ { text: 'a', att: 'photo-1' },
32
+ { text: 'b', att: 'photo-2' },
33
+ ]
34
+ const { primary, extras } = splitCoalescedAttachments(entries, has, 1)
35
+ expect(primary).toEqual({ text: 'a', att: 'photo-1' })
36
+ expect(extras).toEqual([])
37
+ })
38
+
39
+ it('picks the FIRST attachment-bearing entry as primary even when text-only entries precede it', () => {
40
+ const entries: Entry[] = [
41
+ { text: 'look' },
42
+ { text: 'at this', att: 'photo-1' },
43
+ { text: 'and this', att: 'photo-2' },
44
+ ]
45
+ const { primary, extras } = splitCoalescedAttachments(entries, has, 3)
46
+ expect(primary?.att).toBe('photo-1')
47
+ expect(extras.map((e) => e.att)).toEqual(['photo-2'])
48
+ })
49
+
50
+ it('preserves arrival order of extras', () => {
51
+ const entries: Entry[] = [
52
+ { text: '1', att: 'a' },
53
+ { text: '2', att: 'b' },
54
+ { text: '3', att: 'c' },
55
+ ]
56
+ const { primary, extras } = splitCoalescedAttachments(entries, has, 5)
57
+ expect(primary?.att).toBe('a')
58
+ expect(extras.map((e) => e.att)).toEqual(['b', 'c'])
59
+ })
60
+
61
+ it('caps extras at maxAttachments (overflow dropped here; bypassed upstream)', () => {
62
+ const entries: Entry[] = [
63
+ { text: '1', att: 'a' },
64
+ { text: '2', att: 'b' },
65
+ { text: '3', att: 'c' },
66
+ { text: '4', att: 'd' },
67
+ ]
68
+ const { primary, extras } = splitCoalescedAttachments(entries, has, 2)
69
+ expect(primary?.att).toBe('a')
70
+ expect(extras.map((e) => e.att)).toEqual(['b']) // total = cap of 2
71
+ })
72
+
73
+ it('returns undefined primary when no entry carries an attachment', () => {
74
+ const entries: Entry[] = [{ text: 'just' }, { text: 'text' }]
75
+ const { primary, extras } = splitCoalescedAttachments(entries, has, 3)
76
+ expect(primary).toBeUndefined()
77
+ expect(extras).toEqual([])
78
+ })
79
+
80
+ it('floors a cap of 0 / negative at 1 so the only attachment is never stripped', () => {
81
+ const entries: Entry[] = [{ text: '1', att: 'a' }, { text: '2', att: 'b' }]
82
+ expect(splitCoalescedAttachments(entries, has, 0).primary?.att).toBe('a')
83
+ expect(splitCoalescedAttachments(entries, has, -5).primary?.att).toBe('a')
84
+ expect(splitCoalescedAttachments(entries, has, 0).extras).toEqual([])
85
+ })
86
+ })
87
+
88
+ describe('buildExtraAttachmentMeta', () => {
89
+ it('returns an empty object for no extras (default single-attachment turn)', () => {
90
+ expect(buildExtraAttachmentMeta([])).toEqual({})
91
+ })
92
+
93
+ it('numbers a single photo extra as _2', () => {
94
+ const resolved: ResolvedExtraAttachment[] = [{ imagePath: '/inbox/p2.jpg' }]
95
+ expect(buildExtraAttachmentMeta(resolved)).toEqual({ image_path_2: '/inbox/p2.jpg' })
96
+ })
97
+
98
+ it('numbers multiple extras incrementally from _2', () => {
99
+ const resolved: ResolvedExtraAttachment[] = [
100
+ { imagePath: '/inbox/p2.jpg' },
101
+ { imagePath: '/inbox/p3.jpg' },
102
+ ]
103
+ expect(buildExtraAttachmentMeta(resolved)).toEqual({
104
+ image_path_2: '/inbox/p2.jpg',
105
+ image_path_3: '/inbox/p3.jpg',
106
+ })
107
+ })
108
+
109
+ it('emits full attachment metadata fields for a document extra', () => {
110
+ const resolved: ResolvedExtraAttachment[] = [
111
+ {
112
+ attachment: {
113
+ kind: 'document',
114
+ file_id: 'FID2',
115
+ size: 1234,
116
+ mime: 'application/pdf',
117
+ name: 'spec.pdf',
118
+ },
119
+ },
120
+ ]
121
+ expect(buildExtraAttachmentMeta(resolved)).toEqual({
122
+ attachment_kind_2: 'document',
123
+ attachment_file_id_2: 'FID2',
124
+ attachment_size_2: '1234',
125
+ attachment_mime_2: 'application/pdf',
126
+ attachment_name_2: 'spec.pdf',
127
+ })
128
+ })
129
+
130
+ it('omits optional metadata fields that are absent', () => {
131
+ const resolved: ResolvedExtraAttachment[] = [
132
+ { attachment: { kind: 'voice', file_id: 'FID2' } },
133
+ ]
134
+ expect(buildExtraAttachmentMeta(resolved)).toEqual({
135
+ attachment_kind_2: 'voice',
136
+ attachment_file_id_2: 'FID2',
137
+ })
138
+ })
139
+
140
+ it('handles a mix of photo and document extras with correct numbering', () => {
141
+ const resolved: ResolvedExtraAttachment[] = [
142
+ { imagePath: '/inbox/p2.jpg' },
143
+ { attachment: { kind: 'document', file_id: 'FID3', mime: 'text/plain' } },
144
+ ]
145
+ expect(buildExtraAttachmentMeta(resolved)).toEqual({
146
+ image_path_2: '/inbox/p2.jpg',
147
+ attachment_kind_3: 'document',
148
+ attachment_file_id_3: 'FID3',
149
+ attachment_mime_3: 'text/plain',
150
+ })
151
+ })
152
+ })
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Unit tests for the deferred safe-boundary interrupt core (Problem B).
3
+ *
4
+ * The gateway-side wiring (timer, SIGINT-via-tmux, sendToAgent resume,
5
+ * coalescing) is exercised by integration; these pin the pure decision:
6
+ * - ToolFlightTracker correctly tracks open tool calls by toolUseId and
7
+ * clears on turn_end / a fresh enqueue.
8
+ * - decideInterruptTiming returns fire-now unless the flag is on AND a tool
9
+ * is in flight.
10
+ * - resolveInterruptMaxWaitMs never yields a non-positive / forever wait.
11
+ */
12
+
13
+ import { describe, it, expect } from 'vitest'
14
+ import {
15
+ ToolFlightTracker,
16
+ decideInterruptTiming,
17
+ resolveInterruptMaxWaitMs,
18
+ DEFAULT_INTERRUPT_MAX_WAIT_MS,
19
+ } from '../gateway/interrupt-defer.js'
20
+
21
+ describe('ToolFlightTracker', () => {
22
+ it('starts at a safe boundary (no tools in flight)', () => {
23
+ const t = new ToolFlightTracker()
24
+ expect(t.isMidToolCall()).toBe(false)
25
+ expect(t.inFlightCount()).toBe(0)
26
+ })
27
+
28
+ it('a tool_use opens an unsafe boundary; its tool_result closes it', () => {
29
+ const t = new ToolFlightTracker()
30
+ t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
31
+ expect(t.isMidToolCall()).toBe(true)
32
+ t.onEvent({ kind: 'tool_result', toolUseId: 'tu_1' })
33
+ expect(t.isMidToolCall()).toBe(false)
34
+ })
35
+
36
+ it('stays unsafe while ANY of several parallel tools is open', () => {
37
+ const t = new ToolFlightTracker()
38
+ t.onEvent({ kind: 'tool_use', toolUseId: 'a' })
39
+ t.onEvent({ kind: 'tool_use', toolUseId: 'b' })
40
+ t.onEvent({ kind: 'tool_use', toolUseId: 'c' })
41
+ expect(t.inFlightCount()).toBe(3)
42
+ t.onEvent({ kind: 'tool_result', toolUseId: 'b' })
43
+ t.onEvent({ kind: 'tool_result', toolUseId: 'a' })
44
+ expect(t.isMidToolCall()).toBe(true) // c still open
45
+ t.onEvent({ kind: 'tool_result', toolUseId: 'c' })
46
+ expect(t.isMidToolCall()).toBe(false)
47
+ })
48
+
49
+ it('turn_end clears any residual in-flight tools', () => {
50
+ const t = new ToolFlightTracker()
51
+ t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
52
+ t.onEvent({ kind: 'tool_use', toolUseId: 'tu_2' })
53
+ t.onEvent({ kind: 'turn_end' })
54
+ expect(t.isMidToolCall()).toBe(false)
55
+ })
56
+
57
+ it('a fresh enqueue clears the slate (new turn starts clean)', () => {
58
+ const t = new ToolFlightTracker()
59
+ t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
60
+ t.onEvent({ kind: 'enqueue' })
61
+ expect(t.isMidToolCall()).toBe(false)
62
+ })
63
+
64
+ it('ignores sub-agent and non-tool events', () => {
65
+ const t = new ToolFlightTracker()
66
+ t.onEvent({ kind: 'sub_agent_tool_use', toolUseId: 'sub_1' })
67
+ t.onEvent({ kind: 'thinking' })
68
+ t.onEvent({ kind: 'text' })
69
+ t.onEvent({ kind: 'tool_label', toolUseId: 'tu_x' })
70
+ expect(t.isMidToolCall()).toBe(false)
71
+ })
72
+
73
+ it('ignores tool_use with a missing / empty toolUseId', () => {
74
+ const t = new ToolFlightTracker()
75
+ t.onEvent({ kind: 'tool_use' })
76
+ t.onEvent({ kind: 'tool_use', toolUseId: null })
77
+ t.onEvent({ kind: 'tool_use', toolUseId: '' })
78
+ expect(t.isMidToolCall()).toBe(false)
79
+ })
80
+
81
+ it('tool_result for an unknown id is a harmless no-op', () => {
82
+ const t = new ToolFlightTracker()
83
+ t.onEvent({ kind: 'tool_use', toolUseId: 'real' })
84
+ t.onEvent({ kind: 'tool_result', toolUseId: 'never-opened' })
85
+ expect(t.isMidToolCall()).toBe(true) // 'real' still open
86
+ })
87
+
88
+ it('clear() resets the tracker', () => {
89
+ const t = new ToolFlightTracker()
90
+ t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
91
+ t.clear()
92
+ expect(t.isMidToolCall()).toBe(false)
93
+ })
94
+ })
95
+
96
+ describe('decideInterruptTiming', () => {
97
+ it('fires now when the flag is off, even mid-tool-call', () => {
98
+ expect(
99
+ decideInterruptTiming({ safeBoundaryEnabled: false, midToolCall: true }),
100
+ ).toBe('fire-now')
101
+ })
102
+
103
+ it('fires now when the flag is on but no tool is in flight', () => {
104
+ expect(
105
+ decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: false }),
106
+ ).toBe('fire-now')
107
+ })
108
+
109
+ it('defers only when the flag is on AND a tool is in flight', () => {
110
+ expect(
111
+ decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: true }),
112
+ ).toBe('defer')
113
+ })
114
+
115
+ it('fires now in the fully-off case', () => {
116
+ expect(
117
+ decideInterruptTiming({ safeBoundaryEnabled: false, midToolCall: false }),
118
+ ).toBe('fire-now')
119
+ })
120
+ })
121
+
122
+ describe('resolveInterruptMaxWaitMs', () => {
123
+ it('uses the configured value when positive', () => {
124
+ expect(resolveInterruptMaxWaitMs(3000)).toBe(3000)
125
+ })
126
+
127
+ it('falls back to the default when undefined', () => {
128
+ expect(resolveInterruptMaxWaitMs(undefined)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
129
+ })
130
+
131
+ it('never returns a non-positive wait (no forever-wait)', () => {
132
+ expect(resolveInterruptMaxWaitMs(0)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
133
+ expect(resolveInterruptMaxWaitMs(-1)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
134
+ })
135
+
136
+ it('models the lifecycle: open tool → defer; tool_result → safe → fire', () => {
137
+ const t = new ToolFlightTracker()
138
+ t.onEvent({ kind: 'tool_use', toolUseId: 'w1' })
139
+ // `!` lands here, flag on:
140
+ expect(
141
+ decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: t.isMidToolCall() }),
142
+ ).toBe('defer')
143
+ // tool completes:
144
+ t.onEvent({ kind: 'tool_result', toolUseId: 'w1' })
145
+ expect(t.isMidToolCall()).toBe(false) // gateway fires the parked interrupt here
146
+ })
147
+ })
@@ -450,6 +450,42 @@ describe('planBufferedRedelivery — merge-on-drain (forwarded-burst across a tu
450
450
  expect(plan[0]!.merged.imagePath).toBe('/tmp/p.jpg')
451
451
  })
452
452
 
453
+ it('splices attachment meta from the media entry when it is NOT the anchor (A2 numbered fields survive)', () => {
454
+ // A coalesced multi-attachment message buffered, then a text-only
455
+ // follow-up. mergeRun anchors on `last` (the text), whose meta has no
456
+ // attachment fields — so the owning entry's image_path + numbered
457
+ // siblings + attachment_count must be spliced into the merged meta or
458
+ // the agent would never see the photos.
459
+ const photo = userMsg({ text: 'look', ts: 1, imagePath: '/tmp/a.jpg' })
460
+ photo.meta = {
461
+ image_path: '/tmp/a.jpg',
462
+ image_path_2: '/tmp/b.jpg',
463
+ attachment_count: '2',
464
+ user: 'alice',
465
+ }
466
+ const txt = userMsg({ text: 'at these', ts: 2 })
467
+ txt.meta = { user: 'alice' }
468
+ const plan = planBufferedRedelivery([photo, txt])
469
+ expect(plan).toHaveLength(1)
470
+ const meta = plan[0]!.merged.meta
471
+ expect(meta.image_path).toBe('/tmp/a.jpg')
472
+ expect(meta.image_path_2).toBe('/tmp/b.jpg')
473
+ expect(meta.attachment_count).toBe('2')
474
+ // Top-level primary still re-seated for inboundHasMedia detection.
475
+ expect(plan[0]!.merged.imagePath).toBe('/tmp/a.jpg')
476
+ })
477
+
478
+ it('does not need a meta splice when the media entry IS the anchor', () => {
479
+ const txt = userMsg({ text: 'intro', ts: 1 })
480
+ txt.meta = { user: 'alice' }
481
+ const photo = userMsg({ text: 'pic', ts: 2, imagePath: '/tmp/p.jpg' })
482
+ photo.meta = { image_path: '/tmp/p.jpg', user: 'alice' }
483
+ const plan = planBufferedRedelivery([txt, photo])
484
+ expect(plan).toHaveLength(1)
485
+ // Anchor is the photo, so its meta is inherited verbatim.
486
+ expect(plan[0]!.merged.meta.image_path).toBe('/tmp/p.jpg')
487
+ })
488
+
453
489
  it('preserves the run total — sum of originals equals input length (lossless)', () => {
454
490
  const msgs = [
455
491
  userMsg({ text: 'a', ts: 1 }),
@@ -92,6 +92,42 @@ describe('renderWorkerActivity', () => {
92
92
  expect(out).toContain('⚠️ <b>Worker failed</b>')
93
93
  })
94
94
 
95
+ it('grows a narrative block when narrativeLines is present', () => {
96
+ const out = renderWorkerActivity(
97
+ view({
98
+ latestSummary: 'newest only — should be ignored',
99
+ narrativeLines: ['read the brief', 'scanned vendor A', 'scanned vendor B'],
100
+ }),
101
+ )
102
+ expect(out).toContain('↳ <i>read the brief</i>')
103
+ expect(out).toContain('↳ <i>scanned vendor A</i>')
104
+ expect(out).toContain('↳ <i>scanned vendor B</i>')
105
+ // The single-line latestSummary fallback is NOT used when a block is present.
106
+ expect(out).not.toContain('newest only')
107
+ // Three narrative lines → three ↳ lines.
108
+ expect(out.match(/↳/g) ?? []).toHaveLength(3)
109
+ })
110
+
111
+ it('falls back to latestSummary when narrativeLines is empty', () => {
112
+ const out = renderWorkerActivity(view({ narrativeLines: [], latestSummary: 'one line' }))
113
+ expect(out).toContain('↳ <i>one line</i>')
114
+ expect(out.match(/↳/g) ?? []).toHaveLength(1)
115
+ })
116
+
117
+ it('drops blank narrative lines from the block', () => {
118
+ const out = renderWorkerActivity(
119
+ view({ narrativeLines: ['kept', ' ', 'also kept'] }),
120
+ )
121
+ expect(out).toContain('↳ <i>kept</i>')
122
+ expect(out).toContain('↳ <i>also kept</i>')
123
+ expect(out.match(/↳/g) ?? []).toHaveLength(2)
124
+ })
125
+
126
+ it('escapes HTML inside narrative lines', () => {
127
+ const out = renderWorkerActivity(view({ narrativeLines: ['a <b>x</b> & y'] }))
128
+ expect(out).toContain('a &lt;b&gt;x&lt;/b&gt; &amp; y')
129
+ })
130
+
95
131
  it('escapes HTML in description, tool, arg, and summary', () => {
96
132
  const out = renderWorkerActivity(
97
133
  view({
@@ -246,6 +282,83 @@ describe('createWorkerActivityFeed', () => {
246
282
  expect(feed.size).toBe(0)
247
283
  })
248
284
 
285
+ it('accumulates distinct narrative lines into a growing block across ticks', async () => {
286
+ const bot = makeFakeBot()
287
+ let clock = 10_000
288
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
289
+
290
+ await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'read the brief' }))
291
+ expect(bot.sent).toHaveLength(1)
292
+ expect(bot.sent[0].text).toContain('↳ <i>read the brief</i>')
293
+
294
+ clock = 11_000
295
+ await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'scanned vendor A' }))
296
+ clock = 12_000
297
+ await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'scanned vendor B' }))
298
+
299
+ const last = bot.edits.at(-1)!
300
+ expect(last.text).toContain('↳ <i>read the brief</i>')
301
+ expect(last.text).toContain('↳ <i>scanned vendor A</i>')
302
+ expect(last.text).toContain('↳ <i>scanned vendor B</i>')
303
+ expect(last.text.match(/↳/g) ?? []).toHaveLength(3)
304
+ })
305
+
306
+ it('dedups a repeated narrative line so the block does not duplicate', async () => {
307
+ const bot = makeFakeBot()
308
+ let clock = 10_000
309
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
310
+
311
+ await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'same line' }))
312
+ // Repeated narrative but a changed tool count → body differs, edit fires,
313
+ // but the narrative block must not gain a duplicate line.
314
+ clock = 11_000
315
+ await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'same line' }))
316
+
317
+ const last = bot.edits.at(-1)!
318
+ expect(last.text.match(/↳/g) ?? []).toHaveLength(1)
319
+ })
320
+
321
+ it('caps the narrative block to the last 6 lines', async () => {
322
+ const bot = makeFakeBot()
323
+ let clock = 10_000
324
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
325
+
326
+ for (let i = 1; i <= 9; i++) {
327
+ clock += 1000
328
+ await feed.update('w1', 'chat', view({ toolCount: i, latestSummary: `line ${i}` }))
329
+ }
330
+
331
+ const last = bot.edits.at(-1)!
332
+ expect(last.text.match(/↳/g) ?? []).toHaveLength(6)
333
+ // Oldest lines evicted; newest retained.
334
+ expect(last.text).not.toContain('line 1')
335
+ expect(last.text).not.toContain('line 3')
336
+ expect(last.text).toContain('line 4')
337
+ expect(last.text).toContain('line 9')
338
+ })
339
+
340
+ it('grows the narrative even while throttled (line surfaces on next edit)', async () => {
341
+ const bot = makeFakeBot()
342
+ let clock = 10_000
343
+ const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 2500 })
344
+
345
+ await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'line A' }))
346
+ expect(bot.sent).toHaveLength(1)
347
+
348
+ // Throttled tick — no edit, but the line must still be accumulated.
349
+ clock = 11_000
350
+ await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'line B' }))
351
+ expect(bot.edits).toHaveLength(0)
352
+
353
+ // Past the throttle — the edit now carries BOTH lines.
354
+ clock = 13_000
355
+ await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'line C' }))
356
+ const last = bot.edits.at(-1)!
357
+ expect(last.text).toContain('↳ <i>line A</i>')
358
+ expect(last.text).toContain('↳ <i>line B</i>')
359
+ expect(last.text).toContain('↳ <i>line C</i>')
360
+ })
361
+
249
362
  it('forwards threadId as message_thread_id on send', async () => {
250
363
  const bot = makeFakeBot()
251
364
  let clock = 10_000
@@ -0,0 +1,158 @@
1
+ /**
2
+ * JTBD scenario — forwarded burst / split paste coalesces into ONE turn.
3
+ *
4
+ * Serves: `reference/steer-or-queue-mid-flight.md` — the "Forwarded
5
+ * burst / split paste" UAT prompt. When several messages land in quick
6
+ * succession from the same sender (a forward of 3-4 messages, or a long
7
+ * paste Telegram split into chunks), inbound coalescing must merge them
8
+ * into a SINGLE Claude turn with shared context — not reply to each
9
+ * fragment in isolation.
10
+ *
11
+ * This is the end-to-end gate for the A1 coalescing work shipped in
12
+ * v0.14.18 (#2007). The merge logic itself is covered by unit + fuzz
13
+ * tests (`inbound-coalesce.test.ts`, `pending-inbound-buffer.test.ts`),
14
+ * but only this scenario exercises the real inbound → gateway coalescer
15
+ * → claude → outbound path over a live Telegram chat.
16
+ *
17
+ * ## How the signal is constructed
18
+ *
19
+ * Naively asking the agent to "combine facts from several messages"
20
+ * does NOT distinguish coalesced from fanned-out: even when each
21
+ * message becomes its own turn, every later turn carries the PRIOR
22
+ * turns in its conversation history, so the model could still answer
23
+ * from history. History bleed makes a content-combination assertion
24
+ * useless as a coalescing probe.
25
+ *
26
+ * The distinguishing fact is whether a SINGLE turn saw all the parts in
27
+ * ONE incoming message. So we anchor the instruction on "this single
28
+ * message": three messages are fired near-simultaneously (so they land
29
+ * inside the default 500ms coalesce window), each carrying a distinct
30
+ * code token, and the instruction (in the last part) asks the agent to
31
+ * echo every token it received *in this one incoming message*.
32
+ *
33
+ * - Coalesced → the merged turn's single message contains ALPHA,
34
+ * BRAVO and CHARLIE → the reply names all three.
35
+ * - Fanned out → the message carrying the instruction contains only
36
+ * its own token → the reply names just that one. (And the other
37
+ * two tokens arrive as their own separate turns.)
38
+ *
39
+ * Tokens are deliberately odd uppercase strings so a substring match is
40
+ * unambiguous and won't collide with incidental words in the reply.
41
+ *
42
+ * Order-independence: the three sends are dispatched concurrently to
43
+ * guarantee they share a coalesce window, which means Telegram may
44
+ * deliver them in any order. We assert SET membership (all three
45
+ * present), never order.
46
+ */
47
+
48
+ import { describe, it, expect } from "vitest";
49
+ import { spinUp } from "../harness.js";
50
+ import { pollUntil } from "../assertions.js";
51
+ import type { ObservedMessage } from "../driver.js";
52
+
53
+ const AGENT = "test-harness";
54
+
55
+ // Distinctive code tokens — unlikely to appear incidentally in a reply.
56
+ const TOKENS = ["ALPHA", "BRAVO", "CHARLIE"] as const;
57
+
58
+ const BURST: string[] = [
59
+ `BURST-PROBE part 1 of 3. Code token: ${TOKENS[0]}.`,
60
+ `BURST-PROBE part 2 of 3. Code token: ${TOKENS[1]}.`,
61
+ `BURST-PROBE part 3 of 3. Code token: ${TOKENS[2]}. ` +
62
+ `Reply with ONLY the BURST-PROBE code tokens contained in THIS SINGLE ` +
63
+ `incoming message, slash-separated. If this message contains just one ` +
64
+ `token, reply with only that token.`,
65
+ ];
66
+
67
+ // Generous budget: TTFO on the test-harness is ~7s warm; coalescing
68
+ // adds the (sub-second) window plus normal model latency.
69
+ const ANSWER_TIMEOUT_MS = 40_000;
70
+
71
+ function tokensIn(text: string): string[] {
72
+ const upper = text.toUpperCase();
73
+ return TOKENS.filter((t) => upper.includes(t));
74
+ }
75
+
76
+ describe("uat: forwarded burst / split paste coalesces into one turn", () => {
77
+ it(
78
+ "a 3-message burst is answered as ONE shared-context turn",
79
+ async () => {
80
+ const sc = await spinUp({ agent: AGENT });
81
+ try {
82
+ // Start observing BEFORE the burst — observeMessages only sees
83
+ // live updates, not history. Drain into a per-message-id map so
84
+ // streamed edits collapse to the message's latest text.
85
+ const latestById = new Map<number, ObservedMessage>();
86
+ const stream = sc.driver.observeMessages(sc.botUserId);
87
+ const consume = (async () => {
88
+ for await (const m of stream) {
89
+ if (m.senderUserId === sc.driverUserId) continue; // skip our own sends
90
+ latestById.set(m.messageId, m);
91
+ }
92
+ })();
93
+
94
+ // Fire all three concurrently so they share one coalesce window.
95
+ // Concurrency (not serial awaits) is what keeps inter-arrival
96
+ // under the 500ms default — three serial round-trips could blow
97
+ // the window on a slow link.
98
+ await Promise.all(BURST.map((t) => sc.sendDM(t)));
99
+
100
+ // Wait until a single bot message names all three tokens — the
101
+ // proof that one turn saw the whole burst in one incoming
102
+ // message.
103
+ const allThree = await pollUntil(
104
+ () => {
105
+ for (const m of latestById.values()) {
106
+ if (tokensIn(m.text).length === TOKENS.length) return m;
107
+ }
108
+ return undefined;
109
+ },
110
+ { timeout: ANSWER_TIMEOUT_MS, interval: 500 },
111
+ ).catch(() => undefined);
112
+
113
+ // Close the observer stream.
114
+ await stream[Symbol.asyncIterator]().return?.(undefined as never);
115
+ await consume;
116
+
117
+ const botMsgs = [...latestById.values()];
118
+ const tokenBearing = botMsgs.filter((m) => tokensIn(m.text).length > 0);
119
+
120
+ if (!allThree) {
121
+ const seen = tokenBearing
122
+ .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
123
+ .join(" ");
124
+ throw new Error(
125
+ `[forwarded-burst] No single bot reply named all of ` +
126
+ `${TOKENS.join("/")}. This is the coalescing regression: the ` +
127
+ `burst fanned out into separate turns so no turn saw the full ` +
128
+ `message. Token-bearing replies: ${seen || "(none)"}.`,
129
+ );
130
+ }
131
+
132
+ expect(tokensIn(allThree.text).sort()).toEqual([...TOKENS].sort());
133
+
134
+ // Forensic: a coalesced burst should produce ONE answer that
135
+ // names the tokens. Several token-bearing replies hint at a
136
+ // partial fan-out even though one of them happened to be
137
+ // complete — worth a warning before it becomes a hard failure.
138
+ if (tokenBearing.length > 1) {
139
+ console.warn(
140
+ `[forwarded-burst] ${tokenBearing.length} token-bearing replies ` +
141
+ `observed (expected 1). Possible partial fan-out: ` +
142
+ tokenBearing
143
+ .map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
144
+ .join(" "),
145
+ );
146
+ } else {
147
+ console.log(
148
+ `[forwarded-burst] One shared-context reply named all tokens ` +
149
+ `(#${allThree.messageId}). Coalescing healthy.`,
150
+ );
151
+ }
152
+ } finally {
153
+ await sc.tearDown();
154
+ }
155
+ },
156
+ ANSWER_TIMEOUT_MS + 20_000,
157
+ );
158
+ });