switchroom 0.14.19 → 0.14.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +6 -1
- package/dist/auth-broker/index.js +6 -1
- package/dist/cli/notion-write-pretool.mjs +6 -1
- package/dist/cli/switchroom.js +17 -3
- package/dist/host-control/main.js +6 -1
- package/dist/vault/approvals/kernel-server.js +6 -1
- package/dist/vault/broker/server.js +6 -1
- package/package.json +1 -1
- package/telegram-plugin/README.md +7 -3
- package/telegram-plugin/bridge/bridge.ts +1 -1
- package/telegram-plugin/dist/bridge/bridge.js +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +357 -152
- package/telegram-plugin/dist/server.js +1 -1
- package/telegram-plugin/gateway/coalesce-attachments.ts +70 -0
- package/telegram-plugin/gateway/gateway.ts +246 -36
- package/telegram-plugin/gateway/interrupt-defer.ts +100 -0
- package/telegram-plugin/gateway/pending-inbound-buffer.ts +21 -4
- package/telegram-plugin/tests/coalesce-attachments.test.ts +152 -0
- package/telegram-plugin/tests/interrupt-defer.test.ts +147 -0
- package/telegram-plugin/tests/pending-inbound-buffer.test.ts +36 -0
- package/telegram-plugin/tests/worker-activity-feed.test.ts +113 -0
- package/telegram-plugin/uat/scenarios/jtbd-forwarded-burst-dm.test.ts +158 -0
- package/telegram-plugin/worker-activity-feed.ts +54 -4
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the A2 multi-attachment helpers
|
|
3
|
+
* (telegram-plugin/gateway/coalesce-attachments.ts).
|
|
4
|
+
*
|
|
5
|
+
* These pin the two pure pieces of the multi-attachment fold-in that live
|
|
6
|
+
* outside gateway.ts so they can be exercised without loadAccess()/IPC:
|
|
7
|
+
* 1. splitCoalescedAttachments — primary + capped extras, arrival order.
|
|
8
|
+
* 2. buildExtraAttachmentMeta — numbered meta fields starting at _2.
|
|
9
|
+
*
|
|
10
|
+
* The default cap (1) MUST reproduce the historical single-attachment shape:
|
|
11
|
+
* primary only, no extras, no numbered meta.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, expect, it } from 'vitest'
|
|
15
|
+
import {
|
|
16
|
+
splitCoalescedAttachments,
|
|
17
|
+
buildExtraAttachmentMeta,
|
|
18
|
+
type ResolvedExtraAttachment,
|
|
19
|
+
} from '../gateway/coalesce-attachments.js'
|
|
20
|
+
|
|
21
|
+
interface Entry {
|
|
22
|
+
text: string
|
|
23
|
+
att?: string
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const has = (e: Entry): boolean => e.att != null
|
|
27
|
+
|
|
28
|
+
describe('splitCoalescedAttachments', () => {
|
|
29
|
+
it('default cap 1: keeps only the first attachment as primary, no extras', () => {
|
|
30
|
+
const entries: Entry[] = [
|
|
31
|
+
{ text: 'a', att: 'photo-1' },
|
|
32
|
+
{ text: 'b', att: 'photo-2' },
|
|
33
|
+
]
|
|
34
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 1)
|
|
35
|
+
expect(primary).toEqual({ text: 'a', att: 'photo-1' })
|
|
36
|
+
expect(extras).toEqual([])
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
it('picks the FIRST attachment-bearing entry as primary even when text-only entries precede it', () => {
|
|
40
|
+
const entries: Entry[] = [
|
|
41
|
+
{ text: 'look' },
|
|
42
|
+
{ text: 'at this', att: 'photo-1' },
|
|
43
|
+
{ text: 'and this', att: 'photo-2' },
|
|
44
|
+
]
|
|
45
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 3)
|
|
46
|
+
expect(primary?.att).toBe('photo-1')
|
|
47
|
+
expect(extras.map((e) => e.att)).toEqual(['photo-2'])
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
it('preserves arrival order of extras', () => {
|
|
51
|
+
const entries: Entry[] = [
|
|
52
|
+
{ text: '1', att: 'a' },
|
|
53
|
+
{ text: '2', att: 'b' },
|
|
54
|
+
{ text: '3', att: 'c' },
|
|
55
|
+
]
|
|
56
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 5)
|
|
57
|
+
expect(primary?.att).toBe('a')
|
|
58
|
+
expect(extras.map((e) => e.att)).toEqual(['b', 'c'])
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('caps extras at maxAttachments (overflow dropped here; bypassed upstream)', () => {
|
|
62
|
+
const entries: Entry[] = [
|
|
63
|
+
{ text: '1', att: 'a' },
|
|
64
|
+
{ text: '2', att: 'b' },
|
|
65
|
+
{ text: '3', att: 'c' },
|
|
66
|
+
{ text: '4', att: 'd' },
|
|
67
|
+
]
|
|
68
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 2)
|
|
69
|
+
expect(primary?.att).toBe('a')
|
|
70
|
+
expect(extras.map((e) => e.att)).toEqual(['b']) // total = cap of 2
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('returns undefined primary when no entry carries an attachment', () => {
|
|
74
|
+
const entries: Entry[] = [{ text: 'just' }, { text: 'text' }]
|
|
75
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 3)
|
|
76
|
+
expect(primary).toBeUndefined()
|
|
77
|
+
expect(extras).toEqual([])
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('floors a cap of 0 / negative at 1 so the only attachment is never stripped', () => {
|
|
81
|
+
const entries: Entry[] = [{ text: '1', att: 'a' }, { text: '2', att: 'b' }]
|
|
82
|
+
expect(splitCoalescedAttachments(entries, has, 0).primary?.att).toBe('a')
|
|
83
|
+
expect(splitCoalescedAttachments(entries, has, -5).primary?.att).toBe('a')
|
|
84
|
+
expect(splitCoalescedAttachments(entries, has, 0).extras).toEqual([])
|
|
85
|
+
})
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
describe('buildExtraAttachmentMeta', () => {
|
|
89
|
+
it('returns an empty object for no extras (default single-attachment turn)', () => {
|
|
90
|
+
expect(buildExtraAttachmentMeta([])).toEqual({})
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
it('numbers a single photo extra as _2', () => {
|
|
94
|
+
const resolved: ResolvedExtraAttachment[] = [{ imagePath: '/inbox/p2.jpg' }]
|
|
95
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({ image_path_2: '/inbox/p2.jpg' })
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('numbers multiple extras incrementally from _2', () => {
|
|
99
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
100
|
+
{ imagePath: '/inbox/p2.jpg' },
|
|
101
|
+
{ imagePath: '/inbox/p3.jpg' },
|
|
102
|
+
]
|
|
103
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
104
|
+
image_path_2: '/inbox/p2.jpg',
|
|
105
|
+
image_path_3: '/inbox/p3.jpg',
|
|
106
|
+
})
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('emits full attachment metadata fields for a document extra', () => {
|
|
110
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
111
|
+
{
|
|
112
|
+
attachment: {
|
|
113
|
+
kind: 'document',
|
|
114
|
+
file_id: 'FID2',
|
|
115
|
+
size: 1234,
|
|
116
|
+
mime: 'application/pdf',
|
|
117
|
+
name: 'spec.pdf',
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
]
|
|
121
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
122
|
+
attachment_kind_2: 'document',
|
|
123
|
+
attachment_file_id_2: 'FID2',
|
|
124
|
+
attachment_size_2: '1234',
|
|
125
|
+
attachment_mime_2: 'application/pdf',
|
|
126
|
+
attachment_name_2: 'spec.pdf',
|
|
127
|
+
})
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('omits optional metadata fields that are absent', () => {
|
|
131
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
132
|
+
{ attachment: { kind: 'voice', file_id: 'FID2' } },
|
|
133
|
+
]
|
|
134
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
135
|
+
attachment_kind_2: 'voice',
|
|
136
|
+
attachment_file_id_2: 'FID2',
|
|
137
|
+
})
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
it('handles a mix of photo and document extras with correct numbering', () => {
|
|
141
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
142
|
+
{ imagePath: '/inbox/p2.jpg' },
|
|
143
|
+
{ attachment: { kind: 'document', file_id: 'FID3', mime: 'text/plain' } },
|
|
144
|
+
]
|
|
145
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
146
|
+
image_path_2: '/inbox/p2.jpg',
|
|
147
|
+
attachment_kind_3: 'document',
|
|
148
|
+
attachment_file_id_3: 'FID3',
|
|
149
|
+
attachment_mime_3: 'text/plain',
|
|
150
|
+
})
|
|
151
|
+
})
|
|
152
|
+
})
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the deferred safe-boundary interrupt core (Problem B).
|
|
3
|
+
*
|
|
4
|
+
* The gateway-side wiring (timer, SIGINT-via-tmux, sendToAgent resume,
|
|
5
|
+
* coalescing) is exercised by integration; these pin the pure decision:
|
|
6
|
+
* - ToolFlightTracker correctly tracks open tool calls by toolUseId and
|
|
7
|
+
* clears on turn_end / a fresh enqueue.
|
|
8
|
+
* - decideInterruptTiming returns fire-now unless the flag is on AND a tool
|
|
9
|
+
* is in flight.
|
|
10
|
+
* - resolveInterruptMaxWaitMs never yields a non-positive / forever wait.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect } from 'vitest'
|
|
14
|
+
import {
|
|
15
|
+
ToolFlightTracker,
|
|
16
|
+
decideInterruptTiming,
|
|
17
|
+
resolveInterruptMaxWaitMs,
|
|
18
|
+
DEFAULT_INTERRUPT_MAX_WAIT_MS,
|
|
19
|
+
} from '../gateway/interrupt-defer.js'
|
|
20
|
+
|
|
21
|
+
describe('ToolFlightTracker', () => {
|
|
22
|
+
it('starts at a safe boundary (no tools in flight)', () => {
|
|
23
|
+
const t = new ToolFlightTracker()
|
|
24
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
25
|
+
expect(t.inFlightCount()).toBe(0)
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
it('a tool_use opens an unsafe boundary; its tool_result closes it', () => {
|
|
29
|
+
const t = new ToolFlightTracker()
|
|
30
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
31
|
+
expect(t.isMidToolCall()).toBe(true)
|
|
32
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'tu_1' })
|
|
33
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('stays unsafe while ANY of several parallel tools is open', () => {
|
|
37
|
+
const t = new ToolFlightTracker()
|
|
38
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'a' })
|
|
39
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'b' })
|
|
40
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'c' })
|
|
41
|
+
expect(t.inFlightCount()).toBe(3)
|
|
42
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'b' })
|
|
43
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'a' })
|
|
44
|
+
expect(t.isMidToolCall()).toBe(true) // c still open
|
|
45
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'c' })
|
|
46
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('turn_end clears any residual in-flight tools', () => {
|
|
50
|
+
const t = new ToolFlightTracker()
|
|
51
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
52
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_2' })
|
|
53
|
+
t.onEvent({ kind: 'turn_end' })
|
|
54
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
it('a fresh enqueue clears the slate (new turn starts clean)', () => {
|
|
58
|
+
const t = new ToolFlightTracker()
|
|
59
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
60
|
+
t.onEvent({ kind: 'enqueue' })
|
|
61
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
it('ignores sub-agent and non-tool events', () => {
|
|
65
|
+
const t = new ToolFlightTracker()
|
|
66
|
+
t.onEvent({ kind: 'sub_agent_tool_use', toolUseId: 'sub_1' })
|
|
67
|
+
t.onEvent({ kind: 'thinking' })
|
|
68
|
+
t.onEvent({ kind: 'text' })
|
|
69
|
+
t.onEvent({ kind: 'tool_label', toolUseId: 'tu_x' })
|
|
70
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('ignores tool_use with a missing / empty toolUseId', () => {
|
|
74
|
+
const t = new ToolFlightTracker()
|
|
75
|
+
t.onEvent({ kind: 'tool_use' })
|
|
76
|
+
t.onEvent({ kind: 'tool_use', toolUseId: null })
|
|
77
|
+
t.onEvent({ kind: 'tool_use', toolUseId: '' })
|
|
78
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('tool_result for an unknown id is a harmless no-op', () => {
|
|
82
|
+
const t = new ToolFlightTracker()
|
|
83
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'real' })
|
|
84
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'never-opened' })
|
|
85
|
+
expect(t.isMidToolCall()).toBe(true) // 'real' still open
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
it('clear() resets the tracker', () => {
|
|
89
|
+
const t = new ToolFlightTracker()
|
|
90
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
91
|
+
t.clear()
|
|
92
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
93
|
+
})
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
describe('decideInterruptTiming', () => {
|
|
97
|
+
it('fires now when the flag is off, even mid-tool-call', () => {
|
|
98
|
+
expect(
|
|
99
|
+
decideInterruptTiming({ safeBoundaryEnabled: false, midToolCall: true }),
|
|
100
|
+
).toBe('fire-now')
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
it('fires now when the flag is on but no tool is in flight', () => {
|
|
104
|
+
expect(
|
|
105
|
+
decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: false }),
|
|
106
|
+
).toBe('fire-now')
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('defers only when the flag is on AND a tool is in flight', () => {
|
|
110
|
+
expect(
|
|
111
|
+
decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: true }),
|
|
112
|
+
).toBe('defer')
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
it('fires now in the fully-off case', () => {
|
|
116
|
+
expect(
|
|
117
|
+
decideInterruptTiming({ safeBoundaryEnabled: false, midToolCall: false }),
|
|
118
|
+
).toBe('fire-now')
|
|
119
|
+
})
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
describe('resolveInterruptMaxWaitMs', () => {
|
|
123
|
+
it('uses the configured value when positive', () => {
|
|
124
|
+
expect(resolveInterruptMaxWaitMs(3000)).toBe(3000)
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
it('falls back to the default when undefined', () => {
|
|
128
|
+
expect(resolveInterruptMaxWaitMs(undefined)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
it('never returns a non-positive wait (no forever-wait)', () => {
|
|
132
|
+
expect(resolveInterruptMaxWaitMs(0)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
|
|
133
|
+
expect(resolveInterruptMaxWaitMs(-1)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('models the lifecycle: open tool → defer; tool_result → safe → fire', () => {
|
|
137
|
+
const t = new ToolFlightTracker()
|
|
138
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'w1' })
|
|
139
|
+
// `!` lands here, flag on:
|
|
140
|
+
expect(
|
|
141
|
+
decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: t.isMidToolCall() }),
|
|
142
|
+
).toBe('defer')
|
|
143
|
+
// tool completes:
|
|
144
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'w1' })
|
|
145
|
+
expect(t.isMidToolCall()).toBe(false) // gateway fires the parked interrupt here
|
|
146
|
+
})
|
|
147
|
+
})
|
|
@@ -450,6 +450,42 @@ describe('planBufferedRedelivery — merge-on-drain (forwarded-burst across a tu
|
|
|
450
450
|
expect(plan[0]!.merged.imagePath).toBe('/tmp/p.jpg')
|
|
451
451
|
})
|
|
452
452
|
|
|
453
|
+
it('splices attachment meta from the media entry when it is NOT the anchor (A2 numbered fields survive)', () => {
|
|
454
|
+
// A coalesced multi-attachment message buffered, then a text-only
|
|
455
|
+
// follow-up. mergeRun anchors on `last` (the text), whose meta has no
|
|
456
|
+
// attachment fields — so the owning entry's image_path + numbered
|
|
457
|
+
// siblings + attachment_count must be spliced into the merged meta or
|
|
458
|
+
// the agent would never see the photos.
|
|
459
|
+
const photo = userMsg({ text: 'look', ts: 1, imagePath: '/tmp/a.jpg' })
|
|
460
|
+
photo.meta = {
|
|
461
|
+
image_path: '/tmp/a.jpg',
|
|
462
|
+
image_path_2: '/tmp/b.jpg',
|
|
463
|
+
attachment_count: '2',
|
|
464
|
+
user: 'alice',
|
|
465
|
+
}
|
|
466
|
+
const txt = userMsg({ text: 'at these', ts: 2 })
|
|
467
|
+
txt.meta = { user: 'alice' }
|
|
468
|
+
const plan = planBufferedRedelivery([photo, txt])
|
|
469
|
+
expect(plan).toHaveLength(1)
|
|
470
|
+
const meta = plan[0]!.merged.meta
|
|
471
|
+
expect(meta.image_path).toBe('/tmp/a.jpg')
|
|
472
|
+
expect(meta.image_path_2).toBe('/tmp/b.jpg')
|
|
473
|
+
expect(meta.attachment_count).toBe('2')
|
|
474
|
+
// Top-level primary still re-seated for inboundHasMedia detection.
|
|
475
|
+
expect(plan[0]!.merged.imagePath).toBe('/tmp/a.jpg')
|
|
476
|
+
})
|
|
477
|
+
|
|
478
|
+
it('does not need a meta splice when the media entry IS the anchor', () => {
|
|
479
|
+
const txt = userMsg({ text: 'intro', ts: 1 })
|
|
480
|
+
txt.meta = { user: 'alice' }
|
|
481
|
+
const photo = userMsg({ text: 'pic', ts: 2, imagePath: '/tmp/p.jpg' })
|
|
482
|
+
photo.meta = { image_path: '/tmp/p.jpg', user: 'alice' }
|
|
483
|
+
const plan = planBufferedRedelivery([txt, photo])
|
|
484
|
+
expect(plan).toHaveLength(1)
|
|
485
|
+
// Anchor is the photo, so its meta is inherited verbatim.
|
|
486
|
+
expect(plan[0]!.merged.meta.image_path).toBe('/tmp/p.jpg')
|
|
487
|
+
})
|
|
488
|
+
|
|
453
489
|
it('preserves the run total — sum of originals equals input length (lossless)', () => {
|
|
454
490
|
const msgs = [
|
|
455
491
|
userMsg({ text: 'a', ts: 1 }),
|
|
@@ -92,6 +92,42 @@ describe('renderWorkerActivity', () => {
|
|
|
92
92
|
expect(out).toContain('⚠️ <b>Worker failed</b>')
|
|
93
93
|
})
|
|
94
94
|
|
|
95
|
+
it('grows a narrative block when narrativeLines is present', () => {
|
|
96
|
+
const out = renderWorkerActivity(
|
|
97
|
+
view({
|
|
98
|
+
latestSummary: 'newest only — should be ignored',
|
|
99
|
+
narrativeLines: ['read the brief', 'scanned vendor A', 'scanned vendor B'],
|
|
100
|
+
}),
|
|
101
|
+
)
|
|
102
|
+
expect(out).toContain('↳ <i>read the brief</i>')
|
|
103
|
+
expect(out).toContain('↳ <i>scanned vendor A</i>')
|
|
104
|
+
expect(out).toContain('↳ <i>scanned vendor B</i>')
|
|
105
|
+
// The single-line latestSummary fallback is NOT used when a block is present.
|
|
106
|
+
expect(out).not.toContain('newest only')
|
|
107
|
+
// Three narrative lines → three ↳ lines.
|
|
108
|
+
expect(out.match(/↳/g) ?? []).toHaveLength(3)
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
it('falls back to latestSummary when narrativeLines is empty', () => {
|
|
112
|
+
const out = renderWorkerActivity(view({ narrativeLines: [], latestSummary: 'one line' }))
|
|
113
|
+
expect(out).toContain('↳ <i>one line</i>')
|
|
114
|
+
expect(out.match(/↳/g) ?? []).toHaveLength(1)
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
it('drops blank narrative lines from the block', () => {
|
|
118
|
+
const out = renderWorkerActivity(
|
|
119
|
+
view({ narrativeLines: ['kept', ' ', 'also kept'] }),
|
|
120
|
+
)
|
|
121
|
+
expect(out).toContain('↳ <i>kept</i>')
|
|
122
|
+
expect(out).toContain('↳ <i>also kept</i>')
|
|
123
|
+
expect(out.match(/↳/g) ?? []).toHaveLength(2)
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
it('escapes HTML inside narrative lines', () => {
|
|
127
|
+
const out = renderWorkerActivity(view({ narrativeLines: ['a <b>x</b> & y'] }))
|
|
128
|
+
expect(out).toContain('a <b>x</b> & y')
|
|
129
|
+
})
|
|
130
|
+
|
|
95
131
|
it('escapes HTML in description, tool, arg, and summary', () => {
|
|
96
132
|
const out = renderWorkerActivity(
|
|
97
133
|
view({
|
|
@@ -246,6 +282,83 @@ describe('createWorkerActivityFeed', () => {
|
|
|
246
282
|
expect(feed.size).toBe(0)
|
|
247
283
|
})
|
|
248
284
|
|
|
285
|
+
it('accumulates distinct narrative lines into a growing block across ticks', async () => {
|
|
286
|
+
const bot = makeFakeBot()
|
|
287
|
+
let clock = 10_000
|
|
288
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
|
|
289
|
+
|
|
290
|
+
await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'read the brief' }))
|
|
291
|
+
expect(bot.sent).toHaveLength(1)
|
|
292
|
+
expect(bot.sent[0].text).toContain('↳ <i>read the brief</i>')
|
|
293
|
+
|
|
294
|
+
clock = 11_000
|
|
295
|
+
await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'scanned vendor A' }))
|
|
296
|
+
clock = 12_000
|
|
297
|
+
await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'scanned vendor B' }))
|
|
298
|
+
|
|
299
|
+
const last = bot.edits.at(-1)!
|
|
300
|
+
expect(last.text).toContain('↳ <i>read the brief</i>')
|
|
301
|
+
expect(last.text).toContain('↳ <i>scanned vendor A</i>')
|
|
302
|
+
expect(last.text).toContain('↳ <i>scanned vendor B</i>')
|
|
303
|
+
expect(last.text.match(/↳/g) ?? []).toHaveLength(3)
|
|
304
|
+
})
|
|
305
|
+
|
|
306
|
+
it('dedups a repeated narrative line so the block does not duplicate', async () => {
|
|
307
|
+
const bot = makeFakeBot()
|
|
308
|
+
let clock = 10_000
|
|
309
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
|
|
310
|
+
|
|
311
|
+
await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'same line' }))
|
|
312
|
+
// Repeated narrative but a changed tool count → body differs, edit fires,
|
|
313
|
+
// but the narrative block must not gain a duplicate line.
|
|
314
|
+
clock = 11_000
|
|
315
|
+
await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'same line' }))
|
|
316
|
+
|
|
317
|
+
const last = bot.edits.at(-1)!
|
|
318
|
+
expect(last.text.match(/↳/g) ?? []).toHaveLength(1)
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
it('caps the narrative block to the last 6 lines', async () => {
|
|
322
|
+
const bot = makeFakeBot()
|
|
323
|
+
let clock = 10_000
|
|
324
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 0 })
|
|
325
|
+
|
|
326
|
+
for (let i = 1; i <= 9; i++) {
|
|
327
|
+
clock += 1000
|
|
328
|
+
await feed.update('w1', 'chat', view({ toolCount: i, latestSummary: `line ${i}` }))
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const last = bot.edits.at(-1)!
|
|
332
|
+
expect(last.text.match(/↳/g) ?? []).toHaveLength(6)
|
|
333
|
+
// Oldest lines evicted; newest retained.
|
|
334
|
+
expect(last.text).not.toContain('line 1')
|
|
335
|
+
expect(last.text).not.toContain('line 3')
|
|
336
|
+
expect(last.text).toContain('line 4')
|
|
337
|
+
expect(last.text).toContain('line 9')
|
|
338
|
+
})
|
|
339
|
+
|
|
340
|
+
it('grows the narrative even while throttled (line surfaces on next edit)', async () => {
|
|
341
|
+
const bot = makeFakeBot()
|
|
342
|
+
let clock = 10_000
|
|
343
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, minEditIntervalMs: 2500 })
|
|
344
|
+
|
|
345
|
+
await feed.update('w1', 'chat', view({ toolCount: 1, latestSummary: 'line A' }))
|
|
346
|
+
expect(bot.sent).toHaveLength(1)
|
|
347
|
+
|
|
348
|
+
// Throttled tick — no edit, but the line must still be accumulated.
|
|
349
|
+
clock = 11_000
|
|
350
|
+
await feed.update('w1', 'chat', view({ toolCount: 2, latestSummary: 'line B' }))
|
|
351
|
+
expect(bot.edits).toHaveLength(0)
|
|
352
|
+
|
|
353
|
+
// Past the throttle — the edit now carries BOTH lines.
|
|
354
|
+
clock = 13_000
|
|
355
|
+
await feed.update('w1', 'chat', view({ toolCount: 3, latestSummary: 'line C' }))
|
|
356
|
+
const last = bot.edits.at(-1)!
|
|
357
|
+
expect(last.text).toContain('↳ <i>line A</i>')
|
|
358
|
+
expect(last.text).toContain('↳ <i>line B</i>')
|
|
359
|
+
expect(last.text).toContain('↳ <i>line C</i>')
|
|
360
|
+
})
|
|
361
|
+
|
|
249
362
|
it('forwards threadId as message_thread_id on send', async () => {
|
|
250
363
|
const bot = makeFakeBot()
|
|
251
364
|
let clock = 10_000
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD scenario — forwarded burst / split paste coalesces into ONE turn.
|
|
3
|
+
*
|
|
4
|
+
* Serves: `reference/steer-or-queue-mid-flight.md` — the "Forwarded
|
|
5
|
+
* burst / split paste" UAT prompt. When several messages land in quick
|
|
6
|
+
* succession from the same sender (a forward of 3-4 messages, or a long
|
|
7
|
+
* paste Telegram split into chunks), inbound coalescing must merge them
|
|
8
|
+
* into a SINGLE Claude turn with shared context — not reply to each
|
|
9
|
+
* fragment in isolation.
|
|
10
|
+
*
|
|
11
|
+
* This is the end-to-end gate for the A1 coalescing work shipped in
|
|
12
|
+
* v0.14.18 (#2007). The merge logic itself is covered by unit + fuzz
|
|
13
|
+
* tests (`inbound-coalesce.test.ts`, `pending-inbound-buffer.test.ts`),
|
|
14
|
+
* but only this scenario exercises the real inbound → gateway coalescer
|
|
15
|
+
* → claude → outbound path over a live Telegram chat.
|
|
16
|
+
*
|
|
17
|
+
* ## How the signal is constructed
|
|
18
|
+
*
|
|
19
|
+
* Naively asking the agent to "combine facts from several messages"
|
|
20
|
+
* does NOT distinguish coalesced from fanned-out: even when each
|
|
21
|
+
* message becomes its own turn, every later turn carries the PRIOR
|
|
22
|
+
* turns in its conversation history, so the model could still answer
|
|
23
|
+
* from history. History bleed makes a content-combination assertion
|
|
24
|
+
* useless as a coalescing probe.
|
|
25
|
+
*
|
|
26
|
+
* The distinguishing fact is whether a SINGLE turn saw all the parts in
|
|
27
|
+
* ONE incoming message. So we anchor the instruction on "this single
|
|
28
|
+
* message": three messages are fired near-simultaneously (so they land
|
|
29
|
+
* inside the default 500ms coalesce window), each carrying a distinct
|
|
30
|
+
* code token, and the instruction (in the last part) asks the agent to
|
|
31
|
+
* echo every token it received *in this one incoming message*.
|
|
32
|
+
*
|
|
33
|
+
* - Coalesced → the merged turn's single message contains ALPHA,
|
|
34
|
+
* BRAVO and CHARLIE → the reply names all three.
|
|
35
|
+
* - Fanned out → the message carrying the instruction contains only
|
|
36
|
+
* its own token → the reply names just that one. (And the other
|
|
37
|
+
* two tokens arrive as their own separate turns.)
|
|
38
|
+
*
|
|
39
|
+
* Tokens are deliberately odd uppercase strings so a substring match is
|
|
40
|
+
* unambiguous and won't collide with incidental words in the reply.
|
|
41
|
+
*
|
|
42
|
+
* Order-independence: the three sends are dispatched concurrently to
|
|
43
|
+
* guarantee they share a coalesce window, which means Telegram may
|
|
44
|
+
* deliver them in any order. We assert SET membership (all three
|
|
45
|
+
* present), never order.
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
import { describe, it, expect } from "vitest";
|
|
49
|
+
import { spinUp } from "../harness.js";
|
|
50
|
+
import { pollUntil } from "../assertions.js";
|
|
51
|
+
import type { ObservedMessage } from "../driver.js";
|
|
52
|
+
|
|
53
|
+
const AGENT = "test-harness";
|
|
54
|
+
|
|
55
|
+
// Distinctive code tokens — unlikely to appear incidentally in a reply.
|
|
56
|
+
const TOKENS = ["ALPHA", "BRAVO", "CHARLIE"] as const;
|
|
57
|
+
|
|
58
|
+
const BURST: string[] = [
|
|
59
|
+
`BURST-PROBE part 1 of 3. Code token: ${TOKENS[0]}.`,
|
|
60
|
+
`BURST-PROBE part 2 of 3. Code token: ${TOKENS[1]}.`,
|
|
61
|
+
`BURST-PROBE part 3 of 3. Code token: ${TOKENS[2]}. ` +
|
|
62
|
+
`Reply with ONLY the BURST-PROBE code tokens contained in THIS SINGLE ` +
|
|
63
|
+
`incoming message, slash-separated. If this message contains just one ` +
|
|
64
|
+
`token, reply with only that token.`,
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
// Generous budget: TTFO on the test-harness is ~7s warm; coalescing
|
|
68
|
+
// adds the (sub-second) window plus normal model latency.
|
|
69
|
+
const ANSWER_TIMEOUT_MS = 40_000;
|
|
70
|
+
|
|
71
|
+
function tokensIn(text: string): string[] {
|
|
72
|
+
const upper = text.toUpperCase();
|
|
73
|
+
return TOKENS.filter((t) => upper.includes(t));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
describe("uat: forwarded burst / split paste coalesces into one turn", () => {
|
|
77
|
+
it(
|
|
78
|
+
"a 3-message burst is answered as ONE shared-context turn",
|
|
79
|
+
async () => {
|
|
80
|
+
const sc = await spinUp({ agent: AGENT });
|
|
81
|
+
try {
|
|
82
|
+
// Start observing BEFORE the burst — observeMessages only sees
|
|
83
|
+
// live updates, not history. Drain into a per-message-id map so
|
|
84
|
+
// streamed edits collapse to the message's latest text.
|
|
85
|
+
const latestById = new Map<number, ObservedMessage>();
|
|
86
|
+
const stream = sc.driver.observeMessages(sc.botUserId);
|
|
87
|
+
const consume = (async () => {
|
|
88
|
+
for await (const m of stream) {
|
|
89
|
+
if (m.senderUserId === sc.driverUserId) continue; // skip our own sends
|
|
90
|
+
latestById.set(m.messageId, m);
|
|
91
|
+
}
|
|
92
|
+
})();
|
|
93
|
+
|
|
94
|
+
// Fire all three concurrently so they share one coalesce window.
|
|
95
|
+
// Concurrency (not serial awaits) is what keeps inter-arrival
|
|
96
|
+
// under the 500ms default — three serial round-trips could blow
|
|
97
|
+
// the window on a slow link.
|
|
98
|
+
await Promise.all(BURST.map((t) => sc.sendDM(t)));
|
|
99
|
+
|
|
100
|
+
// Wait until a single bot message names all three tokens — the
|
|
101
|
+
// proof that one turn saw the whole burst in one incoming
|
|
102
|
+
// message.
|
|
103
|
+
const allThree = await pollUntil(
|
|
104
|
+
() => {
|
|
105
|
+
for (const m of latestById.values()) {
|
|
106
|
+
if (tokensIn(m.text).length === TOKENS.length) return m;
|
|
107
|
+
}
|
|
108
|
+
return undefined;
|
|
109
|
+
},
|
|
110
|
+
{ timeout: ANSWER_TIMEOUT_MS, interval: 500 },
|
|
111
|
+
).catch(() => undefined);
|
|
112
|
+
|
|
113
|
+
// Close the observer stream.
|
|
114
|
+
await stream[Symbol.asyncIterator]().return?.(undefined as never);
|
|
115
|
+
await consume;
|
|
116
|
+
|
|
117
|
+
const botMsgs = [...latestById.values()];
|
|
118
|
+
const tokenBearing = botMsgs.filter((m) => tokensIn(m.text).length > 0);
|
|
119
|
+
|
|
120
|
+
if (!allThree) {
|
|
121
|
+
const seen = tokenBearing
|
|
122
|
+
.map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
|
|
123
|
+
.join(" ");
|
|
124
|
+
throw new Error(
|
|
125
|
+
`[forwarded-burst] No single bot reply named all of ` +
|
|
126
|
+
`${TOKENS.join("/")}. This is the coalescing regression: the ` +
|
|
127
|
+
`burst fanned out into separate turns so no turn saw the full ` +
|
|
128
|
+
`message. Token-bearing replies: ${seen || "(none)"}.`,
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
expect(tokensIn(allThree.text).sort()).toEqual([...TOKENS].sort());
|
|
133
|
+
|
|
134
|
+
// Forensic: a coalesced burst should produce ONE answer that
|
|
135
|
+
// names the tokens. Several token-bearing replies hint at a
|
|
136
|
+
// partial fan-out even though one of them happened to be
|
|
137
|
+
// complete — worth a warning before it becomes a hard failure.
|
|
138
|
+
if (tokenBearing.length > 1) {
|
|
139
|
+
console.warn(
|
|
140
|
+
`[forwarded-burst] ${tokenBearing.length} token-bearing replies ` +
|
|
141
|
+
`observed (expected 1). Possible partial fan-out: ` +
|
|
142
|
+
tokenBearing
|
|
143
|
+
.map((m) => `#${m.messageId}=[${tokensIn(m.text).join(",")}]`)
|
|
144
|
+
.join(" "),
|
|
145
|
+
);
|
|
146
|
+
} else {
|
|
147
|
+
console.log(
|
|
148
|
+
`[forwarded-burst] One shared-context reply named all tokens ` +
|
|
149
|
+
`(#${allThree.messageId}). Coalescing healthy.`,
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
} finally {
|
|
153
|
+
await sc.tearDown();
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
ANSWER_TIMEOUT_MS + 20_000,
|
|
157
|
+
);
|
|
158
|
+
});
|