@zooid/transport-matrix 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -995,6 +995,100 @@ describe('tool-call and plan event bridging', () => {
995
995
  })
996
996
  })
997
997
 
998
+ describe('agent_message_chunk message-boundary buffering', () => {
999
+ async function startTurn(eventId: string) {
1000
+ const { transport, agents, client, finishPrompt } = makeTransport()
1001
+ await postTxn(transport.app, {
1002
+ events: [
1003
+ {
1004
+ type: 'm.room.message',
1005
+ event_id: eventId,
1006
+ origin_server_ts: Date.now(),
1007
+ room_id: '!r:example.com',
1008
+ sender: '@user:example.com',
1009
+ content: {
1010
+ msgtype: 'm.text',
1011
+ body: 'hi',
1012
+ 'm.mentions': { user_ids: ['@architect:example.com'] },
1013
+ },
1014
+ },
1015
+ ],
1016
+ })
1017
+ await settleTurn()
1018
+ return { agents, client, finishPrompt, sessionId: 'sess-' + eventId }
1019
+ }
1020
+
1021
+ const emit = (
1022
+ agents: { onEvent: unknown },
1023
+ sessionId: string,
1024
+ text: string,
1025
+ messageId?: string,
1026
+ ) =>
1027
+ (agents.onEvent as (n: string, e: unknown) => unknown)('architect', {
1028
+ type: 'agent_message_chunk',
1029
+ sessionId,
1030
+ content: { type: 'text', text },
1031
+ messageId,
1032
+ })
1033
+
1034
+ const sentBody = (client: { sendMessage: { mock: { calls: unknown[][] } } }) =>
1035
+ (client.sendMessage.mock.calls[0]![0] as { content: { body: string } }).content.body
1036
+
1037
+ it('inserts a paragraph break when messageId changes between chunks (opencode run-on)', async () => {
1038
+ // The exact production failure: opencode streams "…it." under one message id,
1039
+ // then "Filed:" under a NEW id with no delimiter chunk. Without a break they
1040
+ // weld into "it.Filed:".
1041
+ const { agents, client, finishPrompt, sessionId } = await startTurn('$mid1')
1042
+ await emit(agents, sessionId, 'Let me file it.', 'msg_aaa')
1043
+ await emit(agents, sessionId, 'Filed: done', 'msg_bbb')
1044
+ finishPrompt()
1045
+ await settleTurn()
1046
+ expect(client.sendMessage).toHaveBeenCalledTimes(1)
1047
+ expect(sentBody(client)).toBe('Let me file it.\n\nFiled: done')
1048
+ })
1049
+
1050
+ it('does NOT break between chunks sharing a messageId (token streaming stays intact)', async () => {
1051
+ // Within one message, tokens carry their own leading spaces; we must
1052
+ // concatenate raw or we corrupt every streamed sentence.
1053
+ const { agents, client, finishPrompt, sessionId } = await startTurn('$mid2')
1054
+ await emit(agents, sessionId, 'Hello', 'msg_aaa')
1055
+ await emit(agents, sessionId, ' world', 'msg_aaa')
1056
+ await emit(agents, sessionId, '.', 'msg_aaa')
1057
+ finishPrompt()
1058
+ await settleTurn()
1059
+ expect(sentBody(client)).toBe('Hello world.')
1060
+ })
1061
+
1062
+ it('breaks only once across a three-message run', async () => {
1063
+ const { agents, client, finishPrompt, sessionId } = await startTurn('$mid3')
1064
+ await emit(agents, sessionId, 'one.', 'msg_a')
1065
+ await emit(agents, sessionId, 'two.', 'msg_b')
1066
+ await emit(agents, sessionId, 'three.', 'msg_c')
1067
+ finishPrompt()
1068
+ await settleTurn()
1069
+ expect(sentBody(client)).toBe('one.\n\ntwo.\n\nthree.')
1070
+ })
1071
+
1072
+ it('still breaks on an empty delimiter chunk (agents that signal that way)', async () => {
1073
+ const { agents, client, finishPrompt, sessionId } = await startTurn('$mid4')
1074
+ await emit(agents, sessionId, 'before', 'msg_a')
1075
+ await emit(agents, sessionId, '', 'msg_a') // empty delimiter, same id
1076
+ await emit(agents, sessionId, 'after', 'msg_a')
1077
+ finishPrompt()
1078
+ await settleTurn()
1079
+ expect(sentBody(client)).toBe('before\n\nafter')
1080
+ })
1081
+
1082
+ it('concatenates raw when chunks carry no messageId (e.g. Claude Code)', async () => {
1083
+ const { agents, client, finishPrompt, sessionId } = await startTurn('$mid5')
1084
+ await emit(agents, sessionId, 'Hello', undefined)
1085
+ await emit(agents, sessionId, ' there', undefined)
1086
+ finishPrompt()
1087
+ await settleTurn()
1088
+ expect(sentBody(client)).toBe('Hello there')
1089
+ })
1090
+ })
1091
+
998
1092
  describe('eco.zoon.interrupt handling', () => {
999
1093
  it('dispatches cancelSession(agent.name, sessionId) for an interrupt that targets a tracked session', async () => {
1000
1094
  const { transport, agents, finishPrompt } = makeTransport()
@@ -1218,3 +1312,245 @@ describe('full loop integration', () => {
1218
1312
  )
1219
1313
  })
1220
1314
  })
1315
+
1316
+ // ─── Media pipeline tests ────────────────────────────────────────────────────
1317
+
1318
+ const TINY_PNG_B64 =
1319
+ 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=='
1320
+ const TINY_PNG = Buffer.from(TINY_PNG_B64, 'base64')
1321
+
1322
+ function fakeMedia() {
1323
+ return {
1324
+ download: vi.fn(async () => ({ data: new Uint8Array(TINY_PNG), contentType: 'image/png' })),
1325
+ upload: vi.fn(async () => ({ content_uri: 'mxc://localhost/up1' })),
1326
+ }
1327
+ }
1328
+
1329
+ const workspaceBinding = {
1330
+ ...baseAgents[0],
1331
+ workspaceDir: '/tmp/ws',
1332
+ agentWorkspacePath: '/workspace',
1333
+ }
1334
+
1335
+ function makeMediaTransport(opts: {
1336
+ media?: ReturnType<typeof fakeMedia>
1337
+ writeAttachmentFn?: unknown
1338
+ } = {}) {
1339
+ const { reg, finishPrompt } = fakeRegistry()
1340
+ const approvals = fakeApprovals()
1341
+ const client = fakeClient()
1342
+ const transport = createMatrixTransport({
1343
+ agents: reg as never,
1344
+ approvals: approvals as never,
1345
+ client: client as never,
1346
+ bindings: [workspaceBinding],
1347
+ hsToken: 'hs-secret',
1348
+ drainQuietMs: 0,
1349
+ media: opts.media as never,
1350
+ writeAttachmentFn: opts.writeAttachmentFn as never,
1351
+ })
1352
+ return { transport, agents: reg, client, finishPrompt }
1353
+ }
1354
+
1355
+ function imageEvent(over: {
1356
+ size?: number
1357
+ mimetype?: string
1358
+ msgtype?: string
1359
+ body?: string
1360
+ eventId?: string
1361
+ } = {}) {
1362
+ return {
1363
+ type: 'm.room.message',
1364
+ event_id: over.eventId ?? '$media1',
1365
+ room_id: '!r:example.com',
1366
+ sender: '@alice:example.com',
1367
+ content: {
1368
+ msgtype: over.msgtype ?? 'm.image',
1369
+ body: over.body ?? 'dog.png',
1370
+ url: 'mxc://localhost/abc',
1371
+ info: { mimetype: over.mimetype ?? 'image/png', size: over.size ?? 67 },
1372
+ },
1373
+ }
1374
+ }
1375
+
1376
+ function mentionMsg(body: string, eventId = '$text1') {
1377
+ return {
1378
+ type: 'm.room.message',
1379
+ event_id: eventId,
1380
+ room_id: '!r:example.com',
1381
+ sender: '@alice:example.com',
1382
+ content: {
1383
+ msgtype: 'm.text',
1384
+ body: `@architect ${body}`,
1385
+ 'm.mentions': { user_ids: ['@architect:example.com'] },
1386
+ },
1387
+ }
1388
+ }
1389
+
1390
+ describe('inbound media', () => {
1391
+ it('media events do not trigger a turn; m.text from the same sender drains them inline', async () => {
1392
+ const media = fakeMedia()
1393
+ const { transport, agents } = makeMediaTransport({ media })
1394
+
1395
+ // Image event: no turn fired
1396
+ await postTxn(transport.app, { events: [imageEvent()] })
1397
+ await settleTurn()
1398
+ expect(agents.prompt).not.toHaveBeenCalled()
1399
+
1400
+ // m.text mention from same sender: turn fires, image block prepended
1401
+ agents.prompt.mockImplementation(async (_name: string, p: { content: unknown[] }) => {
1402
+ agents.onEvent('architect', {
1403
+ type: 'agent_message_chunk',
1404
+ sessionId: 'sess-$text1',
1405
+ content: { type: 'text', text: 'got it' },
1406
+ })
1407
+ return { stopReason: 'end_turn' as const }
1408
+ })
1409
+ await postTxn(transport.app, { events: [mentionMsg('look at this')] })
1410
+ await settleTurn()
1411
+
1412
+ expect(agents.prompt).toHaveBeenCalledOnce()
1413
+ const content = (agents.prompt.mock.calls[0][1] as { content: unknown[] }).content
1414
+ expect(content[0]).toMatchObject({ type: 'image', data: TINY_PNG_B64, mimeType: 'image/png' })
1415
+ expect((content[1] as { type: string; text: string }).type).toBe('text')
1416
+ expect(media.download).toHaveBeenCalledOnce()
1417
+ })
1418
+
1419
+ it('routes an oversized image to the file path with a resource_link block and prose line', async () => {
1420
+ const media = fakeMedia()
1421
+ const writeAttachmentFn = vi.fn(() => ({
1422
+ hostPath: '/tmp/ws/.zooid/attachments/media1/dog.png',
1423
+ agentPath: '/workspace/.zooid/attachments/media1/dog.png',
1424
+ }))
1425
+ const { transport, agents } = makeMediaTransport({ media, writeAttachmentFn })
1426
+
1427
+ agents.prompt.mockResolvedValue({ stopReason: 'end_turn' as const })
1428
+ await postTxn(transport.app, { events: [imageEvent({ size: 600_000 })] }) // > MAX_INLINE_IMAGE_BYTES
1429
+ await postTxn(transport.app, { events: [mentionMsg('summarize')] })
1430
+ await settleTurn()
1431
+
1432
+ const content = (agents.prompt.mock.calls[0][1] as { content: unknown[] }).content
1433
+ expect(content[0]).toMatchObject({
1434
+ type: 'resource_link',
1435
+ uri: 'file:///workspace/.zooid/attachments/media1/dog.png',
1436
+ name: 'dog.png',
1437
+ })
1438
+ expect((content[1] as { text: string }).text).toContain(
1439
+ '/workspace/.zooid/attachments/media1/dog.png',
1440
+ )
1441
+ })
1442
+
1443
+ it('routes m.file to the workspace regardless of size', async () => {
1444
+ const media = fakeMedia()
1445
+ const writeAttachmentFn = vi.fn(() => ({
1446
+ hostPath: '/tmp/ws/.zooid/attachments/media1/report.pdf',
1447
+ agentPath: '/workspace/.zooid/attachments/media1/report.pdf',
1448
+ }))
1449
+ const { transport, agents } = makeMediaTransport({ media, writeAttachmentFn })
1450
+
1451
+ agents.prompt.mockResolvedValue({ stopReason: 'end_turn' as const })
1452
+ await postTxn(transport.app, {
1453
+ events: [imageEvent({ msgtype: 'm.file', body: 'report.pdf', mimetype: 'application/pdf' })],
1454
+ })
1455
+ await postTxn(transport.app, { events: [mentionMsg('read it')] })
1456
+ await settleTurn()
1457
+
1458
+ expect(writeAttachmentFn).toHaveBeenCalledOnce()
1459
+ const content = (agents.prompt.mock.calls[0][1] as { content: unknown[] }).content
1460
+ expect((content[0] as { type: string }).type).toBe('resource_link')
1461
+ })
1462
+
1463
+ it('emits eco.zoon.error (code media_failed) when download fails, still runs the turn text-only', async () => {
1464
+ const media = fakeMedia()
1465
+ media.download.mockRejectedValueOnce(new Error('download boom'))
1466
+ const { transport, agents, client } = makeMediaTransport({ media })
1467
+
1468
+ agents.prompt.mockImplementation(async () => {
1469
+ agents.onEvent('architect', {
1470
+ type: 'agent_message_chunk',
1471
+ sessionId: 'sess-$text1',
1472
+ content: { type: 'text', text: 'ok' },
1473
+ })
1474
+ return { stopReason: 'end_turn' as const }
1475
+ })
1476
+ await postTxn(transport.app, { events: [imageEvent()] })
1477
+ await postTxn(transport.app, { events: [mentionMsg('look')] })
1478
+ await settleTurn()
1479
+
1480
+ expect(client.sendCustomEvent).toHaveBeenCalledWith(
1481
+ expect.objectContaining({
1482
+ eventType: 'eco.zoon.error',
1483
+ content: expect.objectContaining({ code: 'media_failed' }),
1484
+ }),
1485
+ )
1486
+ const content = (agents.prompt.mock.calls[0][1] as { content: unknown[] }).content
1487
+ expect(content).toHaveLength(1)
1488
+ expect((content[0] as { type: string }).type).toBe('text')
1489
+ })
1490
+ })
1491
+
1492
+ describe('outbound agent images', () => {
1493
+ it('uploads an image chunk and sends a threaded m.image as the agent user', async () => {
1494
+ const media = fakeMedia()
1495
+ const { transport, agents, client } = makeMediaTransport({ media })
1496
+
1497
+ agents.prompt.mockImplementation(async () => {
1498
+ // Emit image block during prompt
1499
+ agents.onEvent('architect', {
1500
+ type: 'agent_message_chunk',
1501
+ sessionId: 'sess-$text1',
1502
+ content: { type: 'image', data: TINY_PNG_B64, mimeType: 'image/png' },
1503
+ })
1504
+ // Also emit text block so the turn isn't empty
1505
+ agents.onEvent('architect', {
1506
+ type: 'agent_message_chunk',
1507
+ sessionId: 'sess-$text1',
1508
+ content: { type: 'text', text: 'here is the image' },
1509
+ })
1510
+ return { stopReason: 'end_turn' as const }
1511
+ })
1512
+
1513
+ await postTxn(transport.app, { events: [mentionMsg('show me an image')] })
1514
+ await settleTurn()
1515
+ // Give async upload/send a moment to settle
1516
+ await new Promise((r) => setTimeout(r, 10))
1517
+
1518
+ expect(media.upload).toHaveBeenCalledWith(
1519
+ expect.objectContaining({ contentType: 'image/png', asUserId: '@architect:example.com' }),
1520
+ )
1521
+ expect(client.sendMessage).toHaveBeenCalledWith(
1522
+ expect.objectContaining({
1523
+ asUserId: '@architect:example.com',
1524
+ content: expect.objectContaining({
1525
+ msgtype: 'm.image',
1526
+ url: 'mxc://localhost/up1',
1527
+ info: expect.objectContaining({ mimetype: 'image/png', size: TINY_PNG.length }),
1528
+ }),
1529
+ }),
1530
+ )
1531
+ })
1532
+
1533
+ it('does not throw when an audio block arrives (non-goal — warn and drop)', async () => {
1534
+ const media = fakeMedia()
1535
+ const { transport, agents } = makeMediaTransport({ media })
1536
+
1537
+ agents.prompt.mockImplementation(async () => {
1538
+ agents.onEvent('architect', {
1539
+ type: 'agent_message_chunk',
1540
+ sessionId: 'sess-$text1',
1541
+ content: { type: 'audio', data: 'AAAA', mimeType: 'audio/wav' },
1542
+ })
1543
+ agents.onEvent('architect', {
1544
+ type: 'agent_message_chunk',
1545
+ sessionId: 'sess-$text1',
1546
+ content: { type: 'text', text: 'ok' },
1547
+ })
1548
+ return { stopReason: 'end_turn' as const }
1549
+ })
1550
+
1551
+ await expect(
1552
+ postTxn(transport.app, { events: [mentionMsg('test audio')] }),
1553
+ ).resolves.not.toThrow()
1554
+ await settleTurn()
1555
+ })
1556
+ })
package/src/transport.ts CHANGED
@@ -1,14 +1,38 @@
1
1
  import { Hono } from 'hono'
2
2
  import { timingSafeEqual } from 'node:crypto'
3
3
  import type { AcpRegistry, ApprovalCorrelator, RegisteredApproval } from '@zooid/core'
4
- import type { AgentEvent } from '@zooid/acp-client'
4
+ import type { AgentEvent, ContentBlock } from '@zooid/acp-client'
5
5
  import { MatrixClient } from './matrix-client.js'
6
6
  import { BotPool } from './bot-pool.js'
7
- import { route, type AgentBinding, type ThreadState } from './router.js'
7
+ import { route, isMediaMsgtype, type AgentBinding, type ThreadState } from './router.js'
8
8
  import { stripMention, extractMentions } from './mentions.js'
9
9
  import { toToolCallBody, toUpdateBody, toPlanBody, toErrorBody } from './event-encoders.js'
10
10
  import { classify } from '@zooid/acp-client'
11
11
  import { toMatrixHtml } from './markdown-to-matrix-html.js'
12
+ import {
13
+ PendingMediaStore,
14
+ type PendingMediaItem,
15
+ } from './pending-media.js'
16
+ import {
17
+ MediaClient,
18
+ MAX_INLINE_IMAGE_BYTES,
19
+ INLINE_IMAGE_MIMES,
20
+ } from './media-client.js'
21
+ import { writeAttachment } from './attachments.js'
22
+
23
+ export interface MediaClientLike {
24
+ download(input: {
25
+ mxcUri: string
26
+ asUserId: string
27
+ maxBytes?: number
28
+ }): Promise<{ data: Uint8Array; contentType: string }>
29
+ upload(input: {
30
+ data: Uint8Array
31
+ contentType: string
32
+ filename?: string
33
+ asUserId: string
34
+ }): Promise<{ content_uri: string }>
35
+ }
12
36
 
13
37
  export interface CreateMatrixTransportOptions {
14
38
  agents: AcpRegistry
@@ -24,6 +48,10 @@ export interface CreateMatrixTransportOptions {
24
48
  drainQuietMs?: number
25
49
  /** Hard cap on the post-turn drain. Defaults to `DRAIN_MAX_MS`. */
26
50
  drainMaxMs?: number
51
+ /** Injected media client for downloading/uploading Matrix media. */
52
+ media?: MediaClientLike
53
+ /** Injected attachment writer (defaults to the real writeAttachment). */
54
+ writeAttachmentFn?: typeof writeAttachment
27
55
  }
28
56
 
29
57
  interface SessionContext {
@@ -47,6 +75,120 @@ interface MatrixEvent {
47
75
  }
48
76
 
49
77
  const STARTUP_GRACE_MS = 5_000
78
+
79
+ interface MediaBlocksResult {
80
+ blocks: ContentBlock[]
81
+ pathLines: string[]
82
+ }
83
+
84
+ async function buildMediaBlocks(
85
+ items: PendingMediaItem[],
86
+ opts: {
87
+ agent: AgentBinding
88
+ media: MediaClientLike | undefined
89
+ writeAttachmentFn: typeof writeAttachment
90
+ onError: (item: PendingMediaItem, err: unknown) => void
91
+ },
92
+ ): Promise<MediaBlocksResult> {
93
+ const blocks: ContentBlock[] = []
94
+ const pathLines: string[] = []
95
+
96
+ if (!opts.media || items.length === 0) return { blocks, pathLines }
97
+
98
+ for (const item of items) {
99
+ try {
100
+ const isInlineCandidate =
101
+ item.msgtype === 'm.image' &&
102
+ INLINE_IMAGE_MIMES.includes(item.info?.mimetype ?? '') &&
103
+ (item.info?.size === undefined || item.info.size <= MAX_INLINE_IMAGE_BYTES)
104
+
105
+ if (isInlineCandidate) {
106
+ const { data, contentType } = await opts.media.download({
107
+ mxcUri: item.url,
108
+ asUserId: opts.agent.userId,
109
+ })
110
+ // Double-check actual size (info can lie)
111
+ if (data.byteLength <= MAX_INLINE_IMAGE_BYTES) {
112
+ blocks.push({
113
+ type: 'image',
114
+ data: Buffer.from(data).toString('base64'),
115
+ mimeType: contentType,
116
+ })
117
+ continue
118
+ }
119
+ // Actual size exceeded cap — fall through to file route with the already-downloaded bytes
120
+ if (opts.agent.workspaceDir) {
121
+ const paths = opts.writeAttachmentFn({
122
+ workspaceDir: opts.agent.workspaceDir,
123
+ agentWorkspacePath: opts.agent.agentWorkspacePath ?? opts.agent.workspaceDir,
124
+ eventId: item.eventId,
125
+ filename: item.filename ?? item.body,
126
+ data,
127
+ })
128
+ blocks.push({
129
+ type: 'resource_link',
130
+ uri: `file://${paths.agentPath}`,
131
+ name: item.filename ?? item.body,
132
+ })
133
+ pathLines.push(`Attached file: ${paths.agentPath}`)
134
+ }
135
+ } else {
136
+ // File route (m.file, m.video, m.audio, or oversized image)
137
+ if (!opts.agent.workspaceDir) continue
138
+ const { data } = await opts.media.download({
139
+ mxcUri: item.url,
140
+ asUserId: opts.agent.userId,
141
+ })
142
+ const paths = opts.writeAttachmentFn({
143
+ workspaceDir: opts.agent.workspaceDir,
144
+ agentWorkspacePath: opts.agent.agentWorkspacePath ?? opts.agent.workspaceDir,
145
+ eventId: item.eventId,
146
+ filename: item.filename ?? item.body,
147
+ data,
148
+ })
149
+ blocks.push({
150
+ type: 'resource_link',
151
+ uri: `file://${paths.agentPath}`,
152
+ name: item.filename ?? item.body,
153
+ mimeType: item.info?.mimetype,
154
+ size: item.info?.size,
155
+ })
156
+ pathLines.push(`Attached file: ${paths.agentPath}`)
157
+ }
158
+ } catch (err) {
159
+ opts.onError(item, err)
160
+ }
161
+ }
162
+
163
+ return { blocks, pathLines }
164
+ }
165
+
166
+ async function sendMediaError(
167
+ ctx: { agent: AgentBinding; roomId: string; threadRoot: string },
168
+ _err: unknown,
169
+ message: string,
170
+ client: MatrixClient,
171
+ ): Promise<void> {
172
+ await client
173
+ .sendCustomEvent({
174
+ roomId: ctx.roomId,
175
+ asUserId: ctx.agent.userId,
176
+ eventType: 'eco.zoon.error',
177
+ content: toErrorBody(
178
+ {
179
+ kind: 'error' as const,
180
+ agentId: ctx.agent.name,
181
+ sessionId: null,
182
+ turnId: null,
183
+ code: 'media_failed',
184
+ message: message.slice(0, 250),
185
+ transient: false,
186
+ },
187
+ ctx.threadRoot,
188
+ ),
189
+ })
190
+ .catch((e) => console.warn(`[matrix:${ctx.agent.name}] eco.zoon.error send failed:`, e))
191
+ }
50
192
  const SEEN_EVENT_CAP = 5_000
51
193
 
52
194
  // ACP only guarantees that an agent flushes pending `session/update`
@@ -77,9 +219,16 @@ export function createMatrixTransport(opts: CreateMatrixTransportOptions) {
77
219
  const { agents, approvals, client, bindings, hsToken, adminUserId } = opts
78
220
  const drainQuietMs = opts.drainQuietMs ?? DRAIN_QUIET_MS
79
221
  const drainMaxMs = opts.drainMaxMs ?? DRAIN_MAX_MS
222
+ const mediaClient = opts.media
223
+ const writeAttachmentFn = opts.writeAttachmentFn ?? writeAttachment
224
+ const pendingMedia = new PendingMediaStore()
80
225
  const pool = new BotPool(client, bindings)
81
226
  const sessions = new Map<string, SessionContext>()
82
227
  const buffers = new Map<string, string>()
228
+ // Last messageId seen per session's buffer. opencode streams each assistant
229
+ // message under its own id with no delimiter chunk between them, so a change
230
+ // here marks a message boundary we must break on.
231
+ const bufferMessageIds = new Map<string, string>()
83
232
  // Per-session promise tail so out-of-band events (tool_call, plan, etc.)
84
233
  // serialize on the wire even though the ACP producer doesn't await us.
85
234
  const sendQueue = new Map<string, Promise<void>>()
@@ -100,13 +249,61 @@ export function createMatrixTransport(opts: CreateMatrixTransportOptions) {
100
249
  }
101
250
 
102
251
  if (event.type === 'agent_message_chunk') {
103
- const block = event.content as { type?: string; text?: string }
252
+ const block = event.content as { type?: string; text?: string; data?: string; mimeType?: string }
104
253
  if (block.type === 'text' && typeof block.text === 'string') {
105
254
  const current = buffers.get(event.sessionId) ?? ''
106
- // An empty chunk signals a new text block starting (e.g. after a tool call).
107
- // Insert a paragraph break so consecutive blocks don't run together.
108
- const prefix = block.text === '' && current.length > 0 ? '\n\n' : ''
255
+ // Within a message, tokens carry their own leading spaces, so we
256
+ // concatenate raw. Two signals start a *new* message block that must not
257
+ // run together with the previous text:
258
+ // - an empty chunk (some agents emit one between blocks, e.g. after a
259
+ // tool call), or
260
+ // - a change in messageId — opencode streams each assistant message
261
+ // under its own id and emits no delimiter chunk between them, and the
262
+ // first token of the new message has no leading space, so without
263
+ // this they weld together ("…one.🅿️").
264
+ const prevMessageId = bufferMessageIds.get(event.sessionId)
265
+ const messageChanged =
266
+ event.messageId !== undefined &&
267
+ prevMessageId !== undefined &&
268
+ event.messageId !== prevMessageId
269
+ const needsBreak =
270
+ current.length > 0 && (block.text === '' || messageChanged)
271
+ const prefix = needsBreak ? '\n\n' : ''
109
272
  buffers.set(event.sessionId, current + prefix + block.text)
273
+ if (event.messageId !== undefined)
274
+ bufferMessageIds.set(event.sessionId, event.messageId)
275
+ } else if (
276
+ block.type === 'image' &&
277
+ typeof block.data === 'string' &&
278
+ typeof block.mimeType === 'string' &&
279
+ mediaClient
280
+ ) {
281
+ // Outbound agent image: upload immediately and send as a threaded m.image.
282
+ const ctx = sessions.get(event.sessionId)
283
+ if (ctx) {
284
+ const bytes = Buffer.from(block.data, 'base64')
285
+ const ext = (block.mimeType.split('/')[1] ?? 'png').replace(/[^a-z0-9]/gi, '')
286
+ const filename = `image.${ext}`
287
+ void mediaClient
288
+ .upload({ data: bytes, contentType: block.mimeType, filename, asUserId: ctx.agent.userId })
289
+ .then(({ content_uri }) =>
290
+ client.sendMessage({
291
+ roomId: ctx.roomId,
292
+ asUserId: ctx.agent.userId,
293
+ threadRoot: ctx.threadRoot,
294
+ content: {
295
+ msgtype: 'm.image',
296
+ body: filename,
297
+ url: content_uri,
298
+ info: { mimetype: block.mimeType, size: bytes.length },
299
+ },
300
+ }),
301
+ )
302
+ .catch((err) => {
303
+ console.warn(`[matrix:${name}] outbound image upload failed:`, err)
304
+ void sendMediaError(ctx, err, 'agent image upload failed', client)
305
+ })
306
+ }
110
307
  } else {
111
308
  console.warn(`[matrix:${name}] dropped chunk block type=${block.type}`, block)
112
309
  }
@@ -295,6 +492,29 @@ export function createMatrixTransport(opts: CreateMatrixTransportOptions) {
295
492
  continue
296
493
  }
297
494
  logInbound(evt)
495
+
496
+ // Capture media events in the pending store; never route them to agents.
497
+ if (
498
+ evt.type === 'm.room.message' &&
499
+ isMediaMsgtype(evt.content?.msgtype) &&
500
+ evt.room_id &&
501
+ evt.event_id &&
502
+ evt.sender &&
503
+ evt.content?.url &&
504
+ !bindings.some((b) => b.userId === evt.sender)
505
+ ) {
506
+ pendingMedia.add(evt.room_id, inboundThreadRoot(evt), {
507
+ eventId: evt.event_id,
508
+ sender: evt.sender,
509
+ msgtype: evt.content.msgtype as string,
510
+ body: (evt.content.body as string | undefined) ?? '',
511
+ filename: evt.content.filename as string | undefined,
512
+ url: evt.content.url as string,
513
+ info: evt.content.info as PendingMediaItem['info'],
514
+ })
515
+ continue
516
+ }
517
+
298
518
  // Agent-promotion: top-level inbound event becomes the thread root.
299
519
  // For in-thread messages the existing root is preserved.
300
520
  const promotedRoot = inboundThreadRoot(evt) ?? evt.event_id
@@ -416,6 +636,7 @@ export function createMatrixTransport(opts: CreateMatrixTransportOptions) {
416
636
  const sessionId = await agents.ensureSession(agent.name, sessionKey, evt.room_id)
417
637
  sessions.set(sessionId, { agent, roomId: evt.room_id, threadRoot })
418
638
  buffers.set(sessionId, '')
639
+ bufferMessageIds.delete(sessionId)
419
640
 
420
641
  const roomId = evt.room_id
421
642
  const TYPING_TTL_MS = 30_000
@@ -440,10 +661,33 @@ export function createMatrixTransport(opts: CreateMatrixTransportOptions) {
440
661
  try {
441
662
  const rawBody = evt.content?.body ?? ''
442
663
  const promptText = stripMention(rawBody, agent.userId)
664
+
665
+ // Drain pending media for this sender+thread and prepend as ACP content blocks.
666
+ const pendingItems = pendingMedia.drain(
667
+ evt.room_id,
668
+ inboundThreadRoot(evt),
669
+ evt.sender ?? '',
670
+ )
671
+ const { blocks, pathLines } = await buildMediaBlocks(pendingItems, {
672
+ agent,
673
+ media: mediaClient,
674
+ writeAttachmentFn,
675
+ onError: (item, err) => {
676
+ console.warn(`[matrix:${agent.name}] media_failed for ${item.body}:`, err)
677
+ void sendMediaError(
678
+ { agent, roomId: evt.room_id!, threadRoot },
679
+ err,
680
+ `Could not process attachment: ${item.body}`,
681
+ client,
682
+ )
683
+ },
684
+ })
685
+
686
+ const fullPromptText = [promptText, ...pathLines].filter(Boolean).join('\n')
443
687
  await agents.prompt(agent.name, {
444
688
  threadId: sessionKey,
445
689
  channelId: evt.room_id,
446
- content: [{ type: 'text', text: promptText }],
690
+ content: [...blocks, { type: 'text', text: fullPromptText }],
447
691
  })
448
692
  // Drain: the prompt promise resolves on the stopReason response, but
449
693
  // trailing chunks may still arrive (see DRAIN_* above). Wait until the
@@ -507,6 +751,7 @@ export function createMatrixTransport(opts: CreateMatrixTransportOptions) {
507
751
  await safeTyping(false)
508
752
  await safePresence('online')
509
753
  buffers.delete(sessionId)
754
+ bufferMessageIds.delete(sessionId)
510
755
  }
511
756
  }
512
757