@obtoai/agent-bridge 0.1.0-beta.21 → 0.1.0-beta.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@obtoai/agent-bridge",
3
- "version": "0.1.0-beta.21",
3
+ "version": "0.1.0-beta.22",
4
4
  "description": "Local consumer for the OBTO Agent Bridge. Receives bridge events over SSE and drives a coding agent (Claude Code or OpenAI Codex) on your machine.",
5
5
  "license": "Apache-2.0",
6
6
  "author": "OBTO Inc.",
@@ -92,6 +92,28 @@ const claimThread = (threadId, agentId) =>
92
92
  const postExternalSync = (agentId, sessions) =>
93
93
  postJson('/api/bridge/external/sync', { agentId, sessions });
94
94
 
95
+ // Phase 6.4 — download an attachment's raw bytes for use as a Claude SDK
96
+ // image content block. The serve route streams the file with its stored
97
+ // Content-Type; we read it into a Buffer and base64-encode for the SDK.
98
+ // Returns { ok, status, mimeType, base64 } or { ok: false, status }.
99
+ const getAttachmentBytes = async (attachmentId) => {
100
+ const c = getCfg();
101
+ const url = c.baseUrl.replace(/\/$/, '') +
102
+ '/api/bridge/attachment/' + encodeURIComponent(String(attachmentId));
103
+ const res = await fetch(url, {
104
+ method: 'GET',
105
+ headers: {
106
+ 'OBTO-ORIGIN-HOST': c.originHost,
107
+ Authorization: 'Bearer ' + c.apiToken,
108
+ },
109
+ cache: 'no-store',
110
+ });
111
+ if (!res.ok) return { ok: false, status: res.status };
112
+ const mimeType = res.headers.get('content-type') || 'application/octet-stream';
113
+ const buf = Buffer.from(await res.arrayBuffer());
114
+ return { ok: true, status: res.status, mimeType, base64: buf.toString('base64') };
115
+ };
116
+
95
117
  module.exports = {
96
118
  getCfg,
97
119
  buildHeaders,
@@ -100,4 +122,5 @@ module.exports = {
100
122
  postAgentActivity,
101
123
  claimThread,
102
124
  postExternalSync,
125
+ getAttachmentBytes,
103
126
  };
@@ -228,6 +228,52 @@ const buildEnvelope = (payload) => {
228
228
  return head + '\n\n' + body;
229
229
  };
230
230
 
231
+ // Phase 6.4 — image attachments. When payload.attachmentIds is non-empty,
232
+ // download each via the bridge HTTP API and assemble a multimodal user
233
+ // message (image blocks + text envelope) as an async iterable, which the
234
+ // Claude Agent SDK accepts in lieu of a plain prompt string. With no
235
+ // attachments, returns the envelope text as-is — zero overhead on the
236
+ // hot text-only path.
237
+ const buildPromptForSdk = async (payload, envelopeText, log) => {
238
+ const ids = Array.isArray(payload && payload.attachmentIds)
239
+ ? payload.attachmentIds.filter(Boolean)
240
+ : [];
241
+ if (ids.length === 0) return envelopeText;
242
+
243
+ const blocks = [];
244
+ for (const id of ids) {
245
+ try {
246
+ const r = await bridgeHttp.getAttachmentBytes(id);
247
+ if (r && r.ok) {
248
+ blocks.push({
249
+ type: 'image',
250
+ source: {
251
+ type: 'base64',
252
+ media_type: r.mimeType || 'image/png',
253
+ data: r.base64,
254
+ },
255
+ });
256
+ } else {
257
+ if (log) log('warn', 'attachment fetch failed', { id, status: r && r.status });
258
+ }
259
+ } catch (e) {
260
+ if (log) log('warn', 'attachment fetch threw', {
261
+ id,
262
+ error: e && e.message ? e.message : String(e),
263
+ });
264
+ }
265
+ }
266
+ // No images survived the fetch — fall back to text-only so the turn still
267
+ // runs (with degraded context). The agent has the envelope; the user will
268
+ // see their own bubble with images in the bridge UI.
269
+ if (blocks.length === 0) return envelopeText;
270
+
271
+ blocks.push({ type: 'text', text: envelopeText });
272
+ return (async function* () {
273
+ yield { type: 'user', message: { role: 'user', content: blocks } };
274
+ })();
275
+ };
276
+
231
277
  const buildBootstrapPrompt = (payload) =>
232
278
  buildEnvelope(payload) +
233
279
  '\n\n---\n' +
@@ -290,7 +336,7 @@ const consumeQuery = async (q) => {
290
336
  const driveFirstTouch = async ({ threadId, projectDir, payload, log }) => {
291
337
  const sdk = await import('@anthropic-ai/claude-agent-sdk');
292
338
  const bridgeServer = await buildBridgeMcpServer({ log });
293
- const prompt = buildBootstrapPrompt(payload);
339
+ const prompt = await buildPromptForSdk(payload, buildBootstrapPrompt(payload), log);
294
340
  const options = Object.assign(
295
341
  {
296
342
  cwd: projectDir,
@@ -303,6 +349,7 @@ const driveFirstTouch = async ({ threadId, projectDir, payload, log }) => {
303
349
  threadId,
304
350
  projectDir,
305
351
  messageId: payload.messageId,
352
+ attachments: (payload.attachmentIds || []).length,
306
353
  });
307
354
 
308
355
  const startedAt = Date.now();
@@ -357,7 +404,7 @@ const driveResume = async ({ threadId, sessionId, projectDir, jsonlPath, lastJso
357
404
 
358
405
  const sdk = await import('@anthropic-ai/claude-agent-sdk');
359
406
  const bridgeServer = await buildBridgeMcpServer({ log });
360
- const prompt = buildEnvelope(payload);
407
+ const prompt = await buildPromptForSdk(payload, buildEnvelope(payload), log);
361
408
  const options = Object.assign(
362
409
  {
363
410
  resume: sessionId,
@@ -371,6 +418,7 @@ const driveResume = async ({ threadId, sessionId, projectDir, jsonlPath, lastJso
371
418
  threadId,
372
419
  sessionId,
373
420
  messageId: payload.messageId,
421
+ attachments: (payload.attachmentIds || []).length,
374
422
  });
375
423
 
376
424
  const startedAt = Date.now();
@@ -32,8 +32,24 @@ const queues = new Map();
32
32
 
33
33
  const ALLOW_ALL = process.env.BRIDGE_ALLOW_ALL === '1';
34
34
 
35
+ // Phase 6.4 — Codex SDK doesn't accept image inputs yet. When the bridge
36
+ // payload carries attachmentIds, we prepend an honest note so the agent
37
+ // knows images existed (the human will see them in their own bubble on the
38
+ // bridge UI). When the SDK gains multimodal support, this can be replaced
39
+ // with a real image-in path.
40
+ const attachmentDropNote = (payload) => {
41
+ const n = Array.isArray(payload && payload.attachmentIds)
42
+ ? payload.attachmentIds.filter(Boolean).length
43
+ : 0;
44
+ if (!n) return '';
45
+ return '[OBTO bridge note: ' + n + ' image attachment' + (n === 1 ? '' : 's') +
46
+ ' came with this message, but the Codex driver does not support image ' +
47
+ 'input yet — proceeding with text only. Ask the human to describe the ' +
48
+ 'image in words if you need its content.]\n\n';
49
+ };
50
+
35
51
  const buildCodexPrompt = (payload, isFirst) => {
36
- const head = buildEnvelope(payload);
52
+ const head = attachmentDropNote(payload) + buildEnvelope(payload);
37
53
  if (!isFirst) return head;
38
54
  return head +
39
55
  '\n\n---\n' +
@@ -29,8 +29,24 @@ const queues = new Map();
29
29
  const DEFAULT_PROVIDER = process.env.BRIDGE_OPENCODE_PROVIDER || 'anthropic';
30
30
  const DEFAULT_MODEL = process.env.BRIDGE_OPENCODE_MODEL || 'claude-sonnet-4-5';
31
31
 
32
+ // Phase 6.4 — opencode's SDK accepts only `parts:[{type:'text',text}]`. When
33
+ // the bridge payload carries attachmentIds, we prepend an honest note so the
34
+ // agent knows images existed (the human will see them in their own bubble on
35
+ // the bridge UI). Upgrade to real image parts when opencode-ai/sdk grows
36
+ // support for file/image parts.
37
+ const attachmentDropNote = (payload) => {
38
+ const n = Array.isArray(payload && payload.attachmentIds)
39
+ ? payload.attachmentIds.filter(Boolean).length
40
+ : 0;
41
+ if (!n) return '';
42
+ return '[OBTO bridge note: ' + n + ' image attachment' + (n === 1 ? '' : 's') +
43
+ ' came with this message, but the opencode driver does not support image ' +
44
+ 'input yet — proceeding with text only. Ask the human to describe the ' +
45
+ 'image in words if you need its content.]\n\n';
46
+ };
47
+
32
48
  const buildOpencodePrompt = (payload, isFirst) => {
33
- const head = buildEnvelope(payload);
49
+ const head = attachmentDropNote(payload) + buildEnvelope(payload);
34
50
  if (!isFirst) return head;
35
51
  return head +
36
52
  '\n\n---\n' +