ethagent 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import { ProviderError } from './contracts.js'
4
4
  import { providerErrorFromResponse } from './errors.js'
5
5
  import { fetchWithRetryStreamEvents } from './retry.js'
6
6
  import { iterSseFrames } from './sse.js'
7
+ import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
7
8
 
8
9
  export type GeminiToolDefinition = {
9
10
  name: string
@@ -41,6 +42,7 @@ type GeminiChunk = {
41
42
 
42
43
  type GeminiContentPart =
43
44
  | { text: string }
45
+ | { inlineData: { mimeType: string; data: string } }
44
46
  | { functionCall: { name: string; args: Record<string, unknown> } }
45
47
  | { functionResponse: { name: string; response: Record<string, unknown> } }
46
48
 
@@ -92,8 +94,21 @@ export class GeminiProvider implements Provider {
92
94
  yield { type: 'error', message: error.message }
93
95
  return
94
96
  }
97
+ if (hasImageBlocks(messages) && !supportsGeminiImages(this.model)) {
98
+ yield { type: 'error', message: `image input is not enabled for ${this.model}` }
99
+ return
100
+ }
95
101
 
96
- const payload = buildGeminiPayload(messages, this.tools, options)
102
+ let payload: GeminiPayload
103
+ try {
104
+ payload = await buildGeminiPayload(messages, this.tools, options)
105
+ } catch (err: unknown) {
106
+ if (err instanceof ImageLoadError) {
107
+ yield { type: 'error', message: err.message }
108
+ return
109
+ }
110
+ throw err
111
+ }
97
112
  const modelName = this.model.replace(/^models\//, '')
98
113
  const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(modelName)}:streamGenerateContent?alt=sse`
99
114
 
@@ -181,11 +196,11 @@ export class GeminiProvider implements Provider {
181
196
  }
182
197
  }
183
198
 
184
- export function buildGeminiPayload(
199
+ export async function buildGeminiPayload(
185
200
  messages: Message[],
186
201
  tools: GeminiToolDefinition[] = [],
187
202
  options: ProviderCompleteOptions = {},
188
- ): GeminiPayload {
203
+ ): Promise<GeminiPayload> {
189
204
  const systemParts: string[] = []
190
205
  const contents: GeminiContent[] = []
191
206
  const toolUseNamesById = new Map<string, string>()
@@ -222,6 +237,10 @@ export function buildGeminiPayload(
222
237
  for (const block of blocks) {
223
238
  if (block.type === 'text') {
224
239
  parts.push({ text: block.text })
240
+ } else if (block.type === 'image') {
241
+ const loaded = await loadImageBlock(block)
242
+ if (!loaded.dataBase64 || !loaded.mimeType) throw new Error(`could not load image: ${block.path}`)
243
+ parts.push({ inlineData: { mimeType: loaded.mimeType, data: loaded.dataBase64 } })
225
244
  } else if (block.type === 'tool_result') {
226
245
  const name = toolUseNamesById.get(block.toolUseId) ?? 'unknown'
227
246
  const response: Record<string, unknown> = block.isError
@@ -258,6 +277,13 @@ function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
258
277
  })
259
278
  }
260
279
 
280
+ export function supportsGeminiImages(model: string): boolean {
281
+ const normalized = model.toLowerCase()
282
+ return normalized.includes('gemini-1.5')
283
+ || normalized.includes('gemini-2.0')
284
+ || normalized.includes('gemini-2.5')
285
+ }
286
+
261
287
  function normalizeFinishReason(reason: string, sawToolCall: boolean): DoneStopReason {
262
288
  if (sawToolCall) return 'tool_use'
263
289
  switch (reason) {
@@ -5,6 +5,7 @@ import { providerErrorFromResponse } from './errors.js'
5
5
  import { fetchWithRetryStreamEvents } from './retry.js'
6
6
  import { iterSseFrames } from './sse.js'
7
7
  import { messageTextContent } from '../utils/messages.js'
8
+ import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
8
9
  import { providerDisplayName } from '../models/providerDisplay.js'
9
10
 
10
11
  export type OpenAIToolDefinition = {
@@ -28,6 +29,7 @@ type Options = {
28
29
  loadApiKey?: () => Promise<string | null>
29
30
  tools?: OpenAIToolDefinition[]
30
31
  maxRetries?: number
32
+ hasVisionProjector?: boolean
31
33
  }
32
34
 
33
35
  type ChatChunk = {
@@ -76,6 +78,7 @@ export class OpenAIChatProvider implements Provider {
76
78
  private readonly loadApiKey?: () => Promise<string | null>
77
79
  private readonly tools: OpenAIToolDefinition[]
78
80
  private readonly maxRetries?: number
81
+ private readonly hasVisionProjector: boolean
79
82
 
80
83
  constructor(opts: Options) {
81
84
  this.id = opts.id
@@ -86,6 +89,7 @@ export class OpenAIChatProvider implements Provider {
86
89
  this.tools = opts.tools ?? []
87
90
  this.maxRetries = opts.maxRetries
88
91
  this.supportsTools = this.tools.length > 0
92
+ this.hasVisionProjector = opts.hasVisionProjector ?? false
89
93
  }
90
94
 
91
95
  async *complete(
@@ -99,6 +103,19 @@ export class OpenAIChatProvider implements Provider {
99
103
  yield { type: 'error', message: error.message }
100
104
  return
101
105
  }
106
+ if (hasImageBlocks(messages)) {
107
+ if (this.id === 'llamacpp' && !this.hasVisionProjector) {
108
+ const hint = localModelNameHintsVision(this.model)
109
+ ? '; open alt+p and run "Add Vision Encoder" on this model to enable image input'
110
+ : ''
111
+ yield { type: 'error', message: `image input is not enabled for local model "${this.model}" (no vision projector loaded)${hint}` }
112
+ return
113
+ }
114
+ if (this.id === 'openai' && !supportsOpenAIImages(this.model)) {
115
+ yield { type: 'error', message: `image input is not enabled for ${this.model}` }
116
+ return
117
+ }
118
+ }
102
119
 
103
120
  const headers: Record<string, string> = {
104
121
  'Content-Type': 'application/json',
@@ -106,6 +123,17 @@ export class OpenAIChatProvider implements Provider {
106
123
  }
107
124
  if (apiKey) headers.Authorization = `Bearer ${apiKey}`
108
125
 
126
+ let wireMessages: Array<Record<string, unknown>>
127
+ try {
128
+ wireMessages = await toWireMessages(messages)
129
+ } catch (err: unknown) {
130
+ if (err instanceof ImageLoadError) {
131
+ yield { type: 'error', message: err.message }
132
+ return
133
+ }
134
+ throw err
135
+ }
136
+
109
137
  let response: Response
110
138
  try {
111
139
  response = yield* fetchWithRetryStreamEvents(`${this.baseUrl}/chat/completions`, {
@@ -113,7 +141,7 @@ export class OpenAIChatProvider implements Provider {
113
141
  headers,
114
142
  body: JSON.stringify({
115
143
  model: this.model,
116
- messages: toWireMessages(messages),
144
+ messages: wireMessages,
117
145
  tools: this.tools.length > 0 ? this.tools : undefined,
118
146
  tool_choice: this.tools.length > 0 ? 'auto' : undefined,
119
147
  stream: true,
@@ -221,7 +249,7 @@ export class OpenAIChatProvider implements Provider {
221
249
 
222
250
  }
223
251
 
224
- export function toWireMessages(messages: Message[]): Array<Record<string, unknown>> {
252
+ export async function toWireMessages(messages: Message[]): Promise<Array<Record<string, unknown>>> {
225
253
  const out: Array<Record<string, unknown>> = []
226
254
 
227
255
  for (const message of messages) {
@@ -230,6 +258,26 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
230
258
  continue
231
259
  }
232
260
 
261
+ if (message.role === 'user') {
262
+ const toolResults = message.content.filter(isToolResultBlock)
263
+ if (toolResults.length > 0) {
264
+ for (const block of toolResults) {
265
+ out.push({
266
+ role: 'tool',
267
+ tool_call_id: block.toolUseId,
268
+ content: block.content,
269
+ })
270
+ }
271
+ const nonToolBlocks = message.content.filter(block => block.type !== 'tool_result')
272
+ if (nonToolBlocks.length > 0) {
273
+ out.push({ role: 'user', content: await toOpenAIUserContent(nonToolBlocks) })
274
+ }
275
+ continue
276
+ }
277
+ out.push({ role: 'user', content: await toOpenAIUserContent(message.content) })
278
+ continue
279
+ }
280
+
233
281
  if (message.role === 'assistant') {
234
282
  const textParts = message.content.filter(isTextBlock).map(block => block.text)
235
283
  const toolCalls = message.content.filter(isToolUseBlock).map(block => ({
@@ -266,6 +314,37 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
266
314
  return normalizeSystemMessages(out)
267
315
  }
268
316
 
317
+ async function toOpenAIUserContent(blocks: MessageContentBlock[]): Promise<Array<Record<string, unknown>>> {
318
+ const parts: Array<Record<string, unknown>> = []
319
+ for (const block of blocks) {
320
+ if (block.type === 'text') {
321
+ if (block.text.length > 0) parts.push({ type: 'text', text: block.text })
322
+ continue
323
+ }
324
+ if (block.type === 'image') {
325
+ const loaded = await loadImageBlock(block)
326
+ if (loaded.url) {
327
+ parts.push({ type: 'image_url', image_url: { url: loaded.url } })
328
+ } else if (loaded.dataBase64 && loaded.mimeType) {
329
+ parts.push({ type: 'image_url', image_url: { url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` } })
330
+ }
331
+ continue
332
+ }
333
+ }
334
+ return parts.length > 0 ? parts : [{ type: 'text', text: '' }]
335
+ }
336
+
337
+ export function supportsOpenAIImages(model: string): boolean {
338
+ const normalized = model.toLowerCase()
339
+ if (normalized.includes('gpt-3.5')) return false
340
+ return /gpt-4o|gpt-4\.1|gpt-4-turbo|gpt-4-vision|gpt-5|o1|o3|o4|chatgpt-4/.test(normalized)
341
+ }
342
+
343
+ export function localModelNameHintsVision(model: string): boolean {
344
+ const normalized = model.toLowerCase()
345
+ return /llava|bakllava|qwen[-_.]?vl|qwen2[-_.]?vl|qwen2\.5[-_.]?vl|minicpm-?v|llama-3\.2.*vision|mllama|cogvlm|internvl|moondream|pixtral|phi-?3[\.-]?vision|phi-?3\.5[\.-]?vision|smolvlm/.test(normalized)
346
+ }
347
+
269
348
  function normalizeSystemMessages(messages: Array<Record<string, unknown>>): Array<Record<string, unknown>> {
270
349
  const systemContents: string[] = []
271
350
  const nonSystem: Array<Record<string, unknown>> = []
@@ -1,9 +1,11 @@
1
1
  import type { Message, MessageContentBlock } from './contracts.js'
2
2
  import { messageTextContent } from '../utils/messages.js'
3
3
  import type { OpenAIToolDefinition } from './openai-chat.js'
4
+ import { loadImageBlock } from '../utils/images.js'
4
5
 
5
6
  export type ResponsesInputContent =
6
7
  | { type: 'input_text'; text: string }
8
+ | { type: 'input_image'; image_url: string }
7
9
  | { type: 'output_text'; text: string }
8
10
 
9
11
  export type ResponsesInputItem =
@@ -30,13 +32,13 @@ export type ResponsesRequestBody = {
30
32
  max_output_tokens?: number
31
33
  }
32
34
 
33
- export function buildResponsesBody(args: {
35
+ export async function buildResponsesBody(args: {
34
36
  model: string
35
37
  messages: Message[]
36
38
  tools: OpenAIToolDefinition[]
37
39
  maxOutputTokens?: number
38
- }): ResponsesRequestBody {
39
- const { instructions, items } = splitMessages(args.messages)
40
+ }): Promise<ResponsesRequestBody> {
41
+ const { instructions, items } = await splitMessages(args.messages)
40
42
  const body: ResponsesRequestBody = {
41
43
  model: args.model,
42
44
  input: items,
@@ -60,10 +62,10 @@ export function buildResponsesBody(args: {
60
62
  return body
61
63
  }
62
64
 
63
- function splitMessages(messages: Message[]): {
65
+ async function splitMessages(messages: Message[]): Promise<{
64
66
  instructions?: string
65
67
  items: ResponsesInputItem[]
66
- } {
68
+ }> {
67
69
  const instructions: string[] = []
68
70
  const items: ResponsesInputItem[] = []
69
71
 
@@ -100,12 +102,12 @@ function splitMessages(messages: Message[]): {
100
102
  }
101
103
  continue
102
104
  }
103
- const text = blocks.filter(isTextBlock).map(block => block.text).join('')
104
- if (text) {
105
+ const content = await toOpenAIResponsesUserContent(blocks)
106
+ if (content.length > 0) {
105
107
  items.push({
106
108
  type: 'message',
107
109
  role: 'user',
108
- content: [{ type: 'input_text', text }],
110
+ content,
109
111
  })
110
112
  }
111
113
  continue
@@ -136,6 +138,25 @@ function splitMessages(messages: Message[]): {
136
138
  }
137
139
  }
138
140
 
141
+ async function toOpenAIResponsesUserContent(blocks: MessageContentBlock[]): Promise<ResponsesInputContent[]> {
142
+ const content: ResponsesInputContent[] = []
143
+ for (const block of blocks) {
144
+ if (block.type === 'text') {
145
+ if (block.text) content.push({ type: 'input_text', text: block.text })
146
+ continue
147
+ }
148
+ if (block.type === 'image') {
149
+ const loaded = await loadImageBlock(block)
150
+ if (loaded.url) {
151
+ content.push({ type: 'input_image', image_url: loaded.url })
152
+ } else if (loaded.dataBase64 && loaded.mimeType) {
153
+ content.push({ type: 'input_image', image_url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` })
154
+ }
155
+ }
156
+ }
157
+ return content
158
+ }
159
+
139
160
  function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
140
161
  if (typeof content === 'string') {
141
162
  return content ? [{ type: 'text', text: content }] : []
@@ -5,7 +5,8 @@ import { providerErrorFromResponse } from './errors.js'
5
5
  import { fetchWithRetryStreamEvents } from './retry.js'
6
6
  import { iterSseEvents } from './sse.js'
7
7
  import { buildResponsesBody } from './openai-responses-format.js'
8
- import type { OpenAIToolDefinition } from './openai-chat.js'
8
+ import { supportsOpenAIImages, type OpenAIToolDefinition } from './openai-chat.js'
9
+ import { hasImageBlocks, ImageLoadError } from '../utils/images.js'
9
10
 
10
11
  const READ_TIMEOUT_MS = 45_000
11
12
 
@@ -64,15 +65,29 @@ export class OpenAIResponsesProvider implements Provider {
64
65
  return
65
66
  }
66
67
 
68
+ if (hasImageBlocks(messages) && !supportsOpenAIImages(this.model)) {
69
+ yield { type: 'error', message: `image input is not enabled for ${this.model}` }
70
+ return
71
+ }
72
+
67
73
  let attempt = 0
68
74
  while (true) {
69
75
  attempt += 1
70
- const body = JSON.stringify(buildResponsesBody({
71
- model: this.model,
72
- messages,
73
- tools: this.tools,
74
- maxOutputTokens: options.maxTokens,
75
- }))
76
+ let body: string
77
+ try {
78
+ body = JSON.stringify(await buildResponsesBody({
79
+ model: this.model,
80
+ messages,
81
+ tools: this.tools,
82
+ maxOutputTokens: options.maxTokens,
83
+ }))
84
+ } catch (err: unknown) {
85
+ if (err instanceof ImageLoadError) {
86
+ yield { type: 'error', message: err.message }
87
+ return
88
+ }
89
+ throw err
90
+ }
76
91
 
77
92
  let response: Response
78
93
  try {
@@ -34,6 +34,7 @@ export function createProvider(config: EthagentConfig, options: { mode?: Session
34
34
  baseUrl: localProviderBaseUrlFor('llamacpp', config.baseUrl),
35
35
  apiKey: 'llamacpp',
36
36
  tools: openAITools(mode, toolContext),
37
+ hasVisionProjector: Boolean(config.localMmprojPath),
37
38
  })
38
39
  case 'openai':
39
40
  return createOpenAIProvider(config, openAITools(mode, toolContext))
@@ -80,6 +80,7 @@ const ConfigSchema = z.object({
80
80
  provider: z.enum(PROVIDERS),
81
81
  model: z.string().min(1),
82
82
  baseUrl: z.string().url().optional(),
83
+ localMmprojPath: z.string().min(1).optional(),
83
84
  firstRunAt: z.string(),
84
85
  identity: IdentitySchema.optional(),
85
86
  erc8004: z.object({
@@ -11,9 +11,10 @@ import {
11
11
  isUserCorrectionOfToolState,
12
12
  looksLikeToolStateClaim,
13
13
  } from '../runtime/toolClaimGuards.js'
14
+ import { userTextToContentBlocks } from '../utils/images.js'
14
15
 
15
16
  export type SessionMessage =
16
- | { version?: 2; role: 'user'; content: string; createdAt: string; turnId?: string; synthetic?: boolean }
17
+ | { version?: 2; role: 'user'; content: string; providerContent?: Message['content']; createdAt: string; turnId?: string; synthetic?: boolean }
17
18
  | { version?: 2; role: 'assistant'; content: string; createdAt: string; model?: string; usage?: { in?: number; out?: number }; turnId?: string; synthetic?: boolean }
18
19
  | { version?: 2; role: 'system'; content: string; createdAt: string; turnId?: string; synthetic?: boolean }
19
20
  | { version: 2; role: 'tool_use'; toolUseId: string; name: string; input: Record<string, unknown>; createdAt: string; turnId?: string }
@@ -244,6 +245,17 @@ export type ProviderMessageProjectionOptions = {
244
245
  export const TOOL_CORRECTION_CONTEXT_MESSAGE =
245
246
  'The latest user message corrects a prior assistant claim about tool or filesystem state. Treat user correction and tool_result messages as authoritative. Ignore any recent assistant claim about files, directories, cwd, or tool execution unless it is backed by a tool_result, and retry with the appropriate tool.'
246
247
 
248
+ function resolveUserContent(
249
+ message: Extract<SessionMessage, { role: 'system' | 'user' | 'assistant' }>,
250
+ ): Message['content'] {
251
+ if (message.role !== 'user') return message.content
252
+ if (message.providerContent) return message.providerContent
253
+ if (message.content.includes('[image:')) {
254
+ return userTextToContentBlocks(message.content)
255
+ }
256
+ return message.content
257
+ }
258
+
247
259
  export function sessionMessagesToProviderMessages(
248
260
  messages: SessionMessage[],
249
261
  options: ProviderMessageProjectionOptions = {},
@@ -255,7 +267,7 @@ export function sessionMessagesToProviderMessages(
255
267
  for (const [index, message] of messages.entries()) {
256
268
  if (message.role === 'system' || message.role === 'user' || message.role === 'assistant') {
257
269
  if (message.role === 'assistant' && invalidatedAssistantMessages.has(index)) continue
258
- out.push({ role: message.role, content: message.content })
270
+ out.push({ role: message.role, content: resolveUserContent(message) })
259
271
  continue
260
272
  }
261
273
  if (message.role === 'tool_use') {
@@ -295,8 +295,20 @@ export const Spinner: React.FC<SpinnerProps> = ({
295
295
  function formatElapsedSeconds(milliseconds: number): string {
296
296
  const seconds = Math.max(0, Math.floor(milliseconds / 1000))
297
297
  if (seconds < 60) return `${seconds}s`
298
- const minutes = Math.floor(seconds / 60)
299
- return `${minutes}:${(seconds % 60).toString().padStart(2, '0')}`
298
+
299
+ const hours = Math.floor(seconds / 3600)
300
+ const minutes = Math.floor((seconds % 3600) / 60)
301
+ const remainingSeconds = seconds % 60
302
+
303
+ if (hours > 0) {
304
+ return remainingSeconds > 0
305
+ ? `${hours}h ${minutes}min ${remainingSeconds}s`
306
+ : `${hours}h ${minutes}min`
307
+ }
308
+
309
+ return remainingSeconds > 0
310
+ ? `${minutes}min ${remainingSeconds}s`
311
+ : `${minutes}min`
300
312
  }
301
313
 
302
314
  function restoreSpinnerTerms(value: string): string {
package/src/ui/theme.ts CHANGED
@@ -11,6 +11,8 @@ export const theme = {
11
11
  accentBlue: '#e8eefd',
12
12
  accentWhite: '#f5f8ff',
13
13
  accentError: '#d99898',
14
+ modePlan: '#f0c7a8',
15
+ modeAcceptEdits: '#c7b6f2',
14
16
  diffAdded: '#8fd49d',
15
17
  diffRemoved: '#d99898',
16
18
  diffAddedBackground: '#16351f',
@@ -0,0 +1,140 @@
1
+ import fs from 'node:fs/promises'
2
+ import path from 'node:path'
3
+ import type { ImageBlock, Message, MessageContentBlock } from '../providers/contracts.js'
4
+
5
+ const IMAGE_MARKER_RE = /\[image:\s*([^\]]+?)\]/gi
6
+ const PLACEHOLDER_RE = /^([<{[].*[>}\]]|#\d+)$/
7
+
8
+ export class ImageLoadError extends Error {
9
+ readonly imagePath: string
10
+ constructor(imagePath: string, message: string) {
11
+ super(message)
12
+ this.name = 'ImageLoadError'
13
+ this.imagePath = imagePath
14
+ }
15
+ }
16
+
17
+ export function collapseImagePathsToRefs(text: string): string {
18
+ let counter = 0
19
+ return text.replace(IMAGE_MARKER_RE, (full, raw: string) => {
20
+ const trimmed = raw.trim()
21
+ if (!trimmed || PLACEHOLDER_RE.test(trimmed)) return full
22
+ counter += 1
23
+ return `[Image #${counter}]`
24
+ })
25
+ }
26
+
27
+ export function modelSupportsImages(
28
+ provider: string,
29
+ model: string,
30
+ extra?: { mmprojPath?: string },
31
+ ): boolean {
32
+ const normalized = model.toLowerCase()
33
+ switch (provider) {
34
+ case 'anthropic':
35
+ return /claude-3|claude-sonnet-4|claude-opus-4|claude-haiku-4/.test(normalized)
36
+ case 'gemini':
37
+ return /gemini-1\.5|gemini-2\.0|gemini-2\.5/.test(normalized)
38
+ case 'openai':
39
+ if (normalized.includes('gpt-3.5')) return false
40
+ return /gpt-4o|gpt-4\.1|gpt-4-turbo|gpt-4-vision|gpt-5|o1|o3|o4|chatgpt-4/.test(normalized)
41
+ case 'llamacpp':
42
+ return Boolean(extra?.mmprojPath)
43
+ default:
44
+ return false
45
+ }
46
+ }
47
+
48
+ export function hasImageBlocks(messages: Message[]): boolean {
49
+ return messages.some(message => Array.isArray(message.content) && message.content.some(block => block.type === 'image'))
50
+ }
51
+
52
+ export function userTextToContentBlocks(text: string): string | MessageContentBlock[] {
53
+ const blocks = parseImageMarkers(text)
54
+ return blocks.length === 1 && blocks[0]?.type === 'text' ? blocks[0].text : blocks
55
+ }
56
+
57
+ export function parseImageMarkers(text: string): MessageContentBlock[] {
58
+ const out: MessageContentBlock[] = []
59
+ let lastIndex = 0
60
+ let match: RegExpExecArray | null
61
+
62
+ while ((match = IMAGE_MARKER_RE.exec(text)) !== null) {
63
+ const full = match[0]
64
+ const rawPath = match[1]?.trim() ?? ''
65
+ if (match.index > lastIndex) {
66
+ const prefix = text.slice(lastIndex, match.index)
67
+ if (prefix) out.push({ type: 'text', text: prefix })
68
+ }
69
+ if (rawPath && !PLACEHOLDER_RE.test(rawPath)) {
70
+ out.push({ type: 'image', path: rawPath })
71
+ } else {
72
+ out.push({ type: 'text', text: full })
73
+ }
74
+ lastIndex = match.index + full.length
75
+ }
76
+
77
+ if (lastIndex < text.length) {
78
+ const suffix = text.slice(lastIndex)
79
+ if (suffix) out.push({ type: 'text', text: suffix })
80
+ }
81
+
82
+ if (out.length === 0) return text ? [{ type: 'text', text }] : []
83
+ return mergeAdjacentTextBlocks(out)
84
+ }
85
+
86
+ export async function loadImageBlock(block: ImageBlock): Promise<ImageBlock> {
87
+ if (block.dataBase64 && block.mimeType) return block
88
+ if (block.url) return block
89
+ const rawPath = block.path?.trim() ?? ''
90
+ if (!rawPath) throw new ImageLoadError(rawPath, 'image path is empty')
91
+ if (PLACEHOLDER_RE.test(rawPath)) {
92
+ throw new ImageLoadError(rawPath, `image path looks like a placeholder, not a real file: ${rawPath}`)
93
+ }
94
+ let file: Buffer
95
+ try {
96
+ file = await fs.readFile(rawPath)
97
+ } catch (err: unknown) {
98
+ const code = (err as NodeJS.ErrnoException).code
99
+ if (code === 'ENOENT') {
100
+ throw new ImageLoadError(rawPath, `image file not found: ${rawPath}`)
101
+ }
102
+ throw new ImageLoadError(rawPath, `could not read image at ${rawPath}: ${(err as Error).message}`)
103
+ }
104
+ const mimeType = block.mimeType ?? mimeTypeForPath(rawPath)
105
+ return {
106
+ ...block,
107
+ path: rawPath,
108
+ mimeType,
109
+ dataBase64: file.toString('base64'),
110
+ }
111
+ }
112
+
113
+ export function imagePlaceholder(pathValue: string): string {
114
+ return `[image: ${path.basename(pathValue)}]`
115
+ }
116
+
117
+ function mergeAdjacentTextBlocks(blocks: MessageContentBlock[]): MessageContentBlock[] {
118
+ const out: MessageContentBlock[] = []
119
+ for (const block of blocks) {
120
+ const prev = out[out.length - 1]
121
+ if (block.type === 'text' && prev?.type === 'text') {
122
+ prev.text += block.text
123
+ continue
124
+ }
125
+ out.push(block)
126
+ }
127
+ return out
128
+ }
129
+
130
+ function mimeTypeForPath(filePath: string): string {
131
+ switch (path.extname(filePath).toLowerCase()) {
132
+ case '.png': return 'image/png'
133
+ case '.jpg':
134
+ case '.jpeg': return 'image/jpeg'
135
+ case '.webp': return 'image/webp'
136
+ case '.gif': return 'image/gif'
137
+ case '.bmp': return 'image/bmp'
138
+ default: return 'application/octet-stream'
139
+ }
140
+ }
@@ -1,3 +1,4 @@
1
+ import path from 'node:path'
1
2
  import type { Message, MessageContentBlock } from '../providers/contracts.js'
2
3
 
3
4
  export function systemMessage(content: string): Message {
@@ -20,6 +21,7 @@ export function blocksToText(blocks: MessageContentBlock[]): string {
20
21
  return blocks
21
22
  .map(block => {
22
23
  if (block.type === 'text') return block.text
24
+ if (block.type === 'image') return `[image attached: ${path.basename(block.path)}]`
23
25
  if (block.type === 'tool_use') return `[tool use: ${block.name}]`
24
26
  return block.isError
25
27
  ? `[tool error: ${block.content}]`