npm - ethagent - Versions diffs - 2.3.0 → 2.4.0 - Mend

ethagent 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +11 -0
package/package.json +1 -1
package/src/app/FirstRun.tsx +2 -0
package/src/chat/ChatBottomPane.tsx +9 -0
package/src/chat/ChatScreen.tsx +10 -4
package/src/chat/chatSessionState.ts +4 -1
package/src/chat/chatTurnOrchestrator.ts +6 -2
package/src/chat/input/ChatInput.tsx +25 -2
package/src/chat/input/imageRefs.ts +30 -0
package/src/chat/views/ResumeView.tsx +16 -7
package/src/models/ModelPicker.tsx +138 -6
package/src/models/huggingface.ts +180 -2
package/src/models/llamacpp.ts +110 -15
package/src/models/llamacppPreflight.ts +30 -11
package/src/models/modelPickerOptions.ts +14 -1
package/src/providers/anthropic.ts +36 -5
package/src/providers/contracts.ts +9 -1
package/src/providers/gemini.ts +29 -3
package/src/providers/openai-chat.ts +81 -2
package/src/providers/openai-responses-format.ts +29 -8
package/src/providers/openai-responses.ts +22 -7
package/src/providers/registry.ts +1 -0
package/src/storage/config.ts +1 -0
package/src/storage/sessions.ts +14 -2
package/src/ui/Spinner.tsx +14 -2
package/src/ui/theme.ts +2 -0
package/src/utils/images.ts +140 -0
package/src/utils/messages.ts +2 -0

package/src/providers/gemini.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { ProviderError } from './contracts.js'
 import { providerErrorFromResponse } from './errors.js'
 import { fetchWithRetryStreamEvents } from './retry.js'
 import { iterSseFrames } from './sse.js'
+import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
 export type GeminiToolDefinition = {
   name: string
@@ -41,6 +42,7 @@ type GeminiChunk = {
 type GeminiContentPart =
   | { text: string }
+  | { inlineData: { mimeType: string; data: string } }
   | { functionCall: { name: string; args: Record<string, unknown> } }
   | { functionResponse: { name: string; response: Record<string, unknown> } }
@@ -92,8 +94,21 @@ export class GeminiProvider implements Provider {
       yield { type: 'error', message: error.message }
       return
     }
+    if (hasImageBlocks(messages) && !supportsGeminiImages(this.model)) {
+      yield { type: 'error', message: `image input is not enabled for ${this.model}` }
+      return
+    }
-    const payload = buildGeminiPayload(messages, this.tools, options)
+    let payload: GeminiPayload
+    try {
+      payload = await buildGeminiPayload(messages, this.tools, options)
+    } catch (err: unknown) {
+      if (err instanceof ImageLoadError) {
+        yield { type: 'error', message: err.message }
+        return
+      }
+      throw err
+    }
     const modelName = this.model.replace(/^models\//, '')
     const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(modelName)}:streamGenerateContent?alt=sse`
@@ -181,11 +196,11 @@ export class GeminiProvider implements Provider {
   }
 }
-export function buildGeminiPayload(
+export async function buildGeminiPayload(
   messages: Message[],
   tools: GeminiToolDefinition[] = [],
   options: ProviderCompleteOptions = {},
-): GeminiPayload {
+): Promise<GeminiPayload> {
   const systemParts: string[] = []
   const contents: GeminiContent[] = []
   const toolUseNamesById = new Map<string, string>()
@@ -222,6 +237,10 @@ export function buildGeminiPayload(
     for (const block of blocks) {
       if (block.type === 'text') {
         parts.push({ text: block.text })
+      } else if (block.type === 'image') {
+        const loaded = await loadImageBlock(block)
+        if (!loaded.dataBase64 || !loaded.mimeType) throw new Error(`could not load image: ${block.path}`)
+        parts.push({ inlineData: { mimeType: loaded.mimeType, data: loaded.dataBase64 } })
       } else if (block.type === 'tool_result') {
         const name = toolUseNamesById.get(block.toolUseId) ?? 'unknown'
         const response: Record<string, unknown> = block.isError
@@ -258,6 +277,13 @@ function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
   })
 }
+export function supportsGeminiImages(model: string): boolean {
+  const normalized = model.toLowerCase()
+  return normalized.includes('gemini-1.5')
+    || normalized.includes('gemini-2.0')
+    || normalized.includes('gemini-2.5')
+}
 function normalizeFinishReason(reason: string, sawToolCall: boolean): DoneStopReason {
   if (sawToolCall) return 'tool_use'
   switch (reason) {

package/src/providers/openai-chat.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { providerErrorFromResponse } from './errors.js'
 import { fetchWithRetryStreamEvents } from './retry.js'
 import { iterSseFrames } from './sse.js'
 import { messageTextContent } from '../utils/messages.js'
+import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
 import { providerDisplayName } from '../models/providerDisplay.js'
 export type OpenAIToolDefinition = {
@@ -28,6 +29,7 @@ type Options = {
   loadApiKey?: () => Promise<string | null>
   tools?: OpenAIToolDefinition[]
   maxRetries?: number
+  hasVisionProjector?: boolean
 }
 type ChatChunk = {
@@ -76,6 +78,7 @@ export class OpenAIChatProvider implements Provider {
   private readonly loadApiKey?: () => Promise<string | null>
   private readonly tools: OpenAIToolDefinition[]
   private readonly maxRetries?: number
+  private readonly hasVisionProjector: boolean
   constructor(opts: Options) {
     this.id = opts.id
@@ -86,6 +89,7 @@ export class OpenAIChatProvider implements Provider {
     this.tools = opts.tools ?? []
     this.maxRetries = opts.maxRetries
     this.supportsTools = this.tools.length > 0
+    this.hasVisionProjector = opts.hasVisionProjector ?? false
   }
   async *complete(
@@ -99,6 +103,19 @@ export class OpenAIChatProvider implements Provider {
       yield { type: 'error', message: error.message }
       return
     }
+    if (hasImageBlocks(messages)) {
+      if (this.id === 'llamacpp' && !this.hasVisionProjector) {
+        const hint = localModelNameHintsVision(this.model)
+          ? '; open alt+p and run "Add Vision Encoder" on this model to enable image input'
+          : ''
+        yield { type: 'error', message: `image input is not enabled for local model "${this.model}" (no vision projector loaded)${hint}` }
+        return
+      }
+      if (this.id === 'openai' && !supportsOpenAIImages(this.model)) {
+        yield { type: 'error', message: `image input is not enabled for ${this.model}` }
+        return
+      }
+    }
     const headers: Record<string, string> = {
       'Content-Type': 'application/json',
@@ -106,6 +123,17 @@ export class OpenAIChatProvider implements Provider {
     }
     if (apiKey) headers.Authorization = `Bearer ${apiKey}`
+    let wireMessages: Array<Record<string, unknown>>
+    try {
+      wireMessages = await toWireMessages(messages)
+    } catch (err: unknown) {
+      if (err instanceof ImageLoadError) {
+        yield { type: 'error', message: err.message }
+        return
+      }
+      throw err
+    }
     let response: Response
     try {
       response = yield* fetchWithRetryStreamEvents(`${this.baseUrl}/chat/completions`, {
@@ -113,7 +141,7 @@ export class OpenAIChatProvider implements Provider {
         headers,
         body: JSON.stringify({
           model: this.model,
-          messages: toWireMessages(messages),
+          messages: wireMessages,
           tools: this.tools.length > 0 ? this.tools : undefined,
           tool_choice: this.tools.length > 0 ? 'auto' : undefined,
           stream: true,
@@ -221,7 +249,7 @@ export class OpenAIChatProvider implements Provider {
 }
-export function toWireMessages(messages: Message[]): Array<Record<string, unknown>> {
+export async function toWireMessages(messages: Message[]): Promise<Array<Record<string, unknown>>> {
   const out: Array<Record<string, unknown>> = []
   for (const message of messages) {
@@ -230,6 +258,26 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
       continue
     }
+    if (message.role === 'user') {
+      const toolResults = message.content.filter(isToolResultBlock)
+      if (toolResults.length > 0) {
+        for (const block of toolResults) {
+          out.push({
+            role: 'tool',
+            tool_call_id: block.toolUseId,
+            content: block.content,
+          })
+        }
+        const nonToolBlocks = message.content.filter(block => block.type !== 'tool_result')
+        if (nonToolBlocks.length > 0) {
+          out.push({ role: 'user', content: await toOpenAIUserContent(nonToolBlocks) })
+        }
+        continue
+      }
+      out.push({ role: 'user', content: await toOpenAIUserContent(message.content) })
+      continue
+    }
     if (message.role === 'assistant') {
       const textParts = message.content.filter(isTextBlock).map(block => block.text)
       const toolCalls = message.content.filter(isToolUseBlock).map(block => ({
@@ -266,6 +314,37 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
   return normalizeSystemMessages(out)
 }
+async function toOpenAIUserContent(blocks: MessageContentBlock[]): Promise<Array<Record<string, unknown>>> {
+  const parts: Array<Record<string, unknown>> = []
+  for (const block of blocks) {
+    if (block.type === 'text') {
+      if (block.text.length > 0) parts.push({ type: 'text', text: block.text })
+      continue
+    }
+    if (block.type === 'image') {
+      const loaded = await loadImageBlock(block)
+      if (loaded.url) {
+        parts.push({ type: 'image_url', image_url: { url: loaded.url } })
+      } else if (loaded.dataBase64 && loaded.mimeType) {
+        parts.push({ type: 'image_url', image_url: { url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` } })
+      }
+      continue
+    }
+  }
+  return parts.length > 0 ? parts : [{ type: 'text', text: '' }]
+}
+export function supportsOpenAIImages(model: string): boolean {
+  const normalized = model.toLowerCase()
+  if (normalized.includes('gpt-3.5')) return false
+  return /gpt-4o|gpt-4\.1|gpt-4-turbo|gpt-4-vision|gpt-5|o1|o3|o4|chatgpt-4/.test(normalized)
+}
+export function localModelNameHintsVision(model: string): boolean {
+  const normalized = model.toLowerCase()
+  return /llava|bakllava|qwen[-_.]?vl|qwen2[-_.]?vl|qwen2\.5[-_.]?vl|minicpm-?v|llama-3\.2.*vision|mllama|cogvlm|internvl|moondream|pixtral|phi-?3[\.-]?vision|phi-?3\.5[\.-]?vision|smolvlm/.test(normalized)
+}
 function normalizeSystemMessages(messages: Array<Record<string, unknown>>): Array<Record<string, unknown>> {
   const systemContents: string[] = []
   const nonSystem: Array<Record<string, unknown>> = []

package/src/providers/openai-responses-format.ts CHANGED Viewed

@@ -1,9 +1,11 @@
 import type { Message, MessageContentBlock } from './contracts.js'
 import { messageTextContent } from '../utils/messages.js'
 import type { OpenAIToolDefinition } from './openai-chat.js'
+import { loadImageBlock } from '../utils/images.js'
 export type ResponsesInputContent =
   | { type: 'input_text'; text: string }
+  | { type: 'input_image'; image_url: string }
   | { type: 'output_text'; text: string }
 export type ResponsesInputItem =
@@ -30,13 +32,13 @@ export type ResponsesRequestBody = {
   max_output_tokens?: number
 }
-export function buildResponsesBody(args: {
+export async function buildResponsesBody(args: {
   model: string
   messages: Message[]
   tools: OpenAIToolDefinition[]
   maxOutputTokens?: number
-}): ResponsesRequestBody {
-  const { instructions, items } = splitMessages(args.messages)
+}): Promise<ResponsesRequestBody> {
+  const { instructions, items } = await splitMessages(args.messages)
   const body: ResponsesRequestBody = {
     model: args.model,
     input: items,
@@ -60,10 +62,10 @@ export function buildResponsesBody(args: {
   return body
 }
-function splitMessages(messages: Message[]): {
+async function splitMessages(messages: Message[]): Promise<{
   instructions?: string
   items: ResponsesInputItem[]
-} {
+}> {
   const instructions: string[] = []
   const items: ResponsesInputItem[] = []
@@ -100,12 +102,12 @@ function splitMessages(messages: Message[]): {
         }
         continue
       }
-      const text = blocks.filter(isTextBlock).map(block => block.text).join('')
-      if (text) {
+      const content = await toOpenAIResponsesUserContent(blocks)
+      if (content.length > 0) {
         items.push({
           type: 'message',
           role: 'user',
-          content: [{ type: 'input_text', text }],
+          content,
         })
       }
       continue
@@ -136,6 +138,25 @@ function splitMessages(messages: Message[]): {
   }
 }
+async function toOpenAIResponsesUserContent(blocks: MessageContentBlock[]): Promise<ResponsesInputContent[]> {
+  const content: ResponsesInputContent[] = []
+  for (const block of blocks) {
+    if (block.type === 'text') {
+      if (block.text) content.push({ type: 'input_text', text: block.text })
+      continue
+    }
+    if (block.type === 'image') {
+      const loaded = await loadImageBlock(block)
+      if (loaded.url) {
+        content.push({ type: 'input_image', image_url: loaded.url })
+      } else if (loaded.dataBase64 && loaded.mimeType) {
+        content.push({ type: 'input_image', image_url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` })
+      }
+    }
+  }
+  return content
+}
 function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
   if (typeof content === 'string') {
     return content ? [{ type: 'text', text: content }] : []

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -5,7 +5,8 @@ import { providerErrorFromResponse } from './errors.js'
 import { fetchWithRetryStreamEvents } from './retry.js'
 import { iterSseEvents } from './sse.js'
 import { buildResponsesBody } from './openai-responses-format.js'
-import type { OpenAIToolDefinition } from './openai-chat.js'
+import { supportsOpenAIImages, type OpenAIToolDefinition } from './openai-chat.js'
+import { hasImageBlocks, ImageLoadError } from '../utils/images.js'
 const READ_TIMEOUT_MS = 45_000
@@ -64,15 +65,29 @@ export class OpenAIResponsesProvider implements Provider {
       return
     }
+    if (hasImageBlocks(messages) && !supportsOpenAIImages(this.model)) {
+      yield { type: 'error', message: `image input is not enabled for ${this.model}` }
+      return
+    }
     let attempt = 0
     while (true) {
       attempt += 1
-      const body = JSON.stringify(buildResponsesBody({
-        model: this.model,
-        messages,
-        tools: this.tools,
-        maxOutputTokens: options.maxTokens,
-      }))
+      let body: string
+      try {
+        body = JSON.stringify(await buildResponsesBody({
+          model: this.model,
+          messages,
+          tools: this.tools,
+          maxOutputTokens: options.maxTokens,
+        }))
+      } catch (err: unknown) {
+        if (err instanceof ImageLoadError) {
+          yield { type: 'error', message: err.message }
+          return
+        }
+        throw err
+      }
       let response: Response
       try {

package/src/providers/registry.ts CHANGED Viewed

@@ -34,6 +34,7 @@ export function createProvider(config: EthagentConfig, options: { mode?: Session
         baseUrl: localProviderBaseUrlFor('llamacpp', config.baseUrl),
         apiKey: 'llamacpp',
         tools: openAITools(mode, toolContext),
+        hasVisionProjector: Boolean(config.localMmprojPath),
       })
     case 'openai':
       return createOpenAIProvider(config, openAITools(mode, toolContext))

package/src/storage/config.ts CHANGED Viewed

@@ -80,6 +80,7 @@ const ConfigSchema = z.object({
   provider: z.enum(PROVIDERS),
   model: z.string().min(1),
   baseUrl: z.string().url().optional(),
+  localMmprojPath: z.string().min(1).optional(),
   firstRunAt: z.string(),
   identity: IdentitySchema.optional(),
   erc8004: z.object({

package/src/storage/sessions.ts CHANGED Viewed

@@ -11,9 +11,10 @@ import {
   isUserCorrectionOfToolState,
   looksLikeToolStateClaim,
 } from '../runtime/toolClaimGuards.js'
+import { userTextToContentBlocks } from '../utils/images.js'
 export type SessionMessage =
-  | { version?: 2; role: 'user'; content: string; createdAt: string; turnId?: string; synthetic?: boolean }
+  | { version?: 2; role: 'user'; content: string; providerContent?: Message['content']; createdAt: string; turnId?: string; synthetic?: boolean }
   | { version?: 2; role: 'assistant'; content: string; createdAt: string; model?: string; usage?: { in?: number; out?: number }; turnId?: string; synthetic?: boolean }
   | { version?: 2; role: 'system'; content: string; createdAt: string; turnId?: string; synthetic?: boolean }
   | { version: 2; role: 'tool_use'; toolUseId: string; name: string; input: Record<string, unknown>; createdAt: string; turnId?: string }
@@ -244,6 +245,17 @@ export type ProviderMessageProjectionOptions = {
 export const TOOL_CORRECTION_CONTEXT_MESSAGE =
   'The latest user message corrects a prior assistant claim about tool or filesystem state. Treat user correction and tool_result messages as authoritative. Ignore any recent assistant claim about files, directories, cwd, or tool execution unless it is backed by a tool_result, and retry with the appropriate tool.'
+function resolveUserContent(
+  message: Extract<SessionMessage, { role: 'system' | 'user' | 'assistant' }>,
+): Message['content'] {
+  if (message.role !== 'user') return message.content
+  if (message.providerContent) return message.providerContent
+  if (message.content.includes('[image:')) {
+    return userTextToContentBlocks(message.content)
+  }
+  return message.content
+}
 export function sessionMessagesToProviderMessages(
   messages: SessionMessage[],
   options: ProviderMessageProjectionOptions = {},
@@ -255,7 +267,7 @@ export function sessionMessagesToProviderMessages(
   for (const [index, message] of messages.entries()) {
     if (message.role === 'system' || message.role === 'user' || message.role === 'assistant') {
       if (message.role === 'assistant' && invalidatedAssistantMessages.has(index)) continue
-      out.push({ role: message.role, content: message.content })
+      out.push({ role: message.role, content: resolveUserContent(message) })
       continue
     }
     if (message.role === 'tool_use') {

package/src/ui/Spinner.tsx CHANGED Viewed

@@ -295,8 +295,20 @@ export const Spinner: React.FC<SpinnerProps> = ({
 function formatElapsedSeconds(milliseconds: number): string {
   const seconds = Math.max(0, Math.floor(milliseconds / 1000))
   if (seconds < 60) return `${seconds}s`
-  const minutes = Math.floor(seconds / 60)
-  return `${minutes}:${(seconds % 60).toString().padStart(2, '0')}`
+  const hours = Math.floor(seconds / 3600)
+  const minutes = Math.floor((seconds % 3600) / 60)
+  const remainingSeconds = seconds % 60
+  if (hours > 0) {
+    return remainingSeconds > 0
+      ? `${hours}h ${minutes}min ${remainingSeconds}s`
+      : `${hours}h ${minutes}min`
+  }
+  return remainingSeconds > 0
+    ? `${minutes}min ${remainingSeconds}s`
+    : `${minutes}min`
 }
 function restoreSpinnerTerms(value: string): string {

package/src/ui/theme.ts CHANGED Viewed

@@ -11,6 +11,8 @@ export const theme = {
   accentBlue: '#e8eefd',
   accentWhite: '#f5f8ff',
   accentError: '#d99898',
+  modePlan: '#f0c7a8',
+  modeAcceptEdits: '#c7b6f2',
   diffAdded: '#8fd49d',
   diffRemoved: '#d99898',
   diffAddedBackground: '#16351f',

package/src/utils/images.ts ADDED Viewed

@@ -0,0 +1,140 @@
+import fs from 'node:fs/promises'
+import path from 'node:path'
+import type { ImageBlock, Message, MessageContentBlock } from '../providers/contracts.js'
+const IMAGE_MARKER_RE = /\[image:\s*([^\]]+?)\]/gi
+const PLACEHOLDER_RE = /^([<{[].*[>}\]]|#\d+)$/
+export class ImageLoadError extends Error {
+  readonly imagePath: string
+  constructor(imagePath: string, message: string) {
+    super(message)
+    this.name = 'ImageLoadError'
+    this.imagePath = imagePath
+  }
+}
+export function collapseImagePathsToRefs(text: string): string {
+  let counter = 0
+  return text.replace(IMAGE_MARKER_RE, (full, raw: string) => {
+    const trimmed = raw.trim()
+    if (!trimmed || PLACEHOLDER_RE.test(trimmed)) return full
+    counter += 1
+    return `[Image #${counter}]`
+  })
+}
+export function modelSupportsImages(
+  provider: string,
+  model: string,
+  extra?: { mmprojPath?: string },
+): boolean {
+  const normalized = model.toLowerCase()
+  switch (provider) {
+    case 'anthropic':
+      return /claude-3|claude-sonnet-4|claude-opus-4|claude-haiku-4/.test(normalized)
+    case 'gemini':
+      return /gemini-1\.5|gemini-2\.0|gemini-2\.5/.test(normalized)
+    case 'openai':
+      if (normalized.includes('gpt-3.5')) return false
+      return /gpt-4o|gpt-4\.1|gpt-4-turbo|gpt-4-vision|gpt-5|o1|o3|o4|chatgpt-4/.test(normalized)
+    case 'llamacpp':
+      return Boolean(extra?.mmprojPath)
+    default:
+      return false
+  }
+}
+export function hasImageBlocks(messages: Message[]): boolean {
+  return messages.some(message => Array.isArray(message.content) && message.content.some(block => block.type === 'image'))
+}
+export function userTextToContentBlocks(text: string): string | MessageContentBlock[] {
+  const blocks = parseImageMarkers(text)
+  return blocks.length === 1 && blocks[0]?.type === 'text' ? blocks[0].text : blocks
+}
+export function parseImageMarkers(text: string): MessageContentBlock[] {
+  const out: MessageContentBlock[] = []
+  let lastIndex = 0
+  let match: RegExpExecArray | null
+  while ((match = IMAGE_MARKER_RE.exec(text)) !== null) {
+    const full = match[0]
+    const rawPath = match[1]?.trim() ?? ''
+    if (match.index > lastIndex) {
+      const prefix = text.slice(lastIndex, match.index)
+      if (prefix) out.push({ type: 'text', text: prefix })
+    }
+    if (rawPath && !PLACEHOLDER_RE.test(rawPath)) {
+      out.push({ type: 'image', path: rawPath })
+    } else {
+      out.push({ type: 'text', text: full })
+    }
+    lastIndex = match.index + full.length
+  }
+  if (lastIndex < text.length) {
+    const suffix = text.slice(lastIndex)
+    if (suffix) out.push({ type: 'text', text: suffix })
+  }
+  if (out.length === 0) return text ? [{ type: 'text', text }] : []
+  return mergeAdjacentTextBlocks(out)
+}
+export async function loadImageBlock(block: ImageBlock): Promise<ImageBlock> {
+  if (block.dataBase64 && block.mimeType) return block
+  if (block.url) return block
+  const rawPath = block.path?.trim() ?? ''
+  if (!rawPath) throw new ImageLoadError(rawPath, 'image path is empty')
+  if (PLACEHOLDER_RE.test(rawPath)) {
+    throw new ImageLoadError(rawPath, `image path looks like a placeholder, not a real file: ${rawPath}`)
+  }
+  let file: Buffer
+  try {
+    file = await fs.readFile(rawPath)
+  } catch (err: unknown) {
+    const code = (err as NodeJS.ErrnoException).code
+    if (code === 'ENOENT') {
+      throw new ImageLoadError(rawPath, `image file not found: ${rawPath}`)
+    }
+    throw new ImageLoadError(rawPath, `could not read image at ${rawPath}: ${(err as Error).message}`)
+  }
+  const mimeType = block.mimeType ?? mimeTypeForPath(rawPath)
+  return {
+    ...block,
+    path: rawPath,
+    mimeType,
+    dataBase64: file.toString('base64'),
+  }
+}
+export function imagePlaceholder(pathValue: string): string {
+  return `[image: ${path.basename(pathValue)}]`
+}
+function mergeAdjacentTextBlocks(blocks: MessageContentBlock[]): MessageContentBlock[] {
+  const out: MessageContentBlock[] = []
+  for (const block of blocks) {
+    const prev = out[out.length - 1]
+    if (block.type === 'text' && prev?.type === 'text') {
+      prev.text += block.text
+      continue
+    }
+    out.push(block)
+  }
+  return out
+}
+function mimeTypeForPath(filePath: string): string {
+  switch (path.extname(filePath).toLowerCase()) {
+    case '.png': return 'image/png'
+    case '.jpg':
+    case '.jpeg': return 'image/jpeg'
+    case '.webp': return 'image/webp'
+    case '.gif': return 'image/gif'
+    case '.bmp': return 'image/bmp'
+    default: return 'application/octet-stream'
+  }
+}

package/src/utils/messages.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import path from 'node:path'
 import type { Message, MessageContentBlock } from '../providers/contracts.js'
 export function systemMessage(content: string): Message {
@@ -20,6 +21,7 @@ export function blocksToText(blocks: MessageContentBlock[]): string {
   return blocks
     .map(block => {
       if (block.type === 'text') return block.text
+      if (block.type === 'image') return `[image attached: ${path.basename(block.path)}]`
       if (block.type === 'tool_use') return `[tool use: ${block.name}]`
       return block.isError
         ? `[tool error: ${block.content}]`