ethagent 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/package.json +1 -1
- package/src/app/FirstRun.tsx +2 -0
- package/src/chat/ChatBottomPane.tsx +9 -0
- package/src/chat/ChatScreen.tsx +10 -4
- package/src/chat/chatSessionState.ts +4 -1
- package/src/chat/chatTurnOrchestrator.ts +6 -2
- package/src/chat/input/ChatInput.tsx +25 -2
- package/src/chat/input/imageRefs.ts +30 -0
- package/src/chat/views/ResumeView.tsx +16 -7
- package/src/models/ModelPicker.tsx +138 -6
- package/src/models/huggingface.ts +180 -2
- package/src/models/llamacpp.ts +110 -15
- package/src/models/llamacppPreflight.ts +30 -11
- package/src/models/modelPickerOptions.ts +14 -1
- package/src/providers/anthropic.ts +36 -5
- package/src/providers/contracts.ts +9 -1
- package/src/providers/gemini.ts +29 -3
- package/src/providers/openai-chat.ts +81 -2
- package/src/providers/openai-responses-format.ts +29 -8
- package/src/providers/openai-responses.ts +22 -7
- package/src/providers/registry.ts +1 -0
- package/src/storage/config.ts +1 -0
- package/src/storage/sessions.ts +14 -2
- package/src/ui/Spinner.tsx +14 -2
- package/src/ui/theme.ts +2 -0
- package/src/utils/images.ts +140 -0
- package/src/utils/messages.ts +2 -0
package/src/providers/gemini.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { ProviderError } from './contracts.js'
|
|
|
4
4
|
import { providerErrorFromResponse } from './errors.js'
|
|
5
5
|
import { fetchWithRetryStreamEvents } from './retry.js'
|
|
6
6
|
import { iterSseFrames } from './sse.js'
|
|
7
|
+
import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
|
|
7
8
|
|
|
8
9
|
export type GeminiToolDefinition = {
|
|
9
10
|
name: string
|
|
@@ -41,6 +42,7 @@ type GeminiChunk = {
|
|
|
41
42
|
|
|
42
43
|
type GeminiContentPart =
|
|
43
44
|
| { text: string }
|
|
45
|
+
| { inlineData: { mimeType: string; data: string } }
|
|
44
46
|
| { functionCall: { name: string; args: Record<string, unknown> } }
|
|
45
47
|
| { functionResponse: { name: string; response: Record<string, unknown> } }
|
|
46
48
|
|
|
@@ -92,8 +94,21 @@ export class GeminiProvider implements Provider {
|
|
|
92
94
|
yield { type: 'error', message: error.message }
|
|
93
95
|
return
|
|
94
96
|
}
|
|
97
|
+
if (hasImageBlocks(messages) && !supportsGeminiImages(this.model)) {
|
|
98
|
+
yield { type: 'error', message: `image input is not enabled for ${this.model}` }
|
|
99
|
+
return
|
|
100
|
+
}
|
|
95
101
|
|
|
96
|
-
|
|
102
|
+
let payload: GeminiPayload
|
|
103
|
+
try {
|
|
104
|
+
payload = await buildGeminiPayload(messages, this.tools, options)
|
|
105
|
+
} catch (err: unknown) {
|
|
106
|
+
if (err instanceof ImageLoadError) {
|
|
107
|
+
yield { type: 'error', message: err.message }
|
|
108
|
+
return
|
|
109
|
+
}
|
|
110
|
+
throw err
|
|
111
|
+
}
|
|
97
112
|
const modelName = this.model.replace(/^models\//, '')
|
|
98
113
|
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(modelName)}:streamGenerateContent?alt=sse`
|
|
99
114
|
|
|
@@ -181,11 +196,11 @@ export class GeminiProvider implements Provider {
|
|
|
181
196
|
}
|
|
182
197
|
}
|
|
183
198
|
|
|
184
|
-
export function buildGeminiPayload(
|
|
199
|
+
export async function buildGeminiPayload(
|
|
185
200
|
messages: Message[],
|
|
186
201
|
tools: GeminiToolDefinition[] = [],
|
|
187
202
|
options: ProviderCompleteOptions = {},
|
|
188
|
-
): GeminiPayload {
|
|
203
|
+
): Promise<GeminiPayload> {
|
|
189
204
|
const systemParts: string[] = []
|
|
190
205
|
const contents: GeminiContent[] = []
|
|
191
206
|
const toolUseNamesById = new Map<string, string>()
|
|
@@ -222,6 +237,10 @@ export function buildGeminiPayload(
|
|
|
222
237
|
for (const block of blocks) {
|
|
223
238
|
if (block.type === 'text') {
|
|
224
239
|
parts.push({ text: block.text })
|
|
240
|
+
} else if (block.type === 'image') {
|
|
241
|
+
const loaded = await loadImageBlock(block)
|
|
242
|
+
if (!loaded.dataBase64 || !loaded.mimeType) throw new Error(`could not load image: ${block.path}`)
|
|
243
|
+
parts.push({ inlineData: { mimeType: loaded.mimeType, data: loaded.dataBase64 } })
|
|
225
244
|
} else if (block.type === 'tool_result') {
|
|
226
245
|
const name = toolUseNamesById.get(block.toolUseId) ?? 'unknown'
|
|
227
246
|
const response: Record<string, unknown> = block.isError
|
|
@@ -258,6 +277,13 @@ function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
|
|
|
258
277
|
})
|
|
259
278
|
}
|
|
260
279
|
|
|
280
|
+
export function supportsGeminiImages(model: string): boolean {
|
|
281
|
+
const normalized = model.toLowerCase()
|
|
282
|
+
return normalized.includes('gemini-1.5')
|
|
283
|
+
|| normalized.includes('gemini-2.0')
|
|
284
|
+
|| normalized.includes('gemini-2.5')
|
|
285
|
+
}
|
|
286
|
+
|
|
261
287
|
function normalizeFinishReason(reason: string, sawToolCall: boolean): DoneStopReason {
|
|
262
288
|
if (sawToolCall) return 'tool_use'
|
|
263
289
|
switch (reason) {
|
|
@@ -5,6 +5,7 @@ import { providerErrorFromResponse } from './errors.js'
|
|
|
5
5
|
import { fetchWithRetryStreamEvents } from './retry.js'
|
|
6
6
|
import { iterSseFrames } from './sse.js'
|
|
7
7
|
import { messageTextContent } from '../utils/messages.js'
|
|
8
|
+
import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
|
|
8
9
|
import { providerDisplayName } from '../models/providerDisplay.js'
|
|
9
10
|
|
|
10
11
|
export type OpenAIToolDefinition = {
|
|
@@ -28,6 +29,7 @@ type Options = {
|
|
|
28
29
|
loadApiKey?: () => Promise<string | null>
|
|
29
30
|
tools?: OpenAIToolDefinition[]
|
|
30
31
|
maxRetries?: number
|
|
32
|
+
hasVisionProjector?: boolean
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
type ChatChunk = {
|
|
@@ -76,6 +78,7 @@ export class OpenAIChatProvider implements Provider {
|
|
|
76
78
|
private readonly loadApiKey?: () => Promise<string | null>
|
|
77
79
|
private readonly tools: OpenAIToolDefinition[]
|
|
78
80
|
private readonly maxRetries?: number
|
|
81
|
+
private readonly hasVisionProjector: boolean
|
|
79
82
|
|
|
80
83
|
constructor(opts: Options) {
|
|
81
84
|
this.id = opts.id
|
|
@@ -86,6 +89,7 @@ export class OpenAIChatProvider implements Provider {
|
|
|
86
89
|
this.tools = opts.tools ?? []
|
|
87
90
|
this.maxRetries = opts.maxRetries
|
|
88
91
|
this.supportsTools = this.tools.length > 0
|
|
92
|
+
this.hasVisionProjector = opts.hasVisionProjector ?? false
|
|
89
93
|
}
|
|
90
94
|
|
|
91
95
|
async *complete(
|
|
@@ -99,6 +103,19 @@ export class OpenAIChatProvider implements Provider {
|
|
|
99
103
|
yield { type: 'error', message: error.message }
|
|
100
104
|
return
|
|
101
105
|
}
|
|
106
|
+
if (hasImageBlocks(messages)) {
|
|
107
|
+
if (this.id === 'llamacpp' && !this.hasVisionProjector) {
|
|
108
|
+
const hint = localModelNameHintsVision(this.model)
|
|
109
|
+
? '; open alt+p and run "Add Vision Encoder" on this model to enable image input'
|
|
110
|
+
: ''
|
|
111
|
+
yield { type: 'error', message: `image input is not enabled for local model "${this.model}" (no vision projector loaded)${hint}` }
|
|
112
|
+
return
|
|
113
|
+
}
|
|
114
|
+
if (this.id === 'openai' && !supportsOpenAIImages(this.model)) {
|
|
115
|
+
yield { type: 'error', message: `image input is not enabled for ${this.model}` }
|
|
116
|
+
return
|
|
117
|
+
}
|
|
118
|
+
}
|
|
102
119
|
|
|
103
120
|
const headers: Record<string, string> = {
|
|
104
121
|
'Content-Type': 'application/json',
|
|
@@ -106,6 +123,17 @@ export class OpenAIChatProvider implements Provider {
|
|
|
106
123
|
}
|
|
107
124
|
if (apiKey) headers.Authorization = `Bearer ${apiKey}`
|
|
108
125
|
|
|
126
|
+
let wireMessages: Array<Record<string, unknown>>
|
|
127
|
+
try {
|
|
128
|
+
wireMessages = await toWireMessages(messages)
|
|
129
|
+
} catch (err: unknown) {
|
|
130
|
+
if (err instanceof ImageLoadError) {
|
|
131
|
+
yield { type: 'error', message: err.message }
|
|
132
|
+
return
|
|
133
|
+
}
|
|
134
|
+
throw err
|
|
135
|
+
}
|
|
136
|
+
|
|
109
137
|
let response: Response
|
|
110
138
|
try {
|
|
111
139
|
response = yield* fetchWithRetryStreamEvents(`${this.baseUrl}/chat/completions`, {
|
|
@@ -113,7 +141,7 @@ export class OpenAIChatProvider implements Provider {
|
|
|
113
141
|
headers,
|
|
114
142
|
body: JSON.stringify({
|
|
115
143
|
model: this.model,
|
|
116
|
-
messages:
|
|
144
|
+
messages: wireMessages,
|
|
117
145
|
tools: this.tools.length > 0 ? this.tools : undefined,
|
|
118
146
|
tool_choice: this.tools.length > 0 ? 'auto' : undefined,
|
|
119
147
|
stream: true,
|
|
@@ -221,7 +249,7 @@ export class OpenAIChatProvider implements Provider {
|
|
|
221
249
|
|
|
222
250
|
}
|
|
223
251
|
|
|
224
|
-
export function toWireMessages(messages: Message[]): Array<Record<string, unknown
|
|
252
|
+
export async function toWireMessages(messages: Message[]): Promise<Array<Record<string, unknown>>> {
|
|
225
253
|
const out: Array<Record<string, unknown>> = []
|
|
226
254
|
|
|
227
255
|
for (const message of messages) {
|
|
@@ -230,6 +258,26 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
|
|
|
230
258
|
continue
|
|
231
259
|
}
|
|
232
260
|
|
|
261
|
+
if (message.role === 'user') {
|
|
262
|
+
const toolResults = message.content.filter(isToolResultBlock)
|
|
263
|
+
if (toolResults.length > 0) {
|
|
264
|
+
for (const block of toolResults) {
|
|
265
|
+
out.push({
|
|
266
|
+
role: 'tool',
|
|
267
|
+
tool_call_id: block.toolUseId,
|
|
268
|
+
content: block.content,
|
|
269
|
+
})
|
|
270
|
+
}
|
|
271
|
+
const nonToolBlocks = message.content.filter(block => block.type !== 'tool_result')
|
|
272
|
+
if (nonToolBlocks.length > 0) {
|
|
273
|
+
out.push({ role: 'user', content: await toOpenAIUserContent(nonToolBlocks) })
|
|
274
|
+
}
|
|
275
|
+
continue
|
|
276
|
+
}
|
|
277
|
+
out.push({ role: 'user', content: await toOpenAIUserContent(message.content) })
|
|
278
|
+
continue
|
|
279
|
+
}
|
|
280
|
+
|
|
233
281
|
if (message.role === 'assistant') {
|
|
234
282
|
const textParts = message.content.filter(isTextBlock).map(block => block.text)
|
|
235
283
|
const toolCalls = message.content.filter(isToolUseBlock).map(block => ({
|
|
@@ -266,6 +314,37 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
|
|
|
266
314
|
return normalizeSystemMessages(out)
|
|
267
315
|
}
|
|
268
316
|
|
|
317
|
+
async function toOpenAIUserContent(blocks: MessageContentBlock[]): Promise<Array<Record<string, unknown>>> {
|
|
318
|
+
const parts: Array<Record<string, unknown>> = []
|
|
319
|
+
for (const block of blocks) {
|
|
320
|
+
if (block.type === 'text') {
|
|
321
|
+
if (block.text.length > 0) parts.push({ type: 'text', text: block.text })
|
|
322
|
+
continue
|
|
323
|
+
}
|
|
324
|
+
if (block.type === 'image') {
|
|
325
|
+
const loaded = await loadImageBlock(block)
|
|
326
|
+
if (loaded.url) {
|
|
327
|
+
parts.push({ type: 'image_url', image_url: { url: loaded.url } })
|
|
328
|
+
} else if (loaded.dataBase64 && loaded.mimeType) {
|
|
329
|
+
parts.push({ type: 'image_url', image_url: { url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` } })
|
|
330
|
+
}
|
|
331
|
+
continue
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
return parts.length > 0 ? parts : [{ type: 'text', text: '' }]
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
export function supportsOpenAIImages(model: string): boolean {
|
|
338
|
+
const normalized = model.toLowerCase()
|
|
339
|
+
if (normalized.includes('gpt-3.5')) return false
|
|
340
|
+
return /gpt-4o|gpt-4\.1|gpt-4-turbo|gpt-4-vision|gpt-5|o1|o3|o4|chatgpt-4/.test(normalized)
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
export function localModelNameHintsVision(model: string): boolean {
|
|
344
|
+
const normalized = model.toLowerCase()
|
|
345
|
+
return /llava|bakllava|qwen[-_.]?vl|qwen2[-_.]?vl|qwen2\.5[-_.]?vl|minicpm-?v|llama-3\.2.*vision|mllama|cogvlm|internvl|moondream|pixtral|phi-?3[\.-]?vision|phi-?3\.5[\.-]?vision|smolvlm/.test(normalized)
|
|
346
|
+
}
|
|
347
|
+
|
|
269
348
|
function normalizeSystemMessages(messages: Array<Record<string, unknown>>): Array<Record<string, unknown>> {
|
|
270
349
|
const systemContents: string[] = []
|
|
271
350
|
const nonSystem: Array<Record<string, unknown>> = []
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import type { Message, MessageContentBlock } from './contracts.js'
|
|
2
2
|
import { messageTextContent } from '../utils/messages.js'
|
|
3
3
|
import type { OpenAIToolDefinition } from './openai-chat.js'
|
|
4
|
+
import { loadImageBlock } from '../utils/images.js'
|
|
4
5
|
|
|
5
6
|
export type ResponsesInputContent =
|
|
6
7
|
| { type: 'input_text'; text: string }
|
|
8
|
+
| { type: 'input_image'; image_url: string }
|
|
7
9
|
| { type: 'output_text'; text: string }
|
|
8
10
|
|
|
9
11
|
export type ResponsesInputItem =
|
|
@@ -30,13 +32,13 @@ export type ResponsesRequestBody = {
|
|
|
30
32
|
max_output_tokens?: number
|
|
31
33
|
}
|
|
32
34
|
|
|
33
|
-
export function buildResponsesBody(args: {
|
|
35
|
+
export async function buildResponsesBody(args: {
|
|
34
36
|
model: string
|
|
35
37
|
messages: Message[]
|
|
36
38
|
tools: OpenAIToolDefinition[]
|
|
37
39
|
maxOutputTokens?: number
|
|
38
|
-
}): ResponsesRequestBody {
|
|
39
|
-
const { instructions, items } = splitMessages(args.messages)
|
|
40
|
+
}): Promise<ResponsesRequestBody> {
|
|
41
|
+
const { instructions, items } = await splitMessages(args.messages)
|
|
40
42
|
const body: ResponsesRequestBody = {
|
|
41
43
|
model: args.model,
|
|
42
44
|
input: items,
|
|
@@ -60,10 +62,10 @@ export function buildResponsesBody(args: {
|
|
|
60
62
|
return body
|
|
61
63
|
}
|
|
62
64
|
|
|
63
|
-
function splitMessages(messages: Message[]): {
|
|
65
|
+
async function splitMessages(messages: Message[]): Promise<{
|
|
64
66
|
instructions?: string
|
|
65
67
|
items: ResponsesInputItem[]
|
|
66
|
-
} {
|
|
68
|
+
}> {
|
|
67
69
|
const instructions: string[] = []
|
|
68
70
|
const items: ResponsesInputItem[] = []
|
|
69
71
|
|
|
@@ -100,12 +102,12 @@ function splitMessages(messages: Message[]): {
|
|
|
100
102
|
}
|
|
101
103
|
continue
|
|
102
104
|
}
|
|
103
|
-
const
|
|
104
|
-
if (
|
|
105
|
+
const content = await toOpenAIResponsesUserContent(blocks)
|
|
106
|
+
if (content.length > 0) {
|
|
105
107
|
items.push({
|
|
106
108
|
type: 'message',
|
|
107
109
|
role: 'user',
|
|
108
|
-
content
|
|
110
|
+
content,
|
|
109
111
|
})
|
|
110
112
|
}
|
|
111
113
|
continue
|
|
@@ -136,6 +138,25 @@ function splitMessages(messages: Message[]): {
|
|
|
136
138
|
}
|
|
137
139
|
}
|
|
138
140
|
|
|
141
|
+
async function toOpenAIResponsesUserContent(blocks: MessageContentBlock[]): Promise<ResponsesInputContent[]> {
|
|
142
|
+
const content: ResponsesInputContent[] = []
|
|
143
|
+
for (const block of blocks) {
|
|
144
|
+
if (block.type === 'text') {
|
|
145
|
+
if (block.text) content.push({ type: 'input_text', text: block.text })
|
|
146
|
+
continue
|
|
147
|
+
}
|
|
148
|
+
if (block.type === 'image') {
|
|
149
|
+
const loaded = await loadImageBlock(block)
|
|
150
|
+
if (loaded.url) {
|
|
151
|
+
content.push({ type: 'input_image', image_url: loaded.url })
|
|
152
|
+
} else if (loaded.dataBase64 && loaded.mimeType) {
|
|
153
|
+
content.push({ type: 'input_image', image_url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` })
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return content
|
|
158
|
+
}
|
|
159
|
+
|
|
139
160
|
function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
|
|
140
161
|
if (typeof content === 'string') {
|
|
141
162
|
return content ? [{ type: 'text', text: content }] : []
|
|
@@ -5,7 +5,8 @@ import { providerErrorFromResponse } from './errors.js'
|
|
|
5
5
|
import { fetchWithRetryStreamEvents } from './retry.js'
|
|
6
6
|
import { iterSseEvents } from './sse.js'
|
|
7
7
|
import { buildResponsesBody } from './openai-responses-format.js'
|
|
8
|
-
import type
|
|
8
|
+
import { supportsOpenAIImages, type OpenAIToolDefinition } from './openai-chat.js'
|
|
9
|
+
import { hasImageBlocks, ImageLoadError } from '../utils/images.js'
|
|
9
10
|
|
|
10
11
|
const READ_TIMEOUT_MS = 45_000
|
|
11
12
|
|
|
@@ -64,15 +65,29 @@ export class OpenAIResponsesProvider implements Provider {
|
|
|
64
65
|
return
|
|
65
66
|
}
|
|
66
67
|
|
|
68
|
+
if (hasImageBlocks(messages) && !supportsOpenAIImages(this.model)) {
|
|
69
|
+
yield { type: 'error', message: `image input is not enabled for ${this.model}` }
|
|
70
|
+
return
|
|
71
|
+
}
|
|
72
|
+
|
|
67
73
|
let attempt = 0
|
|
68
74
|
while (true) {
|
|
69
75
|
attempt += 1
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
+
let body: string
|
|
77
|
+
try {
|
|
78
|
+
body = JSON.stringify(await buildResponsesBody({
|
|
79
|
+
model: this.model,
|
|
80
|
+
messages,
|
|
81
|
+
tools: this.tools,
|
|
82
|
+
maxOutputTokens: options.maxTokens,
|
|
83
|
+
}))
|
|
84
|
+
} catch (err: unknown) {
|
|
85
|
+
if (err instanceof ImageLoadError) {
|
|
86
|
+
yield { type: 'error', message: err.message }
|
|
87
|
+
return
|
|
88
|
+
}
|
|
89
|
+
throw err
|
|
90
|
+
}
|
|
76
91
|
|
|
77
92
|
let response: Response
|
|
78
93
|
try {
|
|
@@ -34,6 +34,7 @@ export function createProvider(config: EthagentConfig, options: { mode?: Session
|
|
|
34
34
|
baseUrl: localProviderBaseUrlFor('llamacpp', config.baseUrl),
|
|
35
35
|
apiKey: 'llamacpp',
|
|
36
36
|
tools: openAITools(mode, toolContext),
|
|
37
|
+
hasVisionProjector: Boolean(config.localMmprojPath),
|
|
37
38
|
})
|
|
38
39
|
case 'openai':
|
|
39
40
|
return createOpenAIProvider(config, openAITools(mode, toolContext))
|
package/src/storage/config.ts
CHANGED
|
@@ -80,6 +80,7 @@ const ConfigSchema = z.object({
|
|
|
80
80
|
provider: z.enum(PROVIDERS),
|
|
81
81
|
model: z.string().min(1),
|
|
82
82
|
baseUrl: z.string().url().optional(),
|
|
83
|
+
localMmprojPath: z.string().min(1).optional(),
|
|
83
84
|
firstRunAt: z.string(),
|
|
84
85
|
identity: IdentitySchema.optional(),
|
|
85
86
|
erc8004: z.object({
|
package/src/storage/sessions.ts
CHANGED
|
@@ -11,9 +11,10 @@ import {
|
|
|
11
11
|
isUserCorrectionOfToolState,
|
|
12
12
|
looksLikeToolStateClaim,
|
|
13
13
|
} from '../runtime/toolClaimGuards.js'
|
|
14
|
+
import { userTextToContentBlocks } from '../utils/images.js'
|
|
14
15
|
|
|
15
16
|
export type SessionMessage =
|
|
16
|
-
| { version?: 2; role: 'user'; content: string; createdAt: string; turnId?: string; synthetic?: boolean }
|
|
17
|
+
| { version?: 2; role: 'user'; content: string; providerContent?: Message['content']; createdAt: string; turnId?: string; synthetic?: boolean }
|
|
17
18
|
| { version?: 2; role: 'assistant'; content: string; createdAt: string; model?: string; usage?: { in?: number; out?: number }; turnId?: string; synthetic?: boolean }
|
|
18
19
|
| { version?: 2; role: 'system'; content: string; createdAt: string; turnId?: string; synthetic?: boolean }
|
|
19
20
|
| { version: 2; role: 'tool_use'; toolUseId: string; name: string; input: Record<string, unknown>; createdAt: string; turnId?: string }
|
|
@@ -244,6 +245,17 @@ export type ProviderMessageProjectionOptions = {
|
|
|
244
245
|
export const TOOL_CORRECTION_CONTEXT_MESSAGE =
|
|
245
246
|
'The latest user message corrects a prior assistant claim about tool or filesystem state. Treat user correction and tool_result messages as authoritative. Ignore any recent assistant claim about files, directories, cwd, or tool execution unless it is backed by a tool_result, and retry with the appropriate tool.'
|
|
246
247
|
|
|
248
|
+
function resolveUserContent(
|
|
249
|
+
message: Extract<SessionMessage, { role: 'system' | 'user' | 'assistant' }>,
|
|
250
|
+
): Message['content'] {
|
|
251
|
+
if (message.role !== 'user') return message.content
|
|
252
|
+
if (message.providerContent) return message.providerContent
|
|
253
|
+
if (message.content.includes('[image:')) {
|
|
254
|
+
return userTextToContentBlocks(message.content)
|
|
255
|
+
}
|
|
256
|
+
return message.content
|
|
257
|
+
}
|
|
258
|
+
|
|
247
259
|
export function sessionMessagesToProviderMessages(
|
|
248
260
|
messages: SessionMessage[],
|
|
249
261
|
options: ProviderMessageProjectionOptions = {},
|
|
@@ -255,7 +267,7 @@ export function sessionMessagesToProviderMessages(
|
|
|
255
267
|
for (const [index, message] of messages.entries()) {
|
|
256
268
|
if (message.role === 'system' || message.role === 'user' || message.role === 'assistant') {
|
|
257
269
|
if (message.role === 'assistant' && invalidatedAssistantMessages.has(index)) continue
|
|
258
|
-
out.push({ role: message.role, content: message
|
|
270
|
+
out.push({ role: message.role, content: resolveUserContent(message) })
|
|
259
271
|
continue
|
|
260
272
|
}
|
|
261
273
|
if (message.role === 'tool_use') {
|
package/src/ui/Spinner.tsx
CHANGED
|
@@ -295,8 +295,20 @@ export const Spinner: React.FC<SpinnerProps> = ({
|
|
|
295
295
|
function formatElapsedSeconds(milliseconds: number): string {
|
|
296
296
|
const seconds = Math.max(0, Math.floor(milliseconds / 1000))
|
|
297
297
|
if (seconds < 60) return `${seconds}s`
|
|
298
|
-
|
|
299
|
-
|
|
298
|
+
|
|
299
|
+
const hours = Math.floor(seconds / 3600)
|
|
300
|
+
const minutes = Math.floor((seconds % 3600) / 60)
|
|
301
|
+
const remainingSeconds = seconds % 60
|
|
302
|
+
|
|
303
|
+
if (hours > 0) {
|
|
304
|
+
return remainingSeconds > 0
|
|
305
|
+
? `${hours}h ${minutes}min ${remainingSeconds}s`
|
|
306
|
+
: `${hours}h ${minutes}min`
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return remainingSeconds > 0
|
|
310
|
+
? `${minutes}min ${remainingSeconds}s`
|
|
311
|
+
: `${minutes}min`
|
|
300
312
|
}
|
|
301
313
|
|
|
302
314
|
function restoreSpinnerTerms(value: string): string {
|
package/src/ui/theme.ts
CHANGED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import fs from 'node:fs/promises'
|
|
2
|
+
import path from 'node:path'
|
|
3
|
+
import type { ImageBlock, Message, MessageContentBlock } from '../providers/contracts.js'
|
|
4
|
+
|
|
5
|
+
const IMAGE_MARKER_RE = /\[image:\s*([^\]]+?)\]/gi
|
|
6
|
+
const PLACEHOLDER_RE = /^([<{[].*[>}\]]|#\d+)$/
|
|
7
|
+
|
|
8
|
+
export class ImageLoadError extends Error {
|
|
9
|
+
readonly imagePath: string
|
|
10
|
+
constructor(imagePath: string, message: string) {
|
|
11
|
+
super(message)
|
|
12
|
+
this.name = 'ImageLoadError'
|
|
13
|
+
this.imagePath = imagePath
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function collapseImagePathsToRefs(text: string): string {
|
|
18
|
+
let counter = 0
|
|
19
|
+
return text.replace(IMAGE_MARKER_RE, (full, raw: string) => {
|
|
20
|
+
const trimmed = raw.trim()
|
|
21
|
+
if (!trimmed || PLACEHOLDER_RE.test(trimmed)) return full
|
|
22
|
+
counter += 1
|
|
23
|
+
return `[Image #${counter}]`
|
|
24
|
+
})
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function modelSupportsImages(
|
|
28
|
+
provider: string,
|
|
29
|
+
model: string,
|
|
30
|
+
extra?: { mmprojPath?: string },
|
|
31
|
+
): boolean {
|
|
32
|
+
const normalized = model.toLowerCase()
|
|
33
|
+
switch (provider) {
|
|
34
|
+
case 'anthropic':
|
|
35
|
+
return /claude-3|claude-sonnet-4|claude-opus-4|claude-haiku-4/.test(normalized)
|
|
36
|
+
case 'gemini':
|
|
37
|
+
return /gemini-1\.5|gemini-2\.0|gemini-2\.5/.test(normalized)
|
|
38
|
+
case 'openai':
|
|
39
|
+
if (normalized.includes('gpt-3.5')) return false
|
|
40
|
+
return /gpt-4o|gpt-4\.1|gpt-4-turbo|gpt-4-vision|gpt-5|o1|o3|o4|chatgpt-4/.test(normalized)
|
|
41
|
+
case 'llamacpp':
|
|
42
|
+
return Boolean(extra?.mmprojPath)
|
|
43
|
+
default:
|
|
44
|
+
return false
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function hasImageBlocks(messages: Message[]): boolean {
|
|
49
|
+
return messages.some(message => Array.isArray(message.content) && message.content.some(block => block.type === 'image'))
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function userTextToContentBlocks(text: string): string | MessageContentBlock[] {
|
|
53
|
+
const blocks = parseImageMarkers(text)
|
|
54
|
+
return blocks.length === 1 && blocks[0]?.type === 'text' ? blocks[0].text : blocks
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function parseImageMarkers(text: string): MessageContentBlock[] {
|
|
58
|
+
const out: MessageContentBlock[] = []
|
|
59
|
+
let lastIndex = 0
|
|
60
|
+
let match: RegExpExecArray | null
|
|
61
|
+
|
|
62
|
+
while ((match = IMAGE_MARKER_RE.exec(text)) !== null) {
|
|
63
|
+
const full = match[0]
|
|
64
|
+
const rawPath = match[1]?.trim() ?? ''
|
|
65
|
+
if (match.index > lastIndex) {
|
|
66
|
+
const prefix = text.slice(lastIndex, match.index)
|
|
67
|
+
if (prefix) out.push({ type: 'text', text: prefix })
|
|
68
|
+
}
|
|
69
|
+
if (rawPath && !PLACEHOLDER_RE.test(rawPath)) {
|
|
70
|
+
out.push({ type: 'image', path: rawPath })
|
|
71
|
+
} else {
|
|
72
|
+
out.push({ type: 'text', text: full })
|
|
73
|
+
}
|
|
74
|
+
lastIndex = match.index + full.length
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (lastIndex < text.length) {
|
|
78
|
+
const suffix = text.slice(lastIndex)
|
|
79
|
+
if (suffix) out.push({ type: 'text', text: suffix })
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (out.length === 0) return text ? [{ type: 'text', text }] : []
|
|
83
|
+
return mergeAdjacentTextBlocks(out)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export async function loadImageBlock(block: ImageBlock): Promise<ImageBlock> {
|
|
87
|
+
if (block.dataBase64 && block.mimeType) return block
|
|
88
|
+
if (block.url) return block
|
|
89
|
+
const rawPath = block.path?.trim() ?? ''
|
|
90
|
+
if (!rawPath) throw new ImageLoadError(rawPath, 'image path is empty')
|
|
91
|
+
if (PLACEHOLDER_RE.test(rawPath)) {
|
|
92
|
+
throw new ImageLoadError(rawPath, `image path looks like a placeholder, not a real file: ${rawPath}`)
|
|
93
|
+
}
|
|
94
|
+
let file: Buffer
|
|
95
|
+
try {
|
|
96
|
+
file = await fs.readFile(rawPath)
|
|
97
|
+
} catch (err: unknown) {
|
|
98
|
+
const code = (err as NodeJS.ErrnoException).code
|
|
99
|
+
if (code === 'ENOENT') {
|
|
100
|
+
throw new ImageLoadError(rawPath, `image file not found: ${rawPath}`)
|
|
101
|
+
}
|
|
102
|
+
throw new ImageLoadError(rawPath, `could not read image at ${rawPath}: ${(err as Error).message}`)
|
|
103
|
+
}
|
|
104
|
+
const mimeType = block.mimeType ?? mimeTypeForPath(rawPath)
|
|
105
|
+
return {
|
|
106
|
+
...block,
|
|
107
|
+
path: rawPath,
|
|
108
|
+
mimeType,
|
|
109
|
+
dataBase64: file.toString('base64'),
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export function imagePlaceholder(pathValue: string): string {
|
|
114
|
+
return `[image: ${path.basename(pathValue)}]`
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function mergeAdjacentTextBlocks(blocks: MessageContentBlock[]): MessageContentBlock[] {
|
|
118
|
+
const out: MessageContentBlock[] = []
|
|
119
|
+
for (const block of blocks) {
|
|
120
|
+
const prev = out[out.length - 1]
|
|
121
|
+
if (block.type === 'text' && prev?.type === 'text') {
|
|
122
|
+
prev.text += block.text
|
|
123
|
+
continue
|
|
124
|
+
}
|
|
125
|
+
out.push(block)
|
|
126
|
+
}
|
|
127
|
+
return out
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function mimeTypeForPath(filePath: string): string {
|
|
131
|
+
switch (path.extname(filePath).toLowerCase()) {
|
|
132
|
+
case '.png': return 'image/png'
|
|
133
|
+
case '.jpg':
|
|
134
|
+
case '.jpeg': return 'image/jpeg'
|
|
135
|
+
case '.webp': return 'image/webp'
|
|
136
|
+
case '.gif': return 'image/gif'
|
|
137
|
+
case '.bmp': return 'image/bmp'
|
|
138
|
+
default: return 'application/octet-stream'
|
|
139
|
+
}
|
|
140
|
+
}
|
package/src/utils/messages.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import path from 'node:path'
|
|
1
2
|
import type { Message, MessageContentBlock } from '../providers/contracts.js'
|
|
2
3
|
|
|
3
4
|
export function systemMessage(content: string): Message {
|
|
@@ -20,6 +21,7 @@ export function blocksToText(blocks: MessageContentBlock[]): string {
|
|
|
20
21
|
return blocks
|
|
21
22
|
.map(block => {
|
|
22
23
|
if (block.type === 'text') return block.text
|
|
24
|
+
if (block.type === 'image') return `[image attached: ${path.basename(block.path)}]`
|
|
23
25
|
if (block.type === 'tool_use') return `[tool use: ${block.name}]`
|
|
24
26
|
return block.isError
|
|
25
27
|
? `[tool error: ${block.content}]`
|