ethagent 2.3.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +18 -4
  2. package/package.json +2 -1
  3. package/src/app/FirstRun.tsx +157 -15
  4. package/src/app/FirstRunTimeline.tsx +4 -0
  5. package/src/app/input/AppInputProvider.tsx +19 -0
  6. package/src/app/input/appInputParser.ts +19 -4
  7. package/src/chat/ChatBottomPane.tsx +12 -1
  8. package/src/chat/ChatScreen.tsx +17 -5
  9. package/src/chat/ConversationStack.tsx +25 -19
  10. package/src/chat/MessageList.tsx +194 -53
  11. package/src/chat/chatSessionState.ts +4 -1
  12. package/src/chat/chatTurnOrchestrator.ts +65 -2
  13. package/src/chat/input/ChatInput.tsx +28 -2
  14. package/src/chat/input/imageRefs.ts +30 -0
  15. package/src/chat/input/textCursor.ts +13 -3
  16. package/src/chat/transcript/TranscriptView.tsx +7 -5
  17. package/src/chat/transcript/transcriptViewport.ts +88 -17
  18. package/src/chat/views/PermissionPrompt.tsx +26 -26
  19. package/src/chat/views/PermissionsView.tsx +18 -12
  20. package/src/chat/views/ResumeView.tsx +16 -7
  21. package/src/chat/views/RewindView.tsx +3 -1
  22. package/src/cli/ResetConfirmView.tsx +24 -9
  23. package/src/identity/continuity/editor.ts +27 -2
  24. package/src/identity/continuity/envelope.ts +125 -0
  25. package/src/identity/continuity/publicSkills.ts +37 -1
  26. package/src/identity/continuity/skills/frontmatter.ts +183 -0
  27. package/src/identity/continuity/skills/loadSkills.ts +609 -0
  28. package/src/identity/continuity/skills/publicSkillsSync.ts +32 -0
  29. package/src/identity/continuity/skills/scaffold.ts +52 -0
  30. package/src/identity/continuity/skills/types.ts +30 -0
  31. package/src/identity/continuity/storage/defaults.ts +28 -47
  32. package/src/identity/continuity/storage/files.ts +1 -0
  33. package/src/identity/continuity/storage/paths.ts +1 -0
  34. package/src/identity/continuity/storage/scaffold.ts +25 -23
  35. package/src/identity/continuity/storage/status.ts +34 -5
  36. package/src/identity/continuity/storage/types.ts +3 -2
  37. package/src/identity/continuity/storage.ts +3 -0
  38. package/src/identity/hub/OperationalRoutes.tsx +105 -3
  39. package/src/identity/hub/Routes.tsx +5 -3
  40. package/src/identity/hub/continuity/ContinuityDashboardScreen.tsx +5 -51
  41. package/src/identity/hub/continuity/RecoveryConfirmScreen.tsx +1 -1
  42. package/src/identity/hub/continuity/SavePromptScreen.tsx +1 -0
  43. package/src/identity/hub/continuity/effects.ts +36 -5
  44. package/src/identity/hub/continuity/skills/DeleteSkillConfirmScreen.tsx +112 -0
  45. package/src/identity/hub/continuity/skills/DeleteSkillScreen.tsx +123 -0
  46. package/src/identity/hub/continuity/skills/NewSkillScreen.tsx +57 -0
  47. package/src/identity/hub/continuity/skills/NewSkillVisibilityScreen.tsx +52 -0
  48. package/src/identity/hub/continuity/skills/SkillVisibilityScreen.tsx +171 -0
  49. package/src/identity/hub/continuity/skills/SkillsTreeScreen.tsx +213 -0
  50. package/src/identity/hub/continuity/snapshot.ts +3 -0
  51. package/src/identity/hub/continuity/state.ts +3 -2
  52. package/src/identity/hub/continuity/vault.ts +42 -10
  53. package/src/identity/hub/custody/CustodyEditFlow.tsx +3 -3
  54. package/src/identity/hub/identityHubReducer.ts +21 -0
  55. package/src/identity/hub/profile/effects.ts +16 -3
  56. package/src/identity/hub/restore/RestoreFlow.tsx +43 -6
  57. package/src/identity/hub/restore/apply.ts +12 -1
  58. package/src/identity/hub/restore/recovery.ts +11 -1
  59. package/src/identity/hub/restore/resolve.ts +1 -1
  60. package/src/identity/hub/restore/useRestoreEffects.ts +4 -6
  61. package/src/identity/hub/shared/components/DetailsScreen.tsx +4 -1
  62. package/src/identity/hub/shared/components/IdentitySummary.tsx +97 -53
  63. package/src/identity/hub/shared/components/MenuScreen.tsx +18 -15
  64. package/src/identity/hub/shared/components/UnlinkedIdentityScreen.tsx +1 -1
  65. package/src/identity/hub/shared/components/menuFlagsFromReconciliation.ts +8 -12
  66. package/src/identity/hub/shared/effects/sync.ts +16 -3
  67. package/src/identity/hub/shared/model/copy.ts +2 -4
  68. package/src/identity/hub/transfer/effects.ts +15 -2
  69. package/src/identity/hub/useIdentityHubContinuity.ts +145 -23
  70. package/src/identity/hub/useIdentityHubController.ts +5 -1
  71. package/src/identity/hub/useIdentityHubSideEffects.ts +2 -4
  72. package/src/mcp/manager.ts +1 -1
  73. package/src/models/ModelPicker.tsx +211 -74
  74. package/src/models/huggingface.ts +180 -2
  75. package/src/models/llamacpp.ts +261 -17
  76. package/src/models/llamacppPreflight.ts +16 -12
  77. package/src/models/modelPickerOptions.ts +57 -38
  78. package/src/providers/anthropic.ts +36 -5
  79. package/src/providers/contracts.ts +10 -1
  80. package/src/providers/gemini.ts +29 -3
  81. package/src/providers/openai-chat.ts +131 -11
  82. package/src/providers/openai-responses-format.ts +29 -8
  83. package/src/providers/openai-responses.ts +41 -11
  84. package/src/providers/registry.ts +1 -0
  85. package/src/runtime/toolExecution.ts +4 -3
  86. package/src/runtime/turn.ts +61 -30
  87. package/src/storage/config.ts +1 -0
  88. package/src/storage/sessions.ts +14 -2
  89. package/src/tools/changeDirectoryTool.ts +1 -1
  90. package/src/tools/contracts.ts +10 -0
  91. package/src/tools/deleteFileTool.ts +1 -1
  92. package/src/tools/editTool.ts +1 -1
  93. package/src/tools/listDirectoryTool.ts +1 -1
  94. package/src/tools/listSkillFilesTool.ts +77 -0
  95. package/src/tools/listSkillsTool.ts +68 -0
  96. package/src/tools/mcpResourceTools.ts +2 -2
  97. package/src/tools/privateContinuityReadTool.ts +1 -1
  98. package/src/tools/readSkillTool.ts +107 -0
  99. package/src/tools/readTool.ts +1 -1
  100. package/src/tools/registry.ts +6 -0
  101. package/src/tools/writeFileTool.ts +22 -2
  102. package/src/ui/Spinner.tsx +15 -3
  103. package/src/ui/theme.ts +2 -0
  104. package/src/utils/images.ts +140 -0
  105. package/src/utils/messages.ts +2 -0
  106. package/src/identity/continuity/localBackup.ts +0 -249
  107. package/src/identity/continuity/zipWriter.ts +0 -95
  108. package/src/identity/hub/continuity/index.ts +0 -7
  109. package/src/identity/hub/ens/index.ts +0 -11
  110. package/src/identity/hub/restore/index.ts +0 -22
@@ -23,6 +23,9 @@ export type LocalHfPickerModel = {
23
23
  risk: HfRisk
24
24
  task: HfTask
25
25
  status: 'ready' | 'incomplete'
26
+ mmprojPath?: string
27
+ mmprojAvailable?: boolean
28
+ mmprojSizeBytes?: number
26
29
  }
27
30
 
28
31
  export type CloudCredentialKind = 'apikey' | 'oauth'
@@ -58,7 +61,9 @@ const CHILD_INDENT = 4
58
61
  export function buildModelPickerOptions(
59
62
  data: ModelPickerOptionsData,
60
63
  context: ModelPickerOptionsContext,
64
+ options_: { localOnly?: boolean } = {},
61
65
  ): SelectOption<string>[] {
66
+ const localOnly = options_.localOnly === true
62
67
  const options: SelectOption<string>[] = []
63
68
 
64
69
  options.push(sectionOption('hdr:local', 'Local Models'))
@@ -69,49 +74,53 @@ export function buildModelPickerOptions(
69
74
  options.push(utilityOption('local:uninstall', 'Uninstall Downloaded GGUF'))
70
75
  }
71
76
 
72
- options.push(sectionOption('hdr:cloud', 'Cloud'))
73
- for (const provider of MODEL_PICKER_CLOUD_PROVIDERS) {
74
- options.push(groupOption(`hdr:cloud:${provider}`, cloudProviderDisplayName(provider)))
75
- const keySet = data.cloudKeys[provider] === true
76
- if (!keySet) {
77
- if (provider === 'openai') {
78
- options.push(utilityOption('oauth:openai', 'Sign in with ChatGPT', 'Use your ChatGPT subscription'))
77
+ if (!localOnly) {
78
+ options.push(sectionOption('hdr:cloud', 'Cloud'))
79
+ for (const provider of MODEL_PICKER_CLOUD_PROVIDERS) {
80
+ options.push(groupOption(`hdr:cloud:${provider}`, cloudProviderDisplayName(provider)))
81
+ const keySet = data.cloudKeys[provider] === true
82
+ if (!keySet) {
83
+ if (provider === 'openai') {
84
+ options.push(utilityOption('oauth:openai', 'Sign in with ChatGPT', 'Use your ChatGPT subscription'))
85
+ }
86
+ options.push(utilityOption(`key:set:${provider}`, 'Add API Key'))
87
+ continue
79
88
  }
80
- options.push(utilityOption(`key:set:${provider}`, 'Add API Key'))
81
- continue
82
- }
83
89
 
84
- const catalog = data.cloudCatalogs[provider]
85
- if (catalog?.status === 'fallback') {
86
- const reason = catalog.error ? ` · ${catalog.error}` : ''
87
- options.push(noticeOption(
88
- `hdr:cloud-fallback:${provider}`,
89
- `Catalog unavailable${reason} · showing configured model`,
90
- CHILD_INDENT,
91
- ))
92
- }
90
+ const catalog = data.cloudCatalogs[provider]
91
+ if (catalog?.status === 'fallback') {
92
+ const reason = catalog.error ? ` · ${catalog.error}` : ''
93
+ options.push(noticeOption(
94
+ `hdr:cloud-fallback:${provider}`,
95
+ `Catalog unavailable${reason} · showing configured model`,
96
+ CHILD_INDENT,
97
+ ))
98
+ }
93
99
 
94
- const models = orderModelsForContextFit(provider, cloudPickerModels(provider, catalog, context), context.contextFit)
95
- if (models.length === 0) {
96
- options.push(noticeOption(`hdr:cloud-empty:${provider}`, 'No selectable models', CHILD_INDENT))
97
- }
98
- for (const model of models) {
99
- const active = context.currentProvider === provider && context.currentModel === model
100
- const displayName = formatModelDisplayName(provider, model, { maxLength: 58 })
101
- options.push(rowOption(
102
- `c:${provider}:${model}`,
103
- contextFitLabel(provider, model, `${displayName}${active ? ' *' : ''}`, context.contextFit),
104
- ))
100
+ const models = orderModelsForContextFit(provider, cloudPickerModels(provider, catalog, context), context.contextFit)
101
+ if (models.length === 0) {
102
+ options.push(noticeOption(`hdr:cloud-empty:${provider}`, 'No selectable models', CHILD_INDENT))
103
+ }
104
+ for (const model of models) {
105
+ const active = context.currentProvider === provider && context.currentModel === model
106
+ const displayName = formatModelDisplayName(provider, model, { maxLength: 58 })
107
+ options.push(rowOption(
108
+ `c:${provider}:${model}`,
109
+ contextFitLabel(provider, model, `${displayName}${active ? ' *' : ''}`, context.contextFit),
110
+ ))
111
+ }
112
+ options.push(utilityOption(`catalog:${provider}`, 'Full Catalog'))
113
+ const manageLabel = provider === 'openai' && data.cloudCredentialKinds?.openai === 'oauth'
114
+ ? 'Manage ChatGPT Sign-in'
115
+ : 'Manage API Key'
116
+ options.push(utilityOption(`key:manage:${provider}`, manageLabel))
105
117
  }
106
- options.push(utilityOption(`catalog:${provider}`, 'Full Catalog'))
107
- const manageLabel = provider === 'openai' && data.cloudCredentialKinds?.openai === 'oauth'
108
- ? 'Manage ChatGPT Sign-in'
109
- : 'Manage API Key'
110
- options.push(utilityOption(`key:manage:${provider}`, manageLabel))
111
118
  }
112
119
 
113
- options.push(sectionOption('hdr:exit', 'Exit'))
114
- options.push(utilityOption('cancel', 'Close Model Picker', 'Return to chat without changing model'))
120
+ if (!localOnly) {
121
+ options.push(sectionOption('hdr:exit', 'Exit'))
122
+ options.push(utilityOption('cancel', 'Close Model Picker', 'Return to chat'))
123
+ }
115
124
 
116
125
  return options
117
126
  }
@@ -185,12 +194,22 @@ function appendHfModelOptions(
185
194
  displayName: model.displayName,
186
195
  maxLength,
187
196
  })
197
+ const tags = ['Installed']
198
+ if (model.mmprojPath) tags.push('Vision encoder loaded')
188
199
  options.push(rowOption(
189
200
  `hf:${id}`,
190
201
  contextFitLabel('llamacpp', id, `${active ? '* ' : ' '}${displayName}`, context.contextFit),
191
202
  undefined,
192
- modelMetadataSubtext(size, ['Installed']),
203
+ modelMetadataSubtext(size, tags),
193
204
  ))
205
+ if (model.mmprojAvailable && !model.mmprojPath) {
206
+ const projectorSize = model.mmprojSizeBytes ? ` (+${formatSize(model.mmprojSizeBytes)})` : ''
207
+ options.push(rowOption(
208
+ `hfmmproj:${id}`,
209
+ ` + Add Vision Encoder${projectorSize}`,
210
+ 'Enable image input on this local model',
211
+ ))
212
+ }
194
213
  }
195
214
  }
196
215
 
@@ -4,6 +4,7 @@ import { ProviderError } from './contracts.js'
4
4
  import { providerErrorFromResponse } from './errors.js'
5
5
  import { fetchWithRetryStreamEvents } from './retry.js'
6
6
  import { iterSseEvents } from './sse.js'
7
+ import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
7
8
 
8
9
  export type AnthropicToolDefinition = {
9
10
  name: string
@@ -75,7 +76,22 @@ export class AnthropicProvider implements Provider {
75
76
  return
76
77
  }
77
78
 
78
- const { system, conversation } = splitMessages(messages)
79
+ if (hasImageBlocks(messages) && !supportsAnthropicImages(this.model)) {
80
+ yield { type: 'error', message: `image input is not enabled for ${this.model}` }
81
+ return
82
+ }
83
+
84
+ let split: { system?: string; conversation: Awaited<ReturnType<typeof splitMessages>>['conversation'] }
85
+ try {
86
+ split = await splitMessages(messages)
87
+ } catch (err: unknown) {
88
+ if (err instanceof ImageLoadError) {
89
+ yield { type: 'error', message: err.message }
90
+ return
91
+ }
92
+ throw err
93
+ }
94
+ const { system, conversation } = split
79
95
 
80
96
  let response: Response
81
97
  try {
@@ -195,22 +211,24 @@ export class AnthropicProvider implements Provider {
195
211
  }
196
212
  }
197
213
 
198
- function splitMessages(messages: Message[]): {
214
+ async function splitMessages(messages: Message[]): Promise<{
199
215
  system?: string
200
216
  conversation: Array<{
201
217
  role: 'user' | 'assistant'
202
218
  content: Array<
203
219
  | { type: 'text'; text: string }
220
+ | { type: 'image'; source: { type: 'base64'; media_type: string; data: string } }
204
221
  | { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> }
205
222
  | { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean }
206
223
  >
207
224
  }>
208
- } {
225
+ }> {
209
226
  const systemParts: string[] = []
210
227
  const conversation: Array<{
211
228
  role: 'user' | 'assistant'
212
229
  content: Array<
213
230
  | { type: 'text'; text: string }
231
+ | { type: 'image'; source: { type: 'base64'; media_type: string; data: string } }
214
232
  | { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> }
215
233
  | { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean }
216
234
  >
@@ -226,11 +244,16 @@ function splitMessages(messages: Message[]): {
226
244
  }
227
245
  conversation.push({
228
246
  role: message.role,
229
- content: blocks.map(block => {
247
+ content: await Promise.all(blocks.map(async block => {
230
248
  if (block.type === 'text') return { type: 'text', text: block.text }
249
+ if (block.type === 'image') {
250
+ const loaded = await loadImageBlock(block)
251
+ if (!loaded.dataBase64 || !loaded.mimeType) throw new Error(`could not load image: ${block.path}`)
252
+ return { type: 'image', source: { type: 'base64', media_type: loaded.mimeType, data: loaded.dataBase64 } }
253
+ }
231
254
  if (block.type === 'tool_use') return { type: 'tool_use', id: block.id, name: block.name, input: block.input }
232
255
  return { type: 'tool_result', tool_use_id: block.toolUseId, content: block.content, is_error: block.isError }
233
- }),
256
+ })),
234
257
  })
235
258
  }
236
259
 
@@ -251,6 +274,14 @@ function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
251
274
  })
252
275
  }
253
276
 
277
+ export function supportsAnthropicImages(model: string): boolean {
278
+ const normalized = model.toLowerCase()
279
+ return normalized.includes('claude-3')
280
+ || normalized.includes('claude-sonnet-4')
281
+ || normalized.includes('claude-opus-4')
282
+ || normalized.includes('claude-haiku-4')
283
+ }
284
+
254
285
  function normalizeStopReason(value?: string): 'end_turn' | 'tool_use' | 'max_tokens' | 'stop_sequence' | 'unknown' {
255
286
  if (value === 'end_turn' || value === 'tool_use' || value === 'max_tokens' || value === 'stop_sequence') {
256
287
  return value
@@ -8,6 +8,14 @@ export type TextBlock = {
8
8
  text: string
9
9
  }
10
10
 
11
+ export type ImageBlock = {
12
+ type: 'image'
13
+ path: string
14
+ mimeType?: string
15
+ url?: string
16
+ dataBase64?: string
17
+ }
18
+
11
19
  export type ToolUseBlock = {
12
20
  type: 'tool_use'
13
21
  id: string
@@ -22,7 +30,7 @@ export type ToolResultBlock = {
22
30
  isError?: boolean
23
31
  }
24
32
 
25
- export type MessageContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
33
+ export type MessageContentBlock = TextBlock | ImageBlock | ToolUseBlock | ToolResultBlock
26
34
 
27
35
  export type Message = {
28
36
  role: Role
@@ -34,6 +42,7 @@ export type ProviderRetryStreamEvent = { type: 'retry' } & RetryEvent
34
42
  export type StreamEvent =
35
43
  | { type: 'text'; delta: string }
36
44
  | { type: 'thinking'; delta: string }
45
+ | { type: 'thinking_end' }
37
46
  | ProviderRetryStreamEvent
38
47
  | { type: 'tool_use_start'; id: string; name: string }
39
48
  | { type: 'tool_use_delta'; id: string; delta: string }
@@ -4,6 +4,7 @@ import { ProviderError } from './contracts.js'
4
4
  import { providerErrorFromResponse } from './errors.js'
5
5
  import { fetchWithRetryStreamEvents } from './retry.js'
6
6
  import { iterSseFrames } from './sse.js'
7
+ import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
7
8
 
8
9
  export type GeminiToolDefinition = {
9
10
  name: string
@@ -41,6 +42,7 @@ type GeminiChunk = {
41
42
 
42
43
  type GeminiContentPart =
43
44
  | { text: string }
45
+ | { inlineData: { mimeType: string; data: string } }
44
46
  | { functionCall: { name: string; args: Record<string, unknown> } }
45
47
  | { functionResponse: { name: string; response: Record<string, unknown> } }
46
48
 
@@ -92,8 +94,21 @@ export class GeminiProvider implements Provider {
92
94
  yield { type: 'error', message: error.message }
93
95
  return
94
96
  }
97
+ if (hasImageBlocks(messages) && !supportsGeminiImages(this.model)) {
98
+ yield { type: 'error', message: `image input is not enabled for ${this.model}` }
99
+ return
100
+ }
95
101
 
96
- const payload = buildGeminiPayload(messages, this.tools, options)
102
+ let payload: GeminiPayload
103
+ try {
104
+ payload = await buildGeminiPayload(messages, this.tools, options)
105
+ } catch (err: unknown) {
106
+ if (err instanceof ImageLoadError) {
107
+ yield { type: 'error', message: err.message }
108
+ return
109
+ }
110
+ throw err
111
+ }
97
112
  const modelName = this.model.replace(/^models\//, '')
98
113
  const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(modelName)}:streamGenerateContent?alt=sse`
99
114
 
@@ -181,11 +196,11 @@ export class GeminiProvider implements Provider {
181
196
  }
182
197
  }
183
198
 
184
- export function buildGeminiPayload(
199
+ export async function buildGeminiPayload(
185
200
  messages: Message[],
186
201
  tools: GeminiToolDefinition[] = [],
187
202
  options: ProviderCompleteOptions = {},
188
- ): GeminiPayload {
203
+ ): Promise<GeminiPayload> {
189
204
  const systemParts: string[] = []
190
205
  const contents: GeminiContent[] = []
191
206
  const toolUseNamesById = new Map<string, string>()
@@ -222,6 +237,10 @@ export function buildGeminiPayload(
222
237
  for (const block of blocks) {
223
238
  if (block.type === 'text') {
224
239
  parts.push({ text: block.text })
240
+ } else if (block.type === 'image') {
241
+ const loaded = await loadImageBlock(block)
242
+ if (!loaded.dataBase64 || !loaded.mimeType) throw new Error(`could not load image: ${block.path}`)
243
+ parts.push({ inlineData: { mimeType: loaded.mimeType, data: loaded.dataBase64 } })
225
244
  } else if (block.type === 'tool_result') {
226
245
  const name = toolUseNamesById.get(block.toolUseId) ?? 'unknown'
227
246
  const response: Record<string, unknown> = block.isError
@@ -258,6 +277,13 @@ function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
258
277
  })
259
278
  }
260
279
 
280
+ export function supportsGeminiImages(model: string): boolean {
281
+ const normalized = model.toLowerCase()
282
+ return normalized.includes('gemini-1.5')
283
+ || normalized.includes('gemini-2.0')
284
+ || normalized.includes('gemini-2.5')
285
+ }
286
+
261
287
  function normalizeFinishReason(reason: string, sawToolCall: boolean): DoneStopReason {
262
288
  if (sawToolCall) return 'tool_use'
263
289
  switch (reason) {
@@ -5,6 +5,7 @@ import { providerErrorFromResponse } from './errors.js'
5
5
  import { fetchWithRetryStreamEvents } from './retry.js'
6
6
  import { iterSseFrames } from './sse.js'
7
7
  import { messageTextContent } from '../utils/messages.js'
8
+ import { hasImageBlocks, ImageLoadError, loadImageBlock } from '../utils/images.js'
8
9
  import { providerDisplayName } from '../models/providerDisplay.js'
9
10
 
10
11
  export type OpenAIToolDefinition = {
@@ -28,6 +29,7 @@ type Options = {
28
29
  loadApiKey?: () => Promise<string | null>
29
30
  tools?: OpenAIToolDefinition[]
30
31
  maxRetries?: number
32
+ hasVisionProjector?: boolean
31
33
  }
32
34
 
33
35
  type ChatChunk = {
@@ -76,6 +78,7 @@ export class OpenAIChatProvider implements Provider {
76
78
  private readonly loadApiKey?: () => Promise<string | null>
77
79
  private readonly tools: OpenAIToolDefinition[]
78
80
  private readonly maxRetries?: number
81
+ private readonly hasVisionProjector: boolean
79
82
 
80
83
  constructor(opts: Options) {
81
84
  this.id = opts.id
@@ -86,6 +89,7 @@ export class OpenAIChatProvider implements Provider {
86
89
  this.tools = opts.tools ?? []
87
90
  this.maxRetries = opts.maxRetries
88
91
  this.supportsTools = this.tools.length > 0
92
+ this.hasVisionProjector = opts.hasVisionProjector ?? false
89
93
  }
90
94
 
91
95
  async *complete(
@@ -99,6 +103,19 @@ export class OpenAIChatProvider implements Provider {
99
103
  yield { type: 'error', message: error.message }
100
104
  return
101
105
  }
106
+ if (hasImageBlocks(messages)) {
107
+ if (this.id === 'llamacpp' && !this.hasVisionProjector) {
108
+ const hint = localModelNameHintsVision(this.model)
109
+ ? '; open alt+p and run "Add Vision Encoder" on this model to enable image input'
110
+ : ''
111
+ yield { type: 'error', message: `image input is not enabled for local model "${this.model}" (no vision projector loaded)${hint}` }
112
+ return
113
+ }
114
+ if (this.id === 'openai' && !supportsOpenAIImages(this.model)) {
115
+ yield { type: 'error', message: `image input is not enabled for ${this.model}` }
116
+ return
117
+ }
118
+ }
102
119
 
103
120
  const headers: Record<string, string> = {
104
121
  'Content-Type': 'application/json',
@@ -106,6 +123,17 @@ export class OpenAIChatProvider implements Provider {
106
123
  }
107
124
  if (apiKey) headers.Authorization = `Bearer ${apiKey}`
108
125
 
126
+ let wireMessages: Array<Record<string, unknown>>
127
+ try {
128
+ wireMessages = await toWireMessages(messages)
129
+ } catch (err: unknown) {
130
+ if (err instanceof ImageLoadError) {
131
+ yield { type: 'error', message: err.message }
132
+ return
133
+ }
134
+ throw err
135
+ }
136
+
109
137
  let response: Response
110
138
  try {
111
139
  response = yield* fetchWithRetryStreamEvents(`${this.baseUrl}/chat/completions`, {
@@ -113,7 +141,7 @@ export class OpenAIChatProvider implements Provider {
113
141
  headers,
114
142
  body: JSON.stringify({
115
143
  model: this.model,
116
- messages: toWireMessages(messages),
144
+ messages: wireMessages,
117
145
  tools: this.tools.length > 0 ? this.tools : undefined,
118
146
  tool_choice: this.tools.length > 0 ? 'auto' : undefined,
119
147
  stream: true,
@@ -143,6 +171,7 @@ export class OpenAIChatProvider implements Provider {
143
171
  let stopReason: DoneStopReason = 'unknown'
144
172
  const toolCalls = new Map<number, StreamingToolCall>()
145
173
  const contentThinkingParser = new ContentThinkingParser(this.id)
174
+ let reasoningPending = false
146
175
 
147
176
  try {
148
177
  for await (const frame of iterSseFrames(response.body, signal, READ_TIMEOUT_MS)) {
@@ -166,18 +195,34 @@ export class OpenAIChatProvider implements Provider {
166
195
  ? delta.thinking
167
196
  : ''
168
197
 
169
- if (reasoning.length > 0) yield { type: 'thinking', delta: reasoning }
198
+ if (reasoning.length > 0) {
199
+ yield { type: 'thinking', delta: reasoning }
200
+ reasoningPending = true
201
+ }
170
202
  if (text.length > 0) {
203
+ if (reasoningPending) {
204
+ yield { type: 'thinking_end' }
205
+ reasoningPending = false
206
+ }
171
207
  for (const event of contentThinkingParser.push(text)) {
172
208
  yield event
173
209
  }
174
210
  }
175
211
 
176
- for (const event of applyStreamingToolCallDelta(toolCalls, delta?.tool_calls ?? [])) {
212
+ const toolCallDeltas = delta?.tool_calls ?? []
213
+ if (toolCallDeltas.length > 0 && reasoningPending) {
214
+ yield { type: 'thinking_end' }
215
+ reasoningPending = false
216
+ }
217
+ for (const event of applyStreamingToolCallDelta(toolCalls, toolCallDeltas)) {
177
218
  yield event
178
219
  }
179
220
 
180
221
  if (choice?.finish_reason) {
222
+ if (reasoningPending) {
223
+ yield { type: 'thinking_end' }
224
+ reasoningPending = false
225
+ }
181
226
  stopReason = normalizeFinishReason(choice.finish_reason)
182
227
  }
183
228
  if (parsed.usage) {
@@ -195,6 +240,10 @@ export class OpenAIChatProvider implements Provider {
195
240
  for (const event of contentThinkingParser.flush()) {
196
241
  yield event
197
242
  }
243
+ if (reasoningPending) {
244
+ yield { type: 'thinking_end' }
245
+ reasoningPending = false
246
+ }
198
247
 
199
248
  let streamEmittedToolUses = 0
200
249
  if (stopReason === 'tool_use' || toolCalls.size > 0) {
@@ -221,7 +270,7 @@ export class OpenAIChatProvider implements Provider {
221
270
 
222
271
  }
223
272
 
224
- export function toWireMessages(messages: Message[]): Array<Record<string, unknown>> {
273
+ export async function toWireMessages(messages: Message[]): Promise<Array<Record<string, unknown>>> {
225
274
  const out: Array<Record<string, unknown>> = []
226
275
 
227
276
  for (const message of messages) {
@@ -230,6 +279,26 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
230
279
  continue
231
280
  }
232
281
 
282
+ if (message.role === 'user') {
283
+ const toolResults = message.content.filter(isToolResultBlock)
284
+ if (toolResults.length > 0) {
285
+ for (const block of toolResults) {
286
+ out.push({
287
+ role: 'tool',
288
+ tool_call_id: block.toolUseId,
289
+ content: block.content,
290
+ })
291
+ }
292
+ const nonToolBlocks = message.content.filter(block => block.type !== 'tool_result')
293
+ if (nonToolBlocks.length > 0) {
294
+ out.push({ role: 'user', content: await toOpenAIUserContent(nonToolBlocks) })
295
+ }
296
+ continue
297
+ }
298
+ out.push({ role: 'user', content: await toOpenAIUserContent(message.content) })
299
+ continue
300
+ }
301
+
233
302
  if (message.role === 'assistant') {
234
303
  const textParts = message.content.filter(isTextBlock).map(block => block.text)
235
304
  const toolCalls = message.content.filter(isToolUseBlock).map(block => ({
@@ -266,6 +335,37 @@ export function toWireMessages(messages: Message[]): Array<Record<string, unknow
266
335
  return normalizeSystemMessages(out)
267
336
  }
268
337
 
338
+ async function toOpenAIUserContent(blocks: MessageContentBlock[]): Promise<Array<Record<string, unknown>>> {
339
+ const parts: Array<Record<string, unknown>> = []
340
+ for (const block of blocks) {
341
+ if (block.type === 'text') {
342
+ if (block.text.length > 0) parts.push({ type: 'text', text: block.text })
343
+ continue
344
+ }
345
+ if (block.type === 'image') {
346
+ const loaded = await loadImageBlock(block)
347
+ if (loaded.url) {
348
+ parts.push({ type: 'image_url', image_url: { url: loaded.url } })
349
+ } else if (loaded.dataBase64 && loaded.mimeType) {
350
+ parts.push({ type: 'image_url', image_url: { url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` } })
351
+ }
352
+ continue
353
+ }
354
+ }
355
+ return parts.length > 0 ? parts : [{ type: 'text', text: '' }]
356
+ }
357
+
358
+ export function supportsOpenAIImages(model: string): boolean {
359
+ const normalized = model.toLowerCase()
360
+ if (normalized.includes('gpt-3.5')) return false
361
+ return /gpt-4o|gpt-4\.1|gpt-4-turbo|gpt-4-vision|gpt-5|o1|o3|o4|chatgpt-4/.test(normalized)
362
+ }
363
+
364
+ export function localModelNameHintsVision(model: string): boolean {
365
+ const normalized = model.toLowerCase()
366
+ return /llava|bakllava|qwen[-_.]?vl|qwen2[-_.]?vl|qwen2\.5[-_.]?vl|minicpm-?v|llama-3\.2.*vision|mllama|cogvlm|internvl|moondream|pixtral|phi-?3[\.-]?vision|phi-?3\.5[\.-]?vision|smolvlm/.test(normalized)
367
+ }
368
+
269
369
  function normalizeSystemMessages(messages: Array<Record<string, unknown>>): Array<Record<string, unknown>> {
270
370
  const systemContents: string[] = []
271
371
  const nonSystem: Array<Record<string, unknown>> = []
@@ -304,17 +404,35 @@ function isToolResultBlock(block: MessageContentBlock): block is Extract<Message
304
404
 
305
405
  function parseToolArguments(inputJson: string): Record<string, unknown> {
306
406
  if (!inputJson.trim()) return {}
407
+ const direct = tryParseJsonOnce(inputJson)
408
+ if (direct !== undefined) return coerceToToolArguments(direct)
409
+ const repaired = repairJsonObject(inputJson)
410
+ if (!repaired) return {}
411
+ const parsedRepaired = tryParseJsonOnce(repaired)
412
+ return parsedRepaired === undefined ? {} : coerceToToolArguments(parsedRepaired)
413
+ }
414
+
415
+ function tryParseJsonOnce(value: string): unknown {
307
416
  try {
308
- return JSON.parse(inputJson) as Record<string, unknown>
417
+ return JSON.parse(value)
309
418
  } catch {
310
- const repaired = repairJsonObject(inputJson)
311
- if (!repaired) return {}
312
- try {
313
- return JSON.parse(repaired) as Record<string, unknown>
314
- } catch {
315
- return {}
419
+ return undefined
420
+ }
421
+ }
422
+
423
+ function coerceToToolArguments(value: unknown): Record<string, unknown> {
424
+ if (typeof value === 'string') {
425
+ const trimmed = value.trim()
426
+ if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
427
+ const inner = tryParseJsonOnce(trimmed)
428
+ if (inner !== undefined) return coerceToToolArguments(inner)
316
429
  }
430
+ return {}
431
+ }
432
+ if (value && typeof value === 'object' && !Array.isArray(value)) {
433
+ return value as Record<string, unknown>
317
434
  }
435
+ return {}
318
436
  }
319
437
 
320
438
  function* applyStreamingToolCallDelta(
@@ -407,7 +525,9 @@ class ContentThinkingParser {
407
525
  yield { type: this.state === 'thinking' ? 'thinking' : 'text', delta: before }
408
526
  }
409
527
  this.buffer = this.buffer.slice(tagIndex + tag.length)
528
+ const wasThinking = this.state === 'thinking'
410
529
  this.state = this.state === 'text' ? 'thinking' : 'text'
530
+ if (wasThinking) yield { type: 'thinking_end' }
411
531
  continue
412
532
  }
413
533
 
@@ -1,9 +1,11 @@
1
1
  import type { Message, MessageContentBlock } from './contracts.js'
2
2
  import { messageTextContent } from '../utils/messages.js'
3
3
  import type { OpenAIToolDefinition } from './openai-chat.js'
4
+ import { loadImageBlock } from '../utils/images.js'
4
5
 
5
6
  export type ResponsesInputContent =
6
7
  | { type: 'input_text'; text: string }
8
+ | { type: 'input_image'; image_url: string }
7
9
  | { type: 'output_text'; text: string }
8
10
 
9
11
  export type ResponsesInputItem =
@@ -30,13 +32,13 @@ export type ResponsesRequestBody = {
30
32
  max_output_tokens?: number
31
33
  }
32
34
 
33
- export function buildResponsesBody(args: {
35
+ export async function buildResponsesBody(args: {
34
36
  model: string
35
37
  messages: Message[]
36
38
  tools: OpenAIToolDefinition[]
37
39
  maxOutputTokens?: number
38
- }): ResponsesRequestBody {
39
- const { instructions, items } = splitMessages(args.messages)
40
+ }): Promise<ResponsesRequestBody> {
41
+ const { instructions, items } = await splitMessages(args.messages)
40
42
  const body: ResponsesRequestBody = {
41
43
  model: args.model,
42
44
  input: items,
@@ -60,10 +62,10 @@ export function buildResponsesBody(args: {
60
62
  return body
61
63
  }
62
64
 
63
- function splitMessages(messages: Message[]): {
65
+ async function splitMessages(messages: Message[]): Promise<{
64
66
  instructions?: string
65
67
  items: ResponsesInputItem[]
66
- } {
68
+ }> {
67
69
  const instructions: string[] = []
68
70
  const items: ResponsesInputItem[] = []
69
71
 
@@ -100,12 +102,12 @@ function splitMessages(messages: Message[]): {
100
102
  }
101
103
  continue
102
104
  }
103
- const text = blocks.filter(isTextBlock).map(block => block.text).join('')
104
- if (text) {
105
+ const content = await toOpenAIResponsesUserContent(blocks)
106
+ if (content.length > 0) {
105
107
  items.push({
106
108
  type: 'message',
107
109
  role: 'user',
108
- content: [{ type: 'input_text', text }],
110
+ content,
109
111
  })
110
112
  }
111
113
  continue
@@ -136,6 +138,25 @@ function splitMessages(messages: Message[]): {
136
138
  }
137
139
  }
138
140
 
141
+ async function toOpenAIResponsesUserContent(blocks: MessageContentBlock[]): Promise<ResponsesInputContent[]> {
142
+ const content: ResponsesInputContent[] = []
143
+ for (const block of blocks) {
144
+ if (block.type === 'text') {
145
+ if (block.text) content.push({ type: 'input_text', text: block.text })
146
+ continue
147
+ }
148
+ if (block.type === 'image') {
149
+ const loaded = await loadImageBlock(block)
150
+ if (loaded.url) {
151
+ content.push({ type: 'input_image', image_url: loaded.url })
152
+ } else if (loaded.dataBase64 && loaded.mimeType) {
153
+ content.push({ type: 'input_image', image_url: `data:${loaded.mimeType};base64,${loaded.dataBase64}` })
154
+ }
155
+ }
156
+ }
157
+ return content
158
+ }
159
+
139
160
  function normalizeBlocks(content: Message['content']): MessageContentBlock[] {
140
161
  if (typeof content === 'string') {
141
162
  return content ? [{ type: 'text', text: content }] : []