ethagent 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -112,6 +112,17 @@ ethagent works with OpenAI, Anthropic, Gemini, and local GGUF models served thro
112
112
  - The featured local model is [Qwen3.5-9B-Uncensored](https://huggingface.co/HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive); other Hugging Face GGUF models work by repo ID or direct URL.
113
113
  - Cloud API keys live in the OS keyring when one is available, with an encrypted local file under `~/.ethagent` as fallback.
114
114
 
115
+ ### Image Input
116
+
117
+ Press `Alt+V` to paste an image from the clipboard. A marker like `[Image #1]` appears in the prompt; delete it to drop the attachment.
118
+
119
+ Vision support is available on:
120
+
121
+ - **OpenAI** (Chat Completions and Responses API): `gpt-4o`, `gpt-4.1`, `gpt-4-turbo`, `gpt-4-vision`, `gpt-5`, `o1`, `o3`, `o4`, `chatgpt-4`.
122
+ - **Anthropic**: `claude-3`, `claude-sonnet-4`, `claude-opus-4`, `claude-haiku-4`.
123
+ - **Gemini**: `gemini-1.5`, `gemini-2.0`, `gemini-2.5`.
124
+ - **Local llama.cpp**: vision works when both the main GGUF and a `mmproj-*.gguf` projector are loaded. The picker recommends the bundle during install; if you skipped, open `Alt+P` and any installed model with a vision encoder available shows an `Add Vision Encoder` row directly beneath it.
125
+
115
126
  ## Tools and Sessions
116
127
 
117
128
  - File ops, shell, clipboard, and MCP tools all run through the same permission layer.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ethagent",
3
- "version": "2.3.0",
3
+ "version": "2.4.0",
4
4
  "description": "A privacy-first AI agent with a portable Ethereum identity",
5
5
  "type": "module",
6
6
  "main": "bin/ethagent.js",
@@ -435,6 +435,7 @@ function configFromModelPickerSelection(selection: ModelPickerSelection, base: E
435
435
  provider: 'llamacpp',
436
436
  model: selection.model,
437
437
  baseUrl: defaultBaseUrlFor('llamacpp'),
438
+ localMmprojPath: selection.mmprojPath,
438
439
  }
439
440
  }
440
441
  return {
@@ -442,6 +443,7 @@ function configFromModelPickerSelection(selection: ModelPickerSelection, base: E
442
443
  provider: selection.provider,
443
444
  model: selection.model,
444
445
  baseUrl: undefined,
446
+ localMmprojPath: undefined,
445
447
  }
446
448
  }
447
449
 
@@ -13,6 +13,7 @@ import { ChatInput } from './input/ChatInput.js'
13
13
  import { IdentityHub, type IdentityHubInitialAction, type IdentityHubResult } from '../identity/hub/IdentityHub.js'
14
14
  import type { CopyResult } from '../utils/clipboard.js'
15
15
  import { getSlashSuggestions } from './commands.js'
16
+ import { modelSupportsImages } from '../utils/images.js'
16
17
  import { Box, Text } from 'ink'
17
18
  import { theme } from '../ui/theme.js'
18
19
  import { Spinner } from '../ui/Spinner.js'
@@ -270,6 +271,14 @@ export function ChatBottomPane({
270
271
  cwd={cwd}
271
272
  seedText={pendingInputDraft}
272
273
  onSeedConsumed={onInputDraftConsumed}
274
+ onImagePaste={() => {
275
+ if (!modelSupportsImages(config.provider, config.model, { mmprojPath: config.localMmprojPath })) {
276
+ const hint = config.provider === 'llamacpp'
277
+ ? ' · run "Add Vision Encoder" in alt+p to enable image input on this model'
278
+ : ' · switch via alt+p'
279
+ pushNote(`current model "${config.model}" does not accept image input${hint}`, 'error')
280
+ }
281
+ }}
273
282
  />
274
283
  <Box marginLeft={2} marginTop={0} flexDirection="column">
275
284
  <Text>
@@ -1197,7 +1197,7 @@ export const ChatScreen: React.FC<ChatScreenProps> = ({ config: initialConfig, o
1197
1197
  clearTranscript()
1198
1198
  overlayRef.current = 'none'
1199
1199
  setOverlay('none')
1200
- pushNote('Cleared saved chat logs and resume context from this machine.', 'dim')
1200
+ pushNote('Cleared saved sessions and resume context from this machine.', 'dim')
1201
1201
  },
1202
1202
  [clearTranscript, pushNote],
1203
1203
  )
@@ -1517,17 +1517,23 @@ export const ChatScreen: React.FC<ChatScreenProps> = ({ config: initialConfig, o
1517
1517
 
1518
1518
  const exitHint = exitState.pending ? 'ctrl+c again to quit' : null
1519
1519
  const runtimeModeLabel = sessionModeLabel(mode)
1520
+ const runtimeModeColor =
1521
+ mode === 'plan'
1522
+ ? theme.modePlan
1523
+ : mode === 'accept-edits'
1524
+ ? theme.modeAcceptEdits
1525
+ : theme.text
1520
1526
  const footerRight = (
1521
1527
  <Box flexDirection="row">
1522
1528
  {exitHint ? (
1523
1529
  <>
1524
- <Text color={theme.text}>{exitHint}</Text>
1530
+ <Text color={theme.accentPeriwinkle}>{exitHint}</Text>
1525
1531
  <Text color={theme.dim}> · </Text>
1526
1532
  </>
1527
1533
  ) : null}
1528
1534
  {runtimeModeLabel ? (
1529
1535
  <>
1530
- <Text bold>{runtimeModeLabel}</Text>
1536
+ <Text color={runtimeModeColor} bold>{runtimeModeLabel}</Text>
1531
1537
  <Text color={theme.dim}> (</Text>
1532
1538
  <Text color={theme.accentPeriwinkle}>shift+tab to cycle</Text>
1533
1539
  <Text color={theme.dim}>) · </Text>
@@ -1613,7 +1619,7 @@ export const ChatScreen: React.FC<ChatScreenProps> = ({ config: initialConfig, o
1613
1619
  }
1614
1620
 
1615
1621
  export function chatFooterShortcutText(canScrollTranscript: boolean): string {
1616
- return `${canScrollTranscript ? 'pgup/pgdn scroll · ' : ''}alt+p model · alt+i identity`
1622
+ return 'alt+p model · alt+i identity'
1617
1623
  }
1618
1624
 
1619
1625
  function formatContextLabel(usage: ContextUsage): string {
@@ -39,6 +39,7 @@ export function resolveModelSelection(
39
39
  selection.model === currentConfig.model
40
40
  && currentConfig.provider === 'llamacpp'
41
41
  && currentConfig.baseUrl === baseUrl
42
+ && currentConfig.localMmprojPath === selection.mmprojPath
42
43
  ) {
43
44
  return { kind: 'noop' }
44
45
  }
@@ -49,8 +50,9 @@ export function resolveModelSelection(
49
50
  provider: 'llamacpp',
50
51
  model: selection.model,
51
52
  baseUrl,
53
+ localMmprojPath: selection.mmprojPath,
52
54
  },
53
- notice: `Local Hugging Face model ready. Now using ${formatModelDisplayName('llamacpp', selection.model, { maxLength: 64 })}.`,
55
+ notice: `Local Hugging Face model ready. Now using ${formatModelDisplayName('llamacpp', selection.model, { maxLength: 64 })}${selection.mmprojPath ? ' with vision encoder' : ''}.`,
54
56
  tone: 'info',
55
57
  }
56
58
  }
@@ -65,6 +67,7 @@ export function resolveModelSelection(
65
67
  provider: nextProvider,
66
68
  model: selection.model,
67
69
  baseUrl: nextBaseUrl,
70
+ localMmprojPath: undefined,
68
71
  }
69
72
 
70
73
  return {
@@ -15,6 +15,7 @@ import {
15
15
  createTurnCheckpoint,
16
16
  type TurnCheckpoint,
17
17
  } from './chatScreenUtils.js'
18
+ import { collapseImagePathsToRefs, userTextToContentBlocks } from '../utils/images.js'
18
19
 
19
20
  type MutableRef<T> = { current: T }
20
21
 
@@ -101,10 +102,13 @@ export async function runStreamingTurn(
101
102
  const activeCheckpoint = createTurnCheckpoint(sessionId, userText)
102
103
  setActiveCheckpoint(activeCheckpoint)
103
104
 
104
- updateRows(prev => [...prev, { role: 'user', id: nextRowId(), content: userText }])
105
+ const userContent = userTextToContentBlocks(userText)
106
+ const displayText = collapseImagePathsToRefs(userText)
107
+ updateRows(prev => [...prev, { role: 'user', id: nextRowId(), content: displayText }])
105
108
  await persistTurnMessage({
106
109
  role: 'user',
107
- content: userText,
110
+ content: displayText,
111
+ providerContent: typeof userContent === 'string' ? undefined : userContent,
108
112
  createdAt: nowIso(),
109
113
  turnId: activeCheckpoint.turnId,
110
114
  })
@@ -33,6 +33,12 @@ import {
33
33
  shouldCollapsePastedText,
34
34
  type PastedTextRef,
35
35
  } from './chatPaste.js'
36
+ import {
37
+ expandImageRefs,
38
+ formatImageRefMarker,
39
+ pruneImageRefs,
40
+ type ImageRef,
41
+ } from './imageRefs.js'
36
42
 
37
43
  type PromptInputProps = {
38
44
  onSubmit: (value: string) => void
@@ -48,6 +54,7 @@ type PromptInputProps = {
48
54
  cwd?: string
49
55
  seedText?: string | null
50
56
  onSeedConsumed?: () => void
57
+ onImagePaste?: (path: string) => void
51
58
  }
52
59
 
53
60
  const MAX_LENGTH = 32_768
@@ -76,6 +83,7 @@ export const ChatInput: React.FC<PromptInputProps> = ({
76
83
  cwd,
77
84
  seedText,
78
85
  onSeedConsumed,
86
+ onImagePaste,
79
87
  }) => {
80
88
  const { stdout } = useStdout()
81
89
  const [buffer, setBuffer] = useState<ChatBuffer>(emptyBuffer)
@@ -100,6 +108,8 @@ export const ChatInput: React.FC<PromptInputProps> = ({
100
108
  const pasteTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null)
101
109
  const pastedTextRefsRef = useRef<Map<number, PastedTextRef>>(new Map())
102
110
  const nextPastedTextRefIdRef = useRef(1)
111
+ const imageRefsRef = useRef<Map<number, ImageRef>>(new Map())
112
+ const nextImageRefIdRef = useRef(1)
103
113
 
104
114
  useEffect(() => { bufferRef.current = buffer }, [buffer])
105
115
  useEffect(() => { historyIndexRef.current = historyIndex }, [historyIndex])
@@ -107,6 +117,11 @@ export const ChatInput: React.FC<PromptInputProps> = ({
107
117
  useEffect(() => { historyPreviewActiveRef.current = historyPreviewActive }, [historyPreviewActive])
108
118
  useEffect(() => { preferredColumnRef.current = preferredColumn }, [preferredColumn])
109
119
 
120
+ useEffect(() => {
121
+ pruneImageRefs(imageRefsRef.current, value)
122
+ if (imageRefsRef.current.size === 0) nextImageRefIdRef.current = 1
123
+ }, [value])
124
+
110
125
  useEffect(() => {
111
126
  const handleResize = () => {
112
127
  setColumns(stdout.columns ?? process.stdout.columns ?? 80)
@@ -217,6 +232,8 @@ export const ChatInput: React.FC<PromptInputProps> = ({
217
232
  })
218
233
  pastedTextRefsRef.current.clear()
219
234
  nextPastedTextRefIdRef.current = 1
235
+ imageRefsRef.current.clear()
236
+ nextImageRefIdRef.current = 1
220
237
  }, [applyBuffer, applyHistoryState])
221
238
 
222
239
  const handlePaste = useCallback((text: string) => {
@@ -257,7 +274,8 @@ export const ChatInput: React.FC<PromptInputProps> = ({
257
274
  const submit = useCallback(() => {
258
275
  const trimmed = value.trim()
259
276
  if (!trimmed) return
260
- onSubmit(expandPastedTextRefs(trimmed, pastedTextRefsRef.current))
277
+ const withText = expandPastedTextRefs(trimmed, pastedTextRefsRef.current)
278
+ onSubmit(expandImageRefs(withText, imageRefsRef.current))
261
279
  resetBuffer()
262
280
  }, [value, onSubmit, resetBuffer])
263
281
 
@@ -388,7 +406,12 @@ export const ChatInput: React.FC<PromptInputProps> = ({
388
406
  if (key.meta && inputText === 'v') {
389
407
  void (async () => {
390
408
  const image = await readClipboardImage()
391
- if (image.ok) insertText(`[image: ${image.path}]`)
409
+ if (image.ok) {
410
+ const id = nextImageRefIdRef.current++
411
+ imageRefsRef.current.set(id, { path: image.path })
412
+ insertText(formatImageRefMarker(id))
413
+ onImagePaste?.(image.path)
414
+ }
392
415
  })()
393
416
  return
394
417
  }
@@ -0,0 +1,30 @@
1
+ export type ImageRef = { path: string; mimeType?: string }
2
+
3
+ const IMAGE_REF_MARKER_RE = /\[Image\s+#(\d+)\]/g
4
+
5
+ export function expandImageRefs(text: string, refs: Map<number, ImageRef>): string {
6
+ return text.replace(IMAGE_REF_MARKER_RE, (full, raw: string) => {
7
+ const ref = refs.get(Number(raw))
8
+ return ref ? `[image: ${ref.path}]` : full
9
+ })
10
+ }
11
+
12
+ export function referencedImageIds(text: string): Set<number> {
13
+ const out = new Set<number>()
14
+ for (const match of text.matchAll(IMAGE_REF_MARKER_RE)) {
15
+ const id = Number(match[1])
16
+ if (Number.isFinite(id)) out.add(id)
17
+ }
18
+ return out
19
+ }
20
+
21
+ export function pruneImageRefs(refs: Map<number, ImageRef>, text: string): void {
22
+ const referenced = referencedImageIds(text)
23
+ for (const id of [...refs.keys()]) {
24
+ if (!referenced.has(id)) refs.delete(id)
25
+ }
26
+ }
27
+
28
+ export function formatImageRefMarker(id: number): string {
29
+ return `[Image #${id}]`
30
+ }
@@ -48,8 +48,8 @@ export const ResumeView: React.FC<ResumeViewProps> = ({ currentSessionId, onResu
48
48
 
49
49
  if (state.kind === 'loading') {
50
50
  return (
51
- <Surface title="Resume Session" subtitle="Loading projects and directories...">
52
- <Spinner label="loading sessions..." />
51
+ <Surface title="Resume Session" subtitle="Recent chats and directories." footer="esc closes">
52
+ <Spinner label="loading..." />
53
53
  </Surface>
54
54
  )
55
55
  }
@@ -65,7 +65,7 @@ export const ResumeView: React.FC<ResumeViewProps> = ({ currentSessionId, onResu
65
65
  if (state.kind === 'confirmClear') {
66
66
  return (
67
67
  <Surface
68
- title="Clear All Chat Logs?"
68
+ title="Clear All Saved Sessions?"
69
69
  subtitle={`${state.sessions.length} saved session${state.sessions.length === 1 ? '' : 's'} will be removed.`}
70
70
  tone="error"
71
71
  footer="enter selects · esc returns to resume"
@@ -76,9 +76,10 @@ export const ResumeView: React.FC<ResumeViewProps> = ({ currentSessionId, onResu
76
76
  {state.error ? <Text color={theme.accentError}>{state.error}</Text> : null}
77
77
  </Box>
78
78
  <Select<'back' | 'clear'>
79
+ hintLayout="inline"
79
80
  options={[
80
- { value: 'back', label: 'back to sessions' },
81
- { value: 'clear', label: 'clear all chat logs', hint: 'cannot be undone' },
81
+ { value: 'back', label: 'Back to Sessions' },
82
+ { value: 'clear', label: 'Clear All Saved Sessions', hint: 'Cannot be undone' },
82
83
  ]}
83
84
  onSubmit={choice => {
84
85
  if (choice === 'back') {
@@ -155,11 +156,18 @@ export function buildResumeOptions(
155
156
  label: '',
156
157
  disabled: true,
157
158
  }
159
+ const manageHeader: SelectOption<string> = {
160
+ value: 'separator:manage',
161
+ label: 'Manage',
162
+ role: 'section',
163
+ bold: true,
164
+ disabled: true,
165
+ }
158
166
 
159
167
  const clearOption: SelectOption<string> = {
160
168
  value: CLEAR_ALL_SESSIONS_VALUE,
161
- label: 'Clear All Chat Logs',
162
- hint: 'removes saved chats and resume context',
169
+ label: 'Clear All Saved Sessions',
170
+ hint: 'Removes saved chats and resume context',
163
171
  role: 'utility',
164
172
  }
165
173
 
@@ -202,6 +210,7 @@ export function buildResumeOptions(
202
210
  }
203
211
 
204
212
  options.push(manageSpacer)
213
+ options.push(manageHeader)
205
214
  options.push(clearOption)
206
215
 
207
216
  return options
@@ -13,6 +13,7 @@ import {
13
13
  installLlamaCppRunner,
14
14
  setLlamaCppServerPath,
15
15
  startLlamaCppServer,
16
+ stopLlamaCppServer,
16
17
  type LlamaCppInstallProgress,
17
18
  type LlamaCppInstallResult,
18
19
  type LlamaCppStartResult,
@@ -32,6 +33,8 @@ import { defaultModelFor, type EthagentConfig, type ProviderId } from '../storag
32
33
  import { clearModelCatalogCache, discoverProviderModels, isOpenAIOAuthAllowedModel, OPENAI_OAUTH_DEFAULT_MODEL, type ModelCatalogResult } from './catalog.js'
33
34
  import { contextWindowInfo } from '../runtime/compaction.js'
34
35
  import {
36
+ addMmprojToInstalledModel,
37
+ backfillMmprojForModels,
35
38
  createHfDownloadPlan,
36
39
  downloadHfModel,
37
40
  fetchHuggingFaceRepoInfo,
@@ -68,7 +71,7 @@ import { formatLocalHfModelDisplayName, formatModelDisplayName } from './modelDi
68
71
  import { fetchUncensoredGgufCatalog, type UncensoredCatalogEntry } from './uncensoredCatalog.js'
69
72
 
70
73
  export type ModelPickerSelection =
71
- | { kind: 'llamacpp'; model: string }
74
+ | { kind: 'llamacpp'; model: string; mmprojPath?: string }
72
75
  | { kind: 'cloud'; provider: CloudProviderId; model: string; keyJustSet: boolean }
73
76
 
74
77
  type ModelPickerProps = {
@@ -113,6 +116,9 @@ type State =
113
116
  | { kind: 'localRunnerPathEntry'; data: LoadedData; model: LocalHfModel; submitting: boolean; error?: string }
114
117
  | { kind: 'localRunnerStarting'; data: LoadedData; model: LocalHfModel; startedAt: number }
115
118
  | { kind: 'localRunnerStartFail'; data: LoadedData; model: LocalHfModel; result: Extract<LlamaCppStartResult, { ok: false }> }
119
+ | { kind: 'mmprojOffer'; data: LoadedData; model: LocalHfModel }
120
+ | { kind: 'mmprojDownloading'; data: LoadedData; model: LocalHfModel; progress: HfDownloadProgress }
121
+ | { kind: 'mmprojError'; data: LoadedData; model: LocalHfModel; message: string }
116
122
 
117
123
  export const ModelPicker: React.FC<ModelPickerProps> = ({
118
124
  currentConfig,
@@ -244,6 +250,7 @@ export const ModelPicker: React.FC<ModelPickerProps> = ({
244
250
  const canDownload = plan.review.risk !== 'high' && plan.review.runtime === 'llama.cpp runnable'
245
251
  const fit = state.data.machineSpec ? estimateGgufMachineFit(plan.sizeBytes, state.data.machineSpec) : null
246
252
  const recommended = state.data.machineSpec ? recommendGgufFile(plan.repo, ggufFiles(plan.repo), state.data.machineSpec) : null
253
+ const mmproj = plan.mmprojCandidate
247
254
  return (
248
255
  <Surface
249
256
  title="Review Model Link"
@@ -260,15 +267,22 @@ export const ModelPicker: React.FC<ModelPickerProps> = ({
260
267
  <Text color={riskColor(plan.review.risk)}>safety: {safetyLabel(plan.review.risk)} · source: {credibilityLabel(plan.review.credibility)}</Text>
261
268
  <Text color={theme.dim}>signals: {formatSignals(plan.repo.downloads, plan.repo.likes)}</Text>
262
269
  <Text color={theme.dim}>notes: {friendlyReasons(plan.review.reasons).join('; ')}</Text>
270
+ {mmproj ? (
271
+ <Text color={theme.dim}>vision encoder available: {friendlyFileName(mmproj.filename)} (+{formatBytes(mmproj.sizeBytes)})</Text>
272
+ ) : null}
263
273
  </Box>
264
- <Select<'download' | 'pick' | 'cancel'>
274
+ <Select<'download' | 'downloadWithMmproj' | 'pick' | 'cancel'>
265
275
  options={[
266
- { value: 'download', label: 'Download This Model', disabled: !canDownload },
276
+ ...(mmproj ? [{ value: 'downloadWithMmproj' as const, label: `Download Model + Vision Encoder (+${formatBytes(mmproj.sizeBytes)}) · recommended`, disabled: !canDownload }] : []),
277
+ { value: 'download', label: mmproj ? 'Download Without Vision Encoder' : 'Download This Model', disabled: !canDownload },
267
278
  { value: 'pick', label: 'Pick Another File' },
268
279
  { value: 'cancel', label: 'Cancel' },
269
280
  ]}
270
281
  onSubmit={choice => {
271
282
  if (choice === 'download') void startHfDownload(state, setState, hfAbortRef, onPick)
283
+ else if (choice === 'downloadWithMmproj') {
284
+ void startHfDownload({ ...state, plan: { ...plan, includeMmproj: true } }, setState, hfAbortRef, onPick)
285
+ }
272
286
  else if (choice === 'pick') void inspectHfInput({ kind: 'hfInput', data: state.data }, plan.repoId, setState)
273
287
  else setState({ kind: 'list', data: state.data })
274
288
  }}
@@ -291,6 +305,68 @@ export const ModelPicker: React.FC<ModelPickerProps> = ({
291
305
  )
292
306
  }
293
307
 
308
+ if (state.kind === 'mmprojOffer') {
309
+ const sizeLabel = state.model.mmprojSizeBytes ? `+${formatBytes(state.model.mmprojSizeBytes)}` : 'additional download'
310
+ return (
311
+ <Surface
312
+ title="Add Image Support?"
313
+ subtitle={`${state.model.displayName} has a vision encoder available in its Hugging Face repo.`}
314
+ footer="enter select · esc back"
315
+ >
316
+ <Box flexDirection="column" marginBottom={1}>
317
+ <Text color={theme.dim}>Loading the vision encoder lets this model accept pasted images.</Text>
318
+ <Text color={theme.dim}>Without it, image paste is declined at submit time.</Text>
319
+ </Box>
320
+ <Select<'add' | 'skip' | 'cancel'>
321
+ options={[
322
+ { value: 'add', label: `Add Vision Encoder (${sizeLabel}) And Use` },
323
+ { value: 'skip', label: 'Use Without Image Support' },
324
+ { value: 'cancel', label: 'Cancel' },
325
+ ]}
326
+ onSubmit={choice => {
327
+ if (choice === 'add') void downloadMmprojAndContinue(state, setState, onPick)
328
+ else if (choice === 'skip') void startAndPickHfModel({ ...state.model, mmprojAvailable: false }, state, setState, onPick)
329
+ else setState({ kind: 'list', data: state.data })
330
+ }}
331
+ onCancel={() => setState({ kind: 'list', data: state.data })}
332
+ />
333
+ </Surface>
334
+ )
335
+ }
336
+
337
+ if (state.kind === 'mmprojDownloading') {
338
+ const total = state.progress.total ?? state.model.mmprojSizeBytes ?? 0
339
+ const completed = state.progress.completed ?? 0
340
+ const progress = total > 0 ? completed / total : 0
341
+ const suffix = total > 0 ? `${formatBytes(completed)} / ${formatBytes(total)}` : formatBytes(completed)
342
+ return (
343
+ <Surface title="Downloading Vision Encoder" subtitle={state.model.displayName}>
344
+ <Text color={theme.dim}>{state.progress.status}</Text>
345
+ <ProgressBar progress={progress} suffix={suffix} />
346
+ </Surface>
347
+ )
348
+ }
349
+
350
+ if (state.kind === 'mmprojError') {
351
+ return (
352
+ <Surface title="Vision Encoder Download Failed" subtitle={state.message} tone="error" footer="enter select · esc back">
353
+ <Select<'retry' | 'skip' | 'back'>
354
+ options={[
355
+ { value: 'retry', label: 'Retry Download' },
356
+ { value: 'skip', label: 'Use Without Image Support' },
357
+ { value: 'back', label: 'Back To Picker' },
358
+ ]}
359
+ onSubmit={choice => {
360
+ if (choice === 'retry') setState({ kind: 'mmprojOffer', data: state.data, model: state.model })
361
+ else if (choice === 'skip') void startAndPickHfModel({ ...state.model, mmprojAvailable: false }, state, setState, onPick)
362
+ else setState({ kind: 'list', data: state.data })
363
+ }}
364
+ onCancel={() => setState({ kind: 'list', data: state.data })}
365
+ />
366
+ </Surface>
367
+ )
368
+ }
369
+
294
370
  if (state.kind === 'hfDone') {
295
371
  return (
296
372
  <Surface
@@ -834,6 +910,18 @@ function handleSubmit(
834
910
  })()
835
911
  return
836
912
  }
913
+ if (value.startsWith('hfmmproj:') && state.kind === 'list') {
914
+ const id = value.slice('hfmmproj:'.length)
915
+ void (async () => {
916
+ const local = await findLocalHfModel(id)
917
+ if (!local) {
918
+ setState({ kind: 'hfError', data: state.data, message: 'local model metadata was not found' })
919
+ return
920
+ }
921
+ setState({ kind: 'mmprojOffer', data: state.data, model: local })
922
+ })()
923
+ return
924
+ }
837
925
  if (value.startsWith('uc:') && state.kind === 'localCatalog') {
838
926
  const entry = state.catalog.find(item => catalogOptionValue(item.repo.repoId, item.file.filename) === value)
839
927
  if (entry) void reviewCatalogModel(state, entry, setState)
@@ -1250,6 +1338,8 @@ function localRunnerStartFailureSubtitle(result: Extract<LlamaCppStartResult, {
1250
1338
  return result.message
1251
1339
  case 'runner-not-installed':
1252
1340
  return 'this machine still needs a local runner'
1341
+ case 'untracked-server':
1342
+ return result.message
1253
1343
  }
1254
1344
  }
1255
1345
 
@@ -1445,6 +1535,39 @@ async function uninstallLocalModel(
1445
1535
  }
1446
1536
  }
1447
1537
 
1538
+ async function downloadMmprojAndContinue(
1539
+ state: Extract<State, { kind: 'mmprojOffer' }>,
1540
+ setState: (s: State) => void,
1541
+ onPick: (sel: ModelPickerSelection) => void,
1542
+ ): Promise<void> {
1543
+ setState({ kind: 'mmprojDownloading', data: state.data, model: state.model, progress: { status: 'starting' } })
1544
+ try {
1545
+ for await (const progress of addMmprojToInstalledModel(state.model.id)) {
1546
+ setState({ kind: 'mmprojDownloading', data: state.data, model: state.model, progress })
1547
+ }
1548
+ } catch (err: unknown) {
1549
+ setState({ kind: 'mmprojError', data: state.data, model: state.model, message: (err as Error).message })
1550
+ return
1551
+ }
1552
+ const updated = await findLocalHfModel(state.model.id)
1553
+ if (!updated || !updated.mmprojPath) {
1554
+ setState({ kind: 'mmprojError', data: state.data, model: state.model, message: 'projector downloaded but path was not persisted' })
1555
+ return
1556
+ }
1557
+ const stopResult = await stopLlamaCppServer().catch(() => null)
1558
+ if (stopResult && stopResult.ok && stopResult.reason === 'untracked-server') {
1559
+ setState({
1560
+ kind: 'mmprojError',
1561
+ data: state.data,
1562
+ model: updated,
1563
+ message: 'Vision encoder downloaded, but a llama-server is already running and ethagent did not launch it. Quit ethagent, stop the external llama-server (taskkill /F /IM llama-server.exe on Windows, pkill llama-server on macOS or Linux), then reopen ethagent to load the projector.',
1564
+ })
1565
+ return
1566
+ }
1567
+ const data = { ...state.data, hfModels: await loadHfPickerModels() }
1568
+ await startAndPickHfModel(updated, { kind: 'mmprojOffer', data, model: updated }, setState, onPick)
1569
+ }
1570
+
1448
1571
  async function refreshLocalModelData(data: LoadedData): Promise<LoadedData> {
1449
1572
  const hfModels = await loadHfPickerModels()
1450
1573
  return {
@@ -1455,7 +1578,7 @@ async function refreshLocalModelData(data: LoadedData): Promise<LoadedData> {
1455
1578
 
1456
1579
  async function startAndPickHfModel(
1457
1580
  model: LocalHfModel,
1458
- state: Extract<State, { kind: 'list' | 'localCatalog' | 'hfDone' }>,
1581
+ state: Extract<State, { kind: 'list' | 'localCatalog' | 'hfDone' | 'mmprojOffer' | 'mmprojError' }>,
1459
1582
  setState: (s: State) => void,
1460
1583
  onPick: (sel: ModelPickerSelection) => void,
1461
1584
  ): Promise<void> {
@@ -1463,10 +1586,15 @@ async function startAndPickHfModel(
1463
1586
  setState({ kind: 'hfError', data: state.data, message: 'blocked high-risk model; choose a model from a more credible source' })
1464
1587
  return
1465
1588
  }
1589
+ if (model.mmprojAvailable && !model.mmprojPath && state.kind !== 'mmprojOffer' && state.kind !== 'mmprojError') {
1590
+ setState({ kind: 'mmprojOffer', data: state.data, model })
1591
+ return
1592
+ }
1466
1593
  setState({ kind: 'localRunnerStarting', data: state.data, model, startedAt: Date.now() })
1467
1594
  const result = await startLlamaCppServer({
1468
1595
  modelPath: model.localPath,
1469
1596
  modelAlias: model.id,
1597
+ mmprojPath: model.mmprojPath,
1470
1598
  })
1471
1599
  const llamaCpp = await probeLlamaCpp()
1472
1600
  const data = { ...state.data, llamaCpp }
@@ -1478,7 +1606,7 @@ async function startAndPickHfModel(
1478
1606
  setState({ kind: 'localRunnerStartFail', data, model, result })
1479
1607
  return
1480
1608
  }
1481
- onPick({ kind: 'llamacpp', model: model.id })
1609
+ onPick({ kind: 'llamacpp', model: model.id, mmprojPath: model.mmprojPath })
1482
1610
  }
1483
1611
 
1484
1612
  async function installRunnerAndStart(
@@ -1576,7 +1704,8 @@ function formatContextWindow(tokens: number): string {
1576
1704
 
1577
1705
  async function loadHfPickerModels(): Promise<ModelPickerOptionsData['hfModels']> {
1578
1706
  const installed = await loadLocalHfModels()
1579
- return installed.map(model => ({
1707
+ const backfilled = await backfillMmprojForModels(installed)
1708
+ return backfilled.map(model => ({
1580
1709
  id: model.id,
1581
1710
  displayName: model.displayName,
1582
1711
  sizeBytes: model.sizeBytes,
@@ -1584,6 +1713,9 @@ async function loadHfPickerModels(): Promise<ModelPickerOptionsData['hfModels']>
1584
1713
  risk: model.risk,
1585
1714
  task: model.task,
1586
1715
  status: model.status,
1716
+ mmprojPath: model.mmprojPath,
1717
+ mmprojAvailable: model.mmprojAvailable,
1718
+ mmprojSizeBytes: model.mmprojSizeBytes,
1587
1719
  }))
1588
1720
  }
1589
1721