@pentoshi/clai 0.13.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/bin/clai.mjs +25 -0
  2. package/dist/agent/runner.d.ts +31 -1
  3. package/dist/agent/runner.js +416 -28
  4. package/dist/agent/runner.js.map +1 -1
  5. package/dist/commands/update.js +1 -1
  6. package/dist/commands/update.js.map +1 -1
  7. package/dist/llm/anthropic.js +31 -12
  8. package/dist/llm/anthropic.js.map +1 -1
  9. package/dist/llm/capabilities.d.ts +13 -0
  10. package/dist/llm/capabilities.js +107 -24
  11. package/dist/llm/capabilities.js.map +1 -1
  12. package/dist/llm/gemini.js +17 -4
  13. package/dist/llm/gemini.js.map +1 -1
  14. package/dist/llm/http.d.ts +12 -1
  15. package/dist/llm/http.js +50 -25
  16. package/dist/llm/http.js.map +1 -1
  17. package/dist/llm/ollama.js +16 -8
  18. package/dist/llm/ollama.js.map +1 -1
  19. package/dist/modes/agent.d.ts +2 -1
  20. package/dist/modes/agent.js.map +1 -1
  21. package/dist/modes/ask.d.ts +2 -1
  22. package/dist/modes/ask.js +5 -1
  23. package/dist/modes/ask.js.map +1 -1
  24. package/dist/os/cwd.d.ts +30 -0
  25. package/dist/os/cwd.js +76 -0
  26. package/dist/os/cwd.js.map +1 -0
  27. package/dist/os/detect.js +2 -1
  28. package/dist/os/detect.js.map +1 -1
  29. package/dist/prompts/index.d.ts +1 -1
  30. package/dist/prompts/index.js +66 -21
  31. package/dist/prompts/index.js.map +1 -1
  32. package/dist/repl.d.ts +10 -0
  33. package/dist/repl.js +258 -28
  34. package/dist/repl.js.map +1 -1
  35. package/dist/safety/classifier.js +121 -26
  36. package/dist/safety/classifier.js.map +1 -1
  37. package/dist/safety/patterns.d.ts +26 -0
  38. package/dist/safety/patterns.js +167 -0
  39. package/dist/safety/patterns.js.map +1 -1
  40. package/dist/store/config.js +2 -1
  41. package/dist/store/config.js.map +1 -1
  42. package/dist/store/history.js +19 -5
  43. package/dist/store/history.js.map +1 -1
  44. package/dist/store/plan.d.ts +43 -0
  45. package/dist/store/plan.js +201 -0
  46. package/dist/store/plan.js.map +1 -0
  47. package/dist/store/project.js +3 -2
  48. package/dist/store/project.js.map +1 -1
  49. package/dist/tools/capabilities.js +6 -1
  50. package/dist/tools/capabilities.js.map +1 -1
  51. package/dist/tools/fs.js +3 -2
  52. package/dist/tools/fs.js.map +1 -1
  53. package/dist/tools/image.d.ts +13 -0
  54. package/dist/tools/image.js +81 -0
  55. package/dist/tools/image.js.map +1 -0
  56. package/dist/tools/jobs.js +2 -1
  57. package/dist/tools/jobs.js.map +1 -1
  58. package/dist/tools/pdf.d.ts +18 -0
  59. package/dist/tools/pdf.js +200 -0
  60. package/dist/tools/pdf.js.map +1 -0
  61. package/dist/tools/registry.js +79 -7
  62. package/dist/tools/registry.js.map +1 -1
  63. package/dist/tools/shell.js +3 -2
  64. package/dist/tools/shell.js.map +1 -1
  65. package/dist/types.d.ts +16 -0
  66. package/dist/ui/keys.d.ts +1 -0
  67. package/dist/ui/keys.js +4 -0
  68. package/dist/ui/keys.js.map +1 -1
  69. package/dist/ui/mentions.d.ts +32 -1
  70. package/dist/ui/mentions.js +304 -27
  71. package/dist/ui/mentions.js.map +1 -1
  72. package/dist/ui/plan-pane.d.ts +19 -0
  73. package/dist/ui/plan-pane.js +101 -0
  74. package/dist/ui/plan-pane.js.map +1 -0
  75. package/package.json +4 -1
package/dist/repl.js CHANGED
@@ -12,11 +12,15 @@ import { renderBanner, renderSessionInfo, renderSuggestions, renderModeSwitch, r
12
12
  import { clearThinking, createThinkingStreamParser, getLastThinking, rememberThinkingFromText, renderThinkingBlock, renderThinkingSummary, renderThinkingToggleMessage, } from "./ui/thinking.js";
13
13
  import { createMarkdownStreamWriter, renderMarkdown } from "./ui/markdown.js";
14
14
  import { startThinkingSpinner } from "./ui/spinner.js";
15
- import { modelSupportsThinking } from "./llm/capabilities.js";
16
- import { clearViewports, getLastViewport, getViewport, isPagerActive, listViewports, openViewportPager, toggleViewport, } from "./ui/output-pane.js";
15
+ import { modelSupportsThinking, modelSupportsVision, preferredVisionModel, } from "./llm/capabilities.js";
16
+ import { clearViewports, getLastViewport, getViewport, isPagerActive, listViewports, openPager, openViewportPager, toggleViewport, } from "./ui/output-pane.js";
17
+ import { loadPlan, savePlan } from "./store/plan.js";
18
+ import { renderPlanDocument, renderPlanChecklist } from "./ui/plan-pane.js";
19
+ import { safeCwd, cwdIsBroken, recoverCwd } from "./os/cwd.js";
17
20
  import { compactMessages, estimateMessagesTokens, } from "./agent/context-manager.js";
18
- import { isCtrlC, isCtrlO, isCtrlT, isEscape } from "./ui/keys.js";
19
- import { getMentionQuery, findFileSuggestions, expandMentions, } from "./ui/mentions.js";
21
+ import { isCtrlC, isCtrlO, isCtrlP, isCtrlT, isEscape } from "./ui/keys.js";
22
+ import { getMentionQuery, findFileSuggestions, expandMentions, loadImageAttachments, imageAttachmentPaths, } from "./ui/mentions.js";
23
+ import { imageOcr } from "./tools/image.js";
20
24
  const slashCommands = [
21
25
  { command: "/ask", description: "switch to ask mode" },
22
26
  { command: "/agent", description: "switch to agent mode" },
@@ -89,6 +93,14 @@ const slashCommands = [
89
93
  },
90
94
  { command: "/compact", description: "compact session history now" },
91
95
  { command: "/context", description: "show estimated context size" },
96
+ {
97
+ command: "/plan",
98
+ description: "view the current session plan (also Ctrl+P)",
99
+ },
100
+ {
101
+ command: "/implement",
102
+ description: "approve the current plan and have clai execute it",
103
+ },
92
104
  {
93
105
  command: "/scope",
94
106
  usage: "[show|clear|new|add <targets>]",
@@ -234,6 +246,76 @@ function splitCommand(line) {
234
246
  function stripAnsi(text) {
235
247
  return text.replace(/\x1b\[[0-9;]*m/g, "");
236
248
  }
249
+ /** Set of known slash-command names (without the leading "/"). */
250
+ const knownSlashNames = new Set(slashCommands.map((c) => c.command.slice(1).toLowerCase()));
251
+ /**
252
+ * Build an OCR text layer for attached images. Some providers/proxies accept
253
+ * multimodal `image_url` parts but silently ignore the bytes upstream — the
254
+ * model then hallucinates an answer from the filename ("Screenshot…AM.png" →
255
+ * "a dark terminal"). To make image handling robust regardless of whether the
256
+ * provider's vision actually fired, we OCR each attached image locally and
257
+ * append the extracted text as supplementary grounding. Vision models still
258
+ * get the real bytes for colors/layout/style; this only ADDS a safety net.
259
+ *
260
+ * Best-effort: if tesseract is missing or OCR yields nothing, returns "".
261
+ */
262
+ async function buildImageOcrGrounding(line, baseDir) {
263
+ const paths = imageAttachmentPaths(line, baseDir);
264
+ if (paths.length === 0)
265
+ return "";
266
+ const sections = [];
267
+ for (const path of paths) {
268
+ try {
269
+ const result = await imageOcr({ path });
270
+ const text = result.output.trim();
271
+ // tesseract emits noise/garbage on non-text images; only include a
272
+ // section when there is a meaningful amount of recognized text.
273
+ const meaningful = (text.match(/[A-Za-z0-9]/g) ?? []).length;
274
+ if (result.ok && meaningful >= 8) {
275
+ sections.push(`----- OCR of ${path} -----\n${text}\n----- end OCR -----`);
276
+ }
277
+ }
278
+ catch {
279
+ // tesseract missing or failed — skip silently; vision bytes still sent.
280
+ }
281
+ }
282
+ if (sections.length === 0)
283
+ return "";
284
+ return ('<image-ocr note="Text extracted locally from the attached image(s) via OCR, in case the model cannot see the image bytes directly. Use it to ground your answer; if you CAN see the image, prefer your own visual reading and use this only to confirm text.">\n' +
285
+ sections.join("\n\n") +
286
+ "\n</image-ocr>");
287
+ }
288
+ /**
289
+ * Decide whether a line that starts with "/" is actually a slash command
290
+ * versus an absolute filesystem path the user typed or drag-dropped (e.g.
291
+ * `/Users/me/Desktop/Screenshot.png`). A real command is "/" + a single
292
+ * known command word (optionally followed by arguments). An absolute path
293
+ * has extra "/" segments in its first token and won't match a known command,
294
+ * so we route it to the normal prompt path where expandMentions() turns it
295
+ * into a file attachment.
296
+ */
297
+ export function looksLikeSlashCommand(line) {
298
+ if (!line.startsWith("/") || line.length < 2)
299
+ return false;
300
+ // First whitespace-delimited token, minus the leading slash.
301
+ const firstToken = line.slice(1).split(/\s/)[0] ?? "";
302
+ // A path-like first token (contains another "/" or a backslash escape, or
303
+ // looks like a filename with an extension) is never a command.
304
+ if (firstToken.includes("/") || firstToken.includes("\\"))
305
+ return false;
306
+ const name = firstToken.toLowerCase();
307
+ // Exact match against a known command, or a unique prefix of one (so
308
+ // partial typing like "/imp" still routes to the command handler, which
309
+ // already resolves abbreviations). Unknown words like a single-segment
310
+ // path token still fall through to handleSlash's "unknown command" help,
311
+ // which is the historical behavior for genuine typos.
312
+ if (knownSlashNames.has(name))
313
+ return true;
314
+ // Only treat as a (mistyped) command when it has no path/extension shape.
315
+ // "Users" alone (from "/Users") would be caught above by the "/" check,
316
+ // so here we accept bare alpha words as command attempts.
317
+ return /^[a-z][a-z0-9-]*$/i.test(firstToken);
318
+ }
237
319
  function isAbortLikeError(error) {
238
320
  if (!error)
239
321
  return false;
@@ -258,6 +340,12 @@ function slashCommandFilter(line) {
258
340
  // but let Enter submit a raw '/' unless they explicitly navigate the menu.
259
341
  if (!line.startsWith("/") || line.length < 1 || /\s/.test(line))
260
342
  return null;
343
+ // Don't show the command menu for an absolute path the user is typing or
344
+ // drag-dropped (e.g. "/Users/me/file.png"): a path's first token has more
345
+ // "/" or backslash escapes in it. Those go to the normal prompt path.
346
+ const firstToken = line.slice(1).split(/\s/)[0] ?? "";
347
+ if (firstToken.includes("/") || firstToken.includes("\\"))
348
+ return null;
261
349
  return line.slice(1).toLowerCase();
262
350
  }
263
351
  export function getSlashCommandSuggestions(line) {
@@ -308,9 +396,7 @@ export function renderFileMentionMenu(query, suggestions, selectedIndex) {
308
396
  const cols = terminalColumns();
309
397
  const maxWidth = Math.max(1, cols - 1);
310
398
  if (suggestions.length === 0) {
311
- return [
312
- chalk.dim(fitPlain(` no files matching @${query}`, maxWidth)),
313
- ];
399
+ return [chalk.dim(fitPlain(` no files matching @${query}`, maxWidth))];
314
400
  }
315
401
  const termRows = process.stdout.rows || 24;
316
402
  const maxVisible = Math.max(5, termRows - 4);
@@ -440,7 +526,12 @@ async function readPromptLine(options) {
440
526
  const cols = terminalColumns();
441
527
  const menu = getMenuState();
442
528
  const mention = menu.visible
443
- ? { visible: false, query: "", start: 0, suggestions: [] }
529
+ ? {
530
+ visible: false,
531
+ query: "",
532
+ start: 0,
533
+ suggestions: [],
534
+ }
444
535
  : getMentionState();
445
536
  const menuLines = menu.visible
446
537
  ? renderSlashCommandMenu(line, menu.suggestions, selectedIndex)
@@ -520,7 +611,12 @@ async function readPromptLine(options) {
520
611
  return;
521
612
  const menu = getMenuState();
522
613
  const mention = menu.visible
523
- ? { visible: false, query: "", start: 0, suggestions: [] }
614
+ ? {
615
+ visible: false,
616
+ query: "",
617
+ start: 0,
618
+ suggestions: [],
619
+ }
524
620
  : getMentionState();
525
621
  // Cmd+C on macOS terminals is handled by the OS (it never reaches us),
526
622
  // but some Linux terminals forward Meta+C. Treat that as a no-op so
@@ -560,6 +656,12 @@ async function readPromptLine(options) {
560
656
  void options.onOutputShortcut().finally(refresh);
561
657
  return;
562
658
  }
659
+ if (isCtrlP(key)) {
660
+ clearPromptDisplay();
661
+ output.write("\n");
662
+ void options.onPlanShortcut().finally(refresh);
663
+ return;
664
+ }
563
665
  if (key.name === "return" || key.name === "enter") {
564
666
  if (mention.visible && mention.suggestions.length > 0) {
565
667
  applyMention(mention.suggestions[selectedIndex] ?? mention.suggestions[0], mention.start);
@@ -1177,6 +1279,7 @@ async function handleSlash(line, state) {
1177
1279
  case "/clear":
1178
1280
  state.messages.length = 0;
1179
1281
  state.resumedMessageCount = 0;
1282
+ state.session.planApproved.value = false;
1180
1283
  console.log(chalk.dim(" context cleared"));
1181
1284
  return true;
1182
1285
  case "/new": {
@@ -1271,15 +1374,28 @@ async function handleSlash(line, state) {
1271
1374
  }
1272
1375
  case "/cwd": {
1273
1376
  const dir = args.join(" ");
1274
- if (!dir)
1275
- console.log(chalk.dim(` ${process.cwd()}`));
1377
+ if (!dir) {
1378
+ if (cwdIsBroken()) {
1379
+ const recovered = recoverCwd();
1380
+ console.log(chalk.yellow(` ⚠ the previous working directory no longer exists — moved to ${recovered}`));
1381
+ }
1382
+ else {
1383
+ console.log(chalk.dim(` ${safeCwd()}`));
1384
+ }
1385
+ }
1276
1386
  else {
1277
- process.chdir(dir);
1387
+ try {
1388
+ process.chdir(dir);
1389
+ }
1390
+ catch (error) {
1391
+ console.log(chalk.red(` ✗ cannot change to ${dir}: ${error instanceof Error ? error.message : String(error)}`));
1392
+ return true;
1393
+ }
1278
1394
  const config = getConfig();
1279
1395
  updateConfig({
1280
- sandboxRoots: Array.from(new Set([...config.sandboxRoots, process.cwd()])),
1396
+ sandboxRoots: Array.from(new Set([...config.sandboxRoots, safeCwd()])),
1281
1397
  });
1282
- console.log(chalk.dim(` cwd → ${process.cwd()}`));
1398
+ console.log(chalk.dim(` cwd → ${safeCwd()}`));
1283
1399
  }
1284
1400
  return true;
1285
1401
  }
@@ -1323,6 +1439,23 @@ async function handleSlash(line, state) {
1323
1439
  console.log(chalk.dim(` ${state.messages.length} message(s), ~${tokens.toLocaleString()} tokens estimated`));
1324
1440
  return true;
1325
1441
  }
1442
+ case "/plan": {
1443
+ const plan = await loadPlan(state.session.sessionId).catch(() => undefined);
1444
+ if (!plan) {
1445
+ console.log(chalk.dim(' no plan yet — ask clai to plan a multi-step task (e.g. "build a react blog app")'));
1446
+ return true;
1447
+ }
1448
+ if (process.stdout.isTTY && input.isTTY) {
1449
+ await openPager({
1450
+ title: `plan · ${plan.goal}`,
1451
+ body: renderPlanDocument(plan),
1452
+ });
1453
+ }
1454
+ else {
1455
+ console.log(renderPlanDocument(plan));
1456
+ }
1457
+ return true;
1458
+ }
1326
1459
  case "/compact": {
1327
1460
  const before = state.messages.length;
1328
1461
  const compacted = compactMessages(state.messages, { budgetTokens: 0 });
@@ -1577,13 +1710,13 @@ async function handleSlash(line, state) {
1577
1710
  // Re-render the startup banner
1578
1711
  console.log(renderBanner(getCurrentVersion()));
1579
1712
  console.log(renderSessionInfo({
1580
- workdir: process.cwd(),
1713
+ workdir: safeCwd(),
1581
1714
  model: state.model,
1582
1715
  provider: state.provider,
1583
1716
  mode: state.mode,
1584
1717
  }));
1585
1718
  console.log(renderSuggestions());
1586
- console.log(chalk.dim(" ESC abort │ Ctrl+C clears input │ @ to attach files │ Ctrl+T thinking │ Ctrl+O tool output (q to close)\n"));
1719
+ console.log(chalk.dim(" ESC abort │ Ctrl+C clears input │ @ to attach files │ Ctrl+T thinking │ Ctrl+O tool output │ Ctrl+P plan (q to close)\n"));
1587
1720
  return true;
1588
1721
  }
1589
1722
  case "/update":
@@ -1671,6 +1804,31 @@ export async function startRepl(options = {}) {
1671
1804
  outputShortcutBusy = false;
1672
1805
  }
1673
1806
  };
1807
+ let planShortcutBusy = false;
1808
+ const handlePlanShortcut = async () => {
1809
+ if (planShortcutBusy)
1810
+ return;
1811
+ planShortcutBusy = true;
1812
+ try {
1813
+ // Only open the pager when idle (same reasoning as Ctrl+O).
1814
+ if (currentAbortController || !isReadingPrompt) {
1815
+ process.stdout.write(chalk.dim("\n (press Ctrl+P at the prompt when idle to view the plan)\n"));
1816
+ return;
1817
+ }
1818
+ const plan = await loadPlan(state.session.sessionId).catch(() => undefined);
1819
+ if (!plan) {
1820
+ process.stdout.write(chalk.dim('\n (no plan yet — ask clai to plan a multi-step task, e.g. "build a react blog app")\n'));
1821
+ return;
1822
+ }
1823
+ await openPager({
1824
+ title: `plan · ${plan.goal}`,
1825
+ body: renderPlanDocument(plan),
1826
+ });
1827
+ }
1828
+ finally {
1829
+ planShortcutBusy = false;
1830
+ }
1831
+ };
1674
1832
  const handleKeypress = (_sequence, key) => {
1675
1833
  if (isPagerActive())
1676
1834
  return;
@@ -1679,6 +1837,9 @@ export async function startRepl(options = {}) {
1679
1837
  if (isCtrlO(key) && !isReadingPrompt) {
1680
1838
  void handleOutputShortcut();
1681
1839
  }
1840
+ if (isCtrlP(key) && !isReadingPrompt) {
1841
+ void handlePlanShortcut();
1842
+ }
1682
1843
  if ((isEscape(key) || isCtrlC(key)) && currentAbortController) {
1683
1844
  abortPressCount += 1;
1684
1845
  currentAbortController.abort();
@@ -1732,13 +1893,13 @@ export async function startRepl(options = {}) {
1732
1893
  // ── Startup banner ──────────────────────────────────────────────────────
1733
1894
  console.log(renderBanner(getCurrentVersion()));
1734
1895
  console.log(renderSessionInfo({
1735
- workdir: process.cwd(),
1896
+ workdir: safeCwd(),
1736
1897
  model: state.model,
1737
1898
  provider: state.provider,
1738
1899
  mode: state.mode,
1739
1900
  }));
1740
1901
  console.log(renderSuggestions());
1741
- console.log(chalk.dim(" ESC abort │ Ctrl+C clears input │ @ to attach files │ Ctrl+T thinking │ Ctrl+O tool output (q to close)\n"));
1902
+ console.log(chalk.dim(" ESC abort │ Ctrl+C clears input │ @ to attach files │ Ctrl+T thinking │ Ctrl+O tool output │ Ctrl+P plan (q to close)\n"));
1742
1903
  // Hint thinking-capable users that the toggle exists. We default it to
1743
1904
  // off for speed, since on NIM many models route through a much slower
1744
1905
  // chat-template path when reasoning is enabled.
@@ -1760,18 +1921,45 @@ export async function startRepl(options = {}) {
1760
1921
  history: promptHistory,
1761
1922
  onThinkingShortcut: handleThinkingShortcut,
1762
1923
  onOutputShortcut: handleOutputShortcut,
1924
+ onPlanShortcut: handlePlanShortcut,
1763
1925
  })).trim();
1764
1926
  isReadingPrompt = false;
1765
1927
  if (!line)
1766
1928
  continue;
1929
+ // ── /implement — approve the active plan and execute it ──────────
1930
+ // Handled here (not in handleSlash) because it must trigger a full
1931
+ // agent run with the plan marked approved, not just print something.
1932
+ let implementApproved = false;
1933
+ let effectiveLine = line;
1934
+ if (line === "/implement" || line.startsWith("/implement ")) {
1935
+ const plan = await loadPlan(state.session.sessionId).catch(() => undefined);
1936
+ if (!plan) {
1937
+ console.log(chalk.dim(" no plan to implement — ask clai to plan a multi-step task first"));
1938
+ continue;
1939
+ }
1940
+ if (plan.tasks.every((t) => t.state === "done")) {
1941
+ console.log(chalk.dim(" this plan is already complete ✓"));
1942
+ continue;
1943
+ }
1944
+ plan.status = "approved";
1945
+ await savePlan(plan).catch(() => undefined);
1946
+ state.session.planApproved.value = true;
1947
+ console.log(chalk.cyan(" ✦ plan approved — clai will now execute it\n"));
1948
+ console.log(renderPlanChecklist(plan) + "\n");
1949
+ implementApproved = true;
1950
+ effectiveLine =
1951
+ "I approve the plan. Execute it now, task by task: mark each task in_progress before " +
1952
+ "you start it and done after it actually succeeds. Run real commands (installs, servers, " +
1953
+ "verification) — do not claim anything ran without a successful tool call.";
1954
+ }
1767
1955
  // Only remember real prompts in the history ring. Slash commands
1768
1956
  // are operational toggles (eg /model, /provider) and surfacing them
1769
1957
  // when the user presses ↑ to recall a past prompt is just noise.
1770
- if (!line.startsWith("/") &&
1958
+ if (!looksLikeSlashCommand(line) &&
1771
1959
  promptHistory[promptHistory.length - 1] !== line) {
1772
1960
  promptHistory.push(line);
1773
1961
  }
1774
- if (line.startsWith("/")) {
1962
+ if (looksLikeSlashCommand(line) && !implementApproved) {
1775
1963
  // Slash commands may call inquirer/password prompts, which expect the
1776
1964
  // terminal in cooked mode. Normal model runs keep raw mode enabled so
1777
1965
  // ESC/Ctrl+C can abort while streaming.
@@ -1790,17 +1978,48 @@ export async function startRepl(options = {}) {
1790
1978
  // Expand @file mentions and drag-and-dropped paths into real context.
1791
1979
  // The user-visible `line` stays readable in history; the model gets
1792
1980
  // the line plus an appended block of file contents / path notes.
1793
- const expansion = expandMentions(line);
1794
- const modelInput = expansion.contextBlock.length > 0
1795
- ? `${line}\n\n${expansion.contextBlock}`
1796
- : line;
1981
+ let requestModel = state.model;
1982
+ let visionCapable = modelSupportsVision(state.provider, requestModel);
1983
+ let expansion = expandMentions(effectiveLine, safeCwd(), visionCapable);
1984
+ const hasImageAttachment = expansion.attachments.some((att) => att.kind === "image");
1985
+ if (hasImageAttachment && !visionCapable) {
1986
+ const fallbackVisionModel = preferredVisionModel(state.provider, requestModel);
1987
+ if (fallbackVisionModel && fallbackVisionModel !== requestModel) {
1988
+ const previousModel = requestModel;
1989
+ requestModel = fallbackVisionModel;
1990
+ visionCapable = true;
1991
+ expansion = expandMentions(effectiveLine, safeCwd(), true);
1992
+ console.log(chalk.dim(" ↳ vision model: ") +
1993
+ chalk.dim(`${requestModel} (auto for image; ${previousModel} can't view images)`));
1994
+ }
1995
+ }
1996
+ const images = visionCapable
1997
+ ? loadImageAttachments(effectiveLine, safeCwd())
1998
+ : [];
1999
+ const sentImagePaths = new Set(images.map((img) => img.path).filter((p) => Boolean(p)));
2000
+ // OCR grounding: extract text from any attached image locally and
2001
+ // append it. This is the safety net for the case the user hit — a
2002
+ // provider that accepts image bytes but silently ignores them, so the
2003
+ // model otherwise hallucinates from the filename. Cheap, best-effort,
2004
+ // and additive (vision models still get the real bytes).
2005
+ const ocrGrounding = hasImageAttachment
2006
+ ? await buildImageOcrGrounding(effectiveLine, safeCwd())
2007
+ : "";
2008
+ const contextParts = [expansion.contextBlock, ocrGrounding].filter((part) => part.length > 0);
2009
+ const modelInput = contextParts.length > 0
2010
+ ? `${effectiveLine}\n\n${contextParts.join("\n\n")}`
2011
+ : effectiveLine;
1797
2012
  if (expansion.attachments.length > 0) {
1798
2013
  for (const att of expansion.attachments) {
1799
2014
  const tag = att.kind === "text"
1800
2015
  ? chalk.green("attached")
1801
2016
  : att.kind === "missing"
1802
2017
  ? chalk.red("not found")
1803
- : chalk.yellow(att.kind);
2018
+ : att.kind === "image" && sentImagePaths.has(att.path)
2019
+ ? chalk.green("image (sent to model)")
2020
+ : att.kind === "image" && visionCapable
2021
+ ? chalk.yellow("image (not sent)")
2022
+ : chalk.yellow(att.kind);
1804
2023
  console.log(chalk.dim(` ↳ ${tag}: `) + chalk.dim(att.path));
1805
2024
  }
1806
2025
  }
@@ -1808,9 +2027,10 @@ export async function startRepl(options = {}) {
1808
2027
  assistantContent = await withAbortableInput(async (signal) => streamWithAbort(async (runSignal, onToken) => {
1809
2028
  return await runAskStream(modelInput, onToken, {
1810
2029
  provider: state.provider,
1811
- model: state.model,
2030
+ model: requestModel,
1812
2031
  history: state.messages,
1813
2032
  signal: runSignal,
2033
+ images,
1814
2034
  });
1815
2035
  }, signal));
1816
2036
  process.stdout.write("\n");
@@ -1818,14 +2038,24 @@ export async function startRepl(options = {}) {
1818
2038
  else {
1819
2039
  assistantContent = await withAbortableInput(async (signal) => runAgent(modelInput, {
1820
2040
  provider: state.provider,
1821
- model: state.model,
2041
+ model: requestModel,
1822
2042
  history: state.messages,
1823
2043
  signal,
1824
2044
  session: state.session,
2045
+ images,
1825
2046
  }));
1826
2047
  }
1827
2048
  console.log();
1828
- state.messages.push({ role: "user", content: modelInput }, { role: "assistant", content: assistantContent });
2049
+ const userHistoryMessage = {
2050
+ role: "user",
2051
+ content: modelInput,
2052
+ };
2053
+ if (images.length > 0)
2054
+ userHistoryMessage.images = images;
2055
+ state.messages.push(userHistoryMessage, {
2056
+ role: "assistant",
2057
+ content: assistantContent,
2058
+ });
1829
2059
  }
1830
2060
  catch (error) {
1831
2061
  if (error instanceof AbortRunError) {