npm - @pentoshi/clai - Versions diffs - 0.13.0 → 1.0.0 - Mend

@pentoshi/clai 0.13.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/bin/clai.mjs +25 -0
package/dist/agent/runner.d.ts +31 -1
package/dist/agent/runner.js +416 -28
package/dist/agent/runner.js.map +1 -1
package/dist/commands/update.js +1 -1
package/dist/commands/update.js.map +1 -1
package/dist/llm/anthropic.js +31 -12
package/dist/llm/anthropic.js.map +1 -1
package/dist/llm/capabilities.d.ts +13 -0
package/dist/llm/capabilities.js +107 -24
package/dist/llm/capabilities.js.map +1 -1
package/dist/llm/gemini.js +17 -4
package/dist/llm/gemini.js.map +1 -1
package/dist/llm/http.d.ts +12 -1
package/dist/llm/http.js +50 -25
package/dist/llm/http.js.map +1 -1
package/dist/llm/ollama.js +16 -8
package/dist/llm/ollama.js.map +1 -1
package/dist/modes/agent.d.ts +2 -1
package/dist/modes/agent.js.map +1 -1
package/dist/modes/ask.d.ts +2 -1
package/dist/modes/ask.js +5 -1
package/dist/modes/ask.js.map +1 -1
package/dist/os/cwd.d.ts +30 -0
package/dist/os/cwd.js +76 -0
package/dist/os/cwd.js.map +1 -0
package/dist/os/detect.js +2 -1
package/dist/os/detect.js.map +1 -1
package/dist/prompts/index.d.ts +1 -1
package/dist/prompts/index.js +66 -21
package/dist/prompts/index.js.map +1 -1
package/dist/repl.d.ts +10 -0
package/dist/repl.js +258 -28
package/dist/repl.js.map +1 -1
package/dist/safety/classifier.js +121 -26
package/dist/safety/classifier.js.map +1 -1
package/dist/safety/patterns.d.ts +26 -0
package/dist/safety/patterns.js +167 -0
package/dist/safety/patterns.js.map +1 -1
package/dist/store/config.js +2 -1
package/dist/store/config.js.map +1 -1
package/dist/store/history.js +19 -5
package/dist/store/history.js.map +1 -1
package/dist/store/plan.d.ts +43 -0
package/dist/store/plan.js +201 -0
package/dist/store/plan.js.map +1 -0
package/dist/store/project.js +3 -2
package/dist/store/project.js.map +1 -1
package/dist/tools/capabilities.js +6 -1
package/dist/tools/capabilities.js.map +1 -1
package/dist/tools/fs.js +3 -2
package/dist/tools/fs.js.map +1 -1
package/dist/tools/image.d.ts +13 -0
package/dist/tools/image.js +81 -0
package/dist/tools/image.js.map +1 -0
package/dist/tools/jobs.js +2 -1
package/dist/tools/jobs.js.map +1 -1
package/dist/tools/pdf.d.ts +18 -0
package/dist/tools/pdf.js +200 -0
package/dist/tools/pdf.js.map +1 -0
package/dist/tools/registry.js +79 -7
package/dist/tools/registry.js.map +1 -1
package/dist/tools/shell.js +3 -2
package/dist/tools/shell.js.map +1 -1
package/dist/types.d.ts +16 -0
package/dist/ui/keys.d.ts +1 -0
package/dist/ui/keys.js +4 -0
package/dist/ui/keys.js.map +1 -1
package/dist/ui/mentions.d.ts +32 -1
package/dist/ui/mentions.js +304 -27
package/dist/ui/mentions.js.map +1 -1
package/dist/ui/plan-pane.d.ts +19 -0
package/dist/ui/plan-pane.js +101 -0
package/dist/ui/plan-pane.js.map +1 -0
package/package.json +4 -1

package/dist/repl.js CHANGED Viewed

@@ -12,11 +12,15 @@ import { renderBanner, renderSessionInfo, renderSuggestions, renderModeSwitch, r
 import { clearThinking, createThinkingStreamParser, getLastThinking, rememberThinkingFromText, renderThinkingBlock, renderThinkingSummary, renderThinkingToggleMessage, } from "./ui/thinking.js";
 import { createMarkdownStreamWriter, renderMarkdown } from "./ui/markdown.js";
 import { startThinkingSpinner } from "./ui/spinner.js";
-import { modelSupportsThinking } from "./llm/capabilities.js";
-import { clearViewports, getLastViewport, getViewport, isPagerActive, listViewports, openViewportPager, toggleViewport, } from "./ui/output-pane.js";
+import { modelSupportsThinking, modelSupportsVision, preferredVisionModel, } from "./llm/capabilities.js";
+import { clearViewports, getLastViewport, getViewport, isPagerActive, listViewports, openPager, openViewportPager, toggleViewport, } from "./ui/output-pane.js";
+import { loadPlan, savePlan } from "./store/plan.js";
+import { renderPlanDocument, renderPlanChecklist } from "./ui/plan-pane.js";
+import { safeCwd, cwdIsBroken, recoverCwd } from "./os/cwd.js";
 import { compactMessages, estimateMessagesTokens, } from "./agent/context-manager.js";
-import { isCtrlC, isCtrlO, isCtrlT, isEscape } from "./ui/keys.js";
-import { getMentionQuery, findFileSuggestions, expandMentions, } from "./ui/mentions.js";
+import { isCtrlC, isCtrlO, isCtrlP, isCtrlT, isEscape } from "./ui/keys.js";
+import { getMentionQuery, findFileSuggestions, expandMentions, loadImageAttachments, imageAttachmentPaths, } from "./ui/mentions.js";
+import { imageOcr } from "./tools/image.js";
 const slashCommands = [
     { command: "/ask", description: "switch to ask mode" },
     { command: "/agent", description: "switch to agent mode" },
@@ -89,6 +93,14 @@ const slashCommands = [
     },
     { command: "/compact", description: "compact session history now" },
     { command: "/context", description: "show estimated context size" },
+    {
+        command: "/plan",
+        description: "view the current session plan (also Ctrl+P)",
+    },
+    {
+        command: "/implement",
+        description: "approve the current plan and have clai execute it",
+    },
     {
         command: "/scope",
         usage: "[show|clear|new|add <targets>]",
@@ -234,6 +246,76 @@ function splitCommand(line) {
 function stripAnsi(text) {
     return text.replace(/\x1b\[[0-9;]*m/g, "");
 }
+/** Set of known slash-command names (without the leading "/"). */
+const knownSlashNames = new Set(slashCommands.map((c) => c.command.slice(1).toLowerCase()));
+/**
+ * Build an OCR text layer for attached images. Some providers/proxies accept
+ * multimodal `image_url` parts but silently ignore the bytes upstream — the
+ * model then hallucinates an answer from the filename ("Screenshot…AM.png" →
+ * "a dark terminal"). To make image handling robust regardless of whether the
+ * provider's vision actually fired, we OCR each attached image locally and
+ * append the extracted text as supplementary grounding. Vision models still
+ * get the real bytes for colors/layout/style; this only ADDS a safety net.
+ *
+ * Best-effort: if tesseract is missing or OCR yields nothing, returns "".
+ */
+async function buildImageOcrGrounding(line, baseDir) {
+    const paths = imageAttachmentPaths(line, baseDir);
+    if (paths.length === 0)
+        return "";
+    const sections = [];
+    for (const path of paths) {
+        try {
+            const result = await imageOcr({ path });
+            const text = result.output.trim();
+            // tesseract emits noise/garbage on non-text images; only include a
+            // section when there is a meaningful amount of recognized text.
+            const meaningful = (text.match(/[A-Za-z0-9]/g) ?? []).length;
+            if (result.ok && meaningful >= 8) {
+                sections.push(`----- OCR of ${path} -----\n${text}\n----- end OCR -----`);
+            }
+        }
+        catch {
+            // tesseract missing or failed — skip silently; vision bytes still sent.
+        }
+    }
+    if (sections.length === 0)
+        return "";
+    return ('<image-ocr note="Text extracted locally from the attached image(s) via OCR, in case the model cannot see the image bytes directly. Use it to ground your answer; if you CAN see the image, prefer your own visual reading and use this only to confirm text.">\n' +
+        sections.join("\n\n") +
+        "\n</image-ocr>");
+}
+/**
+ * Decide whether a line that starts with "/" is actually a slash command
+ * versus an absolute filesystem path the user typed or drag-dropped (e.g.
+ * `/Users/me/Desktop/Screenshot.png`). A real command is "/" + a single
+ * known command word (optionally followed by arguments). An absolute path
+ * has extra "/" segments in its first token and won't match a known command,
+ * so we route it to the normal prompt path where expandMentions() turns it
+ * into a file attachment.
+ */
+export function looksLikeSlashCommand(line) {
+    if (!line.startsWith("/") || line.length < 2)
+        return false;
+    // First whitespace-delimited token, minus the leading slash.
+    const firstToken = line.slice(1).split(/\s/)[0] ?? "";
+    // A path-like first token (contains another "/" or a backslash escape, or
+    // looks like a filename with an extension) is never a command.
+    if (firstToken.includes("/") || firstToken.includes("\\"))
+        return false;
+    const name = firstToken.toLowerCase();
+    // Exact match against a known command, or a unique prefix of one (so
+    // partial typing like "/imp" still routes to the command handler, which
+    // already resolves abbreviations). Unknown words like a single-segment
+    // path token still fall through to handleSlash's "unknown command" help,
+    // which is the historical behavior for genuine typos.
+    if (knownSlashNames.has(name))
+        return true;
+    // Only treat as a (mistyped) command when it has no path/extension shape.
+    // "Users" alone (from "/Users") would be caught above by the "/" check,
+    // so here we accept bare alpha words as command attempts.
+    return /^[a-z][a-z0-9-]*$/i.test(firstToken);
+}
 function isAbortLikeError(error) {
     if (!error)
         return false;
@@ -258,6 +340,12 @@ function slashCommandFilter(line) {
     // but let Enter submit a raw '/' unless they explicitly navigate the menu.
     if (!line.startsWith("/") || line.length < 1 || /\s/.test(line))
         return null;
+    // Don't show the command menu for an absolute path the user is typing or
+    // drag-dropped (e.g. "/Users/me/file.png"): a path's first token has more
+    // "/" or backslash escapes in it. Those go to the normal prompt path.
+    const firstToken = line.slice(1).split(/\s/)[0] ?? "";
+    if (firstToken.includes("/") || firstToken.includes("\\"))
+        return null;
     return line.slice(1).toLowerCase();
 }
 export function getSlashCommandSuggestions(line) {
@@ -308,9 +396,7 @@ export function renderFileMentionMenu(query, suggestions, selectedIndex) {
     const cols = terminalColumns();
     const maxWidth = Math.max(1, cols - 1);
     if (suggestions.length === 0) {
-        return [
-            chalk.dim(fitPlain(`  no files matching @${query}`, maxWidth)),
-        ];
+        return [chalk.dim(fitPlain(`  no files matching @${query}`, maxWidth))];
     }
     const termRows = process.stdout.rows || 24;
     const maxVisible = Math.max(5, termRows - 4);
@@ -440,7 +526,12 @@ async function readPromptLine(options) {
             const cols = terminalColumns();
             const menu = getMenuState();
             const mention = menu.visible
-                ? { visible: false, query: "", start: 0, suggestions: [] }
+                ? {
+                    visible: false,
+                    query: "",
+                    start: 0,
+                    suggestions: [],
+                }
                 : getMentionState();
             const menuLines = menu.visible
                 ? renderSlashCommandMenu(line, menu.suggestions, selectedIndex)
@@ -520,7 +611,12 @@ async function readPromptLine(options) {
                 return;
             const menu = getMenuState();
             const mention = menu.visible
-                ? { visible: false, query: "", start: 0, suggestions: [] }
+                ? {
+                    visible: false,
+                    query: "",
+                    start: 0,
+                    suggestions: [],
+                }
                 : getMentionState();
             // Cmd+C on macOS terminals is handled by the OS (it never reaches us),
             // but some Linux terminals forward Meta+C. Treat that as a no-op so
@@ -560,6 +656,12 @@ async function readPromptLine(options) {
                 void options.onOutputShortcut().finally(refresh);
                 return;
             }
+            if (isCtrlP(key)) {
+                clearPromptDisplay();
+                output.write("\n");
+                void options.onPlanShortcut().finally(refresh);
+                return;
+            }
             if (key.name === "return" || key.name === "enter") {
                 if (mention.visible && mention.suggestions.length > 0) {
                     applyMention(mention.suggestions[selectedIndex] ?? mention.suggestions[0], mention.start);
@@ -1177,6 +1279,7 @@ async function handleSlash(line, state) {
         case "/clear":
             state.messages.length = 0;
             state.resumedMessageCount = 0;
+            state.session.planApproved.value = false;
             console.log(chalk.dim("  context cleared"));
             return true;
         case "/new": {
@@ -1271,15 +1374,28 @@ async function handleSlash(line, state) {
         }
         case "/cwd": {
             const dir = args.join(" ");
-            if (!dir)
-                console.log(chalk.dim(`  ${process.cwd()}`));
+            if (!dir) {
+                if (cwdIsBroken()) {
+                    const recovered = recoverCwd();
+                    console.log(chalk.yellow(`  ⚠ the previous working directory no longer exists — moved to ${recovered}`));
+                }
+                else {
+                    console.log(chalk.dim(`  ${safeCwd()}`));
+                }
+            }
             else {
-                process.chdir(dir);
+                try {
+                    process.chdir(dir);
+                }
+                catch (error) {
+                    console.log(chalk.red(`  ✗ cannot change to ${dir}: ${error instanceof Error ? error.message : String(error)}`));
+                    return true;
+                }
                 const config = getConfig();
                 updateConfig({
-                    sandboxRoots: Array.from(new Set([...config.sandboxRoots, process.cwd()])),
+                    sandboxRoots: Array.from(new Set([...config.sandboxRoots, safeCwd()])),
                 });
-                console.log(chalk.dim(`  cwd → ${process.cwd()}`));
+                console.log(chalk.dim(`  cwd → ${safeCwd()}`));
             }
             return true;
         }
@@ -1323,6 +1439,23 @@ async function handleSlash(line, state) {
             console.log(chalk.dim(`  ${state.messages.length} message(s), ~${tokens.toLocaleString()} tokens estimated`));
             return true;
         }
+        case "/plan": {
+            const plan = await loadPlan(state.session.sessionId).catch(() => undefined);
+            if (!plan) {
+                console.log(chalk.dim('  no plan yet — ask clai to plan a multi-step task (e.g. "build a react blog app")'));
+                return true;
+            }
+            if (process.stdout.isTTY && input.isTTY) {
+                await openPager({
+                    title: `plan · ${plan.goal}`,
+                    body: renderPlanDocument(plan),
+                });
+            }
+            else {
+                console.log(renderPlanDocument(plan));
+            }
+            return true;
+        }
         case "/compact": {
             const before = state.messages.length;
             const compacted = compactMessages(state.messages, { budgetTokens: 0 });
@@ -1577,13 +1710,13 @@ async function handleSlash(line, state) {
             // Re-render the startup banner
             console.log(renderBanner(getCurrentVersion()));
             console.log(renderSessionInfo({
-                workdir: process.cwd(),
+                workdir: safeCwd(),
                 model: state.model,
                 provider: state.provider,
                 mode: state.mode,
             }));
             console.log(renderSuggestions());
-            console.log(chalk.dim("  ESC abort  │  Ctrl+C clears input  │  @ to attach files  │  Ctrl+T thinking  │  Ctrl+O tool output (q to close)\n"));
+            console.log(chalk.dim("  ESC abort  │  Ctrl+C clears input  │  @ to attach files  │  Ctrl+T thinking  │  Ctrl+O tool output  │  Ctrl+P plan (q to close)\n"));
             return true;
         }
         case "/update":
@@ -1671,6 +1804,31 @@ export async function startRepl(options = {}) {
             outputShortcutBusy = false;
         }
     };
+    let planShortcutBusy = false;
+    const handlePlanShortcut = async () => {
+        if (planShortcutBusy)
+            return;
+        planShortcutBusy = true;
+        try {
+            // Only open the pager when idle (same reasoning as Ctrl+O).
+            if (currentAbortController || !isReadingPrompt) {
+                process.stdout.write(chalk.dim("\n  (press Ctrl+P at the prompt when idle to view the plan)\n"));
+                return;
+            }
+            const plan = await loadPlan(state.session.sessionId).catch(() => undefined);
+            if (!plan) {
+                process.stdout.write(chalk.dim('\n  (no plan yet — ask clai to plan a multi-step task, e.g. "build a react blog app")\n'));
+                return;
+            }
+            await openPager({
+                title: `plan · ${plan.goal}`,
+                body: renderPlanDocument(plan),
+            });
+        }
+        finally {
+            planShortcutBusy = false;
+        }
+    };
     const handleKeypress = (_sequence, key) => {
         if (isPagerActive())
             return;
@@ -1679,6 +1837,9 @@ export async function startRepl(options = {}) {
         if (isCtrlO(key) && !isReadingPrompt) {
             void handleOutputShortcut();
         }
+        if (isCtrlP(key) && !isReadingPrompt) {
+            void handlePlanShortcut();
+        }
         if ((isEscape(key) || isCtrlC(key)) && currentAbortController) {
             abortPressCount += 1;
             currentAbortController.abort();
@@ -1732,13 +1893,13 @@ export async function startRepl(options = {}) {
     // ── Startup banner ──────────────────────────────────────────────────────
     console.log(renderBanner(getCurrentVersion()));
     console.log(renderSessionInfo({
-        workdir: process.cwd(),
+        workdir: safeCwd(),
         model: state.model,
         provider: state.provider,
         mode: state.mode,
     }));
     console.log(renderSuggestions());
-    console.log(chalk.dim("  ESC abort  │  Ctrl+C clears input  │  @ to attach files  │  Ctrl+T thinking  │  Ctrl+O tool output (q to close)\n"));
+    console.log(chalk.dim("  ESC abort  │  Ctrl+C clears input  │  @ to attach files  │  Ctrl+T thinking  │  Ctrl+O tool output  │  Ctrl+P plan (q to close)\n"));
     // Hint thinking-capable users that the toggle exists. We default it to
     // off for speed, since on NIM many models route through a much slower
     // chat-template path when reasoning is enabled.
@@ -1760,18 +1921,45 @@ export async function startRepl(options = {}) {
                 history: promptHistory,
                 onThinkingShortcut: handleThinkingShortcut,
                 onOutputShortcut: handleOutputShortcut,
+                onPlanShortcut: handlePlanShortcut,
             })).trim();
             isReadingPrompt = false;
             if (!line)
                 continue;
+            // ── /implement — approve the active plan and execute it ──────────
+            // Handled here (not in handleSlash) because it must trigger a full
+            // agent run with the plan marked approved, not just print something.
+            let implementApproved = false;
+            let effectiveLine = line;
+            if (line === "/implement" || line.startsWith("/implement ")) {
+                const plan = await loadPlan(state.session.sessionId).catch(() => undefined);
+                if (!plan) {
+                    console.log(chalk.dim("  no plan to implement — ask clai to plan a multi-step task first"));
+                    continue;
+                }
+                if (plan.tasks.every((t) => t.state === "done")) {
+                    console.log(chalk.dim("  this plan is already complete ✓"));
+                    continue;
+                }
+                plan.status = "approved";
+                await savePlan(plan).catch(() => undefined);
+                state.session.planApproved.value = true;
+                console.log(chalk.cyan("  ✦ plan approved — clai will now execute it\n"));
+                console.log(renderPlanChecklist(plan) + "\n");
+                implementApproved = true;
+                effectiveLine =
+                    "I approve the plan. Execute it now, task by task: mark each task in_progress before " +
+                        "you start it and done after it actually succeeds. Run real commands (installs, servers, " +
+                        "verification) — do not claim anything ran without a successful tool call.";
+            }
             // Only remember real prompts in the history ring. Slash commands
             // are operational toggles (eg /model, /provider) and surfacing them
             // when the user presses ↑ to recall a past prompt is just noise.
-            if (!line.startsWith("/") &&
+            if (!looksLikeSlashCommand(line) &&
                 promptHistory[promptHistory.length - 1] !== line) {
                 promptHistory.push(line);
             }
-            if (line.startsWith("/")) {
+            if (looksLikeSlashCommand(line) && !implementApproved) {
                 // Slash commands may call inquirer/password prompts, which expect the
                 // terminal in cooked mode. Normal model runs keep raw mode enabled so
                 // ESC/Ctrl+C can abort while streaming.
@@ -1790,17 +1978,48 @@ export async function startRepl(options = {}) {
                 // Expand @file mentions and drag-and-dropped paths into real context.
                 // The user-visible `line` stays readable in history; the model gets
                 // the line plus an appended block of file contents / path notes.
-                const expansion = expandMentions(line);
-                const modelInput = expansion.contextBlock.length > 0
-                    ? `${line}\n\n${expansion.contextBlock}`
-                    : line;
+                let requestModel = state.model;
+                let visionCapable = modelSupportsVision(state.provider, requestModel);
+                let expansion = expandMentions(effectiveLine, safeCwd(), visionCapable);
+                const hasImageAttachment = expansion.attachments.some((att) => att.kind === "image");
+                if (hasImageAttachment && !visionCapable) {
+                    const fallbackVisionModel = preferredVisionModel(state.provider, requestModel);
+                    if (fallbackVisionModel && fallbackVisionModel !== requestModel) {
+                        const previousModel = requestModel;
+                        requestModel = fallbackVisionModel;
+                        visionCapable = true;
+                        expansion = expandMentions(effectiveLine, safeCwd(), true);
+                        console.log(chalk.dim("  ↳ vision model: ") +
+                            chalk.dim(`${requestModel} (auto for image; ${previousModel} can't view images)`));
+                    }
+                }
+                const images = visionCapable
+                    ? loadImageAttachments(effectiveLine, safeCwd())
+                    : [];
+                const sentImagePaths = new Set(images.map((img) => img.path).filter((p) => Boolean(p)));
+                // OCR grounding: extract text from any attached image locally and
+                // append it. This is the safety net for the case the user hit — a
+                // provider that accepts image bytes but silently ignores them, so the
+                // model otherwise hallucinates from the filename. Cheap, best-effort,
+                // and additive (vision models still get the real bytes).
+                const ocrGrounding = hasImageAttachment
+                    ? await buildImageOcrGrounding(effectiveLine, safeCwd())
+                    : "";
+                const contextParts = [expansion.contextBlock, ocrGrounding].filter((part) => part.length > 0);
+                const modelInput = contextParts.length > 0
+                    ? `${effectiveLine}\n\n${contextParts.join("\n\n")}`
+                    : effectiveLine;
                 if (expansion.attachments.length > 0) {
                     for (const att of expansion.attachments) {
                         const tag = att.kind === "text"
                             ? chalk.green("attached")
                             : att.kind === "missing"
                                 ? chalk.red("not found")
-                                : chalk.yellow(att.kind);
+                                : att.kind === "image" && sentImagePaths.has(att.path)
+                                    ? chalk.green("image (sent to model)")
+                                    : att.kind === "image" && visionCapable
+                                        ? chalk.yellow("image (not sent)")
+                                        : chalk.yellow(att.kind);
                         console.log(chalk.dim(`  ↳ ${tag}: `) + chalk.dim(att.path));
                     }
                 }
@@ -1808,9 +2027,10 @@ export async function startRepl(options = {}) {
                     assistantContent = await withAbortableInput(async (signal) => streamWithAbort(async (runSignal, onToken) => {
                         return await runAskStream(modelInput, onToken, {
                             provider: state.provider,
-                            model: state.model,
+                            model: requestModel,
                             history: state.messages,
                             signal: runSignal,
+                            images,
                         });
                     }, signal));
                     process.stdout.write("\n");
@@ -1818,14 +2038,24 @@ export async function startRepl(options = {}) {
                 else {
                     assistantContent = await withAbortableInput(async (signal) => runAgent(modelInput, {
                         provider: state.provider,
-                        model: state.model,
+                        model: requestModel,
                         history: state.messages,
                         signal,
                         session: state.session,
+                        images,
                     }));
                 }
                 console.log();
-                state.messages.push({ role: "user", content: modelInput }, { role: "assistant", content: assistantContent });
+                const userHistoryMessage = {
+                    role: "user",
+                    content: modelInput,
+                };
+                if (images.length > 0)
+                    userHistoryMessage.images = images;
+                state.messages.push(userHistoryMessage, {
+                    role: "assistant",
+                    content: assistantContent,
+                });
             }
             catch (error) {
                 if (error instanceof AbortRunError) {