npm - agent-sh - Versions diffs - 0.12.19 → 0.12.21 - Mend

agent-sh 0.12.19 → 0.12.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +9 -1
package/dist/agent/agent-loop.js +6 -0
package/dist/agent/normalize-args.d.ts +29 -0
package/dist/agent/normalize-args.js +56 -0
package/dist/agent/subagent.js +2 -0
package/dist/event-bus.d.ts +3 -1
package/dist/extensions/agent-backend.js +58 -21
package/dist/extensions/index.js +8 -3
package/dist/extensions/providers/deepseek.d.ts +8 -0
package/dist/extensions/providers/deepseek.js +23 -0
package/dist/extensions/providers/openai-compatible.d.ts +7 -0
package/dist/extensions/providers/openai-compatible.js +30 -0
package/dist/extensions/providers/openai.d.ts +7 -0
package/dist/extensions/providers/openai.js +39 -0
package/dist/extensions/{openrouter.d.ts → providers/openrouter.d.ts} +1 -1
package/dist/extensions/{openrouter.js → providers/openrouter.js} +5 -3
package/dist/extensions/tui-renderer.js +38 -35
package/dist/settings.d.ts +5 -0
package/dist/settings.js +3 -2
package/dist/types.d.ts +16 -1
package/dist/utils/box-frame.js +14 -8
package/dist/utils/llm-client.d.ts +5 -1
package/dist/utils/llm-client.js +7 -2
package/dist/utils/llm-facade.js +5 -5
package/package.json +1 -1
package/dist/extensions/openai.d.ts +0 -9
package/dist/extensions/openai.js +0 -49

package/README.md CHANGED Viewed

@@ -57,14 +57,22 @@ export OPENAI_API_KEY=sk-...
 agent-sh
 ```
+**DeepSeek:**
+```bash
+export DEEPSEEK_API_KEY=sk-...
+agent-sh
+```
 **Local models** (Ollama, llama.cpp server, LM Studio, vLLM — anything OpenAI-compatible):
 ```bash
-export OPENAI_API_KEY=ollama                        # any value; dummy is fine
 export OPENAI_BASE_URL=http://localhost:11434/v1    # point at your server
 agent-sh
 ```
+Set `OPENAI_API_KEY` too if your server requires auth.
 Once running, switch models at any time with `/model <name>` (tab-completes; selection persists across sessions).
 For richer configuration (multiple providers, extensions), run `agent-sh init` to scaffold `~/.agent-sh/settings.json` with copy-pasteable examples. See the [Usage Guide](docs/usage.md) for the full list of supported providers.

package/dist/agent/agent-loop.js CHANGED Viewed

@@ -4,6 +4,7 @@ import * as path from "node:path";
 import * as os from "node:os";
 import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
 import { ToolRegistry } from "./tool-registry.js";
+import { normalizeToolArgs } from "./normalize-args.js";
 import { ConversationState } from "./conversation-state.js";
 import { HistoryFile } from "./history-file.js";
 import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
@@ -1188,6 +1189,10 @@ export class AgentLoop {
                     });
                     return;
                 }
+                // Normalize against the tool's input_schema: some LLMs stringify
+                // nested object/array args despite the schema. See
+                // normalize-args.ts for the diagnostic that uncovered this.
+                args = normalizeToolArgs(args, tool.input_schema);
                 // ── Round-scoped cache for cacheable read-only tools ──
                 const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
                 const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
@@ -1527,6 +1532,7 @@ export class AgentLoop {
             messages,
             tools: apiTools,
             model: this.currentModel,
+            max_tokens: this.currentMode.maxTokens ?? 65536,
             ...this.reasoningParams(),
         };
         this.bus.emit("llm:request", requestParams);

package/dist/agent/normalize-args.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * Schema-aware tool-arg normalization.
+ *
+ * Some LLMs (notably Claude) occasionally emit nested object/array
+ * tool-call arguments as JSON-encoded strings instead of native
+ * objects, despite the schema declaring `type: "object"` /
+ * `type: "array"`. The discrepancy was diagnosed by the superash field
+ * test (2026-05-03 / commit `b9efd47`):
+ *
+ *     describe_demos: 'task' arrived as a string (length 1267)
+ *       last char code: 93 (']')
+ *       truncation suspected: true
+ *
+ * Tool handlers downstream had to add ad-hoc JSON.parse fallbacks. This
+ * helper centralizes the fix at the kernel boundary: after parsing the
+ * outer `argumentsJson`, walk each top-level field; for any field whose
+ * schema declares `object` or `array` but whose value is a string, run
+ * a single JSON.parse pass. On parse failure (e.g. truncated content),
+ * the string is left as-is — the tool can produce a clean error.
+ *
+ * Top-level only by design. Recursing into nested object schemas would
+ * change semantics for tools that legitimately accept stringified
+ * payloads as inner fields, and the observed wild cases all stringify
+ * at the top level.
+ */
+/** Normalize tool-call args against the tool's input_schema. Pure: does
+ *  not mutate `args`. Returns a new object with stringified-then-decoded
+ *  fields swapped in where applicable. */
+export declare function normalizeToolArgs(args: Record<string, unknown>, schema: unknown): Record<string, unknown>;

package/dist/agent/normalize-args.js ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * Schema-aware tool-arg normalization.
+ *
+ * Some LLMs (notably Claude) occasionally emit nested object/array
+ * tool-call arguments as JSON-encoded strings instead of native
+ * objects, despite the schema declaring `type: "object"` /
+ * `type: "array"`. The discrepancy was diagnosed by the superash field
+ * test (2026-05-03 / commit `b9efd47`):
+ *
+ *     describe_demos: 'task' arrived as a string (length 1267)
+ *       last char code: 93 (']')
+ *       truncation suspected: true
+ *
+ * Tool handlers downstream had to add ad-hoc JSON.parse fallbacks. This
+ * helper centralizes the fix at the kernel boundary: after parsing the
+ * outer `argumentsJson`, walk each top-level field; for any field whose
+ * schema declares `object` or `array` but whose value is a string, run
+ * a single JSON.parse pass. On parse failure (e.g. truncated content),
+ * the string is left as-is — the tool can produce a clean error.
+ *
+ * Top-level only by design. Recursing into nested object schemas would
+ * change semantics for tools that legitimately accept stringified
+ * payloads as inner fields, and the observed wild cases all stringify
+ * at the top level.
+ */
+/** Normalize tool-call args against the tool's input_schema. Pure: does
+ *  not mutate `args`. Returns a new object with stringified-then-decoded
+ *  fields swapped in where applicable. */
+export function normalizeToolArgs(args, schema) {
+    if (!schema || typeof schema !== "object")
+        return args;
+    const properties = schema.properties;
+    if (!properties || typeof properties !== "object")
+        return args;
+    let out = null;
+    for (const [field, fieldSchema] of Object.entries(properties)) {
+        if (!fieldSchema || typeof fieldSchema !== "object")
+            continue;
+        const expectedType = fieldSchema.type;
+        if (expectedType !== "object" && expectedType !== "array")
+            continue;
+        const value = args[field];
+        if (typeof value !== "string")
+            continue;
+        try {
+            const parsed = JSON.parse(value);
+            if (out === null)
+                out = { ...args };
+            out[field] = parsed;
+        }
+        catch {
+            // Leave as string — downstream tool can produce a useful error.
+        }
+    }
+    return out ?? args;
+}

package/dist/agent/subagent.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { ConversationState } from "./conversation-state.js";
+import { normalizeToolArgs } from "./normalize-args.js";
 import { wrapTrailingWithDynamicContext } from "../utils/message-utils.js";
 /**
  * Run a subagent to completion.
@@ -56,6 +57,7 @@ export async function runSubagent(opts) {
                 conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`, true);
                 continue;
             }
+            args = normalizeToolArgs(args, tool.input_schema);
             // Emit tool events for TUI (if bus provided)
             if (bus) {
                 const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };

package/dist/event-bus.d.ts CHANGED Viewed

@@ -69,6 +69,7 @@ export interface ShellEvents {
         messages: unknown[];
         tools?: unknown;
         model?: string;
+        max_tokens?: number;
         reasoning_effort?: string;
     };
     "llm:chunk": {
@@ -318,6 +319,7 @@ export interface ShellEvents {
             id: string;
             reasoning?: boolean;
             contextWindow?: number;
+            maxTokens?: number;
             echoReasoning?: boolean;
         })[];
         /** Provider supports the reasoning_effort parameter. Default: true. */
@@ -325,7 +327,7 @@ export interface ShellEvents {
     };
     "provider:configure": {
         id: string;
-        reasoningParams?: (level: string) => Record<string, unknown>;
+        reasoningParams?: (level: string, model?: string) => Record<string, unknown>;
     };
     "agent:register-tool": {
         tool: import("./agent/types.js").ToolDefinition;

package/dist/extensions/agent-backend.js CHANGED Viewed

@@ -11,24 +11,50 @@ function persistedModelFor(providerName) {
 function defaultReasoningBuilder(level) {
     return level === "off" ? {} : { reasoning_effort: level };
 }
+function mergeCaps(settingsCaps, payloadCaps, modelIds) {
+    if (!settingsCaps)
+        return payloadCaps.size > 0 ? payloadCaps : undefined;
+    const out = new Map();
+    for (const id of modelIds) {
+        const s = settingsCaps.get(id);
+        const p = payloadCaps.get(id);
+        if (!s && !p)
+            continue;
+        out.set(id, {
+            reasoning: s?.reasoning ?? p?.reasoning,
+            contextWindow: s?.contextWindow ?? p?.contextWindow,
+            maxTokens: s?.maxTokens ?? p?.maxTokens,
+            echoReasoning: s?.echoReasoning ?? p?.echoReasoning,
+        });
+    }
+    return out.size > 0 ? out : undefined;
+}
 export default function agentBackend(ctx) {
     const { bus } = ctx;
     const config = ctx.call("config:get-shell-config") ?? {};
-    // Seed from settings.json; runtime provider:register events add more.
+    // Immutable settings snapshot; provider:register payloads merge against it.
     const providerRegistry = new Map();
+    const settingsProviders = new Map();
     for (const name of getProviderNames()) {
         const p = resolveProvider(name);
-        if (p)
+        if (p) {
             providerRegistry.set(name, p);
+            settingsProviders.set(name, p);
+        }
     }
     const providerHooks = new Map();
+    // Bakes model id into the hook so AgentMode.buildReasoningParams keeps
+    // its (level) signature while the hook can branch on model.
+    const bindReasoning = (shapeId, model) => {
+        const hook = providerHooks.get(shapeId)?.reasoningParams;
+        return hook ? (level) => hook(level, model) : defaultReasoningBuilder;
+    };
     const buildModes = () => {
         const allModes = [];
         for (const [id, p] of providerRegistry) {
             if (!p.apiKey)
                 continue;
             const shapeId = p.reasoningShape ?? id;
-            const buildReasoningParams = providerHooks.get(shapeId)?.reasoningParams ?? defaultReasoningBuilder;
             for (const model of p.models) {
                 const mc = p.modelCapabilities?.get(model);
                 allModes.push({
@@ -36,10 +62,11 @@ export default function agentBackend(ctx) {
                     provider: id,
                     providerConfig: { apiKey: p.apiKey, baseURL: p.baseURL },
                     contextWindow: mc?.contextWindow ?? p.contextWindow,
+                    maxTokens: mc?.maxTokens ?? (mc?.contextWindow ? Math.min(Math.floor(mc.contextWindow * 0.4), 65536) : undefined),
                     reasoning: mc?.reasoning,
                     supportsReasoningEffort: p.supportsReasoningEffort,
                     echoReasoning: mc?.echoReasoning,
-                    buildReasoningParams,
+                    buildReasoningParams: bindReasoning(shapeId, model),
                 });
             }
         }
@@ -54,6 +81,8 @@ export default function agentBackend(ctx) {
         return llmClient.complete({
             messages: messages,
             max_tokens: opts?.maxTokens,
+            model: opts?.model,
+            reasoning_effort: opts?.reasoningEffort,
         });
     });
     let modes = [];
@@ -141,38 +170,45 @@ export default function agentBackend(ctx) {
     });
     bus.on("provider:register", (p) => {
         const rawModels = p.models ?? (p.defaultModel ? [p.defaultModel] : []);
-        const modelIds = [];
-        const caps = new Map();
+        const payloadModelIds = [];
+        const payloadCaps = new Map();
         for (const m of rawModels) {
             if (typeof m === "string") {
-                modelIds.push(m);
+                payloadModelIds.push(m);
             }
             else {
-                modelIds.push(m.id);
-                caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, echoReasoning: m.echoReasoning });
+                payloadModelIds.push(m.id);
+                payloadCaps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
             }
         }
-        providerRegistry.set(p.id, {
+        const settings = settingsProviders.get(p.id);
+        const modelIds = settings?.modelsExplicit && settings.models.length > 0 ? settings.models : payloadModelIds;
+        const mergedCaps = mergeCaps(settings?.modelCapabilities, payloadCaps, modelIds);
+        const merged = {
             id: p.id,
-            apiKey: p.apiKey,
-            baseURL: p.baseURL,
-            defaultModel: p.defaultModel,
+            apiKey: settings?.apiKey ?? p.apiKey,
+            baseURL: settings?.baseURL ?? p.baseURL,
+            defaultModel: settings?.defaultModel ?? p.defaultModel,
             models: modelIds,
-            supportsReasoningEffort: p.supportsReasoningEffort,
-            modelCapabilities: caps.size > 0 ? caps : undefined,
-        });
-        const buildReasoningParams = providerHooks.get(p.id)?.reasoningParams ?? defaultReasoningBuilder;
+            modelsExplicit: settings?.modelsExplicit ?? false,
+            contextWindow: settings?.contextWindow,
+            supportsReasoningEffort: settings?.supportsReasoningEffort ?? p.supportsReasoningEffort,
+            modelCapabilities: mergedCaps,
+            reasoningShape: settings?.reasoningShape,
+        };
+        providerRegistry.set(p.id, merged);
         const addModes = modelIds.map((m) => {
-            const mc = caps.get(m);
+            const mc = mergedCaps?.get(m);
             return {
                 model: m,
                 provider: p.id,
-                providerConfig: { apiKey: p.apiKey ?? "", baseURL: p.baseURL },
+                providerConfig: { apiKey: merged.apiKey ?? "", baseURL: merged.baseURL },
                 contextWindow: mc?.contextWindow,
+                maxTokens: mc?.maxTokens,
                 reasoning: mc?.reasoning,
-                supportsReasoningEffort: p.supportsReasoningEffort,
+                supportsReasoningEffort: merged.supportsReasoningEffort,
                 echoReasoning: mc?.echoReasoning,
-                buildReasoningParams,
+                buildReasoningParams: bindReasoning(p.id, m),
             };
         });
         bus.emit("config:add-modes", { modes: addModes });
@@ -212,6 +248,7 @@ export default function agentBackend(ctx) {
                 provider: name,
                 providerConfig: { apiKey: p.apiKey, baseURL: p.baseURL },
                 contextWindow: mc?.contextWindow ?? p.contextWindow,
+                maxTokens: mc?.maxTokens ?? (mc?.contextWindow ? Math.min(Math.floor(mc.contextWindow * 0.4), 65536) : undefined),
                 reasoning: mc?.reasoning,
                 supportsReasoningEffort: p.supportsReasoningEffort,
                 echoReasoning: mc?.echoReasoning,

package/dist/extensions/index.js CHANGED Viewed

@@ -3,10 +3,15 @@ export const BUILTIN_EXTENSIONS = [
     { name: "agent-backend", load: () => import("./agent-backend.js").then(m => m.default) },
     { name: "openrouter",
         when: () => !!process.env.OPENROUTER_API_KEY,
-        load: () => import("./openrouter.js").then(m => m.default) },
+        load: () => import("./providers/openrouter.js").then(m => m.default) },
     { name: "openai",
-        when: () => !!process.env.OPENAI_API_KEY,
-        load: () => import("./openai.js").then(m => m.default) },
+        when: () => !!process.env.OPENAI_API_KEY && !process.env.OPENAI_BASE_URL,
+        load: () => import("./providers/openai.js").then(m => m.default) },
+    { name: "openai-compatible",
+        when: () => !!process.env.OPENAI_BASE_URL,
+        load: () => import("./providers/openai-compatible.js").then(m => m.default) },
+    { name: "deepseek",
+        load: () => import("./providers/deepseek.js").then(m => m.default) },
     { name: "tui-renderer", load: () => import("./tui-renderer.js").then(m => m.default) },
     { name: "slash-commands", load: () => import("./slash-commands.js").then(m => m.default) },
     { name: "file-autocomplete", load: () => import("./file-autocomplete.js").then(m => m.default) },

package/dist/extensions/providers/deepseek.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * Native DeepSeek (api.deepseek.com). V4 ignores reasoning_effort for
+ * on/off — disable lives in a separate `thinking` field that defaults
+ * to enabled. The hook always attaches; provider registration via env
+ * is opt-in alongside any settings.json entry.
+ */
+import type { ExtensionContext } from "../../types.js";
+export default function activate(ctx: ExtensionContext): void;

package/dist/extensions/providers/deepseek.js ADDED Viewed

@@ -0,0 +1,23 @@
+const BASE_URL = "https://api.deepseek.com";
+const DEFAULT_MODELS = [
+    { id: "deepseek-v4-flash", reasoning: true, echoReasoning: true },
+    { id: "deepseek-v4-pro", reasoning: true, echoReasoning: true },
+];
+function buildReasoningParams(level, _model) {
+    return level === "off"
+        ? { thinking: { type: "disabled" } }
+        : { thinking: { type: "enabled" }, reasoning_effort: level };
+}
+export default function activate(ctx) {
+    ctx.providers.configure("deepseek", { reasoningParams: buildReasoningParams });
+    const apiKey = process.env.DEEPSEEK_API_KEY;
+    if (!apiKey)
+        return;
+    ctx.bus.emit("provider:register", {
+        id: "deepseek",
+        apiKey,
+        baseURL: BASE_URL,
+        defaultModel: DEFAULT_MODELS[0].id,
+        models: DEFAULT_MODELS,
+    });
+}

package/dist/extensions/providers/openai-compatible.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+/**
+ * OpenAI Chat Completions-compatible local/3rd-party server (Ollama, LM
+ * Studio, vLLM, llama.cpp, …). No reasoning hook — the right shape depends
+ * on which model the server is serving; user extensions can add one.
+ */
+import type { ExtensionContext } from "../../types.js";
+export default function activate(ctx: ExtensionContext): void;

package/dist/extensions/providers/openai-compatible.js ADDED Viewed

@@ -0,0 +1,30 @@
+export default function activate(ctx) {
+    const baseURL = process.env.OPENAI_BASE_URL;
+    if (!baseURL)
+        return;
+    // Local servers often need no key; SDK still wants a non-empty string.
+    const apiKey = process.env.OPENAI_API_KEY || "no-key";
+    const id = "openai-compatible";
+    ctx.bus.emit("provider:register", { id, apiKey, baseURL, models: [] });
+    fetchModels(baseURL, apiKey).then((models) => {
+        if (models.length === 0)
+            return;
+        ctx.bus.emit("provider:register", {
+            id,
+            apiKey,
+            baseURL,
+            defaultModel: models[0],
+            models,
+        });
+    }).catch(() => { });
+}
+async function fetchModels(baseURL, apiKey) {
+    const headers = {};
+    if (apiKey && apiKey !== "no-key")
+        headers.Authorization = `Bearer ${apiKey}`;
+    const res = await fetch(`${baseURL.replace(/\/$/, "")}/models`, { headers });
+    if (!res.ok)
+        return [];
+    const data = await res.json();
+    return (data.data ?? []).map((m) => m.id);
+}

package/dist/extensions/providers/openai.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+/**
+ * Cloud OpenAI (api.openai.com). reasoning_effort vocabulary diverges per
+ * family: o-series has no off; gpt-5-codex floors at "low"; plain gpt-5
+ * floors at "minimal"; gpt-5.1+ accepts "none" as documented full off.
+ */
+import type { ExtensionContext } from "../../types.js";
+export default function activate(ctx: ExtensionContext): void;

package/dist/extensions/providers/openai.js ADDED Viewed

@@ -0,0 +1,39 @@
+const CLOUD_MODELS = [
+    { id: "gpt-5", reasoning: true },
+    { id: "gpt-4.1", reasoning: false },
+    { id: "gpt-4o", reasoning: false },
+    { id: "gpt-4o-mini", reasoning: false },
+    { id: "o3", reasoning: true },
+    { id: "o3-mini", reasoning: true },
+];
+function offEffortFor(model) {
+    if (/^o\d/.test(model))
+        return null;
+    if (model.startsWith("gpt-5-codex"))
+        return "low";
+    if (/^gpt-5\.[1-9]/.test(model))
+        return "none";
+    if (/^gpt-5(?!\.)/.test(model))
+        return "minimal";
+    return null;
+}
+function buildReasoningParams(level, model) {
+    if (level !== "off")
+        return { reasoning_effort: level };
+    const off = model ? offEffortFor(model) : null;
+    return off ? { reasoning_effort: off } : {};
+}
+export default function activate(ctx) {
+    const apiKey = process.env.OPENAI_API_KEY;
+    if (!apiKey)
+        return;
+    if (process.env.OPENAI_BASE_URL)
+        return; // openai-compatible handles this
+    ctx.providers.configure("openai", { reasoningParams: buildReasoningParams });
+    ctx.bus.emit("provider:register", {
+        id: "openai",
+        apiKey,
+        defaultModel: CLOUD_MODELS[0].id,
+        models: CLOUD_MODELS,
+    });
+}

package/dist/extensions/{openrouter.d.ts → providers/openrouter.d.ts} RENAMED Viewed

@@ -3,5 +3,5 @@
  * Registers curated defaults synchronously so the first query works, then
  * fetches the full catalog to populate /model autocomplete.
  */
-import type { ExtensionContext } from "../types.js";
+import type { ExtensionContext } from "../../types.js";
 export default function activate(ctx: ExtensionContext): void;

package/dist/extensions/{openrouter.js → providers/openrouter.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { getSettings } from "../settings.js";
+import { getSettings } from "../../settings.js";
 const BASE_URL = "https://openrouter.ai/api/v1";
 const DEFAULT_MODELS = ["deepseek/deepseek-v4-flash"];
 // Built-in defaults for models requiring reasoning_content echoed back
@@ -6,9 +6,11 @@ const DEFAULT_MODELS = ["deepseek/deepseek-v4-flash"];
 //   providers.openrouter.echoReasoningPatterns = ["deepseek", "..."]
 //   providers.openrouter.models[*].echoReasoning = true | false
 const BUILTIN_ECHO_REASONING_PATTERNS = [/deepseek/i];
-function buildReasoningParams(level) {
+// `effort: "none"` is the documented disable; honored by OpenAI/Grok, ignored
+// by Anthropic/Gemini/DeepSeek-via-OpenRouter (use native deepseek for a hard off).
+function buildReasoningParams(level, _model) {
     return level === "off"
-        ? { reasoning: { enabled: false } }
+        ? { reasoning: { effort: "none" } }
         : { reasoning: { effort: level } };
 }
 export default function activate(ctx) {

package/dist/extensions/tui-renderer.js CHANGED Viewed

@@ -67,6 +67,8 @@ function createRenderState() {
         isThinking: false,
         showThinkingText: false,
         thinkingPending: false,
+        previewedDiffPending: false,
+        previewedDiffToolIds: new Set(),
     };
 }
 export default function activate(ctx) {
@@ -175,21 +177,20 @@ export default function activate(ctx) {
         s.thinkingPending = true;
         if (!s.isThinking) {
             s.isThinking = true;
-            if (s.showThinkingText) {
-                stopCurrentSpinner();
-                if (!s.renderer)
-                    startAgentResponse();
-            }
-            else {
+            if (!s.showThinkingText)
                 startThinkingSpinner();
-            }
         }
-        if (s.showThinkingText && e.text) {
-            s.thinkingPending = false;
+        if (s.showThinkingText) {
+            stopCurrentSpinner();
             if (!s.renderer)
                 startAgentResponse();
-            s.renderer.push(`${p.dim}${e.text}${p.reset}`);
-            drain();
+            if (e.text) {
+                s.thinkingPending = false;
+                // Wrap each sub-line so dim survives \n boundaries in the renderer.
+                const wrapped = `${p.dim}${e.text.replace(/\n/g, `${p.reset}\n${p.dim}`)}${p.reset}`;
+                s.renderer.push(wrapped);
+                drain();
+            }
         }
     });
     bus.on("agent:response-chunk", (e) => {
@@ -272,6 +273,10 @@ export default function activate(ctx) {
         s.currentToolKind = e.kind;
         s.toolStartTime = Date.now();
         s.orphanContHeaderKind = undefined;
+        if (s.previewedDiffPending && e.toolCallId) {
+            s.previewedDiffToolIds.add(e.toolCallId);
+        }
+        s.previewedDiffPending = false;
         if (e.title === "user_shell") {
             finalizeToolGroup();
             closeToolLine();
@@ -335,11 +340,18 @@ export default function activate(ctx) {
         s.toolExitCode = e.exitCode;
         if (e.exitCode !== 0)
             s.toolGroupAllOk = false;
+        let resultDisplay = e.resultDisplay;
+        if (e.toolCallId && s.previewedDiffToolIds.has(e.toolCallId)) {
+            s.previewedDiffToolIds.delete(e.toolCallId);
+            if (resultDisplay?.body?.kind === "diff") {
+                resultDisplay = { ...resultDisplay, body: undefined };
+            }
+        }
         if (s.toolGroupKind) {
             // Grouped tool — track success/failure and summaries, show aggregate on ⎿ line.
             // Don't restart spinner between grouped tools — it's already running from group start.
-            if (e.resultDisplay?.summary)
-                s.toolGroupSummaries.push(e.resultDisplay.summary);
+            if (resultDisplay?.summary)
+                s.toolGroupSummaries.push(resultDisplay.summary);
             if (e.toolCallId)
                 s.pendingToolCompletes.delete(e.toolCallId);
             s.toolGroupCompletedCount++;
@@ -358,10 +370,10 @@ export default function activate(ctx) {
             if (pending)
                 s.pendingToolCompletes.delete(e.toolCallId);
             if (pending?.orphaned) {
-                showOrphanedComplete(e.exitCode, e.resultDisplay, pending.title, pending.kind, pending.displayDetail);
+                showOrphanedComplete(e.exitCode, resultDisplay, pending.title, pending.kind, pending.displayDetail);
             }
             else {
-                showToolComplete(e.exitCode, e.resultDisplay, pending?.displayDetail ?? pending?.title);
+                showToolComplete(e.exitCode, resultDisplay, pending?.displayDetail ?? pending?.title);
             }
             s.currentToolKind = undefined;
             s.spinnerStartTime = 0;
@@ -432,6 +444,7 @@ export default function activate(ctx) {
             // Mark lastContentKind as "tool" so the tool call line that follows
             // doesn't inject an extra gap between the diff box and the checkmark.
             s.lastContentKind = "tool";
+            s.previewedDiffPending = true;
         }
         // Don't endAgentResponse() here — permission requests that aren't
         // file-write diffs are handled inline (auto-approved or by extensions).
@@ -654,26 +667,16 @@ export default function activate(ctx) {
             return [];
         const boxW = Math.min(120, width - 2);
         const contentW = boxW - 4;
-        let body;
-        if (diff.isNewFile) {
-            const lines = diff.hunks.flatMap(h => h.lines.map(l => l.text));
-            const preview = getSettings().newFilePreviewLines;
-            const head = lines.slice(0, preview);
-            const truncated = head.map(l => l.length > contentW ? l.slice(0, contentW - 1) + "…" : l);
-            const more = lines.length > preview
-                ? [`${p.dim}… ${lines.length - preview} more lines${p.reset}`]
-                : [];
-            body = ["", ...truncated, ...more, ""];
-        }
-        else {
-            const diffLines = renderDiff(diff, {
-                width: contentW,
-                filePath,
-                maxLines: getSettings().diffMaxLines,
-                trueColor: true,
-            });
-            body = diffLines.length > 1 ? ["", ...diffLines.slice(1), ""] : diffLines;
-        }
+        const maxLines = diff.isNewFile
+            ? getSettings().newFilePreviewLines
+            : getSettings().diffMaxLines;
+        const diffLines = renderDiff(diff, {
+            width: contentW,
+            filePath,
+            maxLines,
+            trueColor: true,
+        });
+        const body = diffLines.length > 1 ? ["", ...diffLines.slice(1), ""] : diffLines;
         return renderBoxFrame(body, {
             width: boxW,
             style: "rounded",

package/dist/settings.d.ts CHANGED Viewed

@@ -9,6 +9,8 @@ export interface ModelCapabilityConfig {
     reasoning?: boolean;
     /** Context window size in tokens for this specific model. */
     contextWindow?: number;
+    /** Max output tokens for this model. */
+    maxTokens?: number;
     /** Echo reasoning_content back on assistant turns. Required by DeepSeek. */
     echoReasoning?: boolean;
 }
@@ -141,6 +143,8 @@ export interface ResolvedProvider {
     baseURL?: string;
     defaultModel?: string;
     models: string[];
+    /** User explicitly listed `models` (locks the catalog to that list). */
+    modelsExplicit: boolean;
     contextWindow?: number;
     /** Provider supports the reasoning_effort parameter. Default: true. */
     supportsReasoningEffort?: boolean;
@@ -148,6 +152,7 @@ export interface ResolvedProvider {
     modelCapabilities?: Map<string, {
         reasoning?: boolean;
         contextWindow?: number;
+        maxTokens?: number;
         echoReasoning?: boolean;
     }>;
     /** Borrow another registered provider's reasoning request shape by id. */

package/dist/settings.js CHANGED Viewed

@@ -148,8 +148,8 @@ export function resolveProvider(name) {
         }
         else {
             modelIds.push(m.id);
-            if (m.reasoning !== undefined || m.contextWindow !== undefined || m.echoReasoning !== undefined) {
-                caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, echoReasoning: m.echoReasoning });
+            if (m.reasoning !== undefined || m.contextWindow !== undefined || m.maxTokens !== undefined || m.echoReasoning !== undefined) {
+                caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
             }
         }
     }
@@ -160,6 +160,7 @@ export function resolveProvider(name) {
         baseURL: provider.baseURL,
         defaultModel,
         models: modelIds.length ? modelIds : (defaultModel ? [defaultModel] : []),
+        modelsExplicit: Array.isArray(provider.models),
         contextWindow: provider.contextWindow,
         modelCapabilities: caps.size > 0 ? caps : undefined,
         reasoningShape: provider.reasoningShape,

package/dist/types.d.ts CHANGED Viewed

@@ -41,6 +41,8 @@ export interface AgentMode {
     };
     /** Context window size in tokens (for usage display). */
     contextWindow?: number;
+    /** Max output tokens for this mode. */
+    maxTokens?: number;
     /** Model supports reasoning/thinking tokens. */
     reasoning?: boolean;
     /** Provider supports the reasoning_effort parameter. */
@@ -65,14 +67,27 @@ export interface LlmSession {
 }
 export interface LlmInterface {
     readonly available: boolean;
+    /** `model` overrides the globally-configured model for this call only.
+     *  Provider-specific identifier (e.g. "claude-haiku-4-5"). When omitted,
+     *  the active provider's configured default is used.
+     *
+     *  `reasoningEffort` controls thinking-model token allocation between
+     *  reasoning and final content (e.g. "low", "medium", "high", or
+     *  provider-specific). For non-reasoning models it is ignored. Set to
+     *  "low" for cheap structured-output calls so reasoning doesn't exhaust
+     *  the max-tokens budget and leave content empty. */
     ask(opts: {
         query: string;
         system?: string;
         maxTokens?: number;
+        model?: string;
+        reasoningEffort?: string;
     }): Promise<string>;
     session(opts?: {
         system?: string;
         maxTokens?: number;
+        model?: string;
+        reasoningEffort?: string;
     }): LlmSession;
 }
 export interface AgentShellConfig {
@@ -156,7 +171,7 @@ export interface ExtensionContext {
     }) => () => void;
     providers: {
         configure: (id: string, opts: {
-            reasoningParams?: (level: string) => Record<string, unknown>;
+            reasoningParams?: (level: string, model?: string) => Record<string, unknown>;
         }) => void;
     };
     llm: LlmInterface;

package/dist/utils/box-frame.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * never writes to stdout. Supports multiple border styles and
  * optional title/footer sections with dividers.
  */
-import { visibleLen, truncateToWidth } from "./ansi.js";
+import { visibleLen, truncateToWidth, truncateAnsiToWidth } from "./ansi.js";
 import { palette as p } from "./palette.js";
 const BORDERS = {
     rounded: { tl: "╭", tr: "╮", bl: "╰", br: "╯", h: "─", v: "│", ml: "├", mr: "┤" },
@@ -32,14 +32,20 @@ export function renderBoxFrame(content, opts) {
     const output = [];
     // Top border (with optional left/right titles)
     if (opts.title || opts.titleRight) {
-        const leftPart = opts.title
-            ? `${p.reset} ${opts.title} ${bc}`
-            : "";
-        const leftVis = opts.title ? visibleLen(opts.title) + 2 : 0; // +2 for spaces
-        const rightPart = opts.titleRight
-            ? `${p.reset} ${opts.titleRight} ${bc}`
-            : "";
+        // Budget: 2 corners + 1 minimum dash + space-padding around each title.
+        // Truncate the left title first if combined widths overflow — titleRight
+        // is typically short metadata (model name, stats) worth preserving.
+        let title = opts.title;
         const rightVis = opts.titleRight ? visibleLen(opts.titleRight) + 2 : 0;
+        const leftBudget = width - 2 - 1 - rightVis; // total - corners - min dash - right
+        let leftVis = title ? visibleLen(title) + 2 : 0;
+        if (title && leftVis > leftBudget) {
+            const maxTitleVis = Math.max(1, leftBudget - 2);
+            title = truncateAnsiToWidth(title, maxTitleVis);
+            leftVis = visibleLen(title) + 2;
+        }
+        const leftPart = title ? `${p.reset} ${title} ${bc}` : "";
+        const rightPart = opts.titleRight ? `${p.reset} ${opts.titleRight} ${bc}` : "";
         const dashCount = Math.max(1, width - 2 - leftVis - rightVis);
         output.push(`${bc}${b.tl}${leftPart}${b.h.repeat(dashCount)}${rightPart}${b.tr}${p.reset}`);
     }

package/dist/utils/llm-client.d.ts CHANGED Viewed

@@ -33,7 +33,8 @@ export declare class LlmClient {
         tools?: ChatCompletionTool[];
         model?: string;
         max_tokens?: number;
-        /** Reasoning effort level (e.g. "low", "medium", "high"). Provider-dependent. */
+        /** Reasoning effort: "off" | "low" | "medium" | "high". Provider-dependent;
+         *  "off" matches agent-loop's thinkingLevel and omits the field. */
         reasoning_effort?: string;
         signal?: AbortSignal;
     }): import("openai").APIPromise<import("openai/core/streaming.mjs").Stream<OpenAI.Chat.Completions.ChatCompletionChunk>>;
@@ -45,5 +46,8 @@ export declare class LlmClient {
         messages: ChatCompletionMessageParam[];
         model?: string;
         max_tokens?: number;
+        /** Reasoning effort: "off" | "low" | "medium" | "high". Provider-dependent;
+         *  "off" matches agent-loop's thinkingLevel and omits the field. */
+        reasoning_effort?: string;
     }): Promise<string>;
 }

package/dist/utils/llm-client.js CHANGED Viewed

@@ -40,14 +40,15 @@ export class LlmClient {
      * Returns an async iterable of chunks.
      */
     stream(opts) {
+        const sendEffort = opts.reasoning_effort && opts.reasoning_effort !== "off";
         const body = {
             model: opts.model ?? this.model,
             messages: opts.messages,
             tools: opts.tools?.length ? opts.tools : undefined,
-            max_tokens: opts.max_tokens ?? 8192,
+            max_tokens: opts.max_tokens ?? 65536,
             stream: true,
             stream_options: { include_usage: true },
-            ...(opts.reasoning_effort
+            ...(sendEffort
                 ? { reasoning_effort: opts.reasoning_effort }
                 : {}),
         };
@@ -58,10 +59,14 @@ export class LlmClient {
      * Returns the text content of the first choice.
      */
     async complete(opts) {
+        const sendEffort = opts.reasoning_effort && opts.reasoning_effort !== "off";
         const response = await this.client.chat.completions.create({
             model: opts.model ?? this.model,
             messages: opts.messages,
             max_tokens: opts.max_tokens ?? 1024,
+            ...(sendEffort
+                ? { reasoning_effort: opts.reasoning_effort }
+                : {}),
         });
         return response.choices[0]?.message?.content ?? "";
     }

package/dist/utils/llm-facade.js CHANGED Viewed

@@ -1,18 +1,18 @@
 export function createLlmFacade(handlers) {
-    const invoke = (messages, maxTokens) => {
-        const result = handlers.call("llm:invoke", messages, { maxTokens });
+    const invoke = (messages, maxTokens, model, reasoningEffort) => {
+        const result = handlers.call("llm:invoke", messages, { maxTokens, model, reasoningEffort });
         if (result === undefined)
             return Promise.reject(new Error("ctx.llm: no LLM backend available"));
         return result;
     };
     return {
         get available() { return handlers.list().includes("llm:invoke"); },
-        ask: ({ query, system, maxTokens }) => {
+        ask: ({ query, system, maxTokens, model, reasoningEffort }) => {
             const messages = [];
             if (system)
                 messages.push({ role: "system", content: system });
             messages.push({ role: "user", content: query });
-            return invoke(messages, maxTokens);
+            return invoke(messages, maxTokens, model, reasoningEffort);
         },
         session: (opts = {}) => {
             const messages = [];
@@ -21,7 +21,7 @@ export function createLlmFacade(handlers) {
             const session = {
                 async send(message) {
                     messages.push({ role: "user", content: message });
-                    const reply = await invoke(messages, opts.maxTokens);
+                    const reply = await invoke(messages, opts.maxTokens, opts.model, opts.reasoningEffort);
                     messages.push({ role: "assistant", content: reply });
                     return reply;
                 },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-sh",
-  "version": "0.12.19",
+  "version": "0.12.21",
   "description": "A shell-first terminal where AI is one keystroke away",
   "type": "module",
   "main": "dist/core.js",

package/dist/extensions/openai.d.ts DELETED Viewed

@@ -1,9 +0,0 @@
-/**
- * Built-in OpenAI-compatible provider. Two activation paths:
- *   - OPENAI_API_KEY only       → cloud OpenAI, ships a curated catalog.
- *   - OPENAI_BASE_URL (any key) → local/3rd-party server (Ollama, LM Studio,
- *                                  vLLM, llama.cpp); the catalog is fetched
- *                                  from the server's /models endpoint.
- */
-import type { ExtensionContext } from "../types.js";
-export default function activate(ctx: ExtensionContext): void;

package/dist/extensions/openai.js DELETED Viewed

@@ -1,49 +0,0 @@
-const OPENAI_CLOUD_MODELS = [
-    { id: "gpt-5", reasoning: true },
-    { id: "gpt-4.1", reasoning: false },
-    { id: "gpt-4o", reasoning: false },
-    { id: "gpt-4o-mini", reasoning: false },
-    { id: "o3", reasoning: true },
-    { id: "o3-mini", reasoning: true },
-];
-export default function activate(ctx) {
-    const apiKey = process.env.OPENAI_API_KEY ?? "";
-    const baseURL = process.env.OPENAI_BASE_URL;
-    if (!baseURL) {
-        if (!apiKey)
-            return;
-        ctx.bus.emit("provider:register", {
-            id: "openai",
-            apiKey,
-            defaultModel: OPENAI_CLOUD_MODELS[0].id,
-            models: OPENAI_CLOUD_MODELS,
-        });
-        return;
-    }
-    const id = "openai-compatible";
-    // Local servers (Ollama, llama.cpp) often need no key; the SDK still
-    // requires a non-empty string for construction.
-    const sdkKey = apiKey || "no-key";
-    ctx.bus.emit("provider:register", { id, apiKey: sdkKey, baseURL, models: [] });
-    fetchModels(baseURL, apiKey).then((models) => {
-        if (models.length === 0)
-            return;
-        ctx.bus.emit("provider:register", {
-            id,
-            apiKey: sdkKey,
-            baseURL,
-            defaultModel: models[0],
-            models,
-        });
-    }).catch(() => { });
-}
-async function fetchModels(baseURL, apiKey) {
-    const headers = {};
-    if (apiKey)
-        headers.Authorization = `Bearer ${apiKey}`;
-    const res = await fetch(`${baseURL.replace(/\/$/, "")}/models`, { headers });
-    if (!res.ok)
-        return [];
-    const data = await res.json();
-    return (data.data ?? []).map((m) => m.id);
-}