npm - agent-sh - Versions diffs - 0.15.6 → 0.15.7 - Mend

agent-sh 0.15.6 → 0.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/LICENSE +21 -0
package/README.md +1 -1
package/dist/agent/agent-loop.js +2 -5
package/dist/agent/extensions/rolling-history/index.js +20 -8
package/dist/agent/extensions/rolling-history/recall.d.ts +2 -2
package/dist/agent/extensions/rolling-history/recall.js +17 -7
package/dist/agent/providers/openai-compatible.d.ts +8 -0
package/dist/agent/providers/openai-compatible.js +9 -2
package/dist/agent/store.js +6 -1
package/dist/agent/token-budget.d.ts +2 -1
package/dist/agent/token-budget.js +6 -1
package/dist/cli/index.js +1 -1
package/dist/core/event-bus.d.ts +16 -1
package/dist/core/event-bus.js +73 -11
package/dist/core/index.js +18 -0
package/dist/shell/tui-renderer.js +115 -174
package/dist/utils/executor.js +19 -11
package/dist/utils/floating-panel.d.ts +1 -0
package/dist/utils/floating-panel.js +28 -26
package/dist/utils/markdown.js +19 -21
package/dist/utils/palette.d.ts +11 -0
package/dist/utils/palette.js +11 -0
package/docs/agent.md +13 -11
package/docs/architecture.md +3 -5
package/docs/extensions.md +21 -20
package/docs/library.md +6 -3
package/docs/troubleshooting.md +2 -2
package/docs/tui-composition.md +11 -3
package/docs/usage.md +70 -50
package/examples/extensions/ashi/src/chat/assistant.ts +6 -4
package/examples/extensions/ashi/src/compaction.ts +4 -7
package/examples/extensions/ashi/src/frontend.ts +2 -0
package/examples/extensions/ashi/src/schema.ts +8 -2
package/examples/extensions/command-suggest.ts +4 -0
package/examples/extensions/solarized-theme.ts +11 -0
package/package.json +1 -1
package/src/agent/agent-loop.ts +2 -5
package/src/agent/extensions/rolling-history/index.ts +20 -8
package/src/agent/extensions/rolling-history/recall.ts +28 -7
package/src/agent/providers/openai-compatible.ts +19 -4
package/src/agent/store.ts +5 -1
package/src/agent/token-budget.ts +10 -1
package/src/cli/index.ts +1 -1
package/src/core/event-bus.ts +67 -12
package/src/core/index.ts +18 -0
package/src/shell/tui-renderer.ts +130 -207
package/src/utils/executor.ts +17 -14
package/src/utils/floating-panel.ts +24 -22
package/src/utils/markdown.ts +17 -20
package/src/utils/palette.ts +30 -5

package/dist/utils/palette.d.ts CHANGED Viewed

@@ -23,7 +23,18 @@ export interface ColorPalette {
     dim: string;
     italic: string;
     underline: string;
+    strikethrough: string;
     reset: string;
+    mdHeading: string;
+    mdLink: string;
+    mdLinkUrl: string;
+    mdCode: string;
+    mdCodeBlock: string;
+    mdCodeBlockBorder: string;
+    mdQuote: string;
+    mdQuoteBorder: string;
+    mdHr: string;
+    mdListBullet: string;
 }
 /** Active palette — import and use directly in components. */
 export declare const palette: ColorPalette;

package/dist/utils/palette.js CHANGED Viewed

@@ -23,7 +23,18 @@ const defaultPalette = {
     dim: "\x1b[2m",
     italic: "\x1b[3m",
     underline: "\x1b[4m",
+    strikethrough: "\x1b[9m",
     reset: "\x1b[0m",
+    mdHeading: "\x1b[38;2;240;198;116m", // #f0c674 gold
+    mdLink: "\x1b[38;2;129;162;190m", // #81a2be blue
+    mdLinkUrl: "\x1b[38;2;102;102;102m", // #666666 dim gray
+    mdCode: "\x1b[38;2;138;190;183m", // #8abeb7 teal
+    mdCodeBlock: "\x1b[38;2;181;189;104m", // #b5bd68 green
+    mdCodeBlockBorder: "\x1b[38;2;128;128;128m", // #808080 gray
+    mdQuote: "\x1b[38;2;128;128;128m", // #808080 gray
+    mdQuoteBorder: "\x1b[38;2;128;128;128m", // #808080 gray
+    mdHr: "\x1b[38;2;128;128;128m", // #808080 gray
+    mdListBullet: "\x1b[38;2;138;190;183m", // #8abeb7 teal
 };
 /** Active palette — import and use directly in components. */
 export const palette = { ...defaultPalette };

package/docs/agent.md CHANGED Viewed

@@ -43,14 +43,16 @@ Compaction is pluggable: the `conversation:compact` handler is advisable, so ext
 The system prompt is assembled once per `cwd` and cached (invalidated when the working directory changes), so the prefix is stable for provider-side prompt caching. It includes:
-1. **Identity** — "You are an AI coding assistant running inside agent-sh..."
-2. **Tool decision guide** — when to use which built-in tool
-3. **Tool usage guidelines** — read before editing, prefer edit over write, use grep/glob to find files, etc.
-4. **Project conventions** — `CLAUDE.md`/`AGENT.md` walked from cwd to root (cwd-stable; see next section)
-5. **Skills** — discovered project/global skills (cwd-stable)
-6. **Extension instructions** — blocks registered by extensions via `registerInstruction()` (e.g. proactive recall guidance)
-7. **Available tools** — name + description of every registered tool
-8. **Extension-appended content** — extensions can advise `system-prompt:build` to append additional context (instance IDs, memory files, etc.)
+1. **Identity** — "You are ash, an AI coding assistant running inside agent-sh..." (advisable via `system-prompt:identity`)
+2. **Frontend surface** — the active frontend's self-description, placed right after the identity (advisable via `system-prompt:frontend`; omitted when none)
+3. **Static guide** — agent-sh's own code map (paths to `docs/`, `src/`, `examples/extensions/`), generic tool guidance, and the `<query_context>`/`<dynamic_context>` envelope contract
+4. **Global memory** — `~/.agent-sh/AGENTS.md`, if present
+5. **Global skills** — discovered global skills (cwd-stable)
+6. **Project conventions + skills** — `CLAUDE.md`/`AGENT.md` walked from cwd to root, plus discovered project skills (cwd-stable; see next section)
+7. **Extension instructions** — blocks registered by extensions via `registerInstruction()` (e.g. proactive recall guidance)
+8. **Image support** — appended when the active model accepts image input
+Built-in tools are not inlined here — they're passed to the provider via the API `tools` parameter. Extensions can advise `system-prompt:build` directly to append further context (instance IDs, memory files, etc.).
 Per-turn signals live in two symmetric handlers, both empty by default:
@@ -218,7 +220,7 @@ When the LLM requests multiple tool calls in a single response, the agent groups
 2. **Parallel execution** — side-effect-free tools (`modifiesFiles` unset) run in parallel via `Promise.all`. Side-effecting tools run sequentially.
-3. **Output truncation** — tool results over 16KB (~4K tokens) are head+tail truncated before being added to the conversation, preventing a single tool call from blowing through the context window.
+3. **Output truncation** — tool results over the tool's `maxResultBytes` (default 100KB, ~25K tokens) are head+tail truncated (60/40 split) before being added to the conversation, preventing a single tool call from blowing through the context window.
 ### Structured result display
@@ -260,7 +262,7 @@ For OpenRouter, the flag is set automatically: model ids matching the built-in p
       "echoReasoningPatterns": ["my-custom-deepseek-fork"],
       "models": [
         { "id": "deepseek/deepseek-v3.2", "echoReasoning": false },
-        { "id": "openai/gpt-5.5",         "reasoning": true }
+        { "id": "z-ai/glm-5.1",           "reasoning": true }
       ]
     }
   }
@@ -367,7 +369,7 @@ Each entry is a `(provider, model)` target — a serializable identity plus capa
 ```typescript
 interface Model {
-  id: string;               // model id, e.g. "openai/gpt-5"
+  id: string;               // model id, e.g. "deepseek/deepseek-v4-flash"
   provider: string;         // identity is the (provider, id) pair
   contextWindow?: number;   // per-model override for the auto-compact threshold
   maxTokens?: number;

package/docs/architecture.md CHANGED Viewed

@@ -20,7 +20,7 @@ index.ts — interactive terminal frontend:
   ├── Agent host (always activated via activateAgent(ctx) before built-ins load):
   │     ash backend       — provider resolution, LlmClient, lazy AgentLoop
   │     core tools        — bash/read/write/edit/grep/glob/ls/list_skills registered at activate time
-  │     built-in providers — openrouter, openai, openai-compatible, deepseek (unconditional)
+  │     built-in providers — openrouter, openai, deepseek, ollama, zai-coding-plan, opencode (unconditional); openai-compatible when OPENAI_BASE_URL is set
   │
   ├── Backend registry (owned by core; backends register via `agent:register-backend`):
   │     core.activateBackend() — picks the named/persisted/first backend and calls its start()
@@ -28,7 +28,7 @@ index.ts — interactive terminal frontend:
   ├── Built-in extensions (loaded via declarative manifest, individually disableable):
   │     shell-context     — PTY exchange tracking, cwd advisor, <cwd>/<shell_events> producer
   │     tui-renderer      — markdown rendering, inline diffs, thinking display, spinner
-  │     slash-commands    — /help, /model, /backend, /thinking, /compact, /context, /reload
+  │     slash-commands    — /help, /model, /thinking, /backend, /reload (the ash backend adds /compact, /context)
   │     file-autocomplete — @ file path completion
   │
   ├── Shared utilities:
@@ -36,7 +36,6 @@ index.ts — interactive terminal frontend:
   │     diff-renderer     — syntax-highlighted diffs (split/unified/summary)
   │     box-frame         — bordered TUI panels
   │     tool-display      — width-adaptive tool call rendering + pure spinner
-  │     output-writer     — OutputWriter interface (StdoutWriter, BufferWriter for tests)
   │     stream-transform  — content block transforms for response pipeline
   │
   └── User extensions (opt-in, loaded from -e flag / settings.json / extensions dir):
@@ -147,7 +146,7 @@ agent-sh/
 │   │   ├── types.ts          # AgentBackend, ToolDefinition, ToolResult
 │   │   ├── agent-loop.ts     # ash AgentLoop (constructed lazily in start())
 │   │   ├── llm-client.ts, llm-facade.ts  # ash LLM transport + ctx.agent.llm facade
-│   │   ├── providers/        # openai, openrouter, deepseek, openai-compatible
+│   │   ├── providers/        # openai, openrouter, deepseek, openai-compatible, ollama, zai-coding-plan, opencode
 │   │   ├── token-budget.ts   # Shared constants (RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW)
 │   │   ├── tool-registry.ts, tool-protocol.ts
 │   │   ├── live-view.ts       # In-memory messages array + compaction + recall archive
@@ -185,7 +184,6 @@ agent-sh/
 │       ├── solarized-theme.ts   # Theme example
 │       ├── secret-guard.ts      # Secret redaction
 │       ├── latex-images.ts      # LaTeX equation rendering
-│       ├── ollama.ts            # Ollama provider (local + cloud)
 │       ├── claude-code-bridge/  # Claude Code SDK backend
 │       ├── pi-bridge/           # Pi agent backend
 │       ├── ash-mcp-bridge/      # MCP server bridge

package/docs/extensions.md CHANGED Viewed

@@ -468,17 +468,17 @@ Per-request producers (`mode: "per-request"`) only fire under backends that expo
 ## Custom Providers
-Providers describe the OpenAI-compatible endpoints the `ash` backend can talk to. The built-ins (openrouter, openai, openai-compatible, deepseek) register from `src/agent/providers/`; extensions can register their own — local daemons, hosted gateways, fine-tuned model catalogs — and they show up under `agent-sh auth list` and `/model`.
+Providers describe the OpenAI-compatible endpoints the `ash` backend can talk to. The built-ins (openrouter, openai, openai-compatible, deepseek, ollama, zai-coding-plan, opencode) register from `src/agent/providers/`; extensions can register their own — local daemons, hosted gateways, fine-tuned model catalogs — and they show up under `agent-sh auth list` and `/model`.
 ```typescript
 import type { AgentContext } from "agent-sh/types";
 export default function activate(ctx: AgentContext): void {
   ctx.agent.providers.register({
-    id: "ollama",
-    baseURL: "http://localhost:11434/v1",
-    defaultModel: "llama3.2",
-    models: ["llama3.2", "qwen2.5-coder"],
+    id: "llama-cpp",
+    baseURL: "http://localhost:8080/v1",
+    defaultModel: "gemma4",
+    models: ["gemma4"],
     noAuth: true,
   });
 }
@@ -549,9 +549,6 @@ These are registered by AgentLoop (constructed when the ash backend's `start()`
 | `conversation:estimate-tokens` | `() → number` | Local chars/4 estimate of the conversation size. |
 | `conversation:estimate-prompt-tokens` | `() → number` | API-grounded estimate (last `prompt_tokens` + local delta since). Used by the auto-compact trigger. |
 | `conversation:inject-note` | `(text) → void` | Inject a `role:"user"` note mid-loop — how extensions deliver async results (subagent output, peer messages) into the next iteration. |
-| `conversation:nucleate-user` / `-agent` / `-tool` | `(msg) → NuclearEntry` | Turn a message into its one-line summary. Advise to extract extra metadata (e.g. `[why: ...]` annotations). |
-| `conversation:format-prior-history` | `(entries) → string` | Render prior-session history into a preamble. Advise for session-grouped output. |
-| `history:append` / `:search` / `:find-by-seq` / `:read-recent` | — | Shell-history-style persistent log at `~/.agent-sh/history`. Advise to add indexing, filtering, or external stores. |
 | `tool:execute` | `(ctx) → ToolResult` | Wrap the full tool lifecycle: permission → execute → emit events. |
 **`dynamic-context:build`** — Each advisor appends its own context. Multiple extensions compose independently:
@@ -717,7 +714,7 @@ agent-sh -e ./examples/extensions/latex-images.ts
 Input modes change what happens when the user types and presses Enter. Each mode binds a trigger character (typed at the start of an empty line) to a custom `onSubmit` handler. The built-in mode (`>` for agent) is registered this way — it's not special.
-The flow: user types trigger → prompt changes to show the mode → user types their input → presses Enter → `onSubmit` fires → your handler emits `agent:submit`. You can optionally include a `modeInstruction` that gets prepended to the user message.
+The flow: user types trigger → prompt changes to show the mode → user types their input → presses Enter → `onSubmit` fires → your handler emits `agent:submit`. To steer the agent for this mode, build your instruction into the `query` string before emitting — `agent:submit` carries only `query` (and optional `images`).
 ```typescript
 bus.emit("input-mode:register", {
@@ -728,8 +725,8 @@ bus.emit("input-mode:register", {
   indicator: "🌐",           // status indicator before the icon
   onSubmit(query, bus) {
     bus.emit("agent:submit", {
-      query,                 // what the user typed
-      modeInstruction: "[mode: translate] Translate the following to Spanish.",
+      // prepend the mode instruction to what the user typed
+      query: `[mode: translate] Translate the following to Spanish.\n\n${query}`,
     });
   },
   returnToSelf: true,        // re-enter this mode after agent finishes
@@ -743,7 +740,7 @@ bus.emit("input-mode:register", {
 | `label` | `string` | Shown in the prompt area |
 | `promptIcon` | `string` | Chevron/icon character in the prompt |
 | `indicator` | `string` | Status indicator before the icon |
-| `onSubmit` | `(query, bus) => void` | Called on Enter. Emits `agent:submit` with `query` + optional `modeInstruction` |
+| `onSubmit` | `(query, bus) => void` | Called on Enter. Emits `agent:submit` with `query` (build any mode instruction into the `query` string yourself) |
 | `returnToSelf` | `boolean` | Re-enter this mode after the agent finishes |
 Each trigger character can only be claimed by one mode. Slash commands and readline keybindings work in every mode.
@@ -826,7 +823,7 @@ If your extension wants to signal "this session is interactive — read the scre
 Internally, a remote session:
 1. **Redirects render streams** — `"agent"`, `"query"`, `"status"` all route to the provided surface
-2. **Keeps the shell interactive** — advises `shell:on-processing-start` and `shell:on-processing-done` to skip pause/unpause
+2. **Keeps the shell interactive** — advises `shell:on-processing-start` and `shell:on-processing-redraw` to skip pause/redraw (it deliberately leaves `shell:on-processing-done` alone so the agent-turn state cleanup always runs)
 3. **Suppresses chrome** — advises `tui:response-border`, `tui:render-user-query`, `tui:render-usage` based on options
 Calling `session.close()` removes all advisors and restores all compositor routing in one call.
@@ -860,11 +857,11 @@ session.close();
 ## Shell Lifecycle Handlers
-The shell's behavior during agent processing is controlled by two advisable handlers. Extensions advise these to change how the shell responds when the agent starts and stops working.
+The shell's behavior during agent processing is controlled by three handlers. Two are advisable; the third runs unconditional cleanup and should not be suppressed.
 ### `shell:on-processing-start`
-Default: pauses the shell (blocks PTY output and input) while the agent works. This is correct when agent output shares stdout with the terminal.
+Default: pauses the shell (blocks PTY output and input) and acquires the agent-turn mute scope while the agent works. This is correct when agent output shares stdout with the terminal.
 ```typescript
 // Skip pause — agent output goes to a separate surface
@@ -874,19 +871,23 @@ ctx.advise("shell:on-processing-start", (next) => {
 });
 ```
-### `shell:on-processing-done`
+### `shell:on-processing-redraw`
-Default: unpauses the shell, re-enters agent input mode or redraws the shell prompt.
+Default: re-enters agent input mode or redraws the shell prompt. This is the advisable half of "agent finished" — advise it to skip the redraw when your extension already owns the screen.
 ```typescript
 // Skip prompt redraw — already handled by the extension
-ctx.advise("shell:on-processing-done", (next) => {
+ctx.advise("shell:on-processing-redraw", (next) => {
   if (mySessionActive) return;  // skip
-  return next();                // default: unpause + redraw
+  return next();                // default: redraw / re-enter input mode
 });
 ```
-> **Note:** `createRemoteSession()` advises both of these automatically. You only need to advise them directly if you're building custom lifecycle behavior without using remote sessions.
+### `shell:on-processing-done`
+Runs when the agent turn ends: it releases the agent-turn mute scope (unconditional state cleanup) and then calls `shell:on-processing-redraw`. **Don't advise this to return early** — skipping it would strand the mute scope past the end of the turn. Suppress the redraw via `shell:on-processing-redraw` instead.
+> **Note:** `createRemoteSession()` advises `shell:on-processing-start` and `shell:on-processing-redraw` automatically. You only need to advise them directly if you're building custom lifecycle behavior without using remote sessions.
 ## Rendering Architecture

package/docs/library.md CHANGED Viewed

@@ -23,8 +23,9 @@ import { activateAgent } from "agent-sh/agent";
 import { loadBuiltinExtensions } from "agent-sh/extensions";
 const core = createCore({
-  apiKey: process.env.OPENAI_API_KEY,
-  model: "gpt-4o",
+  // These are ash-backend config, not kernel config — see note below.
+  provider: "deepseek",                 // built-in provider → DeepSeek endpoint + deepseek-v4-flash default
+  apiKey: process.env.DEEPSEEK_API_KEY,
 });
 const ctx = core.extensionContext({ quit: () => process.exit(0) });
@@ -44,6 +45,8 @@ core.bus.emit("agent:submit", { query: "explain this codebase" });
 `createCore()` returns a headless kernel — the event bus and handler registry, with no terminal, shell, LLM, or agent attached. `activateAgent(ctx)` attaches the agent surface (tools, LLM client, providers) and registers the built-in `ash` backend; `loadBuiltinExtensions(ctx)` adds the abstract backend registry, slash commands, and file autocomplete. `core:extensions-loaded` triggers provider resolution; `activateBackend()` then starts ash (or whichever backend is configured). Send queries by emitting `agent:submit` and consume responses by listening to bus events.
+> **The LLM fields are backend config, not kernel config.** `createCore()` doesn't read `provider`/`apiKey`/`model`/`baseURL` — it stores the config object opaquely and re-exposes it through the `config:get-app-config` handler. The **ash** backend is the only consumer (`src/agent/index.ts`); it resolves the provider, key, and model from those fields. Under a different backend they're inert: `pi` reads `~/.pi/agent/settings.json`, `claude-code` uses its own SDK config — for those you pass `{ backend: "pi" }` (a real kernel field) and configure the model the backend's own way. The `AppConfig` type bundles kernel + agent + shell config into one object for convenience; the kernel only owns the `extensions` and `backend` keys (`CoreConfig`).
 Tools run without confirmation by default; to gate them, register tool advisors via `ctx.agent.adviseTool` (see examples/extensions/interactive-prompts.ts).
 ## AgentShellCore API
@@ -68,7 +71,7 @@ import { activateAgent } from "agent-sh/agent";
 import { loadBuiltinExtensions } from "agent-sh/extensions";
 import myTheme from "./my-theme";
-const core = createCore({ apiKey: "...", model: "gpt-4o" });
+const core = createCore({ provider: "deepseek", apiKey: process.env.DEEPSEEK_API_KEY });
 const ctx = core.extensionContext({ quit: () => process.exit(0) });
 activateAgent(ctx);

package/docs/troubleshooting.md CHANGED Viewed

@@ -18,7 +18,7 @@
 **Problem**: Tool calls not working (agent responds but doesn't use tools)
-**Solution**: Some models have limited or no tool/function calling support. Try a more capable model (e.g., gpt-4o, claude-sonnet-4-6 via OpenRouter).
+**Solution**: Some models have limited or no tool/function calling support. Try a more capable model (e.g., deepseek-v4-flash, or a larger model via OpenRouter).
 **Problem**: Garbled output, startup banner overwritten, or messy prompt rendering
@@ -54,7 +54,7 @@ Your normal p10k prompt still works — only the "flash cached prompt then redra
 Enable debug mode for detailed protocol logging:
 ```bash
-DEBUG=1 agent-sh --api-key "$KEY" --model gpt-4o
+DEBUG=1 DEEPSEEK_API_KEY="$KEY" agent-sh
 ```
 ## Getting Help

package/docs/tui-composition.md CHANGED Viewed

@@ -43,9 +43,11 @@ A `RenderSurface` is anything that can accept rendered output:
 ```typescript
 interface RenderSurface {
-  write(text: string): void;    // raw — supports \r, escape codes
+  write(text: string): void;     // raw — supports \r, escape codes
   writeLine(line: string): void; // line + newline
   readonly columns: number;      // available width
+  readonly rows: number;         // available height
+  onResize(cb: (cols: number, rows: number) => void): () => void; // subscribe; returns unsubscribe
 }
 ```
@@ -71,6 +73,12 @@ const panelSurface: RenderSurface = {
   },
   writeLine(line) { panel.appendLine(line); },
   get columns() { return panel.computeGeometry().contentW; },
+  get rows() { return panel.computeGeometry().contentH; },
+  onResize(cb) {
+    const handler = () => { const g = panel.computeGeometry(); cb(g.contentW, g.contentH); };
+    process.stdout.on("resize", handler);
+    return () => process.stdout.off("resize", handler);
+  },
 };
 ```
@@ -94,7 +102,7 @@ interface Compositor {
 | `"query"` | User query display (the bordered input box) |
 | `"status"` | Info messages, errors, suggestions |
-The shell frontend (`src/shell/`) sets all three to `StdoutSurface` during `activateShell`. A library or web consumer that doesn't load the shell frontend has no defaults — it must call `compositor.setDefault(...)` itself.
+The shell frontend (`src/shell/`) sets all three to a `surfaceFromTerminal(terminal)` surface (which writes through the host `Terminal`, ultimately stdout) during `activateShell`. A library or web consumer that doesn't load the shell frontend has no defaults — it must call `compositor.setDefault(...)` itself.
 ### Redirecting a stream
@@ -155,7 +163,7 @@ export default function activate(ctx: ExtensionContext): void {
     bus.emit("agent:submit", { query });
   });
-  panel.handlers.advise("panel:dismiss", (next) => {
+  panel.handlers.advise("panel:hide", (next) => {
     next();
     restoreAgent?.(); restoreAgent = null;
     restoreQuery?.(); restoreQuery = null;

package/docs/usage.md CHANGED Viewed

@@ -2,17 +2,17 @@
 ## Running agent-sh
-The simplest way to run agent-sh — just provide an API key and model:
+The simplest way to run agent-sh — just provide an API key:
 ```bash
-# Using environment variables
-OPENAI_API_KEY="your-key" agent-sh --model gpt-4o
+# DeepSeek is a built-in provider — set the key and go (defaults to deepseek-v4-flash)
+DEEPSEEK_API_KEY="your-key" agent-sh
-# Using CLI flags
-agent-sh --api-key "your-key" --base-url http://localhost:11434/v1 --model llama3
+# Any OpenAI-compatible endpoint via CLI flags (e.g. a local Ollama server)
+agent-sh --api-key "your-key" --base-url http://localhost:11434/v1 --model gemma4
 # Using npx
-npx agent-sh --api-key "$KEY" --model gpt-4o
+DEEPSEEK_API_KEY="your-key" npx agent-sh --model deepseek-v4-flash
 ```
 Environment variables `OPENAI_API_KEY` and `OPENAI_BASE_URL` are supported as alternatives to CLI flags.
@@ -30,13 +30,13 @@ agent-sh --backend pi
 npm run dev
 # Debug mode
-DEBUG=1 agent-sh --api-key "$KEY" --model gpt-4o
+DEBUG=1 DEEPSEEK_API_KEY="$KEY" agent-sh
 ```
 ### Subcommands
 ```bash
-agent-sh init                   # scaffold ~/.agent-sh/ (settings, examples, AGENTS.md)
+agent-sh init                   # scaffold ~/.agent-sh/ (settings.json + settings.example.json, extensions/ dir)
 agent-sh install <name>         # install a bundled extension (e.g. agent-sh install pi-bridge)
 agent-sh install ./path/to/ext  # install from a local path
 agent-sh uninstall <name>       # remove an installed extension
@@ -55,8 +55,8 @@ Any provider you declare under `providers` in `settings.json` is also accepted b
   "providers": {
     "my-llama": {
       "baseURL": "http://localhost:8000/v1",
-      "defaultModel": "llama-3.1-70b",
-      "models": ["llama-3.1-70b"]
+      "defaultModel": "gemma4",
+      "models": ["gemma4"]
     }
   }
 }
@@ -84,26 +84,26 @@ For unreleased changes on `main`, use the clone-and-link flow from the [Quick St
 agent-sh works with any OpenAI-compatible API. Here are common configurations:
-### OpenAI
+### DeepSeek
 ```bash
-export OPENAI_API_KEY="sk-..."
-agent-sh --model gpt-4o
-# or: agent-sh --model gpt-4o-mini
+export DEEPSEEK_API_KEY="sk-..."
+agent-sh   # defaults to deepseek-v4-flash
 ```
-### DeepSeek
+### OpenAI
 ```bash
-export DEEPSEEK_API_KEY="sk-..."
-agent-sh
+export OPENAI_API_KEY="sk-..."
+agent-sh --model gpt-5.4
+# or: agent-sh --model gpt-5.4-mini
 ```
 ### Ollama (Local)
 ```bash
 # No API key needed — Ollama doesn't require authentication
-agent-sh --api-key dummy --base-url http://localhost:11434/v1 --model llama3
+agent-sh --api-key dummy --base-url http://localhost:11434/v1 --model gemma4
 ```
 ### OpenRouter
@@ -111,7 +111,7 @@ agent-sh --api-key dummy --base-url http://localhost:11434/v1 --model llama3
 ```bash
 agent-sh --api-key "$OPENROUTER_KEY" \
   --base-url https://openrouter.ai/api/v1 \
-  --model anthropic/claude-sonnet-4-20250514
+  --model deepseek/deepseek-v4-flash
 ```
 ### Together AI
@@ -119,7 +119,7 @@ agent-sh --api-key "$OPENROUTER_KEY" \
 ```bash
 agent-sh --api-key "$TOGETHER_KEY" \
   --base-url https://api.together.xyz/v1 \
-  --model meta-llama/Llama-3-70b-chat-hf
+  --model deepseek-ai/DeepSeek-V3
 ```
 ### Groq
@@ -127,7 +127,7 @@ agent-sh --api-key "$TOGETHER_KEY" \
 ```bash
 agent-sh --api-key "$GROQ_KEY" \
   --base-url https://api.groq.com/openai/v1 \
-  --model llama-3.3-70b-versatile
+  --model deepseek-r1-distill-llama-70b
 ```
 ### LM Studio
@@ -135,7 +135,7 @@ agent-sh --api-key "$GROQ_KEY" \
 ```bash
 agent-sh --api-key dummy \
   --base-url http://localhost:1234/v1 \
-  --model local-model
+  --model mimo
 ```
 ### vLLM
@@ -143,7 +143,7 @@ agent-sh --api-key dummy \
 ```bash
 agent-sh --api-key dummy \
   --base-url http://localhost:8000/v1 \
-  --model your-model
+  --model deepseek-v4-flash
 ```
 ## Using agent-sh as Your Default Shell
@@ -152,7 +152,7 @@ Add to the end of your `~/.zshrc` or `~/.bashrc`:
 ```bash
 if [[ -z "$AGENT_SH" && $- == *i* && -t 0 ]]; then
-  exec agent-sh --api-key "$OPENAI_API_KEY" --model gpt-4o
+  exec agent-sh   # uses DEEPSEEK_API_KEY from your env (deepseek-v4-flash)
 fi
 ```
@@ -168,27 +168,29 @@ Instead of passing `--api-key` and `--base-url` every time, define named provide
 ```json
 {
-  "defaultProvider": "openai",
+  "defaultProvider": "deepseek",
   "providers": {
-    "openai": {
-      "apiKey": "$OPENAI_API_KEY",
-      "defaultModel": "gpt-4o",
-      "models": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"],
-      "contextWindow": 128000
+    "deepseek": {
+      "apiKey": "$DEEPSEEK_API_KEY",
+      "defaultModel": "deepseek-v4-flash",
+      "models": ["deepseek-v4-flash", "deepseek-v4-pro"]
     },
     "ollama": {
       "apiKey": "not-needed",
       "baseURL": "http://localhost:11434/v1",
-      "defaultModel": "llama3",
-      "models": ["llama3", "mistral", "codellama"]
+      "defaultModel": "gemma4",
+      "models": [
+        "mimo",
+        { "id": "gemma4", "contextWindow": 128000, "modalities": ["text", "image"] }
+      ]
     },
     "openrouter": {
       "apiKey": "$OPENROUTER_KEY",
       "baseURL": "https://openrouter.ai/api/v1",
-      "defaultModel": "anthropic/claude-sonnet-4.5",
+      "defaultModel": "deepseek/deepseek-v4-flash",
       "models": [
-        { "id": "anthropic/claude-sonnet-4.5", "contextWindow": 200000, "reasoning": true },
-        { "id": "google/gemini-2.5-pro",       "contextWindow": 1000000 }
+        { "id": "deepseek/deepseek-v4-flash", "contextWindow": 1000000, "reasoning": true },
+        { "id": "deepseek/deepseek-v4-pro",   "contextWindow": 1048576, "reasoning": true }
       ]
     }
   }
@@ -198,32 +200,50 @@ Instead of passing `--api-key` and `--base-url` every time, define named provide
 Then just run:
 ```bash
-agent-sh                          # uses defaultProvider
+agent-sh                          # uses defaultProvider (deepseek)
 agent-sh --provider ollama        # use a specific provider
-agent-sh --provider openai --model gpt-4-turbo  # override the default model
+agent-sh --provider ollama --model gemma4  # override the default model
 ```
 The `apiKey` field supports `$ENV_VAR` and `${ENV_VAR}` syntax — variables are expanded at runtime, so you don't store secrets in the file.
-### Declaring the context window
-agent-sh adapts its auto-compaction trigger to the model's context window. There are two places to declare it:
-- **Provider-level `contextWindow`** — applies to every model in that provider unless a more specific value is set.
-- **Per-model `contextWindow`** (inside an entry of `models`) — overrides the provider-level value for a specific model, and also lets you tag reasoning-capable models via `reasoning: true`.
+### Declaring model capabilities
-If neither is set, agent-sh falls back to a conservative 60k-token default.
-Entries in `models` can be plain strings (just the model id, uses the provider-level `contextWindow`) or objects:
+Entries in a provider's `models` list can be plain strings (just the id) or objects that declare what the model can do. agent-sh uses these to size its context budget, cap output, route reasoning, and enable image input. Every field except `id` is optional.
 ```json
 "models": [
-  "gpt-4o-mini",
-  { "id": "gpt-4o",    "contextWindow": 128000 },
-  { "id": "o1-preview", "contextWindow": 128000, "reasoning": true }
+  "deepseek-v4-flash",
+  {
+    "id": "gemma4",
+    "contextWindow": 128000,
+    "maxTokens": 8192,
+    "modalities": ["text", "image"]
+  },
+  { "id": "mimo",            "reasoning": true },
+  { "id": "deepseek-v4-pro", "contextWindow": 1000000, "reasoning": true, "echoReasoning": true }
 ]
 ```
+| Field | Type | Default | Effect |
+|---|---|---|---|
+| `id` | `string` | — | Model identifier sent to the API (required). |
+| `contextWindow` | `number` | provider-level `contextWindow`, else `60000` | Total token budget. Drives the `/context` display and the `autoCompactThreshold` auto-compaction trigger. |
+| `maxTokens` | `number` | 40% of this model's `contextWindow` capped at `65536`, else `65536` | Max output (completion) tokens requested per turn. |
+| `reasoning` | `boolean` | `false` | Marks the model as thinking-capable, so `/thinking` levels apply to it. |
+| `modalities` | `("text" \| "image")[]` | `["text"]` | Input modalities. Include `"image"` to let the agent read image files (PNG/JPEG/GIF/WebP) with `read_file`; without it, attached images are dropped before the request. |
+| `echoReasoning` | `boolean` | `false` | Echo `reasoning_content` back on assistant turns. Required by DeepSeek's reasoner; leave off otherwise (leaky proxies may forward it to the model as malformed input). |
+A plain-string entry inherits the provider-level values and the defaults above. These provider-level fields apply to every model unless a per-model entry overrides them:
+| Provider field | Effect |
+|---|---|
+| `contextWindow` | Fallback context window for models that don't declare their own. |
+| `reasoningShape` | Borrow another registered provider's reasoning-request shape by id (e.g. `"openrouter"`). Defaults to the OpenAI-compatible shape. |
+| `echoReasoningPatterns` | Case-insensitive regex sources matched against model ids; a match defaults that model to `echoReasoning: true` (a per-model `echoReasoning` still wins). |
+If neither level declares a `contextWindow`, agent-sh falls back to a conservative 60k-token budget. Override that fallback globally with the `AGENT_SH_DEFAULT_CONTEXT_WINDOW` environment variable (a positive integer; ignored otherwise).
 ### Switching models at runtime
 - **`/model`** — show the current model
@@ -274,7 +294,7 @@ Switching mid-conversation preserves your conversation state — only the LLM en
 On launch, agent-sh displays a structured startup banner showing:
 - **Backend** — which agent backend is active (`ash`, `claude-code`, `pi`, etc.)
-- **Model** — current model with provider in brackets (e.g. `gpt-4o [openai]`)
+- **Model** — current model with provider in brackets (e.g. `deepseek-v4-flash [deepseek]`)
 - **Extensions** — loaded extensions (from CLI `-e`, settings, or `~/.agent-sh/extensions/`)
 - **Skills** — discovered skills (global + project)

package/examples/extensions/ashi/src/chat/assistant.ts CHANGED Viewed

@@ -7,6 +7,8 @@ export type RenderBlock =
 export type ContentTransform = (blocks: RenderBlock[]) => RenderBlock[];
+const stripTrailing = (s: string): string => s.replace(/\s+$/, "");
 export class AssistantMessage {
   readonly node: RenderNode;
   private container: ContainerView;
@@ -23,20 +25,20 @@ export class AssistantMessage {
   appendText(t: string): void {
     this.buffer += t;
-    this.md.setText(this.buffer);
+    this.md.setText(stripTrailing(this.buffer));
   }
   appendCodeBlock(language: string, code: string): void {
     const prefix = this.buffer && !this.buffer.endsWith("\n") ? "\n" : "";
     this.buffer += `${prefix}\`\`\`${language}\n${code}\n\`\`\`\n`;
-    this.md.setText(this.buffer);
+    this.md.setText(stripTrailing(this.buffer));
   }
   finalize(): void {
     if (this.buffer === "") this.buffer = " ";
     const blocks = this.transform([{ type: "text", text: this.buffer }]);
     if (blocks.every((b) => b.type === "text")) {
-      this.md.setText(this.buffer);
+      this.md.setText(stripTrailing(this.buffer));
       return;
     }
     this.rebuild(blocks);
@@ -55,7 +57,7 @@ export class AssistantMessage {
         this.container.addChild(m.node);
       } else if (block.text.trim()) {
         const m = this.nodes.markdown({ paddingX: 1 });
-        m.setText(block.text);
+        m.setText(stripTrailing(block.text));
         this.container.addChild(m.node);
       }
     }