npm - agent-sh - Versions diffs - 0.15.0 → 0.15.1 - Mend

agent-sh 0.15.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/docs/README.md +14 -0
package/docs/agent.md +398 -0
package/docs/architecture.md +196 -0
package/docs/context-management.md +200 -0
package/docs/extensions.md +951 -0
package/docs/library.md +84 -0
package/docs/troubleshooting.md +65 -0
package/docs/tui-composition.md +294 -0
package/docs/usage.md +306 -0
package/examples/extensions/ash-scheme/package.json +1 -1
package/examples/extensions/ashi/EXTENDING.md +2 -2
package/examples/extensions/ashi/README.md +2 -2
package/examples/extensions/ashi/docs/ui-surface-protocol.md +1 -1
package/examples/extensions/ashi/package.json +5 -3
package/examples/extensions/ashi/src/cli.ts +6 -5
package/examples/extensions/ashi/src/renderer.ts +22 -2
package/examples/extensions/ashi/src/renderers/pi-tui/tool-group.ts +5 -8
package/examples/extensions/ashi-ink/package.json +2 -2
package/examples/extensions/claude-code-bridge/package.json +1 -1
package/examples/extensions/opencode-bridge/package.json +1 -1
package/package.json +3 -1
package/src/agent/agent-loop.ts +1563 -0
package/src/agent/entry-format.ts +19 -0
package/src/agent/events.ts +151 -0
package/src/agent/extensions/rolling-history/constants.ts +1 -0
package/src/agent/extensions/rolling-history/index.ts +202 -0
package/src/agent/extensions/rolling-history/recall.ts +131 -0
package/src/agent/extensions/rolling-history/strategy.ts +404 -0
package/src/agent/host-types.ts +192 -0
package/src/agent/index.ts +591 -0
package/src/agent/live-view.ts +279 -0
package/src/agent/llm-client.ts +111 -0
package/src/agent/llm-facade.ts +43 -0
package/src/agent/normalize-args.ts +61 -0
package/src/agent/nuclear-form.ts +382 -0
package/src/agent/providers/deepseek.ts +39 -0
package/src/agent/providers/ollama.ts +92 -0
package/src/agent/providers/openai-compatible.ts +36 -0
package/src/agent/providers/openai.ts +52 -0
package/src/agent/providers/opencode.ts +142 -0
package/src/agent/providers/openrouter.ts +105 -0
package/src/agent/providers/zai-coding-plan.ts +33 -0
package/src/agent/session-store.ts +336 -0
package/src/agent/skills.ts +228 -0
package/src/agent/store.ts +310 -0
package/src/agent/subagent.ts +305 -0
package/src/agent/system-prompt.ts +151 -0
package/src/agent/token-budget.ts +12 -0
package/src/agent/tool-protocol.ts +722 -0
package/src/agent/tool-registry.ts +66 -0
package/src/agent/tools/bash.ts +95 -0
package/src/agent/tools/edit-file.ts +154 -0
package/src/agent/tools/expand-home.ts +7 -0
package/src/agent/tools/glob.ts +108 -0
package/src/agent/tools/grep.ts +228 -0
package/src/agent/tools/list-skills.ts +37 -0
package/src/agent/tools/ls.ts +81 -0
package/src/agent/tools/pwsh.ts +140 -0
package/src/agent/tools/read-file.ts +164 -0
package/src/agent/tools/write-file.ts +72 -0
package/src/agent/types.ts +149 -0
package/src/cli/args.ts +91 -0
package/src/cli/auth/cli.ts +244 -0
package/src/cli/auth/discover.ts +52 -0
package/src/cli/auth/keys.ts +143 -0
package/src/cli/index.ts +295 -0
package/src/cli/init.ts +74 -0
package/src/cli/install.ts +439 -0
package/src/cli/shell-env.ts +68 -0
package/src/cli/subcommands.ts +24 -0
package/src/core/event-bus.ts +252 -0
package/src/core/extension-loader.ts +347 -0
package/src/core/index.ts +152 -0
package/src/core/settings.ts +398 -0
package/src/core/types.ts +61 -0
package/src/extensions/file-autocomplete.ts +71 -0
package/src/extensions/index.ts +38 -0
package/src/extensions/slash-commands/events.ts +14 -0
package/src/extensions/slash-commands/index.ts +269 -0
package/src/shell/events.ts +73 -0
package/src/shell/host-types.ts +150 -0
package/src/shell/index.ts +159 -0
package/src/shell/input-handler.ts +505 -0
package/src/shell/output-parser.ts +156 -0
package/src/shell/shell-context.ts +193 -0
package/src/shell/shell.ts +414 -0
package/src/shell/strategies/bash.ts +83 -0
package/src/shell/strategies/fish.ts +77 -0
package/src/shell/strategies/index.ts +24 -0
package/src/shell/strategies/types.ts +64 -0
package/src/shell/strategies/zsh.ts +92 -0
package/src/shell/terminal.ts +124 -0
package/src/shell/tui-input-view.ts +222 -0
package/src/shell/tui-renderer.ts +1126 -0
package/src/utils/ansi.ts +140 -0
package/src/utils/box-frame.ts +138 -0
package/src/utils/compositor.ts +157 -0
package/src/utils/diff-renderer.ts +829 -0
package/src/utils/diff.ts +244 -0
package/src/utils/executor.ts +305 -0
package/src/utils/file-watcher.ts +110 -0
package/src/utils/floating-panel.ts +1160 -0
package/src/utils/handler-registry.ts +110 -0
package/src/utils/line-editor.ts +636 -0
package/src/utils/markdown.ts +437 -0
package/src/utils/message-utils.ts +113 -0
package/src/utils/package-version.ts +12 -0
package/src/utils/palette.ts +64 -0
package/src/utils/ref-counter.ts +9 -0
package/src/utils/ripgrep-path.ts +17 -0
package/src/utils/shell-output-spill.ts +76 -0
package/src/utils/stream-transform.ts +292 -0
package/src/utils/terminal-buffer.ts +213 -0
package/src/utils/tool-display.ts +315 -0
package/src/utils/tool-interactive.ts +71 -0
package/src/utils/tty.ts +14 -0

package/docs/README.md ADDED Viewed

@@ -0,0 +1,14 @@
+# agent-sh Documentation
+Start with **Usage** to get running, then **Architecture** for the mental model. Everything else builds on those two.
+## Guides
+1. [Usage Guide](usage.md) — install, run, configure providers and models
+2. [Architecture](architecture.md) — pure kernel + extensions, the shell ↔ agent boundary, project structure
+3. [The Built-in Agent: ash](agent.md) — how the default backend works: query flow, tools, system prompt, model switching
+4. [Context Management](context-management.md) — shell-output spill, three-tier conversation compaction, recall APIs
+5. [Extensions](extensions.md) — event bus, content transforms, custom agent backends, theming
+6. [TUI Composition](tui-composition.md) — compositor, render surfaces, stream routing
+7. [Library Usage](library.md) — embedding agent-sh in your own apps
+8. [Troubleshooting](troubleshooting.md) — common errors and debug mode

package/docs/agent.md ADDED Viewed

@@ -0,0 +1,398 @@
+# The Built-in Agent: ash
+agent-sh is designed to be backend-agnostic. The agent that drives a query — assembling context, calling an LLM, executing tools in a loop — is a replaceable component. Any extension can register a backend via `agent:register-backend` and become the default via the `defaultBackend` setting or the `/backend` slash command. Bridge backends like `claude-code` and `pi` plug external CLI agents into the same shell and TUI surface.
+This document describes **ash**, the built-in backend that ships with agent-sh. The agent host (`src/agent/index.ts`) is activated unconditionally via `activateAgent(ctx)` — it attaches the `ctx.agent` surface, registers core tools, and emits `agent:register-backend` to register `ash` with the core's backend registry. ash only *activates* (constructs its `AgentLoop` and starts handling queries) when an LLM provider has both an apiKey and a model resolved, and `activateBackend("ash")` runs. It resolves providers from registered catalogs + settings overlay, configures an `LlmClient`, and calls any OpenAI-compatible API directly. It manages conversation state and executes tools in a loop until the LLM is done.
+If you're looking to write your own backend instead of reading how ash works internally, see [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends).
+## The Query Flow
+Here's what happens when you submit a query:
+```
+User types "> fix the failing test"
+  │
+  ├─ 1. Context assembly — gather recent shell commands, output, cwd
+  ├─ 2. System prompt (cached per cwd) + dynamic context (rebuilt every LLM call)
+  ├─ 3. LLM call — stream response from the API
+  ├─ 4. Tool loop — if LLM requested tool calls:
+  │     ├─ Execute each tool (with permission check if needed)
+  │     ├─ Add results to conversation
+  │     └─ Go back to step 3 (LLM sees tool results, decides next action)
+  └─ 5. Done — no more tool calls, emit response
+```
+The key insight: **the agent is a loop, not a single call**. The LLM calls tools, sees results, calls more tools, until it has enough information to respond. A single query might trigger dozens of LLM calls and tool executions.
+## Context Assembly
+Every query draws on two distinct streams of context:
+- **Shell context** — the user's terminal activity (commands + outputs) plus the live cwd. This is what lets ash understand "fix this" after you ran a failing command, and what keeps it anchored in the right working directory across compactions. The current cwd is wrapped as `<cwd>` (always) and new shell activity since the last turn as `<shell_events>` (when there is any), both nested inside the per-query `<query_context>` envelope and prepended to your user message.
+- **Conversation state** — the OpenAI chat messages array (`user`/`assistant`/`tool` messages). This is the LLM's memory of what it already said and did within this session.
+The two streams don't overlap: agent tool outputs live only in the conversation, and shell context tracks only user-initiated activity. When either stream grows large, ash has escape hatches rather than silent truncation:
+- **Long shell outputs** are spilled to tempfiles (`<tmpdir>/agent-sh-<pid>/<id>.out`) at capture time. The LLM sees a head+tail stub with the path and recovers the full output via plain `read_file`.
+- **Older conversation turns** are compacted by the built-in `rolling-history` extension: each is nucleated into a one-line summary in a persistent store (`~/.agent-sh/rolling-history/history.jsonl`), with the full message kept in an ephemeral per-session cache. The `conversation_recall` tool browses, searches, and expands those entries.
+Compaction is pluggable: the `conversation:compact` handler is advisable, so extensions can install richer strategies without changing the recall surface. See [Context Management](context-management.md) for the full design.
+## System Prompt
+The system prompt is assembled once per `cwd` and cached (invalidated when the working directory changes), so the prefix is stable for provider-side prompt caching. It includes:
+1. **Identity** — "You are an AI coding assistant running inside agent-sh..."
+2. **Tool decision guide** — when to use which built-in tool
+3. **Tool usage guidelines** — read before editing, prefer edit over write, use grep/glob to find files, etc.
+4. **Project conventions** — `CLAUDE.md`/`AGENT.md` walked from cwd to root (cwd-stable; see next section)
+5. **Skills** — discovered project/global skills (cwd-stable)
+6. **Extension instructions** — blocks registered by extensions via `registerInstruction()` (e.g. proactive recall guidance)
+7. **Available tools** — name + description of every registered tool
+8. **Extension-appended content** — extensions can advise `system-prompt:build` to append additional context (instance IDs, memory files, etc.)
+Per-turn signals live in two symmetric handlers, both empty by default:
+- **`query-context:build`** — fires once at user-query start. Output is wrapped in `<query_context>` and frozen into the user message, so it persists in conversation history. Shell context is the canonical example (`<cwd>` always, `<shell_events>` when there is fresh activity); other "what happened between turns" signals (notifications, calendar/inbox deltas) go here too.
+- **`dynamic-context:build`** — fires on every LLM call (each tool-loop iteration). Output is wrapped in `<dynamic_context>` and ephemerally prepended to the trailing message at request time, so the cacheable prefix stays byte-stable. Use for "current state" signals: in-flight subagents, threshold warnings, active mode markers.
+Extensions populate either via `ctx.agent.registerContextProducer(name, fn, { mode: "per-query" | "per-request" })`. When no producer contributes, no envelope tag is emitted at all — vanilla sessions send exactly `[system, ...history]`.
+## Project Conventions
+The agent automatically loads `CLAUDE.md` or `AGENT.md` files from your working directory hierarchy. These are included in the system prompt on every query, so the agent respects project-specific conventions without being told each time.
+The agent scans from your current directory upward to the filesystem root. In each directory it checks for `CLAUDE.md` first, then `AGENT.md` as a fallback (only one per directory). Files are included root-first, so more specific project conventions appear last and take precedence.
+```
+~/projects/myapp/src/        ← cwd
+~/projects/myapp/CLAUDE.md   ← included (project-level)
+~/CLAUDE.md                  ← included first (global conventions)
+```
+Since the system prompt is rebuilt on every query, `cd`-ing to a new project picks up its conventions automatically.
+This follows the same convention as Claude Code — if you already have `CLAUDE.md` files, they work out of the box.
+## Skills
+Skills are reusable instruction sets that the agent can load on demand. They follow the [Agent Skills standard](https://agentskills.io/specification).
+### Skill format
+A skill is a directory containing a `SKILL.md` file with YAML frontmatter:
+```markdown
+---
+name: docker-deploy
+description: Build and deploy Docker containers to production
+---
+# Docker Deploy
+## Steps
+1. Build the image: `docker build -t app .`
+2. Tag for registry: `docker tag app registry.example.com/app:latest`
+3. Push: `docker push registry.example.com/app:latest`
+...
+```
+The `name` and `description` fields are required. An optional `disable-model-invocation: true` hides the skill from the agent's automatic discovery.
+### Discovery
+**Global skills** are discovered from `~/.agent-sh/skills/` by default. Add more locations via `skillPaths` in `~/.agent-sh/settings.json`:
+```json
+{
+  "skillPaths": ["~/.agents/skills", "~/.claude/skills"]
+}
+```
+**Project skills** are discovered from `.agents/skills/` in your working directory hierarchy (up to the git root). When you `cd` into a directory with new project skills, the agent is notified with their names.
+### How the agent uses skills
+Only skill **metadata** (name, description, file path) is included in the system prompt — not the full skill content. This keeps the prompt small regardless of how many skills you have.
+1. The system prompt lists available skills with their descriptions and paths
+2. The agent decides which skill is relevant (no extra round-trip needed)
+3. The agent calls `read_file` on the skill's `SKILL.md` to load full instructions when ready to use it
+The `list_skills` tool is also available for broader discovery.
+### Slash command
+Users can force-load a skill directly:
+```
+> /skill:docker-deploy
+> /skill:docker-deploy deploy the staging branch
+```
+This injects the full skill content into the conversation. Tab completion works for skill names.
+## The Tool Loop
+This is the core of how the agent works. After each LLM call, the agent checks if the response includes tool calls. If yes, it executes them and feeds the results back to the LLM.
+```
+LLM response
+  ├─ Text only → done, emit response
+  └─ Tool calls → for each tool call:
+       ├─ Look up tool in registry
+       ├─ Execute via the `tool:<name>` handler chain (advisors can wrap)
+       ├─ Emit tool events (tool-started, tool-output-chunk, tool-completed)
+       ├─ Add tool result to conversation
+       └─ After all tools: call LLM again with updated conversation
+```
+The loop continues until the LLM returns a response with no tool calls. There's no hard limit on iterations — the LLM decides when it's done.
+### Permission gating
+The kernel has no opinion on permission. By default every tool runs (yolo
+mode). Gating extensions register tool advisors via `ctx.agent.adviseTool(name, ...)`
+to interpose a confirmation prompt, audit log, or policy check before calling
+`next(args, onChunk, ctx)`. See `examples/extensions/interactive-prompts.ts`
+for a reference implementation that gates `bash`, `pwsh`, `write_file`, and
+`edit_file`.
+## Built-in Tools
+Core tools are registered when `activateAgent(ctx)` runs — *before* extensions load. This means extensions can look up or advise tools at their own activate time. Additional tools come from extensions in `~/.agent-sh/extensions/`.
+### bash
+The primary tool for investigation and code execution. **`bash`** runs in an **isolated subprocess** (`/bin/bash -c`). The agent uses this for reading files, running tests, checking state, and executing commands. A `cd` here doesn't affect your shell. Output is captured and returned to the LLM.
+Extensions can add tools that cross the shell↔agent boundary via `shell:exec-request` — for example, running commands with lasting effects in the live PTY (`cd`, `export`, `source`). We don't include such a tool as built-in because the right behavior depends on user preference. See `examples/extensions/user-shell.ts` for a ready-made implementation to start from.
+### All tools
+| Tool | Purpose | Side effects |
+|---|---|---|
+| `bash` | Run commands in isolated subprocess | Yes |
+| `read_file` | Read file contents (line-numbered, with offset/limit) | No |
+| `write_file` | Create or overwrite a file | Yes |
+| `edit_file` | Find-and-replace in a file (old_text → new_text) | Yes |
+| `grep` | Search file contents with regex (via ripgrep) | No |
+| `glob` | Find files by name pattern | No |
+| `ls` | List directory contents (with timestamps and sizes) | No |
+| `list_skills` | List available skills (name, description, path) | No |
+| `conversation_recall`\* | Browse/search/expand evicted conversation turns from the rolling-history store | No |
+\* `conversation_recall` is not a core tool — it's registered by the built-in `rolling-history` extension, so it's absent under headless/bridge backends. Every other row is a core tool registered by `activateAgent(ctx)`.
+**Common pattern**: all file-based tools resolve relative paths from the current working directory, looked up via the `cwd` handler (`ctx.call("cwd")`). The shell-context built-in advises this with the PTY-tracked cwd; without it, tools fall back to `process.cwd()`.
+### Tool-specific enhancements
+**`grep`** supports three output modes and pagination:
+- `output_mode`: `files_with_matches` (default, file paths only), `content` (matching lines with optional `context_before`/`context_after`), or `count` (match counts per file)
+- `case_insensitive`: case-insensitive search
+- `head_limit` / `offset`: pagination — default limits are 200 entries for `files_with_matches`, 150 for `content`/`count`. Pass `head_limit=0` for unlimited. Long lines in `content` mode are capped at 500 characters.
+**`read_file`** deduplicates reads:
+- Tracks file modification time. If a file hasn't changed since the last read (same offset/limit), returns a stub instead of re-reading — saves context tokens.
+- Files over 2MB require `offset` and `limit` to prevent OOM.
+- Cache is automatically invalidated when a file-modifying tool (`write_file`, `edit_file`) succeeds on the same path.
+**`edit_file`** provides diagnostic hints:
+- When `old_text` isn't found, the tool searches for the closest match and suggests fixes (e.g. whitespace differences, wrong line location).
+**`glob`** returns results sorted by modification time (newest first), capped at 200 files.
+**`ls`** returns formatted output with timestamps (YYYY-MM-DD HH:MM) and human-readable file sizes.
+### Tool batching and parallel execution
+When the LLM requests multiple tool calls in a single response, the agent groups and executes them efficiently:
+1. **Batch event** — before execution, the agent emits `agent:tool-batch` with tools grouped by kind (`read`, `search`, `execute`, etc.). The TUI uses this to render group headers with tree-style connectors.
+2. **Parallel execution** — side-effect-free tools (`modifiesFiles` unset) run in parallel via `Promise.all`. Side-effecting tools run sequentially.
+3. **Output truncation** — tool results over 16KB (~4K tokens) are head+tail truncated before being added to the conversation, preventing a single tool call from blowing through the context window.
+### Structured result display
+Tools can provide structured result information for the TUI via two optional methods on `ToolDefinition`:
+- **`formatCall(args)`** — returns a short display string when the tool is called (e.g. the file path or search pattern). Shown in the TUI next to the tool icon.
+- **`formatResult(args, result)`** — returns a `ToolResultDisplay` with an optional `summary` string (e.g. "42 files", "cached") and an optional structured `body` for richer rendering (diffs, line lists). The TUI's `render:result-body` handler renders the body — extensions can advise it.
+### Retry and error handling
+The agent retries transient failures with exponential backoff:
+- **Context overflow** — compacts the conversation and retries immediately
+- **Rate limits (429)** — respects `Retry-After` header, otherwise backs off exponentially
+- **Transient errors (500/502/503, network)** — exponential backoff (1s, 2s, 4s..., capped at 30s), up to 3 retries
+- **Non-retryable errors** — reported with provider-aware context (model name, endpoint, actionable hints)
+### Thinking levels
+The agent supports configurable thinking/reasoning levels for models that support `reasoning_effort`:
+- Levels: `off` (default), `low`, `medium`, `high`, `xhigh` (`xhigh` falls back to `high` on providers that don't support it)
+- Set via the `config:set-thinking` event (wired to `/thinking` slash command)
+- Query current state via `config:get-thinking` pipe
+- The agent validates that the current model/provider supports reasoning before enabling
+### Echoing reasoning back to the model
+DeepSeek-family models require their previous-turn `reasoning_content` / `reasoning_details` to be echoed back on the next assistant message. Most other reasoning models do **not** want that — feeding prior chain-of-thought back through lenient OpenAI-compatible shims can register as out-of-distribution input and degrade quality.
+agent-sh gates this behavior per-model via the `Model.echoReasoning` flag (default `false`). Reasoning extras are only attached to assistant messages when the active model opts in.
+For OpenRouter, the flag is set automatically: model ids matching the built-in pattern `/deepseek/i` (V3, V3.2, V4, rebadges) get `echoReasoning: true`. You can extend or override this in `settings.json`:
+```json
+{
+  "providers": {
+    "openrouter": {
+      "echoReasoningPatterns": ["my-custom-deepseek-fork"],
+      "models": [
+        { "id": "deepseek/deepseek-v3.2", "echoReasoning": false },
+        { "id": "openai/gpt-5.5",         "reasoning": true }
+      ]
+    }
+  }
+}
+```
+- `echoReasoningPatterns` — regex **source strings** (no `/.../` delimiters), each compiled with the case-insensitive flag and tested against the model id. Examples: `"deepseek"` (substring match), `"^vendor/.*-r1$"` (anchored). Merged with the built-in `deepseek` pattern.
+- Per-model `echoReasoning` — explicit boolean override that always wins over patterns.
+- Invalid regexes are silently skipped, so a typo can't break provider registration.
+### Tool interface
+Every tool implements this interface:
+```typescript
+interface ToolDefinition {
+  name: string;
+  displayName?: string;           // short label for TUI (defaults to name)
+  description: string;
+  input_schema: Record<string, unknown>;  // JSON Schema for parameters
+  execute(
+    args: Record<string, unknown>,
+    onChunk?: (chunk: string) => void,    // optional streaming callback
+  ): Promise<ToolResult>;
+  modifiesFiles?: boolean;        // has side effects (skips caching + parallel execution)
+  readOnly?: boolean;             // safe to run without nuclear-form gating (registry tags accordingly)
+  showOutput?: boolean;           // stream output to TUI (default: true)
+  // Display hooks (all optional)
+  getDisplayInfo?: (args) => ToolDisplayInfo;  // icon, kind, file locations
+  formatCall?: (args) => string;               // short call summary for TUI
+  formatResult?: (args, result) => ToolResultDisplay;  // structured result
+}
+interface ToolResult {
+  content: string;       // text returned to the LLM
+  exitCode: number | null;
+  isError: boolean;
+}
+interface ToolResultDisplay {
+  summary?: string;      // one-line (e.g. "42 files", "+3/-1")
+  body?: ToolResultBody; // structured content for richer rendering
+}
+type ToolResultBody =
+  | { kind: "diff"; diff: unknown; filePath: string }
+  | { kind: "lines"; lines: string[]; maxLines?: number }
+interface ToolDisplayInfo {
+  kind: "read" | "write" | "execute" | "search";
+  locations?: { path: string; line?: number | null }[];
+  icon?: string;         // custom icon (e.g. "◆", "⌕")
+}
+```
+The `onChunk` callback enables streaming tool output to the TUI in real-time (used by `bash`). Tools that don't stream (like `read_file`) just return the final result. Extensions can wrap `onChunk` via the `tool:execute` handler to intercept or transform streamed output (e.g. secret redaction).
+## Streaming
+Response streaming has two phases:
+**Phase 1 — LLM stream**: The agent iterates chunks from the OpenAI streaming API. Each chunk can contain:
+- `delta.content` — response text
+- `delta.tool_calls` — tool call arguments (streamed incrementally, parsed by index)
+- `delta.reasoning_content` — thinking/reasoning tokens (non-standard, used by models like DeepSeek-r1)
+**Phase 2 — Content transform pipeline**: Text chunks are emitted via `bus.emitTransform("agent:response-chunk", { blocks })`. This runs the content through the extension transform pipeline (parsers, post-transforms) before the renderer sees it. See [Extensions: Content Transform Pipeline](extensions.md#content-transform-pipeline).
+The agent accumulates the full response text separately for the final `agent:response-done` event.
+## Conversation State
+The conversation state is an OpenAI-compatible chat messages array. Each query adds messages:
+```
+User submits query     → { role: "user", content: "fix the test" }
+LLM responds with text → { role: "assistant", content: "I'll look at..." }
+LLM requests tool call → { role: "assistant", tool_calls: [...] }
+Tool returns result    → { role: "tool", tool_call_id: "...", content: "..." }
+```
+This array grows with every turn. To prevent context overflow, ash auto-compacts when estimated prompt tokens cross `autoCompactThreshold` (default 0.5) of the model's usable context window.
+### Auto-compaction
+Before each LLM call, ash estimates the total prompt tokens. If it's over the threshold, it invokes the `conversation:compact` handler to free space, then proceeds. If the API still returns a context-overflow error, ash compacts more aggressively and retries once; if compaction frees nothing, it aborts rather than looping.
+The default compaction strategy evicts older turns into the nuclear archive and leaves a bridge note; `conversation_recall` can bring them back on demand. See [Context Management](context-management.md#conversation-compaction) for the three-tier design and how to swap the strategy.
+The user can also trigger compaction manually with `/compact`.
+**Note**: reasoning/thinking tokens from the LLM stream are emitted as `agent:thinking-chunk` events for display but are **not stored in conversation state**. They're ephemeral — the LLM doesn't see its own reasoning on the next turn.
+## Provider Profiles & Model Switching
+ash supports multiple models and providers, switchable at runtime.
+### Models
+Each entry is a `(provider, model)` target — a serializable identity plus capabilities:
+```typescript
+interface Model {
+  id: string;               // model id, e.g. "openai/gpt-5"
+  provider: string;         // identity is the (provider, id) pair
+  contextWindow?: number;   // per-model override for the auto-compact threshold
+  maxTokens?: number;
+  reasoning?: boolean;
+  supportsReasoningEffort?: boolean;
+  echoReasoning?: boolean;
+  modalities?: ("text" | "image")[];
+}
+```
+The credentials and provider-shape transforms needed to actually call a model — `apiKey`/`baseURL` plus the reasoning/cache encoders — live in a separate `ModelEndpoint`, resolved internally by `(provider, id)`. It never travels on a frontend-facing event, so `Model` stays secret-free and serializable.
+`agent:get-models` returns the catalog; `agent:models-changed` fires when it changes. When all models share the same provider, switching just changes the model. When they span providers (e.g. OpenAI + Anthropic via OpenRouter), switching also reconfigures the LLM client with the new endpoint's credentials and base URL.
+### Switching
+- **`/model`** — show the current model
+- **`/model <name>`** — switch to a specific model (may cross providers; credentials and base URL are reconfigured automatically)
+The current model is shown in the TUI prompt. Switching mid-conversation preserves the conversation state — only the LLM endpoint changes.
+To swap the backend itself (e.g. to `claude-code` or `pi`), use `/backend <name>` or set `defaultBackend` in settings.
+## Extension Tools
+Extensions can register custom tools via `ctx.agent.registerTool()`. These appear alongside built-in tools and follow the same `ToolDefinition` interface. Only works with the built-in `ash` backend — bridge backends manage their own tools.
+See [Extensions: ExtensionContext API](extensions.md#extensioncontext-api) for the interface and [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends) for writing backend extensions.

package/docs/architecture.md ADDED Viewed

@@ -0,0 +1,196 @@
+# Architecture
+agent-sh is a composable agent runtime: a pure kernel that any frontend can drive and any agent backend can plug into, over one shared extension layer. Frontends and backends are both bus-driven components that self-wire to events — the bundled shell is just one frontend among several.
+## Design Philosophy: Pure Kernel + Everything Is an Extension
+The core (`createCore()`) is a frontend-agnostic kernel — it wires up the EventBus, HandlerRegistry, and Compositor with zero knowledge of terminals, PTYs, LLMs, shells, or rendering. **The core has no agent, no LLM client, and no shell coupling.** The built-in agent backend, shell tracking, provider management, TUI rendering, and all other features are loaded as extensions.
+```
+createCore() — pure kernel:
+  │     EventBus          — typed pub/sub + transform pipelines
+  │     HandlerRegistry   — named function registry (define/advise/call)
+  │     Compositor        — routes named render streams to surfaces
+  │     Multi-backend     — coordinates which agent backend is active
+  │     Default `cwd` handler returning `process.cwd()`
+  │
+index.ts — interactive terminal frontend:
+  │     Shell             — PTY lifecycle (delegates to InputHandler + OutputParser)
+  │
+  ├── Agent host (always activated via activateAgent(ctx) before built-ins load):
+  │     ash backend       — provider resolution, LlmClient, lazy AgentLoop
+  │     core tools        — bash/read/write/edit/grep/glob/ls/list_skills registered at activate time
+  │     built-in providers — openrouter, openai, openai-compatible, deepseek (unconditional)
+  │
+  ├── Backend registry (owned by core; backends register via `agent:register-backend`):
+  │     core.activateBackend() — picks the named/persisted/first backend and calls its start()
+  │
+  ├── Built-in extensions (loaded via declarative manifest, individually disableable):
+  │     shell-context     — PTY exchange tracking, cwd advisor, <cwd>/<shell_events> producer
+  │     tui-renderer      — markdown rendering, inline diffs, thinking display, spinner
+  │     slash-commands    — /help, /model, /backend, /thinking, /compact, /context, /reload
+  │     file-autocomplete — @ file path completion
+  │
+  ├── Shared utilities:
+  │     palette           — semantic color system (accent, success, warning, error, muted)
+  │     diff-renderer     — syntax-highlighted diffs (split/unified/summary)
+  │     box-frame         — bordered TUI panels
+  │     tool-display      — width-adaptive tool call rendering + pure spinner
+  │     output-writer     — OutputWriter interface (StdoutWriter, BufferWriter for tests)
+  │     stream-transform  — content block transforms for response pipeline
+  │
+  └── User extensions (opt-in, loaded from -e flag / settings.json / extensions dir):
+        e.g. overlay-agent, interactive-prompts, solarized-theme, latex-images, peer-mesh
+```
+All components communicate exclusively through typed bus events. The backend has no reference to Shell — it emits lifecycle events and the TUI subscribes. Input flows the same way: any frontend emits `agent:submit` and the backend handles it.
+Built-in extensions are loaded from a declarative manifest and can be individually disabled via the `disabledBuiltins` setting in `~/.agent-sh/settings.json`. This means even the built-in agent can be disabled (e.g., for users who only use extension backends like Claude Code).
+**The core works without any frontend.** See [Library](library.md) for embedding agent-sh in your own apps.
+## How It Works
+1. agent-sh spawns a real PTY running your shell (zsh or bash, with your full rc config) and sets up raw stdin passthrough
+2. Built-in extensions load (including the agent backend, which registers via `agent:register-backend`), then user extensions
+3. `activateBackend()` wires the chosen backend to bus events
+4. All keyboard input goes directly to the PTY — zero latency, full terminal compatibility
+5. When you type `>` at the start of a line, agent-sh intercepts and enters agent input mode
+6. On Enter, the query is emitted as `agent:submit` and the active backend decides which tools to use
+7. The backend handles the query — streaming LLM responses, executing tools, emitting events. Read-only tools run in parallel; permission-requiring tools run sequentially.
+8. The TUI renderer extension renders streamed content inline (markdown, diffs, tool calls with tree-style grouping)
+9. When the backend finishes (`agent:processing-done`), normal shell operation resumes
+## Shell ↔ Agent Boundary
+The shell and the agent are **separate worlds** by default. The PTY runs your real shell; the agent runs its tools in isolated child processes. A `cd` by the agent's `bash` tool doesn't change your shell's cwd.
+### Command-boundary detection
+agent-sh injects three invisible OSC sequences into its inner shell — `\e]9999;id=<tag>;PROMPT\a` (precmd), `\e]9997;id=<tag>;<cmd>\a` (preexec), `\e]9998;id=<tag>;READY\a` (prompt rendered). `<tag>` is the process's `instanceId`. The OutputParser reacts only to its own tag; markers with a different tag (or none) are treated as opaque foreground output. That's what keeps a nested agent-sh — for example, an `ash` launched inside an SSH session — from cross-triggering the outer instance's command lifecycle.
+The connection between them is **context**: each query includes shell context (recent commands, output, cwd). The agent sees what you've been doing but can't touch your shell state.
+Extensions can cross this boundary using `shell:exec-request`. The core event bus makes this easy to wire up — an extension just registers a tool that emits the event and returns the result. We don't include a PTY tool as built-in because the right behavior depends on user preference (confirmation prompts? output capture? restricted commands?). See `examples/extensions/user-shell.ts` for a ready-made implementation.
+The pattern works like this:
+```
+agent calls user_shell({ command: "cd src" })
+  → bus.emitPipeAsync("shell:exec-request", { command })
+    → Shell writes command to PTY
+      → PTY executes in user's real shell
+        → shell:command-done fires with output
+          → result returned to agent
+```
+## Agent Backend
+The agent backend is a bus-driven component that registers via `agent:register-backend`. The core's multi-backend coordinator manages which backend is active — it has no knowledge of any specific backend's internals.
+### Built-in backend: ash
+The default backend is **ash**, registered from the agent host (`src/agent/index.ts`) when `activateAgent(ctx)` runs. It resolves LLM providers from registered catalogs + settings overlay, configures an `LlmClient`, and registers itself with the core's backend registry by emitting `agent:register-backend`. The `AgentLoop` that drives tool calls is constructed lazily — only when ash's `start()` runs (on `activateBackend("ash")`). See [The Built-in Agent: ash](agent.md) for the full guide.
+The agent host also defines an `llm:invoke` handler that backs the `ctx.agent.llm` facade, so any extension can call `ctx.agent.llm.ask(...)` or `ctx.agent.llm.session(...)` without knowing which backend is active. Backends with no LLM leave `ctx.agent.llm.available` false.
+### Extension Backends
+Extensions can register alternative backends by emitting `agent:register-backend` during activation — this is the same mechanism the built-in agent uses. See [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends) for the full protocol and a working example.
+All backends emit the same bus events. The TUI, extensions, and library consumers don't know which backend is active.
+## Key Extension Points
+The extension system provides several composable primitives for customizing agent-sh. Each is documented in detail in the [Extensions](extensions.md) guide:
+- **[Event Bus](extensions.md#event-bus)** — typed pub/sub (`on`/`emit`), synchronous transform chains (`onPipe`/`emitPipe`), async transform chains (`onPipeAsync`/`emitPipeAsync`), and transform-then-notify (`emitTransform`)
+- **[Custom Agent Backends](extensions.md#custom-agent-backends)** — replace the entire agent backend via `agent:register-backend`
+- **[Named Handlers](extensions.md#named-handlers-advice-system)** — `define`/`advise`/`call` registry for wrapping processing steps (e.g. code block rendering)
+- **[Content Transform Pipeline](extensions.md#content-transform-pipeline)** — typed content blocks (`text`, `code-block`, `image`, `raw`) flow through parsers and post-transforms before rendering
+- **[Custom Input Modes](extensions.md#custom-input-modes)** — register trigger characters (`?`, `>`, etc.) with custom `onSubmit` handlers
+- **[Terminal Buffer & Floating Panel](extensions.md#terminal-buffer--floating-panel)** — headless xterm.js terminal mirror + composited overlay with handler-based rendering customization
+- **[Theming](extensions.md#theming)** — semantic color palette overrides via `setPalette()`
+## Project Structure
+```
+agent-sh/
+├── src/
+│   ├── core/                 # Substrate kernel — no LLM, no agent, no shell
+│   │   ├── index.ts          # createCore(), backend registry, extensionContext()
+│   │   ├── types.ts          # CoreContext, CoreConfig
+│   │   ├── event-bus.ts      # Typed EventBus: emit/on, emitPipe, emitPipeAsync, emitTransform
+│   │   ├── settings.ts       # User settings (~/.agent-sh/settings.json)
+│   │   └── extension-loader.ts # Extension loading (-e, settings.json, extensions dir)
+│   │
+│   ├── cli/                  # CLI entry + subcommands (install, init, auth)
+│   │   ├── index.ts          # Interactive terminal entry point
+│   │   ├── subcommands.ts, install.ts, init.ts
+│   │   └── auth/             # Provider API key management
+│   │
+│   ├── shell/                # Shell host — TUI frontend, PTY, compositor, theming
+│   │   ├── index.ts          # registerShellHandlers/activateShell — attaches ctx.shell
+│   │   ├── events.ts         # BusEvents augmentation (shell:*, input:*, compositor:*, autocomplete:request)
+│   │   ├── host-types.ts     # ShellSurface, ShellContext, ExtensionContext, AppConfig
+│   │   ├── shell.ts          # PTY lifecycle + wiring (InputHandler + OutputParser)
+│   │   ├── shell-context.ts  # Shell exchange tracking, cwd advisor, <shell_events>
+│   │   ├── tui-renderer.ts   # Main renderer — writes to compositor streams
+│   │   ├── input-handler.ts  # Keyboard input, agent mode, bus-driven autocomplete
+│   │   ├── output-parser.ts  # OSC parsing, command boundary detection
+│   │   └── tui-input-view.ts # Input rendering + line editor integration
+│   │
+│   ├── agent/                # Agent host — ash backend, providers, tools, skills
+│   │   ├── index.ts          # activateAgent — attaches ctx.agent, registers core tools + ash backend
+│   │   ├── events.ts         # BusEvents augmentation (agent:providers, agent:models-changed, ...)
+│   │   ├── host-types.ts     # AgentSurface, AgentContext, ProviderRegistration, Model, ModelEndpoint
+│   │   ├── types.ts          # AgentBackend, ToolDefinition, ToolResult
+│   │   ├── agent-loop.ts     # ash AgentLoop (constructed lazily in start())
+│   │   ├── llm-client.ts, llm-facade.ts  # ash LLM transport + ctx.agent.llm facade
+│   │   ├── providers/        # openai, openrouter, deepseek, openai-compatible
+│   │   ├── token-budget.ts   # Shared constants (RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW)
+│   │   ├── tool-registry.ts, tool-protocol.ts
+│   │   ├── live-view.ts       # In-memory messages array + compaction + recall archive
+│   │   ├── store.ts, session-store.ts  # Append-only entry store; session/message persistence
+│   │   ├── nuclear-form.ts, system-prompt.ts
+│   │   ├── skills.ts, subagent.ts
+│   │   └── tools/            # Built-in tool implementations (bash, read/write/edit, grep, glob, ls, ...)
+│   │
+│   ├── extensions/           # Cross-cutting built-ins (loaded via manifest)
+│   │   ├── index.ts          # Declarative manifest + loader
+│   │   ├── slash-commands/   # /reload, /quit, command dispatch; events.ts ships command:* events
+│   │   └── file-autocomplete.ts
+│   │
+│   └── utils/                # Shared primitives
+│       ├── handler-registry.ts # Named function registry (define/advise/call)
+│       ├── compositor.ts       # Routes named render streams to surfaces
+│       ├── terminal-buffer.ts  # Headless xterm.js mirror of the terminal
+│       ├── floating-panel.ts   # Composited floating overlay
+│       ├── executor.ts         # Isolated child process execution
+│       ├── shell-output-spill.ts # Session-tempfile spill for long shell outputs
+│       ├── palette.ts, ansi.ts, diff.ts, diff-renderer.ts
+│       └── (markdown, line-editor, stream-transform, ...)
+│
+├── examples/                 # Example extensions and agent integrations
+│   └── extensions/
+│       ├── overlay-agent.ts     # Ctrl+\ floating overlay agent
+│       ├── interactive-prompts.ts # Permission prompts (opt-in safety)
+│       ├── peer-mesh.ts         # Cross-instance communication
+│       ├── terminal-buffer.ts   # Headless xterm.js terminal mirror extension
+│       ├── tmux-pane.ts         # Tmux side pane output/interactive modes
+│       ├── web-access.ts        # Web search and content extraction
+│       ├── user-shell.ts        # Run commands in the live PTY
+│       ├── questionnaire.ts     # Interactive question prompts
+│       ├── subagents.ts         # Subagent orchestration
+│       ├── solarized-theme.ts   # Theme example
+│       ├── secret-guard.ts      # Secret redaction
+│       ├── latex-images.ts      # LaTeX equation rendering
+│       ├── ollama.ts            # Ollama provider (local + cloud)
+│       ├── claude-code-bridge/  # Claude Code SDK backend
+│       ├── pi-bridge/           # Pi agent backend
+│       ├── ash-mcp-bridge/      # MCP server bridge
+│       └── ash-acp-bridge/      # ACP server (headless core)
+├── docs/                     # Documentation
+├── package.json
+└── tsconfig.json
+```