agent-sh 0.15.0 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/agent/agent-loop.js +11 -8
  2. package/dist/agent/events.d.ts +4 -0
  3. package/docs/README.md +14 -0
  4. package/docs/agent.md +398 -0
  5. package/docs/architecture.md +196 -0
  6. package/docs/context-management.md +200 -0
  7. package/docs/extensions.md +951 -0
  8. package/docs/library.md +84 -0
  9. package/docs/troubleshooting.md +65 -0
  10. package/docs/tui-composition.md +294 -0
  11. package/docs/usage.md +306 -0
  12. package/examples/extensions/ash-scheme/package.json +1 -1
  13. package/examples/extensions/ashi/EXTENDING.md +2 -2
  14. package/examples/extensions/ashi/README.md +2 -2
  15. package/examples/extensions/ashi/docs/ui-surface-protocol.md +1 -1
  16. package/examples/extensions/ashi/package.json +5 -3
  17. package/examples/extensions/ashi/src/chat/tool-group.ts +3 -2
  18. package/examples/extensions/ashi/src/cli.ts +9 -8
  19. package/examples/extensions/ashi/src/dialogs.ts +16 -1
  20. package/examples/extensions/ashi/src/events.ts +1 -0
  21. package/examples/extensions/ashi/src/frontend.ts +26 -6
  22. package/examples/extensions/ashi/src/renderer.ts +24 -4
  23. package/examples/extensions/ashi/src/renderers/pi-tui/schema-mount.ts +4 -3
  24. package/examples/extensions/ashi/src/renderers/pi-tui/tool-group.ts +5 -8
  25. package/examples/extensions/ashi/src/ui.ts +11 -0
  26. package/examples/extensions/ashi-ink/package.json +2 -2
  27. package/examples/extensions/claude-code-bridge/package.json +1 -1
  28. package/examples/extensions/opencode-bridge/package.json +1 -1
  29. package/package.json +3 -1
  30. package/src/agent/agent-loop.ts +1566 -0
  31. package/src/agent/entry-format.ts +19 -0
  32. package/src/agent/events.ts +153 -0
  33. package/src/agent/extensions/rolling-history/constants.ts +1 -0
  34. package/src/agent/extensions/rolling-history/index.ts +202 -0
  35. package/src/agent/extensions/rolling-history/recall.ts +131 -0
  36. package/src/agent/extensions/rolling-history/strategy.ts +404 -0
  37. package/src/agent/host-types.ts +192 -0
  38. package/src/agent/index.ts +591 -0
  39. package/src/agent/live-view.ts +279 -0
  40. package/src/agent/llm-client.ts +111 -0
  41. package/src/agent/llm-facade.ts +43 -0
  42. package/src/agent/normalize-args.ts +61 -0
  43. package/src/agent/nuclear-form.ts +382 -0
  44. package/src/agent/providers/deepseek.ts +39 -0
  45. package/src/agent/providers/ollama.ts +92 -0
  46. package/src/agent/providers/openai-compatible.ts +36 -0
  47. package/src/agent/providers/openai.ts +52 -0
  48. package/src/agent/providers/opencode.ts +142 -0
  49. package/src/agent/providers/openrouter.ts +105 -0
  50. package/src/agent/providers/zai-coding-plan.ts +33 -0
  51. package/src/agent/session-store.ts +336 -0
  52. package/src/agent/skills.ts +228 -0
  53. package/src/agent/store.ts +310 -0
  54. package/src/agent/subagent.ts +305 -0
  55. package/src/agent/system-prompt.ts +151 -0
  56. package/src/agent/token-budget.ts +12 -0
  57. package/src/agent/tool-protocol.ts +722 -0
  58. package/src/agent/tool-registry.ts +66 -0
  59. package/src/agent/tools/bash.ts +95 -0
  60. package/src/agent/tools/edit-file.ts +154 -0
  61. package/src/agent/tools/expand-home.ts +7 -0
  62. package/src/agent/tools/glob.ts +108 -0
  63. package/src/agent/tools/grep.ts +228 -0
  64. package/src/agent/tools/list-skills.ts +37 -0
  65. package/src/agent/tools/ls.ts +81 -0
  66. package/src/agent/tools/pwsh.ts +140 -0
  67. package/src/agent/tools/read-file.ts +164 -0
  68. package/src/agent/tools/write-file.ts +72 -0
  69. package/src/agent/types.ts +149 -0
  70. package/src/cli/args.ts +91 -0
  71. package/src/cli/auth/cli.ts +244 -0
  72. package/src/cli/auth/discover.ts +52 -0
  73. package/src/cli/auth/keys.ts +143 -0
  74. package/src/cli/index.ts +295 -0
  75. package/src/cli/init.ts +74 -0
  76. package/src/cli/install.ts +439 -0
  77. package/src/cli/shell-env.ts +68 -0
  78. package/src/cli/subcommands.ts +24 -0
  79. package/src/core/event-bus.ts +252 -0
  80. package/src/core/extension-loader.ts +347 -0
  81. package/src/core/index.ts +152 -0
  82. package/src/core/settings.ts +398 -0
  83. package/src/core/types.ts +61 -0
  84. package/src/extensions/file-autocomplete.ts +71 -0
  85. package/src/extensions/index.ts +38 -0
  86. package/src/extensions/slash-commands/events.ts +14 -0
  87. package/src/extensions/slash-commands/index.ts +269 -0
  88. package/src/shell/events.ts +73 -0
  89. package/src/shell/host-types.ts +150 -0
  90. package/src/shell/index.ts +159 -0
  91. package/src/shell/input-handler.ts +505 -0
  92. package/src/shell/output-parser.ts +156 -0
  93. package/src/shell/shell-context.ts +193 -0
  94. package/src/shell/shell.ts +414 -0
  95. package/src/shell/strategies/bash.ts +83 -0
  96. package/src/shell/strategies/fish.ts +77 -0
  97. package/src/shell/strategies/index.ts +24 -0
  98. package/src/shell/strategies/types.ts +64 -0
  99. package/src/shell/strategies/zsh.ts +92 -0
  100. package/src/shell/terminal.ts +124 -0
  101. package/src/shell/tui-input-view.ts +222 -0
  102. package/src/shell/tui-renderer.ts +1126 -0
  103. package/src/utils/ansi.ts +140 -0
  104. package/src/utils/box-frame.ts +138 -0
  105. package/src/utils/compositor.ts +157 -0
  106. package/src/utils/diff-renderer.ts +829 -0
  107. package/src/utils/diff.ts +244 -0
  108. package/src/utils/executor.ts +305 -0
  109. package/src/utils/file-watcher.ts +110 -0
  110. package/src/utils/floating-panel.ts +1160 -0
  111. package/src/utils/handler-registry.ts +110 -0
  112. package/src/utils/line-editor.ts +636 -0
  113. package/src/utils/markdown.ts +437 -0
  114. package/src/utils/message-utils.ts +113 -0
  115. package/src/utils/package-version.ts +12 -0
  116. package/src/utils/palette.ts +64 -0
  117. package/src/utils/ref-counter.ts +9 -0
  118. package/src/utils/ripgrep-path.ts +17 -0
  119. package/src/utils/shell-output-spill.ts +76 -0
  120. package/src/utils/stream-transform.ts +292 -0
  121. package/src/utils/terminal-buffer.ts +213 -0
  122. package/src/utils/tool-display.ts +315 -0
  123. package/src/utils/tool-interactive.ts +71 -0
  124. package/src/utils/tty.ts +14 -0
@@ -0,0 +1,196 @@
1
+ # Architecture
2
+
3
+ agent-sh is a composable agent runtime: a pure kernel that any frontend can drive and any agent backend can plug into, over one shared extension layer. Frontends and backends are both bus-driven components that self-wire to events — the bundled shell is just one frontend among several.
4
+
5
+ ## Design Philosophy: Pure Kernel + Everything Is an Extension
6
+
7
+ The core (`createCore()`) is a frontend-agnostic kernel — it wires up the EventBus, HandlerRegistry, and Compositor with zero knowledge of terminals, PTYs, LLMs, shells, or rendering. **The core has no agent, no LLM client, and no shell coupling.** The built-in agent backend, shell tracking, provider management, TUI rendering, and all other features are loaded as extensions.
8
+
9
+ ```
10
+ createCore() — pure kernel:
11
+ │ EventBus — typed pub/sub + transform pipelines
12
+ │ HandlerRegistry — named function registry (define/advise/call)
13
+ │ Compositor — routes named render streams to surfaces
14
+ │ Multi-backend — coordinates which agent backend is active
15
+ │ Default `cwd` handler returning `process.cwd()`
16
+
17
+ index.ts — interactive terminal frontend:
18
+ │ Shell — PTY lifecycle (delegates to InputHandler + OutputParser)
19
+
20
+ ├── Agent host (always activated via activateAgent(ctx) before built-ins load):
21
+ │ ash backend — provider resolution, LlmClient, lazy AgentLoop
22
+ │ core tools — bash/read/write/edit/grep/glob/ls/list_skills registered at activate time
23
+ │ built-in providers — openrouter, openai, openai-compatible, deepseek (unconditional)
24
+
25
+ ├── Backend registry (owned by core; backends register via `agent:register-backend`):
26
+ │ core.activateBackend() — picks the named/persisted/first backend and calls its start()
27
+
28
+ ├── Built-in extensions (loaded via declarative manifest, individually disableable):
29
+ │ shell-context — PTY exchange tracking, cwd advisor, <cwd>/<shell_events> producer
30
+ │ tui-renderer — markdown rendering, inline diffs, thinking display, spinner
31
+ │ slash-commands — /help, /model, /backend, /thinking, /compact, /context, /reload
32
+ │ file-autocomplete — @ file path completion
33
+
34
+ ├── Shared utilities:
35
+ │ palette — semantic color system (accent, success, warning, error, muted)
36
+ │ diff-renderer — syntax-highlighted diffs (split/unified/summary)
37
+ │ box-frame — bordered TUI panels
38
+ │ tool-display — width-adaptive tool call rendering + pure spinner
39
+ │ output-writer — OutputWriter interface (StdoutWriter, BufferWriter for tests)
40
+ │ stream-transform — content block transforms for response pipeline
41
+
42
+ └── User extensions (opt-in, loaded from -e flag / settings.json / extensions dir):
43
+ e.g. overlay-agent, interactive-prompts, solarized-theme, latex-images, peer-mesh
44
+ ```
45
+
46
+ All components communicate exclusively through typed bus events. The backend has no reference to Shell — it emits lifecycle events and the TUI subscribes. Input flows the same way: any frontend emits `agent:submit` and the backend handles it.
47
+
48
+ Built-in extensions are loaded from a declarative manifest and can be individually disabled via the `disabledBuiltins` setting in `~/.agent-sh/settings.json`. This means even the built-in agent can be disabled (e.g., for users who only use extension backends like Claude Code).
49
+
50
+ **The core works without any frontend.** See [Library](library.md) for embedding agent-sh in your own apps.
51
+
52
+ ## How It Works
53
+
54
+ 1. agent-sh spawns a real PTY running your shell (zsh or bash, with your full rc config) and sets up raw stdin passthrough
55
+ 2. Built-in extensions load (including the agent backend, which registers via `agent:register-backend`), then user extensions
56
+ 3. `activateBackend()` wires the chosen backend to bus events
57
+ 4. All keyboard input goes directly to the PTY — zero latency, full terminal compatibility
58
+ 5. When you type `>` at the start of a line, agent-sh intercepts and enters agent input mode
59
+ 6. On Enter, the query is emitted as `agent:submit` and the active backend decides which tools to use
60
+ 7. The backend handles the query — streaming LLM responses, executing tools, emitting events. Read-only tools run in parallel; permission-requiring tools run sequentially.
61
+ 8. The TUI renderer extension renders streamed content inline (markdown, diffs, tool calls with tree-style grouping)
62
+ 9. When the backend finishes (`agent:processing-done`), normal shell operation resumes
63
+
64
+ ## Shell ↔ Agent Boundary
65
+
66
+ The shell and the agent are **separate worlds** by default. The PTY runs your real shell; the agent runs its tools in isolated child processes. A `cd` by the agent's `bash` tool doesn't change your shell's cwd.
67
+
68
+ ### Command-boundary detection
69
+
70
+ agent-sh injects three invisible OSC sequences into its inner shell — `\e]9999;id=<tag>;PROMPT\a` (precmd), `\e]9997;id=<tag>;<cmd>\a` (preexec), `\e]9998;id=<tag>;READY\a` (prompt rendered). `<tag>` is the process's `instanceId`. The OutputParser reacts only to its own tag; markers with a different tag (or none) are treated as opaque foreground output. That's what keeps a nested agent-sh — for example, an `ash` launched inside an SSH session — from cross-triggering the outer instance's command lifecycle.
71
+
72
+ The connection between them is **context**: each query includes shell context (recent commands, output, cwd). The agent sees what you've been doing but can't touch your shell state.
73
+
74
+ Extensions can cross this boundary using `shell:exec-request`. The core event bus makes this easy to wire up — an extension just registers a tool that emits the event and returns the result. We don't include a PTY tool as built-in because the right behavior depends on user preference (confirmation prompts? output capture? restricted commands?). See `examples/extensions/user-shell.ts` for a ready-made implementation.
75
+
76
+ The pattern works like this:
77
+
78
+ ```
79
+ agent calls user_shell({ command: "cd src" })
80
+ → bus.emitPipeAsync("shell:exec-request", { command })
81
+ → Shell writes command to PTY
82
+ → PTY executes in user's real shell
83
+ → shell:command-done fires with output
84
+ → result returned to agent
85
+ ```
86
+
87
+ ## Agent Backend
88
+
89
+ The agent backend is a bus-driven component that registers via `agent:register-backend`. The core's multi-backend coordinator manages which backend is active — it has no knowledge of any specific backend's internals.
90
+
91
+ ### Built-in backend: ash
92
+
93
+ The default backend is **ash**, registered from the agent host (`src/agent/index.ts`) when `activateAgent(ctx)` runs. It resolves LLM providers from registered catalogs + settings overlay, configures an `LlmClient`, and registers itself with the core's backend registry by emitting `agent:register-backend`. The `AgentLoop` that drives tool calls is constructed lazily — only when ash's `start()` runs (on `activateBackend("ash")`). See [The Built-in Agent: ash](agent.md) for the full guide.
94
+
95
+ The agent host also defines an `llm:invoke` handler that backs the `ctx.agent.llm` facade, so any extension can call `ctx.agent.llm.ask(...)` or `ctx.agent.llm.session(...)` without knowing which backend is active. Backends with no LLM leave `ctx.agent.llm.available` false.
96
+
97
+ ### Extension Backends
98
+
99
+ Extensions can register alternative backends by emitting `agent:register-backend` during activation — this is the same mechanism the built-in agent uses. See [Extensions: Custom Agent Backends](extensions.md#custom-agent-backends) for the full protocol and a working example.
100
+
101
+ All backends emit the same bus events. The TUI, extensions, and library consumers don't know which backend is active.
102
+
103
+ ## Key Extension Points
104
+
105
+ The extension system provides several composable primitives for customizing agent-sh. Each is documented in detail in the [Extensions](extensions.md) guide:
106
+
107
+ - **[Event Bus](extensions.md#event-bus)** — typed pub/sub (`on`/`emit`), synchronous transform chains (`onPipe`/`emitPipe`), async transform chains (`onPipeAsync`/`emitPipeAsync`), and transform-then-notify (`emitTransform`)
108
+ - **[Custom Agent Backends](extensions.md#custom-agent-backends)** — replace the entire agent backend via `agent:register-backend`
109
+ - **[Named Handlers](extensions.md#named-handlers-advice-system)** — `define`/`advise`/`call` registry for wrapping processing steps (e.g. code block rendering)
110
+ - **[Content Transform Pipeline](extensions.md#content-transform-pipeline)** — typed content blocks (`text`, `code-block`, `image`, `raw`) flow through parsers and post-transforms before rendering
111
+ - **[Custom Input Modes](extensions.md#custom-input-modes)** — register trigger characters (`?`, `>`, etc.) with custom `onSubmit` handlers
112
+ - **[Terminal Buffer & Floating Panel](extensions.md#terminal-buffer--floating-panel)** — headless xterm.js terminal mirror + composited overlay with handler-based rendering customization
113
+ - **[Theming](extensions.md#theming)** — semantic color palette overrides via `setPalette()`
114
+
115
+ ## Project Structure
116
+
117
+ ```
118
+ agent-sh/
119
+ ├── src/
120
+ │ ├── core/ # Substrate kernel — no LLM, no agent, no shell
121
+ │ │ ├── index.ts # createCore(), backend registry, extensionContext()
122
+ │ │ ├── types.ts # CoreContext, CoreConfig
123
+ │ │ ├── event-bus.ts # Typed EventBus: emit/on, emitPipe, emitPipeAsync, emitTransform
124
+ │ │ ├── settings.ts # User settings (~/.agent-sh/settings.json)
125
+ │ │ └── extension-loader.ts # Extension loading (-e, settings.json, extensions dir)
126
+ │ │
127
+ │ ├── cli/ # CLI entry + subcommands (install, init, auth)
128
+ │ │ ├── index.ts # Interactive terminal entry point
129
+ │ │ ├── subcommands.ts, install.ts, init.ts
130
+ │ │ └── auth/ # Provider API key management
131
+ │ │
132
+ │ ├── shell/ # Shell host — TUI frontend, PTY, compositor, theming
133
+ │ │ ├── index.ts # registerShellHandlers/activateShell — attaches ctx.shell
134
+ │ │ ├── events.ts # BusEvents augmentation (shell:*, input:*, compositor:*, autocomplete:request)
135
+ │ │ ├── host-types.ts # ShellSurface, ShellContext, ExtensionContext, AppConfig
136
+ │ │ ├── shell.ts # PTY lifecycle + wiring (InputHandler + OutputParser)
137
+ │ │ ├── shell-context.ts # Shell exchange tracking, cwd advisor, <shell_events>
138
+ │ │ ├── tui-renderer.ts # Main renderer — writes to compositor streams
139
+ │ │ ├── input-handler.ts # Keyboard input, agent mode, bus-driven autocomplete
140
+ │ │ ├── output-parser.ts # OSC parsing, command boundary detection
141
+ │ │ └── tui-input-view.ts # Input rendering + line editor integration
142
+ │ │
143
+ │ ├── agent/ # Agent host — ash backend, providers, tools, skills
144
+ │ │ ├── index.ts # activateAgent — attaches ctx.agent, registers core tools + ash backend
145
+ │ │ ├── events.ts # BusEvents augmentation (agent:providers, agent:models-changed, ...)
146
+ │ │ ├── host-types.ts # AgentSurface, AgentContext, ProviderRegistration, Model, ModelEndpoint
147
+ │ │ ├── types.ts # AgentBackend, ToolDefinition, ToolResult
148
+ │ │ ├── agent-loop.ts # ash AgentLoop (constructed lazily in start())
149
+ │ │ ├── llm-client.ts, llm-facade.ts # ash LLM transport + ctx.agent.llm facade
150
+ │ │ ├── providers/ # openai, openrouter, deepseek, openai-compatible
151
+ │ │ ├── token-budget.ts # Shared constants (RESPONSE_RESERVE, DEFAULT_CONTEXT_WINDOW)
152
+ │ │ ├── tool-registry.ts, tool-protocol.ts
153
+ │ │ ├── live-view.ts # In-memory messages array + compaction + recall archive
154
+ │ │ ├── store.ts, session-store.ts # Append-only entry store; session/message persistence
155
+ │ │ ├── nuclear-form.ts, system-prompt.ts
156
+ │ │ ├── skills.ts, subagent.ts
157
+ │ │ └── tools/ # Built-in tool implementations (bash, read/write/edit, grep, glob, ls, ...)
158
+ │ │
159
+ │ ├── extensions/ # Cross-cutting built-ins (loaded via manifest)
160
+ │ │ ├── index.ts # Declarative manifest + loader
161
+ │ │ ├── slash-commands/ # /reload, /quit, command dispatch; events.ts ships command:* events
162
+ │ │ └── file-autocomplete.ts
163
+ │ │
164
+ │ └── utils/ # Shared primitives
165
+ │ ├── handler-registry.ts # Named function registry (define/advise/call)
166
+ │ ├── compositor.ts # Routes named render streams to surfaces
167
+ │ ├── terminal-buffer.ts # Headless xterm.js mirror of the terminal
168
+ │ ├── floating-panel.ts # Composited floating overlay
169
+ │ ├── executor.ts # Isolated child process execution
170
+ │ ├── shell-output-spill.ts # Session-tempfile spill for long shell outputs
171
+ │ ├── palette.ts, ansi.ts, diff.ts, diff-renderer.ts
172
+ │ └── (markdown, line-editor, stream-transform, ...)
173
+
174
+ ├── examples/ # Example extensions and agent integrations
175
+ │ └── extensions/
176
+ │ ├── overlay-agent.ts # Ctrl+\ floating overlay agent
177
+ │ ├── interactive-prompts.ts # Permission prompts (opt-in safety)
178
+ │ ├── peer-mesh.ts # Cross-instance communication
179
+ │ ├── terminal-buffer.ts # Headless xterm.js terminal mirror extension
180
+ │ ├── tmux-pane.ts # Tmux side pane output/interactive modes
181
+ │ ├── web-access.ts # Web search and content extraction
182
+ │ ├── user-shell.ts # Run commands in the live PTY
183
+ │ ├── questionnaire.ts # Interactive question prompts
184
+ │ ├── subagents.ts # Subagent orchestration
185
+ │ ├── solarized-theme.ts # Theme example
186
+ │ ├── secret-guard.ts # Secret redaction
187
+ │ ├── latex-images.ts # LaTeX equation rendering
188
+ │ ├── ollama.ts # Ollama provider (local + cloud)
189
+ │ ├── claude-code-bridge/ # Claude Code SDK backend
190
+ │ ├── pi-bridge/ # Pi agent backend
191
+ │ ├── ash-mcp-bridge/ # MCP server bridge
192
+ │ └── ash-acp-bridge/ # ACP server (headless core)
193
+ ├── docs/ # Documentation
194
+ ├── package.json
195
+ └── tsconfig.json
196
+ ```
@@ -0,0 +1,200 @@
1
+ # Context Management
2
+
3
+ ## What is "context," and why manage it?
4
+
5
+ Large language models take text as input and produce text as output. Every model has a **context window** — a hard cap on how much text it can consider at once, measured in tokens (~4 characters each). A modern frontier model might offer 200k or 1M tokens; an older one might offer 8k. The window is always finite, and every token inside it costs money, costs latency, and — as windows grow — can degrade output quality.
6
+
7
+ "Context management" is the art of deciding *what* to keep inside that budget, *when* to evict things, and *how* to recover what you've pushed out. Different agents solve this differently. Most chat-style agents sidestep it: you get one window per conversation, and when it fills up you start a new chat. That works when the agent owns the entire interaction.
8
+
9
+ **agent-sh is different — it lives inside a terminal**, and terminals don't have sessions.
10
+
11
+ ## The terminal mental model
12
+
13
+ When you use a shell, you never think about "sessions." You run commands, switch between tasks, help a colleague, come back. Shell history is just *there* — always growing, searchable, persisting across restarts. Nobody invokes `/clear` or picks a new chat.
14
+
15
+ agent-sh adopts this mental model. The consequences shape everything below:
16
+
17
+ 1. **No sessions.** There's no new-chat button and no `/clear`. History is continuous and append-only, like `.zsh_history`.
18
+ 2. **No workflow guessing.** We don't try to detect topic changes or time gaps — any heuristic that guesses user intent will be wrong often enough to annoy. The only reason to evict content is mechanical: the window filled up.
19
+ 3. **Two streams.** Shell activity and agent reasoning are fundamentally different kinds of information; they deserve different mechanisms.
20
+ 4. **Model-aware where it matters.** Compaction triggers adapt to the model's real context window, not a hardcoded threshold.
21
+ 5. **Strategy is pluggable.** The kernel decides *when* to act; *how* to compact is behind an advisable handler so extensions can install richer strategies without touching core code.
22
+
23
+ ## The two streams
24
+
25
+ ### Shell context — "what has the user been doing?"
26
+
27
+ Captured and owned by the `shell-context` built-in (`src/shell/shell-context.ts`). Tracks user-initiated PTY activity: shell commands the user ran + their outputs.
28
+
29
+ Agent tool outputs are **not** here — those live in the conversation stream. The boundary is strict: if the user typed it at the PTY, it goes into shell context; if the agent called a tool, it goes into the conversation.
30
+
31
+ Frontends without a PTY (e.g. ashi, asHub) simply don't load this extension — the agent runs cwd-aware via the default `cwd` handler (`process.cwd()`) and no `<cwd>` / `<shell_events>` envelope is emitted.
32
+
33
+ ### Conversation — "what has the agent been working on?"
34
+
35
+ Owned by `LiveView` (`src/agent/live-view.ts`). This is the OpenAI-shaped messages array (`user` / `assistant` / `tool`) the LLM actually sees. Contains:
36
+
37
+ - User messages (queries the user sent to the agent)
38
+ - Assistant messages (the LLM's replies)
39
+ - Tool calls and tool results
40
+
41
+ The two streams merge at one point: when the user submits a new query, the current cwd is wrapped inside `<cwd>` and any new shell events inside `<shell_events>` (both nested in the per-query `<query_context>` envelope) and prepended to that user message. They then live inside the conversation array as regular bytes, but they are never stored separately in both places.
42
+
43
+ ## How shell activity reaches the LLM
44
+
45
+ Each exchange (a shell command + output) gets a sequential `id` as it's captured. The shell-context extension keeps an internal `lastSeq` cursor — the highest id it has already sent to the model.
46
+
47
+ Shell context contributes to the per-query `query-context:build` handler (the `shell-context` extension advises it directly; extensions can equivalently use `ctx.agent.registerContextProducer(name, fn, { mode: "per-query" })`):
48
+
49
+ 1. The producer always emits `<cwd>...</cwd>` with the live PTY-tracked cwd, so every user message anchors where the agent is right now (immune to compaction confusion over historical cwds).
50
+ 2. If there are exchanges with id > `lastSeq`, it appends `<shell_events>...</shell_events>` with the deltas; the cursor then advances to the new high-water mark.
51
+ 3. The dispatcher composes the result with any other per-query producer output and wraps the whole bundle in `<query_context>...</query_context>`, prepended to the user's query inside a single user message.
52
+
53
+ The delta is sent **once per user query**, not per tool-use step inside the agent loop. Inside the loop (where the LLM calls tools, sees results, calls more tools), no new shell events are injected — injecting mid-loop would break the `tool_call → tool_result` chain some providers require, and per-tool-call shell visibility isn't the right semantic anyway.
54
+
55
+ Prior-turn shell events remain visible in later turns because they're embedded in earlier user messages in the conversation history. They are not *re-sent* as fresh bytes — the provider's prefix cache amortizes them to O(1) per turn.
56
+
57
+ ## Handling long shell outputs
58
+
59
+ A `find /` or a verbose build can produce megabytes of output. Storing that verbatim in context is wasteful: most of it is never referenced.
60
+
61
+ At capture time, if an exchange's output exceeds `shellTruncateThreshold` lines:
62
+
63
+ 1. The full text is written to `<tmpdir>/agent-sh-<pid>/<id>.out`.
64
+ 2. The in-memory exchange keeps only `shellHeadLines` from the top + a marker + `shellTailLines` from the bottom:
65
+ ```
66
+ <first 10 lines verbatim>
67
+ [... 4823 lines truncated — full output at /tmp/agent-sh-12345/42.out; use read_file to expand ...]
68
+ <last 10 lines verbatim>
69
+ ```
70
+ 3. If the agent needs the full content later, it calls `read_file` on the path — with `offset`/`limit` for pagination on very large files.
71
+
72
+ This trades a little disk I/O for a lot of heap and token savings, and gives the user a side benefit: they can `cat /tmp/agent-sh-<pid>/42.out` directly to inspect what was captured, which is handy for debugging.
73
+
74
+ The session directory is removed on process exit (including `SIGINT` / `SIGTERM` / `SIGHUP`). Stale directories from crashed sessions are swept lazily the next time agent-sh starts.
75
+
76
+ ## Conversation compaction
77
+
78
+ Unlike shell context — which is a per-query delta and stays small — the conversation grows every turn. Without an active strategy it would eventually blow past the model's window. The kernel owns the *trigger*; the **built-in `rolling-history` extension** owns the *strategy* and the *store*. The result is a three-tier scheme designed to feel like shell history. (Headless or bridge backends that don't load the extension keep the live array and the kernel trigger, but have no summary store, recall, or cross-restart history.)
79
+
80
+ ### Tier 1 — eager capture
81
+
82
+ Every time a message is appended to the conversation, the kernel emits a `conversation:message-appended` event. The rolling-history extension listens and, for each message:
83
+
84
+ 1. Nucleates it into a one-line summary (`nucleate()` in `src/agent/nuclear-form.ts`) and appends that as a persisted `Entry` to its summary **Store**.
85
+ 2. Appends an *ephemeral* `recall-cache` child entry holding the full message, so the verbatim text stays expandable for the rest of the process without ever being written to disk.
86
+ 3. Links the live message back to its entry id (`conversation:link`, which stamps `meta.entryId`), so a later compaction won't re-summarize it.
87
+
88
+ Read-only tool results (`read_file`, `grep`, `glob`, `ls`) are filtered out of the persisted summaries — the agent can just re-run those tools.
89
+
90
+ #### The summary store on disk
91
+
92
+ The store (`SharedFileStore` in `src/agent/store.ts`) is an append-only JSONL log at `~/.agent-sh/rolling-history/history.jsonl` (`~/.agent-sh` is the config dir, overridable via `AGENT_SH_HOME`). One serialized `Entry` per line — `{ id, parentId?, ts, kind, payload }`, where a summary's payload carries `sum` (the one-liner), optional `body` (full content, capped per kind), and `iid` (the writing instance's id).
93
+
94
+ - **Concurrency-safe.** Lines are short enough that POSIX `O_APPEND` writes are atomic, so multiple agent-sh instances can share one file without a lock. Only front-truncation (which rewrites the file) takes a lock — `history.jsonl.lock` via `O_EXCL`, with a 10-second stale-lock timeout to recover from crashes.
95
+ - **Ephemeral entries never touch disk.** The `recall-cache` full-body entries are appended with `{ ephemeral: true }`, a no-op on the file store — they live only in the current process.
96
+ - **Front-truncation.** After each append, the file is checked against the extension's `maxBytes` (default 50MB). Past 150% of the cap, the oldest lines are dropped and the rest rewritten atomically via temp-file + `rename`; the overshoot avoids frequent rewrites.
97
+ - **Reverse-chunked reads.** `readRecent`, `findById`, and `search` stream the file backward in 1MB chunks, stitching lines across boundaries at the byte level so UTF-8 codepoints never split. Search caps at a 20MB scan budget to bound cost on large files.
98
+
99
+ The store sits behind a generic `Store` interface (`append` / `findById` / `readRecent` / `search`), so an extension can swap in a different backend (SQLite, remote service) without changing capture or recall.
100
+
101
+ ### Tier 2 — active context
102
+
103
+ The live `LiveView` array holds full messages for every turn the LLM currently sees. Alongside it, the rolling-history extension keeps two id-keyed views: the summary Store (one-liners, persisted) and the per-process `recall-cache` (full bodies, ephemeral). So once a turn is evicted from the live array, its summary stays browsable and its full text stays expandable for the rest of the session.
104
+
105
+ ### Tier 3 — compaction
106
+
107
+ The kernel watches estimated prompt size against `autoCompactThreshold × (contextWindow − RESPONSE_RESERVE)` (default threshold `0.5`). When it's crossed (or `/compact` is invoked, or the API returns a context-overflow error), the kernel calls the advisable `conversation:compact` handler with a token target. The rolling-history extension's advisor implements the strategy:
108
+
109
+ 1. Parse the live array into turns (a turn starts at each user message).
110
+ 2. Pin the first turn and the most recent turns — the newest kept verbatim, a band just behind it "slimmed" (read-only tool calls dropped, long tool/assistant bodies trimmed).
111
+ 3. Score the remaining middle turns by *priority × recency* (user messages and errors rank highest; large read-only tool results lowest) and evict lowest-first until the estimate is under target.
112
+ 4. Replace the evicted span in place with one synthetic block — `[Conversation history — use conversation_recall to expand any entry]` — built from the recent summary lines, topping up summaries for any messages that missed eager capture.
113
+
114
+ On startup, if `prefetchEntries > 0` (default 50) the extension reads the most recent summary lines from the Store and injects them as a `[Prior session history]` message — so context carries across restarts the way shell history does.
115
+
116
+ ### Token accounting
117
+
118
+ Compaction decisions use **API-grounded** token counts, not a chars/4 heuristic. After each API response, the provider's reported `prompt_tokens` is captured as an anchor. On the next iteration, `estimatePromptTokens()` returns that anchor plus a small local estimate for anything appended since. This keeps the trigger aligned with what the provider actually bills.
119
+
120
+ ## Two mechanisms that look similar but aren't
121
+
122
+ People often conflate shell output truncation and conversation compaction. They're different things:
123
+
124
+ | | Shell output truncation | Conversation compaction |
125
+ |---|---|---|
126
+ | **Stream** | Shell context (`<shell_events>` deltas) | Conversation messages array |
127
+ | **When** | Once, at the moment each exchange is captured | On threshold crossing, `/compact`, or overflow retry |
128
+ | **State change** | Permanent: `ex.output` becomes head+tail+path | Permanent: evicted turns collapse to one-liners |
129
+ | **Full-text location** | Tempfile on disk | Ephemeral recall cache + summary store (`~/.agent-sh/rolling-history/history.jsonl`) |
130
+ | **Recovery tool** | `read_file` on the spill path | `conversation_recall` |
131
+
132
+ They fire independently. An exchange with a huge output spills as soon as it's captured; conversation compaction may not trigger until many turns later, for unrelated reasons.
133
+
134
+ ## Recall APIs
135
+
136
+ Both streams offer a way to retrieve full content that isn't in live context.
137
+
138
+ ### Shell output — `read_file` on the spill path
139
+
140
+ There's no dedicated shell-recall tool: the spill file is just a normal file. The agent uses `read_file`, which already supports `offset`/`limit` pagination for very large outputs.
141
+
142
+ ### Conversation — `conversation_recall` tool
143
+
144
+ Registered by the built-in `rolling-history` extension (only present when that extension is active; bridges and embedded uses don't ship it):
145
+
146
+ - `conversation_recall {"action": "browse"}` — list the 25 most recent summary entries from the store
147
+ - `conversation_recall {"action": "search", "query": "..."}` — regex search across stored entries (one-line summaries plus the ephemeral full-body cache), returning each hit's header and a first-match excerpt
148
+ - `conversation_recall {"action": "expand", "turn_id": "#a1b2c3d4"}` — full content of a specific entry, by the `#id` shown in browse/search output
149
+
150
+ Extensions that install a custom compaction strategy can reuse `conversation_recall` or advise it with their own semantics.
151
+
152
+ ## Extension hooks
153
+
154
+ | Handler / event | Purpose |
155
+ |---|---|
156
+ | `conversation:compact` *(advisable handler)* | Install a custom compaction strategy. Read the messages array via `conversation:get-messages`, compute a replacement, install it via `conversation:replace-messages`, return `{ before, after, evictedCount }`. |
157
+ | `conversation:message-appended` *(event)* | Fires every time a message is added (user/assistant/tool). Use it to build rolling indexes, summarize in the background, or feed external memory systems. |
158
+
159
+ Common override patterns: LLM-summarized compaction (summarize evicted turns before eviction), topic pinning (preserve turns matching pinned keywords), alternate persistence backends (SQLite, vector store, remote service).
160
+
161
+ ## Slash commands
162
+
163
+ | Command | Action |
164
+ |---|---|
165
+ | `/compact` | Fire the `conversation:compact` handler (effective behavior depends on active advisors) |
166
+ | `/context` | Show context budget usage (active tokens, total tokens, budget) |
167
+ | `/history [on\|off\|status]` | Pause/resume writes to the rolling-history store for this session. Recall stays available; the tool and instruction stay registered, so toggling doesn't perturb the tools array or system prompt (LLM prompt cache is preserved). |
168
+
169
+ There's no `/clear` — history is continuous by design.
170
+
171
+ ## Configuration
172
+
173
+ All settings live in `~/.agent-sh/settings.json`:
174
+
175
+ | Setting | Default | Description |
176
+ |---|---|---|
177
+ | `shellTruncateThreshold` | 20 | Output lines that trigger spill-to-tempfile at capture |
178
+ | `shellHeadLines` | 10 | Lines kept from the top when an output is spilled |
179
+ | `shellTailLines` | 10 | Lines kept from the bottom when an output is spilled |
180
+ | `autoCompactThreshold` | 0.5 | Fraction of available context window that triggers auto-compact |
181
+
182
+ The `rolling-history` extension reads its own settings, namespaced under `"rolling-history"`:
183
+
184
+ | Setting | Default | Description |
185
+ |---|---|---|
186
+ | `maxBytes` | 52428800 | Max size of the summary store before front-truncation (50MB) |
187
+ | `prefetchEntries` | 50 | Summary entries injected as `[Prior session history]` on startup (0 disables) |
188
+
189
+ ## Key files
190
+
191
+ | File | Role |
192
+ |---|---|
193
+ | `src/shell/shell-context.ts` | Built-in: shell exchange capture, spill-to-tempfile on long outputs, `<shell_events>` per-query producer, `cwd` handler advisor |
194
+ | `src/utils/shell-output-spill.ts` | Per-pid session dir, cleanup on exit + signals, stale-dir sweep for crashed sessions |
195
+ | `src/agent/live-view.ts` | The live messages array the LLM sees; estimate/replace/link + API-grounded token accounting |
196
+ | `src/agent/nuclear-form.ts` | One-line-summary primitives (nucleate, serialize, priority classification) |
197
+ | `src/agent/store.ts` | `Store` interface + `SharedFileStore`: append-only JSONL with chunked search/tail-read + front-truncation |
198
+ | `src/agent/agent-loop.ts` | Auto-compact trigger, `conversation:*` handler definitions, `conversation:message-appended` emits |
199
+ | `src/agent/extensions/rolling-history/` | The built-in rolling-history extension: eager capture (`strategy.ts`), `conversation:compact` advisor, `conversation_recall` (`recall.ts`), `/history` command (`index.ts`) |
200
+ | `src/agent/index.ts` | `/compact` and `/context` slash commands registered when the ash backend starts |