agent-sh 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -115
- package/dist/agent/agent-loop.d.ts +86 -0
- package/dist/agent/agent-loop.js +704 -0
- package/dist/agent/conversation-state.d.ts +27 -0
- package/dist/agent/conversation-state.js +59 -0
- package/dist/agent/index.d.ts +11 -0
- package/dist/agent/index.js +9 -0
- package/dist/agent/skills.d.ts +25 -0
- package/dist/agent/skills.js +186 -0
- package/dist/agent/subagent.d.ts +37 -0
- package/dist/agent/subagent.js +119 -0
- package/dist/agent/system-prompt.d.ts +14 -0
- package/dist/agent/system-prompt.js +103 -0
- package/dist/agent/tool-registry.d.ts +15 -0
- package/dist/agent/tool-registry.js +30 -0
- package/dist/agent/tools/bash.d.ts +7 -0
- package/dist/agent/tools/bash.js +71 -0
- package/dist/agent/tools/display.d.ts +13 -0
- package/dist/agent/tools/display.js +70 -0
- package/dist/agent/tools/edit-file.d.ts +2 -0
- package/dist/agent/tools/edit-file.js +148 -0
- package/dist/agent/tools/glob.d.ts +2 -0
- package/dist/agent/tools/glob.js +87 -0
- package/dist/agent/tools/grep.d.ts +2 -0
- package/dist/agent/tools/grep.js +168 -0
- package/dist/agent/tools/list-skills.d.ts +2 -0
- package/dist/agent/tools/list-skills.js +28 -0
- package/dist/agent/tools/ls.d.ts +2 -0
- package/dist/agent/tools/ls.js +72 -0
- package/dist/agent/tools/read-file.d.ts +10 -0
- package/dist/agent/tools/read-file.js +101 -0
- package/dist/agent/tools/user-shell.d.ts +13 -0
- package/dist/agent/tools/user-shell.js +84 -0
- package/dist/agent/tools/write-file.d.ts +2 -0
- package/dist/agent/tools/write-file.js +82 -0
- package/dist/agent/types.d.ts +78 -0
- package/dist/agent/types.js +1 -0
- package/dist/core.d.ts +22 -14
- package/dist/core.js +256 -36
- package/dist/event-bus.d.ts +98 -17
- package/dist/event-bus.js +10 -1
- package/dist/extension-loader.d.ts +1 -1
- package/dist/extension-loader.js +10 -1
- package/dist/extensions/command-suggest.d.ts +10 -0
- package/dist/extensions/command-suggest.js +41 -0
- package/dist/extensions/slash-commands.d.ts +1 -1
- package/dist/extensions/slash-commands.js +161 -64
- package/dist/extensions/tui-renderer.js +426 -126
- package/dist/index.js +110 -129
- package/dist/input-handler.js +78 -9
- package/dist/output-parser.d.ts +7 -0
- package/dist/output-parser.js +27 -0
- package/dist/settings.d.ts +53 -2
- package/dist/settings.js +46 -3
- package/dist/shell.js +35 -28
- package/dist/types.d.ts +33 -6
- package/dist/utils/box-frame.d.ts +3 -1
- package/dist/utils/box-frame.js +12 -5
- package/dist/utils/diff.js +10 -0
- package/dist/utils/llm-client.d.ts +45 -0
- package/dist/utils/llm-client.js +60 -0
- package/dist/utils/markdown.d.ts +1 -0
- package/dist/utils/markdown.js +25 -3
- package/dist/utils/stream-transform.js +20 -47
- package/dist/utils/tool-display.d.ts +4 -0
- package/dist/utils/tool-display.js +35 -8
- package/examples/extensions/claude-code-bridge/README.md +35 -0
- package/examples/extensions/claude-code-bridge/index.ts +194 -0
- package/examples/extensions/claude-code-bridge/package.json +11 -0
- package/examples/extensions/openrouter.ts +87 -0
- package/examples/extensions/pi-bridge/README.md +35 -0
- package/examples/extensions/pi-bridge/index.ts +263 -0
- package/examples/extensions/pi-bridge/package.json +13 -0
- package/examples/extensions/secret-guard.ts +100 -0
- package/examples/extensions/subagents.ts +87 -0
- package/package.json +3 -5
- package/dist/acp-client.d.ts +0 -105
- package/dist/acp-client.js +0 -684
- package/dist/extensions/shell-exec.d.ts +0 -24
- package/dist/extensions/shell-exec.js +0 -188
- package/dist/mcp-server.d.ts +0 -13
- package/dist/mcp-server.js +0 -234
- package/examples/pi-agent-sh.ts +0 -166
package/README.md
CHANGED
|
@@ -5,162 +5,84 @@
|
|
|
5
5
|
|
|
6
6
|
Not a shell that lives in an agent — an agent that lives in a shell.
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
I live in a terminal. I don't want an agent that can run shell commands when it needs to — I want my shell, with an agent I can reach for when *I* need to. Most AI tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
agent-sh flips this. It's your shell first — full PTY, your rc config, your aliases, everything just works. But type `>` at the start of a line, and you're talking to an agent that has full context of what you've been doing.
|
|
11
11
|
|
|
12
12
|
```
|
|
13
13
|
⚡ src $ ls -la # real shell command
|
|
14
14
|
⚡ src $ cd ../tests && npm test # real cd, env, aliases — all just work
|
|
15
15
|
⚡ src $ vim file.ts # opens vim in the same PTY
|
|
16
|
-
⚡ src $
|
|
17
|
-
⚡ src $ > deploy to staging #
|
|
16
|
+
⚡ src $ > explain the last error # agent investigates using its own tools
|
|
17
|
+
⚡ src $ > deploy to staging # agent runs it in your live shell
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
-
##
|
|
21
|
-
|
|
22
|
-
I live mostly in a terminal. I don't just want an agent that has access to my shell — I want a shell that has access to my agent.
|
|
23
|
-
|
|
24
|
-
Most AI coding tools get this backwards: the LLM drives the experience and the shell is bolted on. That means no real PTY, no job control, no interactive commands, and fragile `cd` tracking that reimplements what bash gives you for free.
|
|
20
|
+
## Key Features
|
|
25
21
|
|
|
26
|
-
|
|
22
|
+
**Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
|
|
27
23
|
|
|
28
|
-
|
|
24
|
+
**Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and the agent knows exactly what happened. It has built-in tools for file read/write/edit, bash, grep, glob — no external setup needed.
|
|
29
25
|
|
|
30
|
-
|
|
26
|
+
**Agent decides how to help.** One entry point (`>`), three tool categories. The agent uses scratchpad tools to investigate, `display` to show you output, and `user_shell` for commands with lasting effects. No need to pick a mode — the agent reasons about which tools to use based on your intent.
|
|
31
27
|
|
|
32
|
-
-
|
|
33
|
-
- **Standard protocol** — JSON-RPC 2.0 over stdio, well-specified capability negotiation
|
|
34
|
-
- **Agent handles LLM details** — no API keys, tool definitions, or context windows to manage
|
|
35
|
-
- **Terminal delegation** — ACP defines `terminal/create`, `terminal/output`, `terminal/wait_for_exit` — exactly what an agent needs to run commands in your shell
|
|
28
|
+
**Any LLM, any backend.** Works with any OpenAI-compatible API out of the box. Define multiple providers in settings and cycle between models at runtime with Shift+Tab. Or swap in a completely different agent — [Claude Code](examples/extensions/claude-code-bridge/) and [pi](examples/extensions/pi-bridge/) run as drop-in backend extensions.
|
|
36
29
|
|
|
37
|
-
|
|
30
|
+
**Extensible by design.** The entire system is built on a typed event bus. Extensions can add custom input modes, content transforms (render LaTeX as images, Mermaid as diagrams), themes, slash commands, or replace the agent backend entirely. The built-in TUI renderer is itself just an extension — nothing is special.
|
|
38
31
|
|
|
39
|
-
|
|
40
|
-
- **Smart Connection** — Agent connects asynchronously in the background
|
|
41
|
-
- **Auto-Wait** — Queries automatically wait for agent to finish connecting
|
|
42
|
-
- **Real-time Streaming** — Agent responses stream live with syntax highlighting
|
|
43
|
-
- **Zero Latency** — Direct PTY access, full terminal compatibility
|
|
44
|
-
- **Context Aware** — Agent sees your cwd, recent commands, and their output
|
|
45
|
-
- **Dual Input Modes** — `?` for questions/tasks (agent tools), `>` for live shell execution
|
|
46
|
-
- **Extensible Modes** — Extensions can register custom input modes with their own triggers
|
|
47
|
-
- **Multiple Agents** — Easy switching between pi-acp, claude, and other ACP agents
|
|
48
|
-
- **Inline Diff Preview** — File writes show syntax-highlighted diffs inline (Ctrl+O to expand)
|
|
49
|
-
- **Thinking Display** — Toggle agent thinking/reasoning text with Ctrl+T
|
|
50
|
-
- **Themeable** — Semantic color palette, swappable via [extensions](docs/extensions.md)
|
|
32
|
+
**Embeddable as a library.** The core is a headless kernel — `import { createCore } from "agent-sh"` to build WebSocket servers, REST APIs, Electron apps, or test harnesses. No terminal required.
|
|
51
33
|
|
|
52
34
|
## Quick Start
|
|
53
35
|
|
|
54
36
|
```bash
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
# 2. Set API keys
|
|
59
|
-
export ANTHROPIC_API_KEY="your-key"
|
|
60
|
-
|
|
61
|
-
# 3. Start
|
|
62
|
-
agent-sh # default agent (pi-acp)
|
|
63
|
-
agent-sh --agent claude-agent-acp # use a different agent
|
|
37
|
+
npm install -g agent-sh
|
|
38
|
+
agent-sh
|
|
64
39
|
```
|
|
65
40
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
> **Note**: The `claude` CLI tool (Claude Code) does **not** support ACP. Use `claude-agent-acp` or `pi-acp` with Anthropic models.
|
|
41
|
+
Set `OPENAI_API_KEY` in your environment (or configure providers in `~/.agent-sh/settings.json`). Works with any OpenAI-compatible API — see the [Usage Guide](docs/usage.md) for provider examples (OpenAI, Ollama, OpenRouter, Together, Groq, LM Studio, vLLM).
|
|
69
42
|
|
|
70
|
-
|
|
43
|
+
Requires Node.js 18+.
|
|
71
44
|
|
|
72
|
-
##
|
|
45
|
+
## Agent Mode
|
|
73
46
|
|
|
74
|
-
|
|
47
|
+
Type `>` at the start of a line to talk to the agent. The agent decides how to help:
|
|
75
48
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
| `>` | **Execute** | Agent runs a command in your live shell via `user_shell`. Your aliases, env vars, and cwd apply. Returns to shell after execution. |
|
|
49
|
+
- **Scratchpad tools** (`bash`, `read_file`, `grep`, `glob`, etc.) — for investigation. Output goes to the agent, not your terminal.
|
|
50
|
+
- **`display`** — shows output in your terminal (e.g. `cat`, `git log`). You see it; the agent doesn't process it.
|
|
51
|
+
- **`user_shell`** — runs commands with lasting effects (`cd`, `npm install`, etc.) in your live shell.
|
|
80
52
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
| Input | Behavior |
|
|
84
|
-
|---|---|
|
|
85
|
-
| `ls -la` | Runs in real shell (PTY), output displayed normally |
|
|
86
|
-
| `cd src && make` | Real shell — cd, env, aliases all just work |
|
|
87
|
-
| `vim file.ts` | Opens vim in the same PTY, no hacks needed |
|
|
88
|
-
| `? refactor this fn` | Query mode — agent investigates and responds |
|
|
89
|
-
| `> restart the server` | Execute mode — agent runs it in your live shell |
|
|
90
|
-
| `? /help` | Shows available slash commands (works in either mode) |
|
|
91
|
-
| `Ctrl-C` | Standard signal to shell, or cancels active agent response |
|
|
92
|
-
| `Ctrl-O` | Expand/collapse truncated diff preview |
|
|
93
|
-
| `Ctrl-T` | Toggle thinking/reasoning text display |
|
|
94
|
-
| `Shift-Tab` | Cycle thinking level (off → minimal → low → medium → high → xhigh) |
|
|
95
|
-
| `Escape` | Exit agent input mode |
|
|
96
|
-
|
|
97
|
-
Modes are extensible — extensions can register new modes via the `input-mode:register` event (see [Extensions](docs/extensions.md#custom-input-modes)).
|
|
98
|
-
|
|
99
|
-
### Agent Input Keybindings
|
|
100
|
-
|
|
101
|
-
When typing in either agent mode (`?` or `>`), full readline-style keybindings are available:
|
|
102
|
-
|
|
103
|
-
| Key | Action |
|
|
104
|
-
|---|---|
|
|
105
|
-
| `↑` / `↓` | Browse query history (persisted across sessions) |
|
|
106
|
-
| `Shift-Enter` | Insert newline (multiline input) |
|
|
107
|
-
| `Shift-Tab` | Cycle thinking level |
|
|
108
|
-
| `Ctrl-D` | Exit agent input mode (on empty line) |
|
|
109
|
-
| `Ctrl-A` / `Home` | Move to start of line |
|
|
110
|
-
| `Ctrl-E` / `End` | Move to end of line |
|
|
111
|
-
| `Ctrl-B` / `←` | Move back one character |
|
|
112
|
-
| `Ctrl-F` / `→` | Move forward one character |
|
|
113
|
-
| `Option-B` / `Option-←` | Move back one word |
|
|
114
|
-
| `Option-F` / `Option-→` | Move forward one word |
|
|
115
|
-
| `Ctrl-U` | Delete to start of line |
|
|
116
|
-
| `Ctrl-K` | Delete to end of line |
|
|
117
|
-
| `Ctrl-W` / `Option-Backspace` | Delete word backward |
|
|
118
|
-
| `Option-D` | Delete word forward |
|
|
119
|
-
|
|
120
|
-
### Thinking Level
|
|
121
|
-
|
|
122
|
-
The agent prompt shows the current thinking level next to the model name, with a mode-specific indicator:
|
|
123
|
-
|
|
124
|
-
```
|
|
125
|
-
pi (claude-sonnet-4-6) [medium] ❓ ❯ # query mode
|
|
126
|
-
pi (claude-sonnet-4-6) [medium] ● ⟩ # execute mode
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
Press **Shift-Tab** in agent input mode to cycle through levels. The levels are advertised by the agent via ACP session modes — different agents may offer different options. The spinner label reflects the mode: "Thinking" when thinking is enabled, "Working" when it's off.
|
|
53
|
+
Everything else works as a normal shell — commands go straight to the PTY. Input modes are extensible — see [Extensions: Custom Input Modes](docs/extensions.md#custom-input-modes).
|
|
130
54
|
|
|
131
55
|
### Slash Commands
|
|
132
56
|
|
|
133
57
|
| Command | Description |
|
|
134
58
|
|---|---|
|
|
135
59
|
| `/help` | Show available commands |
|
|
136
|
-
| `/
|
|
137
|
-
| `/
|
|
138
|
-
| `/compact` | Ask agent to summarize the conversation |
|
|
139
|
-
| `/quit` | Exit agent-sh |
|
|
60
|
+
| `/model [name]` | Cycle to the next model, or switch to a specific one |
|
|
61
|
+
| `/backend [name]` | List backends, or switch to a named backend |
|
|
140
62
|
|
|
141
63
|
## Configuration
|
|
142
64
|
|
|
143
|
-
|
|
65
|
+
Configure via `~/.agent-sh/settings.json`. See the [Usage Guide](docs/usage.md#configuration) for the full settings reference (providers, models, extensions, skills, and more).
|
|
66
|
+
|
|
67
|
+
## Documentation
|
|
144
68
|
|
|
145
|
-
|
|
69
|
+
- [Usage Guide](docs/usage.md) — providers, models, configuration, provider profiles
|
|
70
|
+
- [Internal Agent](docs/agent.md) — how the agent loop works: tools, context, streaming
|
|
71
|
+
- [Architecture](docs/architecture.md) — design philosophy, component overview, project structure
|
|
72
|
+
- [Extensions](docs/extensions.md) — event bus, content transforms, custom backends, theming
|
|
73
|
+
- [Library Usage](docs/library.md) — embedding agent-sh in your own apps
|
|
74
|
+
- [Troubleshooting](docs/troubleshooting.md) — common errors and debug mode
|
|
146
75
|
|
|
147
76
|
## Development
|
|
148
77
|
|
|
149
78
|
```bash
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
npm
|
|
153
|
-
|
|
79
|
+
git clone https://github.com/guanyilun/agent-sh.git
|
|
80
|
+
cd agent-sh
|
|
81
|
+
npm install
|
|
82
|
+
npm run build
|
|
83
|
+
npm start
|
|
154
84
|
```
|
|
155
85
|
|
|
156
|
-
## Documentation
|
|
157
|
-
|
|
158
|
-
- [Usage Guide](docs/usage.md) — models, providers, API keys, environment config
|
|
159
|
-
- [Architecture](docs/architecture.md) — design philosophy, protocol details, project structure
|
|
160
|
-
- [Extensions](docs/extensions.md) — writing extensions, theming, yolo mode
|
|
161
|
-
- [Library Usage](docs/library.md) — using agent-sh as a Node.js library
|
|
162
|
-
- [Troubleshooting](docs/troubleshooting.md) — common errors and debug mode
|
|
163
|
-
|
|
164
86
|
## License
|
|
165
87
|
|
|
166
88
|
MIT
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Internal agent backend — bus-driven, self-wiring.
|
|
3
|
+
*
|
|
4
|
+
* Subscribes to bus events in constructor:
|
|
5
|
+
* - agent:submit → run query through LLM tool loop
|
|
6
|
+
* - agent:cancel-request → abort current loop
|
|
7
|
+
* - config:cycle → cycle through modes
|
|
8
|
+
*
|
|
9
|
+
* Emits bus events during execution:
|
|
10
|
+
* - agent:query, agent:processing-start/done, agent:response-chunk/done
|
|
11
|
+
* - agent:tool-started, agent:tool-call, agent:tool-output-chunk,
|
|
12
|
+
* agent:tool-completed, agent:tool-output
|
|
13
|
+
* - agent:thinking-chunk, agent:cancelled, agent:error
|
|
14
|
+
*/
|
|
15
|
+
import type { EventBus } from "../event-bus.js";
|
|
16
|
+
import type { AgentMode } from "../types.js";
|
|
17
|
+
import type { ContextManager } from "../context-manager.js";
|
|
18
|
+
import type { LlmClient } from "../utils/llm-client.js";
|
|
19
|
+
import type { HandlerRegistry } from "../utils/handler-registry.js";
|
|
20
|
+
import type { AgentBackend, ToolDefinition } from "./types.js";
|
|
21
|
+
export declare class AgentLoop implements AgentBackend {
|
|
22
|
+
private bus;
|
|
23
|
+
private contextManager;
|
|
24
|
+
private llmClient;
|
|
25
|
+
private handlers;
|
|
26
|
+
private abortController;
|
|
27
|
+
private toolRegistry;
|
|
28
|
+
private conversation;
|
|
29
|
+
private fileReadCache;
|
|
30
|
+
private modes;
|
|
31
|
+
private currentModeIndex;
|
|
32
|
+
private boundListeners;
|
|
33
|
+
private lastProjectSkillNames;
|
|
34
|
+
private static readonly THINKING_LEVELS;
|
|
35
|
+
private thinkingLevel;
|
|
36
|
+
constructor(bus: EventBus, contextManager: ContextManager, llmClient: LlmClient, handlers: HandlerRegistry, modeConfig?: AgentMode[], initialModeIndex?: number);
|
|
37
|
+
/** Subscribe to bus events — activates this backend. */
|
|
38
|
+
wire(): void;
|
|
39
|
+
/** Unsubscribe from bus events — deactivates this backend. */
|
|
40
|
+
unwire(): void;
|
|
41
|
+
/** Register a tool (used by extensions via ctx.registerTool). */
|
|
42
|
+
registerTool(tool: ToolDefinition): void;
|
|
43
|
+
/** Get all registered tools. */
|
|
44
|
+
getTools(): ToolDefinition[];
|
|
45
|
+
kill(): void;
|
|
46
|
+
private cancel;
|
|
47
|
+
/** Check if reasoning_effort should be sent for the current model/provider. */
|
|
48
|
+
private shouldSendReasoningEffort;
|
|
49
|
+
private cycleMode;
|
|
50
|
+
private get currentMode();
|
|
51
|
+
private get currentModel();
|
|
52
|
+
private isContextOverflow;
|
|
53
|
+
/** Check if an error is retryable (transient). */
|
|
54
|
+
private isRetryable;
|
|
55
|
+
/** Extract retry delay from error headers or use exponential backoff. */
|
|
56
|
+
private getRetryDelay;
|
|
57
|
+
/** Format an error with provider context for user-facing display. */
|
|
58
|
+
private formatError;
|
|
59
|
+
private registerCoreTools;
|
|
60
|
+
/**
|
|
61
|
+
* Register named handlers that extensions can advise.
|
|
62
|
+
* Only high-power use cases where multiple extensions compose.
|
|
63
|
+
*/
|
|
64
|
+
private registerHandlers;
|
|
65
|
+
private handleQuery;
|
|
66
|
+
/** Max tokens before auto-compaction (conservative default). */
|
|
67
|
+
private maxContextTokens;
|
|
68
|
+
/**
|
|
69
|
+
* Core agent loop: stream LLM response → execute tools → repeat.
|
|
70
|
+
* Returns the final accumulated response text.
|
|
71
|
+
*/
|
|
72
|
+
private executeLoop;
|
|
73
|
+
private readonly maxRetries;
|
|
74
|
+
/**
|
|
75
|
+
* Stream with retry logic. Handles:
|
|
76
|
+
* - Context overflow → compact and retry
|
|
77
|
+
* - Rate limits (429) → backoff with Retry-After
|
|
78
|
+
* - Transient errors (500/502/503, network) → exponential backoff
|
|
79
|
+
*/
|
|
80
|
+
private streamWithRetry;
|
|
81
|
+
/**
|
|
82
|
+
* Stream a single LLM response. Returns accumulated text, parsed tool calls,
|
|
83
|
+
* and the raw assistant message data for conversation recording.
|
|
84
|
+
*/
|
|
85
|
+
private streamResponse;
|
|
86
|
+
}
|