agent-sh 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +66 -113
  2. package/dist/agent/agent-loop.d.ts +85 -0
  3. package/dist/agent/agent-loop.js +611 -0
  4. package/dist/agent/conversation-state.d.ts +27 -0
  5. package/dist/agent/conversation-state.js +59 -0
  6. package/dist/agent/index.d.ts +11 -0
  7. package/dist/agent/index.js +9 -0
  8. package/dist/agent/skills.d.ts +25 -0
  9. package/dist/agent/skills.js +186 -0
  10. package/dist/agent/subagent.d.ts +37 -0
  11. package/dist/agent/subagent.js +117 -0
  12. package/dist/agent/system-prompt.d.ts +14 -0
  13. package/dist/agent/system-prompt.js +98 -0
  14. package/dist/agent/tool-registry.d.ts +15 -0
  15. package/dist/agent/tool-registry.js +30 -0
  16. package/dist/agent/tools/bash.d.ts +7 -0
  17. package/dist/agent/tools/bash.js +62 -0
  18. package/dist/agent/tools/edit-file.d.ts +2 -0
  19. package/dist/agent/tools/edit-file.js +95 -0
  20. package/dist/agent/tools/glob.d.ts +2 -0
  21. package/dist/agent/tools/glob.js +55 -0
  22. package/dist/agent/tools/grep.d.ts +2 -0
  23. package/dist/agent/tools/grep.js +77 -0
  24. package/dist/agent/tools/list-skills.d.ts +2 -0
  25. package/dist/agent/tools/list-skills.js +28 -0
  26. package/dist/agent/tools/ls.d.ts +2 -0
  27. package/dist/agent/tools/ls.js +43 -0
  28. package/dist/agent/tools/read-file.d.ts +2 -0
  29. package/dist/agent/tools/read-file.js +55 -0
  30. package/dist/agent/tools/user-shell.d.ts +13 -0
  31. package/dist/agent/tools/user-shell.js +57 -0
  32. package/dist/agent/tools/write-file.d.ts +2 -0
  33. package/dist/agent/tools/write-file.js +74 -0
  34. package/dist/agent/types.d.ts +44 -0
  35. package/dist/agent/types.js +1 -0
  36. package/dist/core.d.ts +24 -14
  37. package/dist/core.js +260 -36
  38. package/dist/event-bus.d.ts +80 -14
  39. package/dist/event-bus.js +10 -1
  40. package/dist/extension-loader.js +12 -1
  41. package/dist/extensions/command-suggest.d.ts +10 -0
  42. package/dist/extensions/command-suggest.js +41 -0
  43. package/dist/extensions/slash-commands.d.ts +1 -1
  44. package/dist/extensions/slash-commands.js +161 -64
  45. package/dist/extensions/tui-renderer.js +90 -48
  46. package/dist/index.js +98 -122
  47. package/dist/input-handler.js +74 -7
  48. package/dist/output-parser.d.ts +7 -0
  49. package/dist/output-parser.js +27 -0
  50. package/dist/settings.d.ts +53 -2
  51. package/dist/settings.js +45 -2
  52. package/dist/shell.js +33 -26
  53. package/dist/types.d.ts +33 -6
  54. package/dist/utils/box-frame.d.ts +3 -1
  55. package/dist/utils/box-frame.js +12 -5
  56. package/dist/utils/llm-client.d.ts +45 -0
  57. package/dist/utils/llm-client.js +60 -0
  58. package/dist/utils/markdown.js +2 -2
  59. package/dist/utils/stream-transform.js +20 -47
  60. package/dist/utils/tool-display.js +15 -5
  61. package/examples/extensions/claude-code-bridge/README.md +35 -0
  62. package/examples/extensions/claude-code-bridge/index.ts +198 -0
  63. package/examples/extensions/claude-code-bridge/package.json +11 -0
  64. package/examples/extensions/openrouter.ts +87 -0
  65. package/examples/extensions/pi-bridge/README.md +35 -0
  66. package/examples/extensions/pi-bridge/index.ts +265 -0
  67. package/examples/extensions/pi-bridge/package.json +13 -0
  68. package/examples/extensions/subagents.ts +87 -0
  69. package/package.json +3 -5
  70. package/dist/acp-client.d.ts +0 -105
  71. package/dist/acp-client.js +0 -684
  72. package/dist/extensions/shell-exec.d.ts +0 -24
  73. package/dist/extensions/shell-exec.js +0 -188
  74. package/dist/mcp-server.d.ts +0 -13
  75. package/dist/mcp-server.js +0 -234
  76. package/examples/pi-agent-sh.ts +0 -166
package/README.md CHANGED
@@ -5,162 +5,115 @@
5
5
 
6
6
  Not a shell that lives in an agent — an agent that lives in a shell.
7
7
 
8
- agent-sh is a real terminal first. Every keystroke goes to a real PTY. `cd`, pipes, vim, job control they all just work. But type `?` or `>` at the start of a line, and you're talking to an AI agent that has full context of what you've been doing: your working directory, recent commands, their output.
8
+ I live in a terminal. I don't want an agent that can run shell commands when it needs to I want my shell, with an agent I can reach for when *I* need to. Most AI tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
9
9
 
10
- The agent connects via the [Agent Client Protocol (ACP)](https://agentclientprotocol.com/), so you can plug in **any** ACP-compatible agent: [pi](https://github.com/svkozak/pi-acp), claude-code, codex, gemini-cli, goose, etc.
10
+ agent-sh flips this. It's your shell first — full PTY, your rc config, your aliases, everything just works. But type `?` or `>` at the start of a line, and you're talking to an agent that has full context of what you've been doing.
11
11
 
12
12
  ```
13
13
  ⚡ src $ ls -la # real shell command
14
14
  ⚡ src $ cd ../tests && npm test # real cd, env, aliases — all just work
15
15
  ⚡ src $ vim file.ts # opens vim in the same PTY
16
- ⚡ src $ ? explain the last error # query mode → agent investigates using its own tools
17
- ⚡ src $ > deploy to staging # execute mode → agent runs it in your live shell
16
+ ⚡ src $ > explain the last error # execute mode → agent investigates using its own tools
17
+ ⚡ src $ ? deploy to staging # help mode → agent runs it in your live shell
18
18
  ```
19
19
 
20
- ## Why shell-first?
21
-
22
- I live mostly in a terminal. I don't just want an agent that has access to my shell — I want a shell that has access to my agent.
20
+ ## Key Features
23
21
 
24
- Most AI coding tools get this backwards: the LLM drives the experience and the shell is bolted on. That means no real PTY, no job control, no interactive commands, and fragile `cd` tracking that reimplements what bash gives you for free.
22
+ **Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly the agent connects asynchronously in the background.
25
23
 
26
- agent-sh starts from the opposite end. The shell is the primary interface — it's your terminal, not the agent's. The agent is a tool you reach for when you need it, not the other way around. Two modes give you fine-grained control: `?` for questions and tasks (agent uses its own tools), `>` for commands that run directly in your live shell.
24
+ **Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `? fix this`, and the agent knows exactly what happened. It has built-in tools for file read/write/edit, bash, grep, glob no external setup needed.
27
25
 
28
- ### Why ACP?
26
+ **Two input modes.** `>` for questions and tasks — the agent investigates using its own isolated tools. `?` for commands that run directly in your live shell, affecting your real environment. The agent knows which mode it's in and behaves accordingly.
29
27
 
30
- The [Agent Client Protocol](https://agentclientprotocol.com/) decouples the shell from any specific agent:
28
+ **Any LLM, any backend.** Works with any OpenAI-compatible API out of the box. Define multiple providers in settings and cycle between models at runtime with Shift+Tab. Or swap in a completely different agent — [Claude Code](examples/extensions/claude-code-bridge/) and [pi](examples/extensions/pi-bridge/) run as drop-in backend extensions.
31
29
 
32
- - **Pluggable agents** swap between pi-acp, claude-code, codex with a CLI flag
33
- - **Standard protocol** — JSON-RPC 2.0 over stdio, well-specified capability negotiation
34
- - **Agent handles LLM details** — no API keys, tool definitions, or context windows to manage
35
- - **Terminal delegation** — ACP defines `terminal/create`, `terminal/output`, `terminal/wait_for_exit` — exactly what an agent needs to run commands in your shell
30
+ **Extensible by design.** The entire system is built on a typed event bus. Extensions can add custom input modes, content transforms (render LaTeX as images, Mermaid as diagrams), themes, slash commands, or replace the agent backend entirely. The built-in TUI renderer is itself just an extension — nothing is special.
36
31
 
37
- ## Key Features
38
-
39
- - **Instant Start** — Shell starts immediately, no waiting for agent connection
40
- - **Smart Connection** — Agent connects asynchronously in the background
41
- - **Auto-Wait** — Queries automatically wait for agent to finish connecting
42
- - **Real-time Streaming** — Agent responses stream live with syntax highlighting
43
- - **Zero Latency** — Direct PTY access, full terminal compatibility
44
- - **Context Aware** — Agent sees your cwd, recent commands, and their output
45
- - **Dual Input Modes** — `?` for questions/tasks (agent tools), `>` for live shell execution
46
- - **Extensible Modes** — Extensions can register custom input modes with their own triggers
47
- - **Multiple Agents** — Easy switching between pi-acp, claude, and other ACP agents
48
- - **Inline Diff Preview** — File writes show syntax-highlighted diffs inline (Ctrl+O to expand)
49
- - **Thinking Display** — Toggle agent thinking/reasoning text with Ctrl+T
50
- - **Themeable** — Semantic color palette, swappable via [extensions](docs/extensions.md)
32
+ **Embeddable as a library.** The core is a headless kernel — `import { createCore } from "agent-sh"` to build WebSocket servers, REST APIs, Electron apps, or test harnesses. No terminal required.
51
33
 
52
34
  ## Quick Start
53
35
 
54
36
  ```bash
55
- # 1. Install agent-sh and an ACP-compatible agent
56
- npm install -g agent-sh pi-acp
57
-
58
- # 2. Set API keys
59
- export ANTHROPIC_API_KEY="your-key"
60
-
61
- # 3. Start
62
- agent-sh # default agent (pi-acp)
63
- agent-sh --agent claude-agent-acp # use a different agent
37
+ npm install -g agent-sh
38
+ agent-sh
64
39
  ```
65
40
 
66
- Requires Node.js 18+. Other ACP agents: `npm install -g @agentclientprotocol/claude-agent-acp`
67
-
68
- > **Note**: The `claude` CLI tool (Claude Code) does **not** support ACP. Use `claude-agent-acp` or `pi-acp` with Anthropic models.
41
+ Set `OPENAI_API_KEY` in your environment (or configure providers in `~/.agent-sh/settings.json`). Works with any OpenAI-compatible API — see the [Usage Guide](docs/usage.md) for provider examples (OpenAI, Ollama, OpenRouter, Together, Groq, LM Studio, vLLM).
69
42
 
70
- See the [Usage Guide](docs/usage.md) for all options, model configuration, and environment variables.
43
+ Requires Node.js 18+.
71
44
 
72
45
  ## Input Modes
73
46
 
74
- agent-sh has two agent input modes, each triggered by a single character at the start of an empty line:
47
+ - **`>` Execute mode** Agent uses its own tools (bash, file read/write, search) to investigate and answer. Stays in execute mode for follow-ups.
48
+ - **`?` Help mode** — Agent runs a command in your live shell. Your aliases, env vars, and cwd apply. Returns to shell after.
75
49
 
76
- | Trigger | Mode | Behavior |
77
- |---|---|---|
78
- | `?` | **Query** | Agent uses its own tools (bash, file read/write, search) to investigate and answer. Stays in query mode after each response. |
79
- | `>` | **Execute** | Agent runs a command in your live shell via `user_shell`. Your aliases, env vars, and cwd apply. Returns to shell after execution. |
50
+ Everything else works as a normal shell — commands go straight to the PTY. Modes are extensible — see [Extensions: Custom Input Modes](docs/extensions.md#custom-input-modes).
80
51
 
81
- Regular shell input works as beforecommands go straight to the PTY:
82
-
83
- | Input | Behavior |
84
- |---|---|
85
- | `ls -la` | Runs in real shell (PTY), output displayed normally |
86
- | `cd src && make` | Real shell — cd, env, aliases all just work |
87
- | `vim file.ts` | Opens vim in the same PTY, no hacks needed |
88
- | `? refactor this fn` | Query mode — agent investigates and responds |
89
- | `> restart the server` | Execute mode — agent runs it in your live shell |
90
- | `? /help` | Shows available slash commands (works in either mode) |
91
- | `Ctrl-C` | Standard signal to shell, or cancels active agent response |
92
- | `Ctrl-O` | Expand/collapse truncated diff preview |
93
- | `Ctrl-T` | Toggle thinking/reasoning text display |
94
- | `Shift-Tab` | Cycle thinking level (off → minimal → low → medium → high → xhigh) |
95
- | `Escape` | Exit agent input mode |
96
-
97
- Modes are extensible — extensions can register new modes via the `input-mode:register` event (see [Extensions](docs/extensions.md#custom-input-modes)).
98
-
99
- ### Agent Input Keybindings
100
-
101
- When typing in either agent mode (`?` or `>`), full readline-style keybindings are available:
102
-
103
- | Key | Action |
104
- |---|---|
105
- | `↑` / `↓` | Browse query history (persisted across sessions) |
106
- | `Shift-Enter` | Insert newline (multiline input) |
107
- | `Shift-Tab` | Cycle thinking level |
108
- | `Ctrl-D` | Exit agent input mode (on empty line) |
109
- | `Ctrl-A` / `Home` | Move to start of line |
110
- | `Ctrl-E` / `End` | Move to end of line |
111
- | `Ctrl-B` / `←` | Move back one character |
112
- | `Ctrl-F` / `→` | Move forward one character |
113
- | `Option-B` / `Option-←` | Move back one word |
114
- | `Option-F` / `Option-→` | Move forward one word |
115
- | `Ctrl-U` | Delete to start of line |
116
- | `Ctrl-K` | Delete to end of line |
117
- | `Ctrl-W` / `Option-Backspace` | Delete word backward |
118
- | `Option-D` | Delete word forward |
119
-
120
- ### Thinking Level
121
-
122
- The agent prompt shows the current thinking level next to the model name, with a mode-specific indicator:
123
-
124
- ```
125
- pi (claude-sonnet-4-6) [medium] ❓ ❯ # query mode
126
- pi (claude-sonnet-4-6) [medium] ● ⟩ # execute mode
127
- ```
128
-
129
- Press **Shift-Tab** in agent input mode to cycle through levels. The levels are advertised by the agent via ACP session modes — different agents may offer different options. The spinner label reflects the mode: "Thinking" when thinking is enabled, "Working" when it's off.
52
+ > **Why `>` for the main mode?** `>` is easy to type and the most common interaction asking the agent to do things. `?` is reserved for when you need the agent to run something directly in your live shell.
130
53
 
131
54
  ### Slash Commands
132
55
 
133
56
  | Command | Description |
134
57
  |---|---|
135
58
  | `/help` | Show available commands |
136
- | `/clear` | Start a new agent session |
137
- | `/copy` | Copy last agent response to clipboard |
138
- | `/compact` | Ask agent to summarize the conversation |
139
- | `/quit` | Exit agent-sh |
59
+ | `/model [name]` | Cycle to the next model, or switch to a specific one |
60
+ | `/backend [name]` | List backends, or switch to a named backend |
140
61
 
141
62
  ## Configuration
142
63
 
143
- agent-sh stores settings and history in `~/.agent-sh/`. Behavior is configurable via `~/.agent-sh/settings.json` context window size, truncation thresholds, display limits, and more. All fields are optional with sensible defaults.
64
+ Configure via `~/.agent-sh/settings.json`. Define named providers with multiple models:
65
+
66
+ ```json
67
+ {
68
+ "defaultProvider": "openai",
69
+ "providers": {
70
+ "openai": {
71
+ "apiKey": "$OPENAI_API_KEY",
72
+ "defaultModel": "gpt-4o",
73
+ "models": ["gpt-4o", "gpt-4o-mini"]
74
+ },
75
+ "ollama": {
76
+ "apiKey": "not-needed",
77
+ "baseURL": "http://localhost:11434/v1",
78
+ "defaultModel": "llama3",
79
+ "models": ["llama3", "mistral"]
80
+ }
81
+ }
82
+ }
83
+ ```
144
84
 
145
- See the [Usage Guide](docs/usage.md#configuration) for the full settings reference.
85
+ Cycle models with **Shift+Tab**, switch providers with `/provider <name>`, switch backends with `/backend <name>`. API keys support `$ENV_VAR` syntax.
146
86
 
147
- ## Development
87
+ Additional options:
148
88
 
149
- ```bash
150
- npm run dev # development mode (no build step)
151
- npm run build # build
152
- npm start # run built version
153
- DEBUG=1 npm start # debug mode (logs ACP protocol details)
154
- ```
89
+ | Key | Default | Description |
90
+ |---|---|---|
91
+ | `startupBanner` | `true` | Show startup banner with model info and usage hints |
92
+ | `promptIndicator` | `true` | Show `⚡ agent-sh` in terminal tab/window title |
93
+
94
+ Set either to `false` to disable.
95
+
96
+ See the [Usage Guide](docs/usage.md#configuration) for the full settings reference.
155
97
 
156
98
  ## Documentation
157
99
 
158
- - [Usage Guide](docs/usage.md) — models, providers, API keys, environment config
159
- - [Architecture](docs/architecture.md) — design philosophy, protocol details, project structure
160
- - [Extensions](docs/extensions.md) — writing extensions, theming, yolo mode
161
- - [Library Usage](docs/library.md) — using agent-sh as a Node.js library
100
+ - [Usage Guide](docs/usage.md) — providers, models, configuration, provider profiles
101
+ - [Internal Agent](docs/agent.md) — how the agent loop works: tools, context, streaming
102
+ - [Architecture](docs/architecture.md) — design philosophy, component overview, project structure
103
+ - [Extensions](docs/extensions.md) — event bus, content transforms, custom backends, theming
104
+ - [Library Usage](docs/library.md) — embedding agent-sh in your own apps
162
105
  - [Troubleshooting](docs/troubleshooting.md) — common errors and debug mode
163
106
 
107
+ ## Development
108
+
109
+ ```bash
110
+ git clone https://github.com/guanyilun/agent-sh.git
111
+ cd agent-sh
112
+ npm install
113
+ npm run build
114
+ npm start
115
+ ```
116
+
164
117
  ## License
165
118
 
166
119
  MIT
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Internal agent backend — bus-driven, self-wiring.
3
+ *
4
+ * Subscribes to bus events in constructor:
5
+ * - agent:submit → run query through LLM tool loop
6
+ * - agent:cancel-request → abort current loop
7
+ * - config:cycle → cycle through modes
8
+ *
9
+ * Emits bus events during execution:
10
+ * - agent:query, agent:processing-start/done, agent:response-chunk/done
11
+ * - agent:tool-started, agent:tool-call, agent:tool-output-chunk,
12
+ * agent:tool-completed, agent:tool-output
13
+ * - agent:thinking-chunk, agent:cancelled, agent:error
14
+ */
15
+ import type { EventBus } from "../event-bus.js";
16
+ import type { AgentMode } from "../types.js";
17
+ import type { ContextManager } from "../context-manager.js";
18
+ import type { LlmClient } from "../utils/llm-client.js";
19
+ import type { HandlerRegistry } from "../utils/handler-registry.js";
20
+ import type { AgentBackend, ToolDefinition } from "./types.js";
21
+ export declare class AgentLoop implements AgentBackend {
22
+ private bus;
23
+ private contextManager;
24
+ private llmClient;
25
+ private handlers;
26
+ private abortController;
27
+ private toolRegistry;
28
+ private conversation;
29
+ private modes;
30
+ private currentModeIndex;
31
+ private boundListeners;
32
+ private lastProjectSkillNames;
33
+ private static readonly THINKING_LEVELS;
34
+ private thinkingLevel;
35
+ constructor(bus: EventBus, contextManager: ContextManager, llmClient: LlmClient, handlers: HandlerRegistry, modeConfig?: AgentMode[], initialModeIndex?: number);
36
+ /** Subscribe to bus events — activates this backend. */
37
+ wire(): void;
38
+ /** Unsubscribe from bus events — deactivates this backend. */
39
+ unwire(): void;
40
+ /** Register a tool (used by extensions via ctx.registerTool). */
41
+ registerTool(tool: ToolDefinition): void;
42
+ /** Get all registered tools. */
43
+ getTools(): ToolDefinition[];
44
+ kill(): void;
45
+ private cancel;
46
+ /** Check if reasoning_effort should be sent for the current model/provider. */
47
+ private shouldSendReasoningEffort;
48
+ private cycleMode;
49
+ private get currentMode();
50
+ private get currentModel();
51
+ private isContextOverflow;
52
+ /** Check if an error is retryable (transient). */
53
+ private isRetryable;
54
+ /** Extract retry delay from error headers or use exponential backoff. */
55
+ private getRetryDelay;
56
+ /** Format an error with provider context for user-facing display. */
57
+ private formatError;
58
+ private registerCoreTools;
59
+ /**
60
+ * Register named handlers that extensions can advise.
61
+ * Only high-power use cases where multiple extensions compose.
62
+ */
63
+ private registerHandlers;
64
+ private handleQuery;
65
+ /** Max tokens before auto-compaction (conservative default). */
66
+ private maxContextTokens;
67
+ /**
68
+ * Core agent loop: stream LLM response → execute tools → repeat.
69
+ * Returns the final accumulated response text.
70
+ */
71
+ private executeLoop;
72
+ private readonly maxRetries;
73
+ /**
74
+ * Stream with retry logic. Handles:
75
+ * - Context overflow → compact and retry
76
+ * - Rate limits (429) → backoff with Retry-After
77
+ * - Transient errors (500/502/503, network) → exponential backoff
78
+ */
79
+ private streamWithRetry;
80
+ /**
81
+ * Stream a single LLM response. Returns accumulated text, parsed tool calls,
82
+ * and the raw assistant message data for conversation recording.
83
+ */
84
+ private streamResponse;
85
+ }