agent-sh 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import type { EventBus } from "./event-bus.js";
2
+ import type { HandlerRegistry } from "./utils/handler-registry.js";
2
3
  export declare class ContextManager {
3
4
  private exchanges;
4
5
  private nextId;
@@ -7,7 +8,8 @@ export declare class ContextManager {
7
8
  private pendingToolCalls;
8
9
  private firstPrompt;
9
10
  private agentShellActive;
10
- constructor(bus: EventBus);
11
+ private handlers;
12
+ constructor(bus: EventBus, handlers?: HandlerRegistry);
11
13
  getCwd(): string;
12
14
  /**
13
15
  * Build the <shell_context> block for the agent prompt.
@@ -13,7 +13,13 @@ export class ContextManager {
13
13
  pendingToolCalls = [];
14
14
  firstPrompt = true;
15
15
  agentShellActive = false; // true while user_shell command is executing
16
- constructor(bus) {
16
+ handlers = null;
17
+ constructor(bus, handlers) {
18
+ if (handlers) {
19
+ this.handlers = handlers;
20
+ // Extensions can advise this to inject extra context (e.g. terminal buffer)
21
+ handlers.define("context:build-extra", () => "");
22
+ }
17
23
  this.currentCwd = process.cwd();
18
24
  this.sessionStart = Date.now();
19
25
  // ── Subscribe to shell events ──
@@ -291,6 +297,10 @@ export class ContextManager {
291
297
  for (const ex of exchanges) {
292
298
  out += "\n" + this.formatExchangeTruncated(ex);
293
299
  }
300
+ // Allow extensions to inject extra context (e.g. terminal buffer snapshot)
301
+ const extra = this.handlers?.call("context:build-extra");
302
+ if (extra)
303
+ out += "\n" + extra + "\n";
294
304
  out += "\n</shell_context>\n";
295
305
  return out;
296
306
  }
package/dist/core.js CHANGED
@@ -25,6 +25,8 @@ import * as streamTransform from "./utils/stream-transform.js";
25
25
  import * as settingsMod from "./settings.js";
26
26
  import { resolveProvider, getProviderNames } from "./settings.js";
27
27
  import { HandlerRegistry } from "./utils/handler-registry.js";
28
+ import { TerminalBuffer } from "./utils/terminal-buffer.js";
29
+ import { FloatingPanel } from "./utils/floating-panel.js";
28
30
  // Re-export types that library consumers need
29
31
  export { EventBus } from "./event-bus.js";
30
32
  export { palette, setPalette, resetPalette } from "./utils/palette.js";
@@ -33,7 +35,7 @@ export { LlmClient } from "./utils/llm-client.js";
33
35
  export function createCore(config) {
34
36
  const bus = new EventBus();
35
37
  const handlers = new HandlerRegistry();
36
- const contextManager = new ContextManager(bus);
38
+ const contextManager = new ContextManager(bus, handlers);
37
39
  // ── Resolve provider ─────────────────────────────────────────
38
40
  const settings = settingsMod.getSettings();
39
41
  let activeProvider = null;
@@ -216,6 +218,14 @@ export function createCore(config) {
216
218
  bus.emit("ui:info", { message: `Switched to ${name} (${switchModel})` });
217
219
  bus.emit("config:changed", {});
218
220
  });
221
+ // ── Lazy singleton terminal buffer ──────────────────────────
222
+ let terminalBufferSingleton; // undefined = not yet created
223
+ const getTerminalBuffer = () => {
224
+ if (terminalBufferSingleton !== undefined)
225
+ return terminalBufferSingleton;
226
+ terminalBufferSingleton = TerminalBuffer.createWired(bus);
227
+ return terminalBufferSingleton;
228
+ };
219
229
  return {
220
230
  bus,
221
231
  contextManager,
@@ -292,6 +302,11 @@ export function createCore(config) {
292
302
  define: (name, fn) => handlers.define(name, fn),
293
303
  advise: (name, wrapper) => handlers.advise(name, wrapper),
294
304
  call: (name, ...args) => handlers.call(name, ...args),
305
+ get terminalBuffer() { return getTerminalBuffer(); },
306
+ createFloatingPanel: (config) => {
307
+ const tb = config.dimBackground !== false ? getTerminalBuffer() : null;
308
+ return new FloatingPanel(bus, { ...config, terminalBuffer: tb ?? undefined });
309
+ },
295
310
  };
296
311
  },
297
312
  kill() {
@@ -22,6 +22,21 @@ export interface ShellEvents {
22
22
  };
23
23
  "shell:agent-exec-start": Record<string, never>;
24
24
  "shell:agent-exec-done": Record<string, never>;
25
+ "shell:pty-data": {
26
+ raw: string;
27
+ };
28
+ "shell:pty-write": {
29
+ data: string;
30
+ };
31
+ "shell:buffer-request": Record<string, never>;
32
+ "shell:buffer-snapshot": {
33
+ text: string;
34
+ altScreen: boolean;
35
+ cursor: {
36
+ x: number;
37
+ y: number;
38
+ };
39
+ };
25
40
  "agent:submit": {
26
41
  query: string;
27
42
  };
@@ -123,6 +138,14 @@ export interface ShellEvents {
123
138
  "input:keypress": {
124
139
  key: string;
125
140
  };
141
+ "input:intercept": {
142
+ data: string;
143
+ consumed: boolean;
144
+ };
145
+ "shell:stdout-hold": Record<string, never>;
146
+ "shell:stdout-release": Record<string, never>;
147
+ "shell:stdout-show": Record<string, never>;
148
+ "shell:stdout-hide": Record<string, never>;
126
149
  "agent:terminal-intercept": {
127
150
  command: string;
128
151
  cwd: string;
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Built-in overlay agent.
3
+ *
4
+ * Provides a hotkey (Ctrl+\) to summon the agent from anywhere — even
5
+ * inside vim, htop, or ssh. Composites a floating response box on top
6
+ * of the current terminal content.
7
+ *
8
+ * Requires: npm install @xterm/headless@5.5.0 @xterm/addon-serialize@0.13.0
9
+ */
10
+ import type { ExtensionContext } from "../types.js";
11
+ export default function activate({ bus, createFloatingPanel }: ExtensionContext): void;
@@ -0,0 +1,43 @@
1
+ const BOLD = "\x1b[1m";
2
+ const CYAN = "\x1b[36m";
3
+ const RESET = "\x1b[0m";
4
+ export default function activate({ bus, createFloatingPanel }) {
5
+ const panel = createFloatingPanel({
6
+ trigger: "\x1c", // Ctrl+\
7
+ dimBackground: true,
8
+ autoDismissMs: 2000,
9
+ });
10
+ // ── Panel lifecycle ────────────────────────────────────────
11
+ panel.handlers.advise("panel:submit", (_next, query) => {
12
+ panel.setActive();
13
+ panel.appendLine(`${CYAN}${BOLD}❯${RESET} ${query}`);
14
+ panel.appendLine("");
15
+ bus.emit("agent:submit", { query });
16
+ });
17
+ // ── Stream agent response into panel ───────────────────────
18
+ bus.on("agent:response-chunk", (e) => {
19
+ if (!panel.active)
20
+ return;
21
+ for (const block of e.blocks) {
22
+ if (block.type === "text" && block.text) {
23
+ panel.appendText(block.text);
24
+ }
25
+ }
26
+ });
27
+ bus.on("agent:tool-started", (e) => {
28
+ if (!panel.active)
29
+ return;
30
+ panel.appendLine(`▶ ${e.title}${e.displayDetail ? " " + e.displayDetail : ""}`);
31
+ });
32
+ bus.on("agent:tool-completed", (e) => {
33
+ if (!panel.active)
34
+ return;
35
+ const mark = e.exitCode === 0 ? " ✓" : ` ✗ exit ${e.exitCode}`;
36
+ panel.updateLastLine((line) => line + mark);
37
+ });
38
+ bus.on("agent:processing-done", () => {
39
+ if (!panel.active)
40
+ return;
41
+ panel.setDone();
42
+ });
43
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Built-in terminal buffer extension.
3
+ *
4
+ * Registers two agent tools:
5
+ * - terminal_read: get the current screen contents + cursor position
6
+ * - terminal_keys: send raw keystrokes into the user's live PTY
7
+ *
8
+ * Together these let the agent operate inside interactive programs
9
+ * (vim, htop, less, etc.) by reading the screen and typing keys.
10
+ *
11
+ * Requires: npm install @xterm/headless@5.5.0 @xterm/addon-serialize@0.13.0
12
+ */
13
+ import type { ExtensionContext } from "../types.js";
14
+ export default function activate({ bus, terminalBuffer: tb, registerTool }: ExtensionContext): void;
@@ -0,0 +1,120 @@
1
+ /** Interpret C-style escape sequences (e.g. \r → CR, \x1b → ESC). */
2
+ function interpretEscapes(str) {
3
+ return str.replace(/\\(x[0-9a-fA-F]{2}|r|n|t|\\|0)/g, (_, seq) => {
4
+ if (seq === "r")
5
+ return "\r";
6
+ if (seq === "n")
7
+ return "\n";
8
+ if (seq === "t")
9
+ return "\t";
10
+ if (seq === "\\")
11
+ return "\\";
12
+ if (seq === "0")
13
+ return "\0";
14
+ if (seq.startsWith("x"))
15
+ return String.fromCharCode(parseInt(seq.slice(1), 16));
16
+ return seq;
17
+ });
18
+ }
19
+ function settle(ms = 100) {
20
+ return new Promise((resolve) => setTimeout(resolve, ms));
21
+ }
22
+ export default function activate({ bus, terminalBuffer: tb, registerTool }) {
23
+ if (!tb)
24
+ return; // @xterm/headless not installed
25
+ registerTool({
26
+ name: "terminal_read",
27
+ description: "Read the current terminal screen contents. Returns clean text (ANSI stripped) " +
28
+ "with cursor position and whether an alternate-screen program (vim, htop, less) is active. " +
29
+ "Use this to see what the user sees before sending keystrokes with terminal_keys.",
30
+ input_schema: {
31
+ type: "object",
32
+ properties: {},
33
+ },
34
+ showOutput: true,
35
+ getDisplayInfo: () => ({
36
+ kind: "read",
37
+ icon: "⊞",
38
+ locations: [],
39
+ }),
40
+ async execute() {
41
+ const { text, altScreen, cursorX, cursorY } = tb.readScreen();
42
+ const info = [
43
+ altScreen ? "mode: alternate screen" : "mode: normal",
44
+ `cursor: row=${cursorY} col=${cursorX}`,
45
+ ].join(", ");
46
+ return {
47
+ content: `[${info}]\n\n${text}`,
48
+ exitCode: 0,
49
+ isError: false,
50
+ };
51
+ },
52
+ });
53
+ registerTool({
54
+ name: "terminal_keys",
55
+ description: "Send keystrokes to the user's live terminal. The keys are written directly to the PTY " +
56
+ "as if the user typed them. Use escape sequences for special keys:\n" +
57
+ " - Escape: \\x1b\n" +
58
+ " - Enter/Return: \\r\n" +
59
+ " - Tab: \\t\n" +
60
+ " - Ctrl+C: \\x03\n" +
61
+ " - Ctrl+D: \\x04\n" +
62
+ " - Ctrl+Z: \\x1a\n" +
63
+ " - Arrow keys: \\x1b[A (up), \\x1b[B (down), \\x1b[C (right), \\x1b[D (left)\n" +
64
+ " - Backspace: \\x7f\n\n" +
65
+ "Example: to quit vim without saving, send keys=\"\\x1b:q!\\r\" (Escape, :q!, Enter).\n" +
66
+ "Always call terminal_read after sending keys to verify the result.",
67
+ input_schema: {
68
+ type: "object",
69
+ properties: {
70
+ keys: {
71
+ type: "string",
72
+ description: "The keystrokes to send. Use \\x1b for Escape, \\r for Enter, \\t for Tab, " +
73
+ "\\x03 for Ctrl+C, etc. Regular characters are sent as-is.",
74
+ },
75
+ settle_ms: {
76
+ type: "number",
77
+ description: "Milliseconds to wait after sending keys for the terminal to settle before " +
78
+ "returning (default: 150). Increase for slow programs.",
79
+ },
80
+ },
81
+ required: ["keys"],
82
+ },
83
+ showOutput: false,
84
+ getDisplayInfo: () => ({
85
+ kind: "execute",
86
+ icon: "⌨",
87
+ locations: [],
88
+ }),
89
+ formatCall: (args) => {
90
+ const keys = args.keys;
91
+ return keys
92
+ .replace(/\\x1b|\x1b/g, "ESC")
93
+ .replace(/\\r|\r/g, "⏎")
94
+ .replace(/\\n|\n/g, "↵")
95
+ .replace(/\\t|\t/g, "TAB")
96
+ .replace(/\\x03|\x03/g, "^C")
97
+ .replace(/\\x04|\x04/g, "^D")
98
+ .replace(/\\x7f|\x7f/g, "BS");
99
+ },
100
+ async execute(args) {
101
+ const raw = args.keys;
102
+ const keys = interpretEscapes(raw);
103
+ const settleMs = args.settle_ms ?? 150;
104
+ bus.emit("shell:stdout-show", {});
105
+ process.stdout.write("\n");
106
+ bus.emit("shell:pty-write", { data: keys });
107
+ await settle(settleMs);
108
+ const { text, altScreen, cursorX, cursorY } = tb.readScreen();
109
+ const info = [
110
+ altScreen ? "mode: alternate screen" : "mode: normal",
111
+ `cursor: row=${cursorY} col=${cursorX}`,
112
+ ].join(", ");
113
+ return {
114
+ content: `Keys sent. Screen after:\n[${info}]\n\n${text}`,
115
+ exitCode: 0,
116
+ isError: false,
117
+ };
118
+ },
119
+ });
120
+ }
@@ -71,6 +71,9 @@ export default function activate(ctx) {
71
71
  const { bus, llmClient, define } = ctx;
72
72
  const writer = new StdoutWriter();
73
73
  const s = createRenderState();
74
+ // Suppress all TUI output while stdout is held (overlay extensions)
75
+ bus.on("shell:stdout-hold", () => { writer.hold(); });
76
+ bus.on("shell:stdout-release", () => { writer.release(); });
74
77
  // Track backend/model info for display on response border
75
78
  let backendInfo = null;
76
79
  bus.on("agent:info", (info) => { backendInfo = info; });
package/dist/index.js CHANGED
@@ -9,6 +9,8 @@ import slashCommands from "./extensions/slash-commands.js";
9
9
  import fileAutocomplete from "./extensions/file-autocomplete.js";
10
10
  import shellRecall from "./extensions/shell-recall.js";
11
11
  import commandSuggest from "./extensions/command-suggest.js";
12
+ import terminalBuffer from "./extensions/terminal-buffer.js";
13
+ import overlayAgent from "./extensions/overlay-agent.js";
12
14
  import { loadExtensions } from "./extension-loader.js";
13
15
  import { getSettings } from "./settings.js";
14
16
  import { discoverSkills } from "./agent/skills.js";
@@ -232,6 +234,8 @@ async function main() {
232
234
  fileAutocomplete(extCtx);
233
235
  shellRecall(extCtx);
234
236
  commandSuggest(extCtx);
237
+ terminalBuffer(extCtx);
238
+ overlayAgent(extCtx);
235
239
  // Load user extensions (may register alternative agent backends)
236
240
  if (process.env.DEBUG) {
237
241
  console.error('[agent-sh] Loading extensions...');
@@ -137,6 +137,10 @@ export class InputHandler {
137
137
  }
138
138
  }
139
139
  handleInput(data) {
140
+ // Allow extensions to capture raw input (e.g. overlay prompt during vim)
141
+ const intercepted = this.bus.emitPipe("input:intercept", { data, consumed: false });
142
+ if (intercepted.consumed)
143
+ return;
140
144
  // If agent is running (processing a query), only Ctrl-C and control keys
141
145
  if (this.ctx.isAgentActive()) {
142
146
  if (data === "\x03") {
@@ -235,7 +239,8 @@ export class InputHandler {
235
239
  this.enterMode(mode);
236
240
  return; // don't process remaining chars
237
241
  }
238
- this.lineBuffer += ch;
242
+ if (!this.ctx.isForegroundBusy())
243
+ this.lineBuffer += ch;
239
244
  this.ctx.writeToPty(ch);
240
245
  }
241
246
  }
@@ -109,7 +109,15 @@ export class OutputParser {
109
109
  this.currentOutputCapture = "";
110
110
  }
111
111
  else {
112
+ // Cap capture buffer to avoid unbounded growth when a foreground
113
+ // program (tmux, vim, etc.) produces output without prompt markers.
114
+ // Keep only the tail — the final output is what matters for
115
+ // command-done context.
116
+ const MAX_CAPTURE = 128 * 1024; // 128 KB
112
117
  this.currentOutputCapture += data;
118
+ if (this.currentOutputCapture.length > MAX_CAPTURE) {
119
+ this.currentOutputCapture = this.currentOutputCapture.slice(-MAX_CAPTURE);
120
+ }
113
121
  }
114
122
  }
115
123
  /**
package/dist/shell.d.ts CHANGED
@@ -6,6 +6,8 @@ export declare class Shell implements InputContext {
6
6
  private inputHandler;
7
7
  private outputParser;
8
8
  private paused;
9
+ private stdoutHold;
10
+ private stdoutShow;
9
11
  private echoSkip;
10
12
  private agentActive;
11
13
  private isZsh;
@@ -37,6 +39,9 @@ export declare class Shell implements InputContext {
37
39
  * Heavy redraw: send \n to PTY to trigger a full precmd → prompt cycle.
38
40
  * Use this after agent responses where stdout has moved far from where
39
41
  * zle expects the cursor. The blank line is acceptable as a separator.
42
+ *
43
+ * Routed through shell:redraw-prompt pipe so extensions (e.g. overlay)
44
+ * can suppress it by setting `handled: true`.
40
45
  */
41
46
  freshPrompt(): void;
42
47
  onCommandEntered(command: string, cwd: string): void;
package/dist/shell.js CHANGED
@@ -5,12 +5,15 @@ import * as pty from "node-pty";
5
5
  import { InputHandler } from "./input-handler.js";
6
6
  import { OutputParser } from "./output-parser.js";
7
7
  import { getSettings } from "./settings.js";
8
+ import { RefCounter } from "./utils/output-writer.js";
8
9
  export class Shell {
9
10
  ptyProcess;
10
11
  bus;
11
12
  inputHandler;
12
13
  outputParser;
13
14
  paused = false;
15
+ stdoutHold = new RefCounter();
16
+ stdoutShow = new RefCounter();
14
17
  echoSkip = false;
15
18
  agentActive = false;
16
19
  isZsh = false;
@@ -156,6 +159,16 @@ export class Shell {
156
159
  this.setupOutput();
157
160
  this.setupInput();
158
161
  this.setupAgentLifecycle();
162
+ // Allow extensions to inject raw keystrokes into the PTY
163
+ this.bus.on("shell:pty-write", ({ data }) => {
164
+ this.ptyProcess.write(data);
165
+ });
166
+ // Ref-counted stdout hold — overlay extensions suppress PTY output
167
+ this.bus.on("shell:stdout-hold", () => { this.stdoutHold.increment(); });
168
+ this.bus.on("shell:stdout-release", () => { this.stdoutHold.decrement(); });
169
+ // Ref-counted stdout show — tools temporarily force output visible during agent processing
170
+ this.bus.on("shell:stdout-show", () => { this.stdoutShow.increment(); });
171
+ this.bus.on("shell:stdout-hide", () => { this.stdoutShow.decrement(); });
159
172
  }
160
173
  // ── InputContext implementation (delegates to OutputParser) ──
161
174
  isForegroundBusy() {
@@ -197,9 +210,18 @@ export class Shell {
197
210
  * Heavy redraw: send \n to PTY to trigger a full precmd → prompt cycle.
198
211
  * Use this after agent responses where stdout has moved far from where
199
212
  * zle expects the cursor. The blank line is acceptable as a separator.
213
+ *
214
+ * Routed through shell:redraw-prompt pipe so extensions (e.g. overlay)
215
+ * can suppress it by setting `handled: true`.
200
216
  */
201
217
  freshPrompt() {
202
- this.ptyProcess.write("\n");
218
+ const result = this.bus.emitPipe("shell:redraw-prompt", {
219
+ cwd: this.outputParser.getCwd(),
220
+ handled: false,
221
+ });
222
+ if (!result.handled) {
223
+ this.ptyProcess.write("\n");
224
+ }
203
225
  }
204
226
  onCommandEntered(command, cwd) {
205
227
  this.outputParser.onCommandEntered(command, cwd);
@@ -207,8 +229,11 @@ export class Shell {
207
229
  // ── PTY I/O wiring ─────────────────────────────────────────
208
230
  setupOutput() {
209
231
  this.ptyProcess.onData((data) => {
232
+ this.bus.emit("shell:pty-data", { raw: data });
210
233
  this.outputParser.processData(data);
211
- if (this.paused)
234
+ if (this.stdoutHold.active)
235
+ return;
236
+ if (this.paused && !this.stdoutShow.active)
212
237
  return;
213
238
  // During user_shell exec, skip the command echo (first line)
214
239
  if (this.echoSkip) {
package/dist/types.d.ts CHANGED
@@ -4,6 +4,8 @@ import type { LlmClient } from "./utils/llm-client.js";
4
4
  import type { ColorPalette } from "./utils/palette.js";
5
5
  import type { BlockTransformOptions, FencedBlockTransformOptions } from "./utils/stream-transform.js";
6
6
  import type { ToolDefinition } from "./agent/types.js";
7
+ import type { TerminalBuffer } from "./utils/terminal-buffer.js";
8
+ import type { FloatingPanel, FloatingPanelConfig } from "./utils/floating-panel.js";
7
9
  export type { ContentBlock } from "./event-bus.js";
8
10
  export type { BlockTransformOptions, FencedBlockTransformOptions } from "./utils/stream-transform.js";
9
11
  /** A model entry in the cycling list, optionally tied to a provider. */
@@ -66,6 +68,17 @@ export interface ExtensionContext {
66
68
  advise: (name: string, wrapper: (next: (...args: any[]) => any, ...args: any[]) => any) => void;
67
69
  /** Call a named handler. */
68
70
  call: (name: string, ...args: any[]) => any;
71
+ /**
72
+ * Shared headless terminal buffer mirroring PTY output.
73
+ * Lazily created on first access. Returns null if @xterm/headless is not installed.
74
+ */
75
+ terminalBuffer: TerminalBuffer | null;
76
+ /**
77
+ * Create a floating panel overlay. The panel composites a bordered box
78
+ * over the terminal with input routing, dimmed background, and
79
+ * handler-based customization.
80
+ */
81
+ createFloatingPanel: (config: FloatingPanelConfig) => FloatingPanel;
69
82
  }
70
83
  /**
71
84
  * Configuration for a registered input mode.