agent-sh 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -43
- package/dist/agent/agent-loop.d.ts +1 -0
- package/dist/agent/agent-loop.js +119 -26
- package/dist/agent/subagent.js +3 -1
- package/dist/agent/system-prompt.d.ts +1 -1
- package/dist/agent/system-prompt.js +21 -16
- package/dist/agent/tools/bash.js +10 -1
- package/dist/agent/tools/display.d.ts +13 -0
- package/dist/agent/tools/display.js +70 -0
- package/dist/agent/tools/edit-file.js +60 -7
- package/dist/agent/tools/glob.js +39 -7
- package/dist/agent/tools/grep.js +111 -20
- package/dist/agent/tools/ls.js +31 -2
- package/dist/agent/tools/read-file.d.ts +9 -1
- package/dist/agent/tools/read-file.js +50 -4
- package/dist/agent/tools/user-shell.js +40 -13
- package/dist/agent/tools/write-file.js +9 -1
- package/dist/agent/types.d.ts +35 -1
- package/dist/context-manager.d.ts +3 -1
- package/dist/context-manager.js +11 -1
- package/dist/core.d.ts +1 -3
- package/dist/core.js +23 -12
- package/dist/event-bus.d.ts +41 -3
- package/dist/extension-loader.d.ts +1 -1
- package/dist/extension-loader.js +1 -3
- package/dist/extensions/overlay-agent.d.ts +11 -0
- package/dist/extensions/overlay-agent.js +43 -0
- package/dist/extensions/terminal-buffer.d.ts +14 -0
- package/dist/extensions/terminal-buffer.js +120 -0
- package/dist/extensions/tui-renderer.js +344 -83
- package/dist/index.js +45 -36
- package/dist/input-handler.js +10 -3
- package/dist/output-parser.js +8 -0
- package/dist/settings.js +1 -1
- package/dist/shell.d.ts +5 -0
- package/dist/shell.js +29 -4
- package/dist/types.d.ts +13 -0
- package/dist/utils/diff.js +10 -0
- package/dist/utils/floating-panel.d.ts +198 -0
- package/dist/utils/floating-panel.js +590 -0
- package/dist/utils/markdown.d.ts +1 -0
- package/dist/utils/markdown.js +23 -1
- package/dist/utils/output-writer.d.ts +14 -0
- package/dist/utils/output-writer.js +16 -0
- package/dist/utils/terminal-buffer.d.ts +65 -0
- package/dist/utils/terminal-buffer.js +166 -0
- package/dist/utils/tool-display.d.ts +4 -0
- package/dist/utils/tool-display.js +22 -5
- package/examples/extensions/claude-code-bridge/index.ts +8 -12
- package/examples/extensions/overlay-agent.ts +70 -0
- package/examples/extensions/pi-bridge/index.ts +10 -12
- package/examples/extensions/secret-guard.ts +100 -0
- package/examples/extensions/terminal-buffer.ts +184 -0
- package/package.json +5 -1
package/README.md
CHANGED
|
@@ -7,23 +7,23 @@ Not a shell that lives in an agent — an agent that lives in a shell.
|
|
|
7
7
|
|
|
8
8
|
I live in a terminal. I don't want an agent that can run shell commands when it needs to — I want my shell, with an agent I can reach for when *I* need to. Most AI tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
|
|
9
9
|
|
|
10
|
-
agent-sh flips this. It's your shell first — full PTY, your rc config, your aliases, everything just works. But type
|
|
10
|
+
agent-sh flips this. It's your shell first — full PTY, your rc config, your aliases, everything just works. But type `>` at the start of a line, and you're talking to an agent that has full context of what you've been doing.
|
|
11
11
|
|
|
12
12
|
```
|
|
13
13
|
⚡ src $ ls -la # real shell command
|
|
14
14
|
⚡ src $ cd ../tests && npm test # real cd, env, aliases — all just work
|
|
15
15
|
⚡ src $ vim file.ts # opens vim in the same PTY
|
|
16
|
-
⚡ src $ > explain the last error #
|
|
17
|
-
⚡ src $
|
|
16
|
+
⚡ src $ > explain the last error # agent investigates using its own tools
|
|
17
|
+
⚡ src $ > deploy to staging # agent runs it in your live shell
|
|
18
18
|
```
|
|
19
19
|
|
|
20
20
|
## Key Features
|
|
21
21
|
|
|
22
22
|
**Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
|
|
23
23
|
|
|
24
|
-
**Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type
|
|
24
|
+
**Context-aware agent.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and the agent knows exactly what happened. It has built-in tools for file read/write/edit, bash, grep, glob — no external setup needed.
|
|
25
25
|
|
|
26
|
-
**
|
|
26
|
+
**Agent decides how to help.** One entry point (`>`), three tool categories. The agent uses scratchpad tools to investigate, `display` to show you output, and `user_shell` for commands with lasting effects. No need to pick a mode — the agent reasons about which tools to use based on your intent.
|
|
27
27
|
|
|
28
28
|
**Any LLM, any backend.** Works with any OpenAI-compatible API out of the box. Define multiple providers in settings and cycle between models at runtime with Shift+Tab. Or swap in a completely different agent — [Claude Code](examples/extensions/claude-code-bridge/) and [pi](examples/extensions/pi-bridge/) run as drop-in backend extensions.
|
|
29
29
|
|
|
@@ -42,14 +42,15 @@ Set `OPENAI_API_KEY` in your environment (or configure providers in `~/.agent-sh
|
|
|
42
42
|
|
|
43
43
|
Requires Node.js 18+.
|
|
44
44
|
|
|
45
|
-
##
|
|
45
|
+
## Agent Mode
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
- **`?` Help mode** — Agent runs a command in your live shell. Your aliases, env vars, and cwd apply. Returns to shell after.
|
|
47
|
+
Type `>` at the start of a line to talk to the agent. The agent decides how to help:
|
|
49
48
|
|
|
50
|
-
|
|
49
|
+
- **Scratchpad tools** (`bash`, `read_file`, `grep`, `glob`, etc.) — for investigation. Output goes to the agent, not your terminal.
|
|
50
|
+
- **`display`** — shows output in your terminal (e.g. `cat`, `git log`). You see it; the agent doesn't process it.
|
|
51
|
+
- **`user_shell`** — runs commands with lasting effects (`cd`, `npm install`, etc.) in your live shell.
|
|
51
52
|
|
|
52
|
-
|
|
53
|
+
Everything else works as a normal shell — commands go straight to the PTY. Input modes are extensible — see [Extensions: Custom Input Modes](docs/extensions.md#custom-input-modes).
|
|
53
54
|
|
|
54
55
|
### Slash Commands
|
|
55
56
|
|
|
@@ -61,39 +62,7 @@ Everything else works as a normal shell — commands go straight to the PTY. Mod
|
|
|
61
62
|
|
|
62
63
|
## Configuration
|
|
63
64
|
|
|
64
|
-
Configure via `~/.agent-sh/settings.json`.
|
|
65
|
-
|
|
66
|
-
```json
|
|
67
|
-
{
|
|
68
|
-
"defaultProvider": "openai",
|
|
69
|
-
"providers": {
|
|
70
|
-
"openai": {
|
|
71
|
-
"apiKey": "$OPENAI_API_KEY",
|
|
72
|
-
"defaultModel": "gpt-4o",
|
|
73
|
-
"models": ["gpt-4o", "gpt-4o-mini"]
|
|
74
|
-
},
|
|
75
|
-
"ollama": {
|
|
76
|
-
"apiKey": "not-needed",
|
|
77
|
-
"baseURL": "http://localhost:11434/v1",
|
|
78
|
-
"defaultModel": "llama3",
|
|
79
|
-
"models": ["llama3", "mistral"]
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
Cycle models with **Shift+Tab**, switch providers with `/provider <name>`, switch backends with `/backend <name>`. API keys support `$ENV_VAR` syntax.
|
|
86
|
-
|
|
87
|
-
Additional options:
|
|
88
|
-
|
|
89
|
-
| Key | Default | Description |
|
|
90
|
-
|---|---|---|
|
|
91
|
-
| `startupBanner` | `true` | Show startup banner with model info and usage hints |
|
|
92
|
-
| `promptIndicator` | `true` | Show `⚡ agent-sh` in terminal tab/window title |
|
|
93
|
-
|
|
94
|
-
Set either to `false` to disable.
|
|
95
|
-
|
|
96
|
-
See the [Usage Guide](docs/usage.md#configuration) for the full settings reference.
|
|
65
|
+
Configure via `~/.agent-sh/settings.json`. See the [Usage Guide](docs/usage.md#configuration) for the full settings reference (providers, models, extensions, skills, and more).
|
|
97
66
|
|
|
98
67
|
## Documentation
|
|
99
68
|
|
package/dist/agent/agent-loop.js
CHANGED
|
@@ -14,6 +14,7 @@ import { createGrepTool } from "./tools/grep.js";
|
|
|
14
14
|
import { createGlobTool } from "./tools/glob.js";
|
|
15
15
|
import { createLsTool } from "./tools/ls.js";
|
|
16
16
|
import { createUserShellTool } from "./tools/user-shell.js";
|
|
17
|
+
import { createDisplayTool } from "./tools/display.js";
|
|
17
18
|
import { createListSkillsTool } from "./tools/list-skills.js";
|
|
18
19
|
import { discoverProjectSkills } from "./skills.js";
|
|
19
20
|
export class AgentLoop {
|
|
@@ -24,6 +25,7 @@ export class AgentLoop {
|
|
|
24
25
|
abortController = null;
|
|
25
26
|
toolRegistry = new ToolRegistry();
|
|
26
27
|
conversation = new ConversationState();
|
|
28
|
+
fileReadCache = new Map();
|
|
27
29
|
modes;
|
|
28
30
|
currentModeIndex = 0;
|
|
29
31
|
boundListeners = [];
|
|
@@ -51,8 +53,8 @@ export class AgentLoop {
|
|
|
51
53
|
this.bus.on(event, fn);
|
|
52
54
|
this.boundListeners.push({ event, fn });
|
|
53
55
|
};
|
|
54
|
-
on("agent:submit", ({ query
|
|
55
|
-
this.handleQuery(query
|
|
56
|
+
on("agent:submit", ({ query }) => {
|
|
57
|
+
this.handleQuery(query).catch(() => { });
|
|
56
58
|
});
|
|
57
59
|
on("agent:cancel-request", (e) => {
|
|
58
60
|
this.abortController?.abort(e.silent ? "silent" : undefined);
|
|
@@ -278,13 +280,14 @@ export class AgentLoop {
|
|
|
278
280
|
return env;
|
|
279
281
|
};
|
|
280
282
|
this.toolRegistry.register(createBashTool({ getCwd, getEnv, bus: this.bus }));
|
|
281
|
-
this.toolRegistry.register(createReadFileTool(getCwd));
|
|
283
|
+
this.toolRegistry.register(createReadFileTool(getCwd, this.fileReadCache));
|
|
282
284
|
this.toolRegistry.register(createWriteFileTool(getCwd));
|
|
283
285
|
this.toolRegistry.register(createEditFileTool(getCwd));
|
|
284
286
|
this.toolRegistry.register(createGrepTool(getCwd));
|
|
285
287
|
this.toolRegistry.register(createGlobTool(getCwd));
|
|
286
288
|
this.toolRegistry.register(createLsTool(getCwd));
|
|
287
289
|
this.toolRegistry.register(createUserShellTool({ getCwd, bus: this.bus }));
|
|
290
|
+
this.toolRegistry.register(createDisplayTool({ getCwd, bus: this.bus }));
|
|
288
291
|
this.toolRegistry.register(createListSkillsTool(getCwd));
|
|
289
292
|
}
|
|
290
293
|
/**
|
|
@@ -301,6 +304,8 @@ export class AgentLoop {
|
|
|
301
304
|
h.define("conversation:prepare", (messages) => messages);
|
|
302
305
|
// Wraps each tool call: permission → execute → emit events.
|
|
303
306
|
// Extensions advise to add safe-mode, logging, metrics, custom policies.
|
|
307
|
+
// The ctx.onChunk callback is exposed so advisors can wrap it to
|
|
308
|
+
// intercept/transform streamed tool output (e.g. secret redaction).
|
|
304
309
|
h.define("tool:execute", async (ctx) => {
|
|
305
310
|
const { name, id, args, tool } = ctx;
|
|
306
311
|
const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
|
|
@@ -308,7 +313,9 @@ export class AgentLoop {
|
|
|
308
313
|
// Permission gating
|
|
309
314
|
if (tool.requiresPermission) {
|
|
310
315
|
let permKind = "tool-call";
|
|
311
|
-
let permTitle =
|
|
316
|
+
let permTitle = typeof args.description === "string"
|
|
317
|
+
? `${name}: ${args.description}`
|
|
318
|
+
: name;
|
|
312
319
|
let metadata = { args };
|
|
313
320
|
// For file-modifying tools, pre-compute diff for display
|
|
314
321
|
if (tool.modifiesFiles && typeof args.path === "string") {
|
|
@@ -359,20 +366,33 @@ export class AgentLoop {
|
|
|
359
366
|
}
|
|
360
367
|
}
|
|
361
368
|
// Emit tool-started for TUI
|
|
369
|
+
const label = tool.displayName ?? name;
|
|
362
370
|
this.bus.emit("agent:tool-started", {
|
|
363
|
-
title:
|
|
364
|
-
|
|
371
|
+
title: typeof args.description === "string" ? `${label}: ${args.description}` : label,
|
|
372
|
+
toolCallId: id,
|
|
373
|
+
kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
|
|
374
|
+
displayDetail: tool.formatCall?.(args),
|
|
375
|
+
batchIndex: ctx.batchIndex, batchTotal: ctx.batchTotal,
|
|
365
376
|
});
|
|
366
377
|
this.bus.emit("agent:tool-call", { tool: name, args });
|
|
367
|
-
// Execute —
|
|
378
|
+
// Execute — use ctx.onChunk so advisors can wrap the streaming callback.
|
|
379
|
+
// Suppress streaming output if diff was already shown.
|
|
368
380
|
const onChunk = (tool.showOutput !== false && !diffShown)
|
|
369
|
-
?
|
|
381
|
+
? ctx.onChunk
|
|
370
382
|
: undefined;
|
|
371
383
|
const result = await tool.execute(args, onChunk);
|
|
372
|
-
//
|
|
373
|
-
|
|
384
|
+
// Invalidate read cache when a file is modified
|
|
385
|
+
if (tool.modifiesFiles && typeof args.path === "string" && !result.isError) {
|
|
386
|
+
const absPath = path.resolve(process.cwd(), args.path);
|
|
387
|
+
this.fileReadCache.delete(absPath);
|
|
388
|
+
}
|
|
389
|
+
// Compute result display: tool-provided → default (none)
|
|
390
|
+
const resultDisplay = tool.formatResult?.(args, result);
|
|
391
|
+
// Emit completion events (via transform pipe so extensions can override)
|
|
392
|
+
this.bus.emitTransform("agent:tool-completed", {
|
|
374
393
|
toolCallId: id, exitCode: result.exitCode,
|
|
375
394
|
rawOutput: result.content, kind: display.kind,
|
|
395
|
+
resultDisplay,
|
|
376
396
|
});
|
|
377
397
|
this.bus.emit("agent:tool-output", {
|
|
378
398
|
tool: name, output: result.content, exitCode: result.exitCode,
|
|
@@ -380,7 +400,7 @@ export class AgentLoop {
|
|
|
380
400
|
return result;
|
|
381
401
|
});
|
|
382
402
|
}
|
|
383
|
-
async handleQuery(query
|
|
403
|
+
async handleQuery(query) {
|
|
384
404
|
// Cancel any in-flight loop (concurrent prompt handling)
|
|
385
405
|
if (this.abortController) {
|
|
386
406
|
this.abortController.abort();
|
|
@@ -390,15 +410,11 @@ export class AgentLoop {
|
|
|
390
410
|
// Each loop iteration adds an abort listener (via OpenAI SDK stream);
|
|
391
411
|
// raise the limit to avoid spurious warnings on multi-tool queries.
|
|
392
412
|
setMaxListeners(50, signal);
|
|
393
|
-
this.bus.emit("agent:query", { query
|
|
413
|
+
this.bus.emit("agent:query", { query });
|
|
394
414
|
this.bus.emit("agent:processing-start", {});
|
|
395
415
|
let responseText = "";
|
|
396
416
|
try {
|
|
397
|
-
|
|
398
|
-
const userMessage = modeInstruction
|
|
399
|
-
? `${modeInstruction}\n${query}`
|
|
400
|
-
: query;
|
|
401
|
-
this.conversation.addUserMessage(userMessage);
|
|
417
|
+
this.conversation.addUserMessage(query);
|
|
402
418
|
responseText = await this.executeLoop(signal);
|
|
403
419
|
}
|
|
404
420
|
catch (e) {
|
|
@@ -453,14 +469,38 @@ export class AgentLoop {
|
|
|
453
469
|
// No tool calls → agent is done
|
|
454
470
|
if (toolCalls.length === 0)
|
|
455
471
|
break;
|
|
456
|
-
//
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
472
|
+
// Emit batch info so the TUI can render group headers upfront
|
|
473
|
+
{
|
|
474
|
+
const groupMap = new Map();
|
|
475
|
+
for (const tc of toolCalls) {
|
|
476
|
+
const tool = this.toolRegistry.get(tc.name);
|
|
477
|
+
const kind = tool?.getDisplayInfo?.((() => { try {
|
|
478
|
+
return JSON.parse(tc.argumentsJson);
|
|
479
|
+
}
|
|
480
|
+
catch {
|
|
481
|
+
return {};
|
|
482
|
+
} })())?.kind ?? "execute";
|
|
483
|
+
let args = {};
|
|
484
|
+
try {
|
|
485
|
+
args = JSON.parse(tc.argumentsJson);
|
|
486
|
+
}
|
|
487
|
+
catch { }
|
|
488
|
+
const detail = tool?.formatCall?.(args);
|
|
489
|
+
if (!groupMap.has(kind))
|
|
490
|
+
groupMap.set(kind, []);
|
|
491
|
+
groupMap.get(kind).push({ name: tc.name, displayDetail: detail });
|
|
492
|
+
}
|
|
493
|
+
const groups = Array.from(groupMap.entries()).map(([kind, tools]) => ({ kind, tools }));
|
|
494
|
+
this.bus.emit("agent:tool-batch", { groups });
|
|
495
|
+
}
|
|
496
|
+
// Execute tool calls — run read-only tools in parallel, permission-
|
|
497
|
+
// requiring tools sequentially (to avoid overlapping permission prompts).
|
|
498
|
+
const batchTotal = toolCalls.length;
|
|
499
|
+
const executeSingle = async (tc, batchIndex) => {
|
|
460
500
|
const tool = this.toolRegistry.get(tc.name);
|
|
461
501
|
if (!tool) {
|
|
462
502
|
this.conversation.addToolResult(tc.id, `Error: Unknown tool "${tc.name}"`);
|
|
463
|
-
|
|
503
|
+
return;
|
|
464
504
|
}
|
|
465
505
|
let args;
|
|
466
506
|
try {
|
|
@@ -468,16 +508,69 @@ export class AgentLoop {
|
|
|
468
508
|
}
|
|
469
509
|
catch {
|
|
470
510
|
this.conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`);
|
|
471
|
-
|
|
511
|
+
return;
|
|
472
512
|
}
|
|
473
513
|
// Execute via handler — extensions can advise to add safe-mode,
|
|
474
514
|
// logging, metrics, custom permission policies, etc.
|
|
475
|
-
const
|
|
476
|
-
|
|
477
|
-
|
|
515
|
+
const defaultOnChunk = (chunk) => {
|
|
516
|
+
this.bus.emit("agent:tool-output-chunk", { chunk });
|
|
517
|
+
};
|
|
518
|
+
const result = await this.handlers.call("tool:execute", { name: tc.name, id: tc.id, args, tool, onChunk: defaultOnChunk,
|
|
519
|
+
batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined });
|
|
520
|
+
// Add tool result to conversation (truncate large outputs to avoid
|
|
521
|
+
// blowing through the context window on a single tool call)
|
|
522
|
+
let content = result.isError
|
|
478
523
|
? `Error: ${result.content}`
|
|
479
524
|
: result.content;
|
|
525
|
+
const maxBytes = 16_384; // ~4k tokens
|
|
526
|
+
if (content.length > maxBytes) {
|
|
527
|
+
const headBytes = Math.floor(maxBytes * 0.6);
|
|
528
|
+
const tailBytes = maxBytes - headBytes;
|
|
529
|
+
const lines = content.split("\n");
|
|
530
|
+
let headEnd = 0, headLen = 0;
|
|
531
|
+
for (let i = 0; i < lines.length && headLen + lines[i].length + 1 <= headBytes; i++) {
|
|
532
|
+
headLen += lines[i].length + 1;
|
|
533
|
+
headEnd = i + 1;
|
|
534
|
+
}
|
|
535
|
+
let tailStart = lines.length, tailLen = 0;
|
|
536
|
+
for (let i = lines.length - 1; i >= headEnd && tailLen + lines[i].length + 1 <= tailBytes; i--) {
|
|
537
|
+
tailLen += lines[i].length + 1;
|
|
538
|
+
tailStart = i;
|
|
539
|
+
}
|
|
540
|
+
const omitted = tailStart - headEnd;
|
|
541
|
+
content = [
|
|
542
|
+
...lines.slice(0, headEnd),
|
|
543
|
+
`\n[… ${omitted} lines omitted (output truncated to ${Math.round(maxBytes / 1024)}KB) …]\n`,
|
|
544
|
+
...lines.slice(tailStart),
|
|
545
|
+
].join("\n");
|
|
546
|
+
}
|
|
480
547
|
this.conversation.addToolResult(tc.id, content);
|
|
548
|
+
};
|
|
549
|
+
// Partition into parallel-safe (read-only) and sequential (needs permission)
|
|
550
|
+
const parallel = [];
|
|
551
|
+
const sequential = [];
|
|
552
|
+
for (const tc of toolCalls) {
|
|
553
|
+
const tool = this.toolRegistry.get(tc.name);
|
|
554
|
+
if (tool && !tool.requiresPermission && !tool.modifiesFiles) {
|
|
555
|
+
parallel.push(tc);
|
|
556
|
+
}
|
|
557
|
+
else {
|
|
558
|
+
sequential.push(tc);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
// Run read-only tools in parallel
|
|
562
|
+
let batchIdx = 0;
|
|
563
|
+
if (parallel.length > 0 && !signal.aborted) {
|
|
564
|
+
await Promise.all(parallel.map(tc => {
|
|
565
|
+
const idx = ++batchIdx;
|
|
566
|
+
return signal.aborted ? Promise.resolve() : executeSingle(tc, idx);
|
|
567
|
+
}));
|
|
568
|
+
}
|
|
569
|
+
// Run permission-requiring tools sequentially
|
|
570
|
+
for (const tc of sequential) {
|
|
571
|
+
if (signal.aborted)
|
|
572
|
+
break;
|
|
573
|
+
await executeSingle(tc, ++batchIdx);
|
|
481
574
|
}
|
|
482
575
|
// Loop back — LLM sees tool results
|
|
483
576
|
}
|
package/dist/agent/subagent.js
CHANGED
|
@@ -62,11 +62,13 @@ export async function runSubagent(opts) {
|
|
|
62
62
|
const result = await tool.execute(args, onChunk);
|
|
63
63
|
if (bus) {
|
|
64
64
|
const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
|
|
65
|
-
|
|
65
|
+
const resultDisplay = tool.formatResult?.(args, result);
|
|
66
|
+
bus.emitTransform("agent:tool-completed", {
|
|
66
67
|
toolCallId: tc.id,
|
|
67
68
|
exitCode: result.exitCode,
|
|
68
69
|
rawOutput: result.content,
|
|
69
70
|
kind: display.kind,
|
|
71
|
+
resultDisplay,
|
|
70
72
|
});
|
|
71
73
|
}
|
|
72
74
|
const content = result.isError ? `Error: ${result.content}` : result.content;
|
|
@@ -4,7 +4,7 @@ import type { ContextManager } from "../context-manager.js";
|
|
|
4
4
|
* Static system prompt — identical across all queries, cacheable.
|
|
5
5
|
* Contains only identity and behavioral instructions.
|
|
6
6
|
*/
|
|
7
|
-
export declare const STATIC_SYSTEM_PROMPT = "You are an AI coding assistant embedded in agent-sh, a terminal shell.\nYou have access to the user's shell environment and can read, write, and execute code.\nYou share the user's working directory, environment variables, and shell history.\n\n#
|
|
7
|
+
export declare const STATIC_SYSTEM_PROMPT = "You are an AI coding assistant embedded in agent-sh, a terminal shell.\nYou have access to the user's shell environment and can read, write, and execute code.\nYou share the user's working directory, environment variables, and shell history.\n\n# Tool Decision Guide\n\nYou have three categories of tools \u2014 choose based on who needs the output and\nwhether the command has lasting effects:\n\n**Scratchpad tools** (bash, read_file, grep, glob, ls, edit_file, write_file):\nUse these to investigate, search, read, and modify files. Output is returned\nto you for reasoning \u2014 the user doesn't see it directly.\n\n**Display** (display):\nUse this to show output to the user in their terminal. The user sees the\noutput directly, but it is NOT returned to you. Use when:\n- The user asks to see something (cat a file, git log, git diff, man page)\n- The output is for the user to read, not for you to process\n\n**Live shell** (user_shell):\nUse this to run commands with lasting effects in the user's real shell. Use for:\n- Commands that affect shell state (cd, export, source)\n- Installing packages, starting servers, running builds\n- Any command where the user wants real side effects\n- Set return_output=true only if you need to inspect the result\n\nDefault to scratchpad tools for your own investigation. Use display when the\nuser is the intended audience. Use user_shell when the command has real effects.\n\n# Tool Usage Guidelines\n- Use read_file before editing a file you haven't seen\n- Prefer edit_file over write_file for modifying existing files\n- Use grep/glob to find files before reading them\n- Keep bash commands focused; avoid long-running blocking commands\n- Always check command exit codes for errors";
|
|
8
8
|
/**
|
|
9
9
|
* Build the dynamic context — injected as a user message before each query.
|
|
10
10
|
* Contains everything that changes: tools, shell context, conventions, cwd.
|
|
@@ -40,32 +40,37 @@ export const STATIC_SYSTEM_PROMPT = `You are an AI coding assistant embedded in
|
|
|
40
40
|
You have access to the user's shell environment and can read, write, and execute code.
|
|
41
41
|
You share the user's working directory, environment variables, and shell history.
|
|
42
42
|
|
|
43
|
-
#
|
|
43
|
+
# Tool Decision Guide
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
You have three categories of tools — choose based on who needs the output and
|
|
46
|
+
whether the command has lasting effects:
|
|
46
47
|
|
|
47
|
-
|
|
48
|
-
Use
|
|
49
|
-
|
|
50
|
-
something in their live shell.
|
|
48
|
+
**Scratchpad tools** (bash, read_file, grep, glob, ls, edit_file, write_file):
|
|
49
|
+
Use these to investigate, search, read, and modify files. Output is returned
|
|
50
|
+
to you for reasoning — the user doesn't see it directly.
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
The user
|
|
56
|
-
|
|
52
|
+
**Display** (display):
|
|
53
|
+
Use this to show output to the user in their terminal. The user sees the
|
|
54
|
+
output directly, but it is NOT returned to you. Use when:
|
|
55
|
+
- The user asks to see something (cat a file, git log, git diff, man page)
|
|
56
|
+
- The output is for the user to read, not for you to process
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
**Live shell** (user_shell):
|
|
59
|
+
Use this to run commands with lasting effects in the user's real shell. Use for:
|
|
60
|
+
- Commands that affect shell state (cd, export, source)
|
|
61
|
+
- Installing packages, starting servers, running builds
|
|
62
|
+
- Any command where the user wants real side effects
|
|
63
|
+
- Set return_output=true only if you need to inspect the result
|
|
64
|
+
|
|
65
|
+
Default to scratchpad tools for your own investigation. Use display when the
|
|
66
|
+
user is the intended audience. Use user_shell when the command has real effects.
|
|
59
67
|
|
|
60
68
|
# Tool Usage Guidelines
|
|
61
69
|
- Use read_file before editing a file you haven't seen
|
|
62
70
|
- Prefer edit_file over write_file for modifying existing files
|
|
63
71
|
- Use grep/glob to find files before reading them
|
|
64
72
|
- Keep bash commands focused; avoid long-running blocking commands
|
|
65
|
-
- Always check command exit codes for errors
|
|
66
|
-
- user_shell runs commands in the user's live terminal — use for cd, export, source, etc.
|
|
67
|
-
- user_shell output is shown directly to the user but NOT returned to you by default.
|
|
68
|
-
Set return_output=true if you need to inspect the result to answer a question.`;
|
|
73
|
+
- Always check command exit codes for errors`;
|
|
69
74
|
/**
|
|
70
75
|
* Build the dynamic context — injected as a user message before each query.
|
|
71
76
|
* Contains everything that changes: tools, shell context, conventions, cwd.
|
package/dist/agent/tools/bash.js
CHANGED
|
@@ -2,7 +2,11 @@ import { executeCommand } from "../../executor.js";
|
|
|
2
2
|
export function createBashTool(opts) {
|
|
3
3
|
return {
|
|
4
4
|
name: "bash",
|
|
5
|
-
description: "Execute a bash command in an isolated subprocess. Output is captured and returned
|
|
5
|
+
description: "Execute a bash command in an isolated subprocess. Output is captured and returned. " +
|
|
6
|
+
"Does not affect the user's shell state (use user_shell for cd, export, source). " +
|
|
7
|
+
"Do NOT use bash for file searching — use grep/glob instead. " +
|
|
8
|
+
"Do NOT use bash for reading files — use read_file instead. " +
|
|
9
|
+
"Provide a description parameter to explain what the command does.",
|
|
6
10
|
input_schema: {
|
|
7
11
|
type: "object",
|
|
8
12
|
properties: {
|
|
@@ -14,6 +18,10 @@ export function createBashTool(opts) {
|
|
|
14
18
|
type: "number",
|
|
15
19
|
description: "Timeout in seconds (default: 60)",
|
|
16
20
|
},
|
|
21
|
+
description: {
|
|
22
|
+
type: "string",
|
|
23
|
+
description: "Short description of what this command does (e.g., 'Install dependencies', 'Run test suite')",
|
|
24
|
+
},
|
|
17
25
|
},
|
|
18
26
|
required: ["command"],
|
|
19
27
|
},
|
|
@@ -22,6 +30,7 @@ export function createBashTool(opts) {
|
|
|
22
30
|
requiresPermission: true,
|
|
23
31
|
getDisplayInfo: (args) => ({
|
|
24
32
|
kind: "execute",
|
|
33
|
+
icon: "▶",
|
|
25
34
|
locations: [],
|
|
26
35
|
}),
|
|
27
36
|
async execute(args, onChunk) {
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { EventBus } from "../../event-bus.js";
|
|
2
|
+
import type { ToolDefinition } from "../types.js";
|
|
3
|
+
/**
|
|
4
|
+
* display — shows command output to the user in their live terminal.
|
|
5
|
+
*
|
|
6
|
+
* Unlike bash (scratchpad), the user sees the output directly in their shell.
|
|
7
|
+
* Unlike user_shell, this is for read-only display — no lasting side effects.
|
|
8
|
+
* The agent does NOT receive the output back.
|
|
9
|
+
*/
|
|
10
|
+
export declare function createDisplayTool(opts: {
|
|
11
|
+
getCwd: () => string;
|
|
12
|
+
bus: EventBus;
|
|
13
|
+
}): ToolDefinition;
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* display — shows command output to the user in their live terminal.
|
|
3
|
+
*
|
|
4
|
+
* Unlike bash (scratchpad), the user sees the output directly in their shell.
|
|
5
|
+
* Unlike user_shell, this is for read-only display — no lasting side effects.
|
|
6
|
+
* The agent does NOT receive the output back.
|
|
7
|
+
*/
|
|
8
|
+
export function createDisplayTool(opts) {
|
|
9
|
+
return {
|
|
10
|
+
name: "display",
|
|
11
|
+
description: "Show command output to the user in their terminal. Use when the user asks to see something (cat, git log, diff, man, etc.) and you don't need to process the output yourself. Output is NOT returned to you.",
|
|
12
|
+
input_schema: {
|
|
13
|
+
type: "object",
|
|
14
|
+
properties: {
|
|
15
|
+
command: {
|
|
16
|
+
type: "string",
|
|
17
|
+
description: "Command to run and display output to the user",
|
|
18
|
+
},
|
|
19
|
+
timeout: {
|
|
20
|
+
type: "number",
|
|
21
|
+
description: "Timeout in seconds (default: 30)",
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
required: ["command"],
|
|
25
|
+
},
|
|
26
|
+
showOutput: false,
|
|
27
|
+
modifiesFiles: false,
|
|
28
|
+
getDisplayInfo: () => ({
|
|
29
|
+
kind: "display",
|
|
30
|
+
icon: "◇",
|
|
31
|
+
locations: [],
|
|
32
|
+
}),
|
|
33
|
+
async execute(args) {
|
|
34
|
+
const command = args.command;
|
|
35
|
+
const timeoutSec = args.timeout ?? 30;
|
|
36
|
+
let result;
|
|
37
|
+
try {
|
|
38
|
+
const execPromise = opts.bus.emitPipeAsync("shell:exec-request", {
|
|
39
|
+
command,
|
|
40
|
+
output: "",
|
|
41
|
+
cwd: opts.getCwd(),
|
|
42
|
+
exitCode: null,
|
|
43
|
+
done: false,
|
|
44
|
+
});
|
|
45
|
+
const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), timeoutSec * 1000));
|
|
46
|
+
result = await Promise.race([execPromise, timeoutPromise]);
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
50
|
+
if (msg === "timeout") {
|
|
51
|
+
return {
|
|
52
|
+
content: `Command timed out after ${timeoutSec}s.`,
|
|
53
|
+
exitCode: -1,
|
|
54
|
+
isError: true,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
return { content: `Error: ${msg}`, exitCode: -1, isError: true };
|
|
58
|
+
}
|
|
59
|
+
const exitCode = result.exitCode ?? 0;
|
|
60
|
+
const isError = exitCode !== 0 && exitCode !== null;
|
|
61
|
+
return {
|
|
62
|
+
content: isError
|
|
63
|
+
? `Command failed with exit code ${exitCode}.`
|
|
64
|
+
: "Output displayed to user.",
|
|
65
|
+
exitCode,
|
|
66
|
+
isError,
|
|
67
|
+
};
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|