agent-sh 0.14.0 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +7 -18
  2. package/dist/agent/agent-loop.d.ts +1 -1
  3. package/dist/agent/agent-loop.js +42 -31
  4. package/dist/agent/conversation-state.d.ts +3 -2
  5. package/dist/agent/conversation-state.js +20 -3
  6. package/dist/agent/events.d.ts +2 -0
  7. package/dist/agent/host-types.d.ts +3 -0
  8. package/dist/agent/index.js +2 -1
  9. package/dist/agent/llm-client.js +1 -0
  10. package/dist/agent/subagent.d.ts +1 -1
  11. package/dist/agent/subagent.js +5 -1
  12. package/dist/agent/tool-protocol.d.ts +2 -2
  13. package/dist/agent/tool-protocol.js +5 -4
  14. package/dist/agent/tools/glob.d.ts +1 -1
  15. package/dist/agent/tools/glob.js +4 -2
  16. package/dist/agent/tools/grep.d.ts +1 -1
  17. package/dist/agent/tools/grep.js +4 -2
  18. package/dist/agent/tools/ls.d.ts +1 -1
  19. package/dist/agent/tools/ls.js +4 -2
  20. package/dist/agent/tools/read-file.d.ts +1 -1
  21. package/dist/agent/tools/read-file.js +30 -2
  22. package/dist/agent/types.d.ts +13 -3
  23. package/dist/agent/types.js +6 -1
  24. package/dist/cli/args.js +3 -1
  25. package/dist/cli/index.js +0 -0
  26. package/dist/cli/install.d.ts +1 -0
  27. package/dist/cli/install.js +86 -2
  28. package/dist/cli/subcommands.js +4 -1
  29. package/dist/core/index.d.ts +1 -1
  30. package/dist/core/settings.d.ts +3 -0
  31. package/dist/core/settings.js +2 -2
  32. package/dist/shell/index.d.ts +6 -0
  33. package/dist/shell/index.js +10 -10
  34. package/dist/shell/shell.d.ts +4 -0
  35. package/dist/shell/shell.js +15 -29
  36. package/dist/shell/terminal.d.ts +33 -0
  37. package/dist/shell/terminal.js +62 -0
  38. package/dist/utils/tool-interactive.js +4 -2
  39. package/examples/extensions/ash-scheme/index.ts +2170 -0
  40. package/examples/extensions/ash-scheme/package.json +11 -0
  41. package/examples/extensions/ash-scheme-render.ts +58 -0
  42. package/examples/extensions/ashi/README.md +36 -26
  43. package/examples/extensions/ashi/package.json +9 -1
  44. package/examples/extensions/ashi/src/capture.ts +1 -0
  45. package/examples/extensions/ashi/src/cli.ts +25 -8
  46. package/examples/extensions/ashi/src/compaction.ts +25 -96
  47. package/examples/extensions/ashi/src/components.ts +64 -166
  48. package/examples/extensions/ashi/src/default-schema-renderers.ts +229 -0
  49. package/examples/extensions/ashi/src/display-config.ts +21 -22
  50. package/examples/extensions/ashi/src/frontend.ts +64 -65
  51. package/examples/extensions/ashi/src/hooks.ts +47 -63
  52. package/examples/extensions/ashi/src/multi-session-store.ts +44 -3
  53. package/examples/extensions/ashi/src/schema.ts +407 -0
  54. package/examples/extensions/ashi/src/session-store.ts +55 -4
  55. package/examples/extensions/ashi/src/status-footer.ts +27 -6
  56. package/examples/extensions/ashi-compact-llm.ts +93 -0
  57. package/examples/extensions/claude-code-bridge/index.ts +9 -2
  58. package/examples/extensions/claude-code-bridge/package.json +1 -1
  59. package/examples/extensions/opencode-bridge/index.ts +208 -53
  60. package/examples/extensions/opencode-bridge/package.json +1 -1
  61. package/examples/extensions/opencode-provider.ts +252 -0
  62. package/examples/extensions/pi-bridge/index.ts +1 -0
  63. package/package.json +12 -1
  64. package/examples/extensions/ashi/src/default-renderers.ts +0 -171
package/README.md CHANGED
@@ -19,7 +19,7 @@ So I built agent-sh. Under the hood it's a normal shell on top of node-pty — y
19
19
  ~ $ > draft a commit message # agent reads your diff and shell history
20
20
  ```
21
21
 
22
- agent-sh is built to be agent-agnostic. You can [bring your own coding agent](#bring-your-own-agent) or use the built-in agent `ash` — a lightweight, extensible agent if you'd like to build extensions on top of it.
22
+ agent-sh is built to be agent-agnostic. The recommended path is the built-in agent `ash` — a lightweight agent designed so extensions can plug into the same tool surface. If you'd rather host an existing coding agent (pi, claude-code, opencode), you can [bring your own](#bring-your-own-agent) — with the trade-off that it manages its own separate tools.
23
23
 
24
24
  ## Quick Start
25
25
 
@@ -55,24 +55,9 @@ alias ash="agent-sh"
55
55
 
56
56
  Once installed, pick a backend below.
57
57
 
58
- ### Option A: Bring your own coding agent
58
+ ### Option A: Use the built-in agent (ash) — recommended
59
59
 
60
- If you already use a coding agent, host it inside agent-shsame terminal, same `>` entry point, same shell-context wiring. Three bridges ship in the box:
61
-
62
- - **pi** — [pi-mono](https://github.com/badlogic/pi-mono) coding agent
63
- - **claude-code** — official [Claude Agent SDK](https://www.npmjs.com/package/@anthropic-ai/claude-agent-sdk)
64
- - **opencode** — [opencode](https://opencode.ai/) via `@opencode-ai/sdk`
65
-
66
- ```bash
67
- agent-sh install pi-bridge
68
- agent-sh --backend pi
69
- ```
70
-
71
- See [Bring your own agent](#bring-your-own-agent) below for full details and the other backends.
72
-
73
- ### Option B: Use the built-in agent (ash)
74
-
75
- `ash` is agent-sh's own lightweight agent. It works with any OpenAI-compatible API — pick one of the zero-config paths below, no settings file needed. The built-in providers (openrouter, openai, openai-compatible, deepseek) register on startup; ash activates the first one with a usable key.
60
+ `ash` is agent-sh's own lightweight agent, and the path most users should start with: it shares its tool surface with the rest of the system, so extensions you install (new tools, content transforms, slash commands, themes) compose with it directly. It works with any OpenAI-compatible API pick one of the zero-config paths below, no settings file needed. The built-in providers (openrouter, openai, openai-compatible, deepseek) register on startup; ash activates the first one with a usable key.
76
61
 
77
62
  **Quickest path** — store a key once via the auth subcommand:
78
63
 
@@ -121,6 +106,10 @@ For richer configuration (multiple providers, extensions), run `agent-sh init` t
121
106
 
122
107
  `ash` is designed to be extended. Extensions can add tools, content transforms (e.g. render LaTeX or Mermaid), themes, slash commands, or new input modes — see [Extensions](docs/extensions.md) for the full surface.
123
108
 
109
+ ### Option B: Bring your own coding agent
110
+
111
+ If you already use pi, claude-code, or opencode, agent-sh can host it as the backend instead — see [Bring your own agent](#bring-your-own-agent) just below for the full setup and the trade-offs.
112
+
124
113
  ## Bring your own agent
125
114
 
126
115
  The built-in agent (`ash`) is the default, but agent-sh can host a different coding agent as its backend — same terminal, same `>` entry point, same shell-context wiring. Three bridges ship in the box:
@@ -15,7 +15,7 @@ import type { EventBus } from "../core/event-bus.js";
15
15
  import type { AgentMode } from "./host-types.js";
16
16
  import type { LlmClient } from "./llm-client.js";
17
17
  import type { HandlerFunctions } from "../utils/handler-registry.js";
18
- import type { AgentBackend, ToolDefinition } from "./types.js";
18
+ import { type AgentBackend, type ToolDefinition } from "./types.js";
19
19
  import { type HistoryAdapter } from "./history-file.js";
20
20
  import type { Compositor } from "../utils/compositor.js";
21
21
  export interface AgentLoopConfig {
@@ -1,5 +1,6 @@
1
1
  import { setMaxListeners } from "node:events";
2
2
  import * as path from "node:path";
3
+ import { contentText } from "./types.js";
3
4
  import { ToolRegistry } from "./tool-registry.js";
4
5
  import { normalizeToolArgs } from "./normalize-args.js";
5
6
  import { ConversationState } from "./conversation-state.js";
@@ -244,9 +245,9 @@ export class AgentLoop {
244
245
  this.conversation = new ConversationState(this.handlers, this.instanceId);
245
246
  this.lastProjectSkillNames.clear();
246
247
  });
247
- on("agent:compact-request", () => {
248
+ on("agent:compact-request", async () => {
248
249
  // Force compaction. Strategy lives behind `conversation:compact`.
249
- const stats = this.compactWithHooks(0, 0, true);
250
+ const stats = await this.compactWithHooks(0, 0, true);
250
251
  if (stats) {
251
252
  this.bus.emit("ui:info", {
252
253
  message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
@@ -454,13 +455,13 @@ export class AgentLoop {
454
455
  * compaction, emit `conversation:after-compact` so listeners
455
456
  * (metrics, UI, agent-awareness notes) can react.
456
457
  */
457
- compactWithHooks(target, keepRecent, force, strategy) {
458
- const stats = this.handlers.call("conversation:compact", {
458
+ async compactWithHooks(target, keepRecent, force, strategy) {
459
+ const stats = (await this.handlers.call("conversation:compact", {
459
460
  target,
460
461
  keepRecent,
461
462
  force: !!force,
462
463
  strategy,
463
- });
464
+ }));
464
465
  if (stats) {
465
466
  this.bus.emit("conversation:after-compact", {
466
467
  beforeTokens: stats.before,
@@ -597,7 +598,7 @@ export class AgentLoop {
597
598
  },
598
599
  formatResult: (args, result) => {
599
600
  const action = args.action;
600
- const text = result.content;
601
+ const text = contentText(result.content);
601
602
  if (result.isError)
602
603
  return { summary: "error" };
603
604
  if (action === "search") {
@@ -658,6 +659,13 @@ export class AgentLoop {
658
659
  if (extensionSections.length > 0) {
659
660
  parts.push("# Extension Instructions\n\n" + extensionSections.join("\n\n"));
660
661
  }
662
+ if (this.currentMode.modalities?.includes("image")) {
663
+ parts.push("# Image Support\n\n"
664
+ + "This model supports image input. When you need visual information, "
665
+ + "you can read image files (PNG, JPEG, GIF, WebP) with read_file — "
666
+ + "they will be shown to you directly. Use this to inspect screenshots, "
667
+ + "diagrams, UI mockups, charts, or any visual content relevant to the task.");
668
+ }
661
669
  return parts.join("\n\n");
662
670
  });
663
671
  // ── Orthogonal core-state accessors ──────────────────────────
@@ -838,6 +846,7 @@ export class AgentLoop {
838
846
  const label = tool.displayName ?? name;
839
847
  this.bus.emit("agent:tool-started", {
840
848
  title: typeof args.description === "string" ? `${label}: ${args.description}` : label,
849
+ name,
841
850
  toolCallId: id,
842
851
  kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
843
852
  displayDetail: tool.formatCall?.(args),
@@ -878,7 +887,7 @@ export class AgentLoop {
878
887
  resultDisplay,
879
888
  });
880
889
  this.bus.emit("agent:tool-output", {
881
- tool: name, output: result.content, exitCode: result.exitCode,
890
+ tool: name, output: contentText(result.content), exitCode: result.exitCode,
882
891
  });
883
892
  return result;
884
893
  });
@@ -959,7 +968,7 @@ export class AgentLoop {
959
968
  // Compact deeply — shallow targets buy only 1–2 turns of runway on
960
969
  // tool-heavy workloads.
961
970
  const target = Math.floor(threshold * 0.25);
962
- const result = this.compactWithHooks(target, 1);
971
+ const result = await this.compactWithHooks(target, 1);
963
972
  if (!result) {
964
973
  // Auto-compact fired but nothing was evictable. This can happen
965
974
  // in short conversations with heavy tool output where the pin
@@ -1081,6 +1090,7 @@ export class AgentLoop {
1081
1090
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
1082
1091
  this.bus.emit("agent:tool-started", {
1083
1092
  title: tool.displayName ?? tc.name,
1093
+ name: tc.name,
1084
1094
  toolCallId: tc.id,
1085
1095
  kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
1086
1096
  displayDetail: tool.formatCall?.(args),
@@ -1097,7 +1107,7 @@ export class AgentLoop {
1097
1107
  resultDisplay,
1098
1108
  });
1099
1109
  this.bus.emit("agent:tool-output", {
1100
- tool: tc.name, output: cached.content, exitCode: 0,
1110
+ tool: tc.name, output: contentText(cached.content), exitCode: 0,
1101
1111
  });
1102
1112
  collectedResults.push({
1103
1113
  callId: tc.id, toolName: tc.name,
@@ -1114,29 +1124,30 @@ export class AgentLoop {
1114
1124
  const result = await this.handlers.call("tool:execute", { name: tc.name, id: tc.id, args, tool, onChunk: defaultOnChunk,
1115
1125
  batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined,
1116
1126
  signal });
1117
- // Truncate large outputs to avoid blowing context.
1118
1127
  let content = result.content;
1119
- const maxBytes = tool.maxResultBytes ?? 100_000; // ~25k tokens
1120
- if (content.length > maxBytes) {
1121
- const headBytes = Math.floor(maxBytes * 0.6);
1122
- const tailBytes = maxBytes - headBytes;
1123
- const lines = content.split("\n");
1124
- let headEnd = 0, headLen = 0;
1125
- for (let i = 0; i < lines.length && headLen + lines[i].length + 1 <= headBytes; i++) {
1126
- headLen += lines[i].length + 1;
1127
- headEnd = i + 1;
1128
- }
1129
- let tailStart = lines.length, tailLen = 0;
1130
- for (let i = lines.length - 1; i >= headEnd && tailLen + lines[i].length + 1 <= tailBytes; i--) {
1131
- tailLen += lines[i].length + 1;
1132
- tailStart = i;
1128
+ if (typeof content === "string") {
1129
+ const maxBytes = tool.maxResultBytes ?? 100_000; // ~25k tokens
1130
+ if (content.length > maxBytes) {
1131
+ const headBytes = Math.floor(maxBytes * 0.6);
1132
+ const tailBytes = maxBytes - headBytes;
1133
+ const lines = content.split("\n");
1134
+ let headEnd = 0, headLen = 0;
1135
+ for (let i = 0; i < lines.length && headLen + lines[i].length + 1 <= headBytes; i++) {
1136
+ headLen += lines[i].length + 1;
1137
+ headEnd = i + 1;
1138
+ }
1139
+ let tailStart = lines.length, tailLen = 0;
1140
+ for (let i = lines.length - 1; i >= headEnd && tailLen + lines[i].length + 1 <= tailBytes; i--) {
1141
+ tailLen += lines[i].length + 1;
1142
+ tailStart = i;
1143
+ }
1144
+ const omitted = tailStart - headEnd;
1145
+ content = [
1146
+ ...lines.slice(0, headEnd),
1147
+ `\n[… ${omitted} lines omitted (output truncated to ${Math.round(maxBytes / 1024)}KB) …]\n`,
1148
+ ...lines.slice(tailStart),
1149
+ ].join("\n");
1133
1150
  }
1134
- const omitted = tailStart - headEnd;
1135
- content = [
1136
- ...lines.slice(0, headEnd),
1137
- `\n[… ${omitted} lines omitted (output truncated to ${Math.round(maxBytes / 1024)}KB) …]\n`,
1138
- ...lines.slice(tailStart),
1139
- ].join("\n");
1140
1151
  }
1141
1152
  const finalResult = {
1142
1153
  callId: tc.id, toolName: tc.name,
@@ -1347,7 +1358,7 @@ export class AgentLoop {
1347
1358
  if (this.isContextOverflow(e)) {
1348
1359
  const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1349
1360
  const target = Math.floor((contextWindow - RESPONSE_RESERVE) * 0.6);
1350
- const stats = this.compactWithHooks(target, 1);
1361
+ const stats = await this.compactWithHooks(target, 1);
1351
1362
  // If compaction freed nothing, retrying will hit the same error.
1352
1363
  // Surface the real failure instead of looping until exhaustion.
1353
1364
  if (!stats || stats.after >= stats.before) {
@@ -1,4 +1,5 @@
1
1
  import type { ChatCompletionMessageParam } from "./llm-client.js";
2
+ import type { ImageContent } from "./types.js";
2
3
  import { type NuclearEntry } from "./nuclear-form.js";
3
4
  import type { HandlerFunctions } from "../utils/handler-registry.js";
4
5
  /** Search hit shape returned by the `history:search` handler. */
@@ -51,7 +52,7 @@ export declare class ConversationState {
51
52
  arguments: string;
52
53
  };
53
54
  }[], extras?: Record<string, unknown>): void;
54
- addToolResult(toolCallId: string, content: string, isError?: boolean): void;
55
+ addToolResult(toolCallId: string, content: string | ImageContent[], isError?: boolean): void;
55
56
  /** Add tool results as a user message (for inline tool protocol). */
56
57
  addToolResultInline(content: string): void;
57
58
  /** Safe from any context: queues if mid-tool-pair, appends otherwise. */
@@ -91,7 +92,7 @@ export declare class ConversationState {
91
92
  eagerNucleateTools(results: Array<{
92
93
  toolName: string;
93
94
  args: Record<string, unknown>;
94
- content: string;
95
+ content: string | ImageContent[];
95
96
  isError: boolean;
96
97
  }>): void;
97
98
  /** Track an entry in memory (nuclear list + recall archive). */
@@ -123,7 +123,23 @@ export class ConversationState {
123
123
  this.invalidateMessagesCache();
124
124
  }
125
125
  addToolResult(toolCallId, content, isError = false) {
126
- this.messages.push({ role: "tool", tool_call_id: toolCallId, content });
126
+ if (typeof content === "string") {
127
+ this.messages.push({ role: "tool", tool_call_id: toolCallId, content });
128
+ }
129
+ else {
130
+ // Assembles OpenAI vision content parts for multimodal tool results.
131
+ // This format (array of text + image_url blocks on a tool message) is
132
+ // supported by OpenAI and most OpenAI-compatible providers. Providers
133
+ // that don't support it should not declare image modalities, so this
134
+ // path is only reached for providers known to handle it.
135
+ const parts = [];
136
+ for (const img of content) {
137
+ parts.push({ type: "image_url", image_url: { url: `data:${img.mimeType};base64,${img.data}` } });
138
+ }
139
+ const label = isError ? `Error: [${content.length} image(s)]` : `[${content.length} image(s)]`;
140
+ parts.unshift({ type: "text", text: label });
141
+ this.messages.push({ role: "tool", tool_call_id: toolCallId, content: parts });
142
+ }
127
143
  if (isError)
128
144
  this.toolErrors.add(toolCallId);
129
145
  this.invalidateMessagesCache();
@@ -311,11 +327,12 @@ export class ConversationState {
311
327
  const entries = [];
312
328
  for (const r of results) {
313
329
  const seq = this.nextSeq++;
314
- const entry = this.handlers.call("conversation:nucleate-tool", r.toolName, r.args, r.content, r.isError, this.instanceId, seq);
330
+ const text = typeof r.content === "string" ? r.content : `[${r.content.length} image(s)]`;
331
+ const entry = this.handlers.call("conversation:nucleate-tool", r.toolName, r.args, text, r.isError, this.instanceId, seq);
315
332
  entries.push(entry);
316
333
  this.recordNuclearEntry(entry, [
317
334
  { role: "assistant", content: null, tool_calls: [{ id: `seq_${seq}`, type: "function", function: { name: r.toolName, arguments: JSON.stringify(r.args) } }] },
318
- { role: "tool", tool_call_id: `seq_${seq}`, content: r.content },
335
+ { role: "tool", tool_call_id: `seq_${seq}`, content: text },
319
336
  ]);
320
337
  }
321
338
  this.appendToHistory(entries);
@@ -99,6 +99,8 @@ declare module "../core/event-bus.js" {
99
99
  };
100
100
  "agent:tool-started": {
101
101
  title: string;
102
+ /** Canonical tool name; `title` is the display label and may differ. */
103
+ name?: string;
102
104
  toolCallId?: string;
103
105
  kind?: string;
104
106
  icon?: string;
@@ -51,6 +51,7 @@ export interface ProviderRegistration {
51
51
  contextWindow?: number;
52
52
  maxTokens?: number;
53
53
  echoReasoning?: boolean;
54
+ modalities?: ("text" | "image")[];
54
55
  })[];
55
56
  supportsReasoningEffort?: boolean;
56
57
  /** Local daemons etc. — `auth list/login` shows "no auth required". */
@@ -77,6 +78,8 @@ export interface AgentMode {
77
78
  /** Echo reasoning_content back on assistant turns. Required by DeepSeek;
78
79
  * default off (leaky shims may forward it to the model as OOD input). */
79
80
  echoReasoning?: boolean;
81
+ /** Input modalities the model supports. Defaults to ["text"]. */
82
+ modalities?: ("text" | "image")[];
80
83
  buildReasoningParams?: (level: string) => Record<string, unknown>;
81
84
  }
82
85
  /**
@@ -61,7 +61,7 @@ function splitRegistration(p) {
61
61
  }
62
62
  else {
63
63
  ids.push(m.id);
64
- caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
64
+ caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning, modalities: m.modalities });
65
65
  }
66
66
  }
67
67
  return { ids, caps };
@@ -287,6 +287,7 @@ export default function agentBackend(ctx) {
287
287
  reasoning: mc?.reasoning,
288
288
  supportsReasoningEffort: p.supportsReasoningEffort,
289
289
  echoReasoning: mc?.echoReasoning,
290
+ modalities: mc?.modalities,
290
291
  buildReasoningParams: bindReasoning(shapeId, model),
291
292
  });
292
293
  }
@@ -16,6 +16,7 @@ function attributionHeaders(config) {
16
16
  return {
17
17
  "HTTP-Referer": config.appUrl ?? "https://agent-sh.dev",
18
18
  "X-Title": config.appName ?? "agent-sh",
19
+ "X-OpenRouter-Categories": "cli-agent,programming-app",
19
20
  };
20
21
  }
21
22
  export class LlmClient {
@@ -11,7 +11,7 @@
11
11
  */
12
12
  import type { EventBus } from "../core/event-bus.js";
13
13
  import type { LlmClient } from "./llm-client.js";
14
- import type { ToolDefinition } from "./types.js";
14
+ import { type ToolDefinition } from "./types.js";
15
15
  export interface SubagentOptions {
16
16
  /** LLM client to use. */
17
17
  llmClient: LlmClient;
@@ -1,3 +1,4 @@
1
+ import { contentText } from "./types.js";
1
2
  import { ConversationState } from "./conversation-state.js";
2
3
  import { normalizeToolArgs } from "./normalize-args.js";
3
4
  import { wrapTrailingWithDynamicContext } from "../utils/message-utils.js";
@@ -63,6 +64,7 @@ export async function runSubagent(opts) {
63
64
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
64
65
  bus.emit("agent:tool-started", {
65
66
  title: tc.name,
67
+ name: tc.name,
66
68
  toolCallId: tc.id,
67
69
  kind: display.kind,
68
70
  locations: display.locations,
@@ -84,7 +86,9 @@ export async function runSubagent(opts) {
84
86
  resultDisplay,
85
87
  });
86
88
  }
87
- const content = result.isError ? `Error: ${result.content}` : result.content;
89
+ const content = result.isError
90
+ ? `Error: ${contentText(result.content)}`
91
+ : result.content;
88
92
  conversation.addToolResult(tc.id, content, !!result.isError);
89
93
  }
90
94
  }
@@ -10,7 +10,7 @@
10
10
  * doesn't need to know which mode is active.
11
11
  */
12
12
  import type { ChatCompletionTool } from "./llm-client.js";
13
- import type { ToolDefinition } from "./types.js";
13
+ import { type ToolDefinition } from "./types.js";
14
14
  import type { ConversationState } from "./conversation-state.js";
15
15
  export interface PendingToolCall {
16
16
  id: string;
@@ -20,7 +20,7 @@ export interface PendingToolCall {
20
20
  export interface ToolResult {
21
21
  callId: string;
22
22
  toolName: string;
23
- content: string;
23
+ content: string | import("./types.js").ImageContent[];
24
24
  isError: boolean;
25
25
  }
26
26
  /** Streaming filter — strips tool calls from display output. */
@@ -1,3 +1,4 @@
1
+ import { contentText } from "./types.js";
1
2
  // ── API mode (current behavior) ──────────────────────────────────
2
3
  export class ApiToolProtocol {
3
4
  mode = "api";
@@ -33,7 +34,7 @@ export class ApiToolProtocol {
33
34
  }
34
35
  recordResults(conv, results) {
35
36
  for (const r of results) {
36
- const content = r.isError ? `Error: ${r.content}` : r.content;
37
+ const content = r.isError ? `Error: ${contentText(r.content)}` : r.content;
37
38
  conv.addToolResult(r.callId, content, r.isError);
38
39
  }
39
40
  }
@@ -105,7 +106,7 @@ export class InlineToolProtocol {
105
106
  return;
106
107
  const parts = results.map((r) => {
107
108
  const status = r.isError ? "error" : "ok";
108
- return `[${r.toolName} ${r.callId} ${status}]\n${r.content}`;
109
+ return `[${r.toolName} ${r.callId} ${status}]\n${contentText(r.content)}`;
109
110
  });
110
111
  conv.addToolResultInline(parts.join("\n\n"));
111
112
  }
@@ -362,7 +363,7 @@ export class DeferredToolProtocol {
362
363
  }
363
364
  recordResults(conv, results) {
364
365
  for (const r of results) {
365
- const content = r.isError ? `Error: ${r.content}` : r.content;
366
+ const content = r.isError ? `Error: ${contentText(r.content)}` : r.content;
366
367
  conv.addToolResult(r.callId, content, r.isError);
367
368
  }
368
369
  }
@@ -455,7 +456,7 @@ export class DeferredLookupProtocol {
455
456
  }
456
457
  recordResults(conv, results) {
457
458
  for (const r of results) {
458
- const content = r.isError ? `Error: ${r.content}` : r.content;
459
+ const content = r.isError ? `Error: ${contentText(r.content)}` : r.content;
459
460
  conv.addToolResult(r.callId, content, r.isError);
460
461
  }
461
462
  }
@@ -1,2 +1,2 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  export declare function createGlobTool(getCwd: () => string): ToolDefinition;
@@ -2,6 +2,7 @@ import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
3
  import { executeArgv } from "../../utils/executor.js";
4
4
  import { resolveRgPath } from "../../utils/ripgrep-path.js";
5
+ import { contentText } from "../types.js";
5
6
  import { expandHome } from "./expand-home.js";
6
7
  export function createGlobTool(getCwd) {
7
8
  return {
@@ -27,9 +28,10 @@ export function createGlobTool(getCwd) {
27
28
  },
28
29
  showOutput: false,
29
30
  formatResult: (_args, result) => {
30
- if (result.isError || result.content === "No files matched.")
31
+ const text = contentText(result.content);
32
+ if (result.isError || text === "No files matched.")
31
33
  return { summary: "0 files" };
32
- const lines = result.content.split("\n").filter(l => l && !l.startsWith("["));
34
+ const lines = text.split("\n").filter(l => l && !l.startsWith("["));
33
35
  return { summary: `${lines.length} files` };
34
36
  },
35
37
  getDisplayInfo: (args) => ({
@@ -1,2 +1,2 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  export declare function createGrepTool(getCwd: () => string): ToolDefinition;
@@ -1,5 +1,6 @@
1
1
  import { executeArgv } from "../../utils/executor.js";
2
2
  import { resolveRgPath } from "../../utils/ripgrep-path.js";
3
+ import { contentText } from "../types.js";
3
4
  import { expandHome } from "./expand-home.js";
4
5
  export function createGrepTool(getCwd) {
5
6
  return {
@@ -58,9 +59,10 @@ export function createGrepTool(getCwd) {
58
59
  },
59
60
  showOutput: false,
60
61
  formatResult: (args, result) => {
61
- if (result.isError || result.content === "No matches found.")
62
+ const text = contentText(result.content);
63
+ if (result.isError || text === "No matches found.")
62
64
  return { summary: "0 matches" };
63
- const lines = result.content.split("\n").filter(Boolean);
65
+ const lines = text.split("\n").filter(Boolean);
64
66
  // Strip pagination info line from count
65
67
  const resultLines = lines.filter(l => !l.startsWith("[Showing "));
66
68
  const mode = args.output_mode ?? "files_with_matches";
@@ -1,2 +1,2 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  export declare function createLsTool(getCwd: () => string): ToolDefinition;
@@ -1,5 +1,6 @@
1
1
  import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
+ import { contentText } from "../types.js";
3
4
  import { expandHome } from "./expand-home.js";
4
5
  function formatSize(bytes) {
5
6
  if (bytes < 1024)
@@ -33,9 +34,10 @@ export function createLsTool(getCwd) {
33
34
  : [],
34
35
  }),
35
36
  formatResult: (_args, result) => {
36
- if (result.isError || result.content === "(empty directory)")
37
+ const text = contentText(result.content);
38
+ if (result.isError || text === "(empty directory)")
37
39
  return { summary: "0 entries" };
38
- const lines = result.content.split("\n").filter(Boolean);
40
+ const lines = text.split("\n").filter(Boolean);
39
41
  return { summary: `${lines.length} entries` };
40
42
  },
41
43
  async execute(args) {
@@ -1,4 +1,4 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  /** Tracks the last-read state of a file for deduplication. */
3
3
  export interface FileReadState {
4
4
  mtimeMs: number;
@@ -1,6 +1,14 @@
1
1
  import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
+ import { contentText } from "../types.js";
3
4
  import { expandHome } from "./expand-home.js";
5
+ const IMAGE_MIME_TYPES = {
6
+ ".png": "image/png",
7
+ ".jpg": "image/jpeg",
8
+ ".jpeg": "image/jpeg",
9
+ ".gif": "image/gif",
10
+ ".webp": "image/webp",
11
+ };
4
12
  export function createReadFileTool(getCwd, cache) {
5
13
  return {
6
14
  name: "read_file",
@@ -32,11 +40,12 @@ export function createReadFileTool(getCwd, cache) {
32
40
  locations: [{ path: args.path }],
33
41
  }),
34
42
  formatResult: (_args, result) => {
43
+ const text = contentText(result.content);
35
44
  if (result.isError)
36
45
  return {};
37
- if (result.content.startsWith("File unchanged"))
46
+ if (text.startsWith("File unchanged"))
38
47
  return { summary: "cached" };
39
- const lines = result.content.split("\n").filter(l => !l.startsWith("["));
48
+ const lines = text.split("\n").filter(l => !l.startsWith("["));
40
49
  return { summary: `${lines.length} lines` };
41
50
  },
42
51
  async execute(args) {
@@ -72,6 +81,25 @@ export function createReadFileTool(getCwd, cache) {
72
81
  isError: true,
73
82
  };
74
83
  }
84
+ const ext = path.extname(absPath).toLowerCase();
85
+ const mimeType = IMAGE_MIME_TYPES[ext];
86
+ if (mimeType) {
87
+ const MAX_IMAGE_BYTES = 5 * 1024 * 1024; // 5MB — base64 adds ~33%
88
+ if (stat.size > MAX_IMAGE_BYTES) {
89
+ return {
90
+ content: `Image is ${(stat.size / (1024 * 1024)).toFixed(1)}MB — too large. Images are capped at 5MB.`,
91
+ exitCode: 1,
92
+ isError: true,
93
+ };
94
+ }
95
+ const buf = await fs.readFile(absPath);
96
+ const data = buf.toString("base64");
97
+ return {
98
+ content: [{ type: "image", data, mimeType }],
99
+ exitCode: 0,
100
+ isError: false,
101
+ };
102
+ }
75
103
  const content = await fs.readFile(absPath, "utf-8");
76
104
  const lines = content.split("\n");
77
105
  const start = reqOffset - 1; // 1-indexed → 0-indexed
@@ -15,8 +15,18 @@ export interface AgentBackend {
15
15
  start?(): Promise<void>;
16
16
  kill(): void;
17
17
  }
18
+ /** Image content block for multimodal tool results. */
19
+ export interface ImageContent {
20
+ type: "image";
21
+ /** Base64-encoded image data (no data: URL prefix). */
22
+ data: string;
23
+ /** MIME type (e.g. "image/png", "image/jpeg"). */
24
+ mimeType: string;
25
+ }
26
+ /** Extract the text portion of a tool result's content. Returns "" for image-only results. */
27
+ export declare function contentText(content: string | ImageContent[]): string;
18
28
  export interface ToolResult {
19
- content: string;
29
+ content: string | ImageContent[];
20
30
  exitCode: number | null;
21
31
  isError: boolean;
22
32
  /** When set, takes precedence over `tool.formatResult()`. */
@@ -57,8 +67,8 @@ export interface InteractiveSession<T> {
57
67
  render(width: number): string[];
58
68
  /** Handle raw input. Call done(result) to finish the session. */
59
69
  handleInput(data: string, done: (result: T) => void): void;
60
- /** Called when session starts. Receives invalidate() for async re-renders. */
61
- onMount?(invalidate: () => void): void;
70
+ /** done() lets the session resolve itself from outside handleInput. */
71
+ onMount?(invalidate: () => void, done: (result: T) => void): void;
62
72
  /** Called when session ends (cleanup). */
63
73
  onUnmount?(): void;
64
74
  }
@@ -1 +1,6 @@
1
- export {};
1
+ /** Extract the text portion of a tool result's content. Returns "" for image-only results. */
2
+ export function contentText(content) {
3
+ if (typeof content === "string")
4
+ return content;
5
+ return content.map(c => `[image: ${c.mimeType}]`).join("\n");
6
+ }
package/dist/cli/args.js CHANGED
@@ -3,7 +3,9 @@ const HELP_TEXT = `agent-sh — a shell-first terminal where AI is one keystroke
3
3
 
4
4
  Usage: agent-sh [options]
5
5
  agent-sh init [--force] Scaffold ~/.agent-sh/ (settings, examples, AGENTS.md)
6
- agent-sh install <spec> [--force] Install an extension (bundled name, file:, npm:, github:)
6
+ agent-sh install <spec> [--force] [--sync-deps]
7
+ Install an extension (bundled name, file:, npm:, github:)
8
+ --sync-deps rewrites a stale agent-sh pin to the host version
7
9
  agent-sh uninstall <name> Remove an installed extension
8
10
  agent-sh list List installed extensions
9
11
  agent-sh auth login [provider] Store an API key for a built-in provider
package/dist/cli/index.js CHANGED
File without changes
@@ -1,5 +1,6 @@
1
1
  interface InstallOpts {
2
2
  force?: boolean;
3
+ syncDeps?: boolean;
3
4
  }
4
5
  export declare function listBundled(): string[];
5
6
  /** Heuristic: a backend named "pi" is typically provided by an extension called "pi-bridge". */