agent-sh 0.14.1 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/agent/agent-loop.d.ts +1 -1
  2. package/dist/agent/agent-loop.js +42 -31
  3. package/dist/agent/conversation-state.d.ts +3 -2
  4. package/dist/agent/conversation-state.js +20 -3
  5. package/dist/agent/events.d.ts +2 -0
  6. package/dist/agent/host-types.d.ts +3 -0
  7. package/dist/agent/index.js +2 -1
  8. package/dist/agent/subagent.d.ts +1 -1
  9. package/dist/agent/subagent.js +5 -1
  10. package/dist/agent/tool-protocol.d.ts +2 -2
  11. package/dist/agent/tool-protocol.js +5 -4
  12. package/dist/agent/tools/glob.d.ts +1 -1
  13. package/dist/agent/tools/glob.js +4 -2
  14. package/dist/agent/tools/grep.d.ts +1 -1
  15. package/dist/agent/tools/grep.js +4 -2
  16. package/dist/agent/tools/ls.d.ts +1 -1
  17. package/dist/agent/tools/ls.js +4 -2
  18. package/dist/agent/tools/read-file.d.ts +1 -1
  19. package/dist/agent/tools/read-file.js +30 -2
  20. package/dist/agent/types.d.ts +11 -1
  21. package/dist/agent/types.js +6 -1
  22. package/dist/cli/index.js +0 -0
  23. package/dist/core/index.d.ts +1 -1
  24. package/dist/core/settings.d.ts +3 -0
  25. package/dist/core/settings.js +2 -2
  26. package/dist/shell/index.d.ts +6 -0
  27. package/dist/shell/index.js +10 -10
  28. package/dist/shell/shell.d.ts +4 -0
  29. package/dist/shell/shell.js +15 -29
  30. package/dist/shell/terminal.d.ts +33 -0
  31. package/dist/shell/terminal.js +62 -0
  32. package/examples/extensions/ash-scheme/index.ts +2170 -0
  33. package/examples/extensions/ash-scheme/package.json +11 -0
  34. package/examples/extensions/ash-scheme-render.ts +58 -0
  35. package/examples/extensions/ashi/README.md +36 -26
  36. package/examples/extensions/ashi/package.json +9 -1
  37. package/examples/extensions/ashi/src/capture.ts +1 -0
  38. package/examples/extensions/ashi/src/cli.ts +21 -7
  39. package/examples/extensions/ashi/src/compaction.ts +25 -96
  40. package/examples/extensions/ashi/src/components.ts +64 -166
  41. package/examples/extensions/ashi/src/default-schema-renderers.ts +229 -0
  42. package/examples/extensions/ashi/src/display-config.ts +21 -22
  43. package/examples/extensions/ashi/src/frontend.ts +64 -65
  44. package/examples/extensions/ashi/src/hooks.ts +47 -63
  45. package/examples/extensions/ashi/src/multi-session-store.ts +44 -3
  46. package/examples/extensions/ashi/src/schema.ts +407 -0
  47. package/examples/extensions/ashi/src/session-store.ts +55 -4
  48. package/examples/extensions/ashi/src/status-footer.ts +27 -6
  49. package/examples/extensions/ashi-compact-llm.ts +93 -0
  50. package/examples/extensions/claude-code-bridge/index.ts +2 -0
  51. package/examples/extensions/opencode-bridge/index.ts +3 -0
  52. package/examples/extensions/opencode-provider.ts +252 -0
  53. package/examples/extensions/pi-bridge/index.ts +1 -0
  54. package/package.json +12 -1
  55. package/examples/extensions/ashi/src/default-renderers.ts +0 -171
@@ -15,7 +15,7 @@ import type { EventBus } from "../core/event-bus.js";
15
15
  import type { AgentMode } from "./host-types.js";
16
16
  import type { LlmClient } from "./llm-client.js";
17
17
  import type { HandlerFunctions } from "../utils/handler-registry.js";
18
- import type { AgentBackend, ToolDefinition } from "./types.js";
18
+ import { type AgentBackend, type ToolDefinition } from "./types.js";
19
19
  import { type HistoryAdapter } from "./history-file.js";
20
20
  import type { Compositor } from "../utils/compositor.js";
21
21
  export interface AgentLoopConfig {
@@ -1,5 +1,6 @@
1
1
  import { setMaxListeners } from "node:events";
2
2
  import * as path from "node:path";
3
+ import { contentText } from "./types.js";
3
4
  import { ToolRegistry } from "./tool-registry.js";
4
5
  import { normalizeToolArgs } from "./normalize-args.js";
5
6
  import { ConversationState } from "./conversation-state.js";
@@ -244,9 +245,9 @@ export class AgentLoop {
244
245
  this.conversation = new ConversationState(this.handlers, this.instanceId);
245
246
  this.lastProjectSkillNames.clear();
246
247
  });
247
- on("agent:compact-request", () => {
248
+ on("agent:compact-request", async () => {
248
249
  // Force compaction. Strategy lives behind `conversation:compact`.
249
- const stats = this.compactWithHooks(0, 0, true);
250
+ const stats = await this.compactWithHooks(0, 0, true);
250
251
  if (stats) {
251
252
  this.bus.emit("ui:info", {
252
253
  message: `(compacted: ~${stats.before.toLocaleString()} → ~${stats.after.toLocaleString()} tokens)`,
@@ -454,13 +455,13 @@ export class AgentLoop {
454
455
  * compaction, emit `conversation:after-compact` so listeners
455
456
  * (metrics, UI, agent-awareness notes) can react.
456
457
  */
457
- compactWithHooks(target, keepRecent, force, strategy) {
458
- const stats = this.handlers.call("conversation:compact", {
458
+ async compactWithHooks(target, keepRecent, force, strategy) {
459
+ const stats = (await this.handlers.call("conversation:compact", {
459
460
  target,
460
461
  keepRecent,
461
462
  force: !!force,
462
463
  strategy,
463
- });
464
+ }));
464
465
  if (stats) {
465
466
  this.bus.emit("conversation:after-compact", {
466
467
  beforeTokens: stats.before,
@@ -597,7 +598,7 @@ export class AgentLoop {
597
598
  },
598
599
  formatResult: (args, result) => {
599
600
  const action = args.action;
600
- const text = result.content;
601
+ const text = contentText(result.content);
601
602
  if (result.isError)
602
603
  return { summary: "error" };
603
604
  if (action === "search") {
@@ -658,6 +659,13 @@ export class AgentLoop {
658
659
  if (extensionSections.length > 0) {
659
660
  parts.push("# Extension Instructions\n\n" + extensionSections.join("\n\n"));
660
661
  }
662
+ if (this.currentMode.modalities?.includes("image")) {
663
+ parts.push("# Image Support\n\n"
664
+ + "This model supports image input. When you need visual information, "
665
+ + "you can read image files (PNG, JPEG, GIF, WebP) with read_file — "
666
+ + "they will be shown to you directly. Use this to inspect screenshots, "
667
+ + "diagrams, UI mockups, charts, or any visual content relevant to the task.");
668
+ }
661
669
  return parts.join("\n\n");
662
670
  });
663
671
  // ── Orthogonal core-state accessors ──────────────────────────
@@ -838,6 +846,7 @@ export class AgentLoop {
838
846
  const label = tool.displayName ?? name;
839
847
  this.bus.emit("agent:tool-started", {
840
848
  title: typeof args.description === "string" ? `${label}: ${args.description}` : label,
849
+ name,
841
850
  toolCallId: id,
842
851
  kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
843
852
  displayDetail: tool.formatCall?.(args),
@@ -878,7 +887,7 @@ export class AgentLoop {
878
887
  resultDisplay,
879
888
  });
880
889
  this.bus.emit("agent:tool-output", {
881
- tool: name, output: result.content, exitCode: result.exitCode,
890
+ tool: name, output: contentText(result.content), exitCode: result.exitCode,
882
891
  });
883
892
  return result;
884
893
  });
@@ -959,7 +968,7 @@ export class AgentLoop {
959
968
  // Compact deeply — shallow targets buy only 1–2 turns of runway on
960
969
  // tool-heavy workloads.
961
970
  const target = Math.floor(threshold * 0.25);
962
- const result = this.compactWithHooks(target, 1);
971
+ const result = await this.compactWithHooks(target, 1);
963
972
  if (!result) {
964
973
  // Auto-compact fired but nothing was evictable. This can happen
965
974
  // in short conversations with heavy tool output where the pin
@@ -1081,6 +1090,7 @@ export class AgentLoop {
1081
1090
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
1082
1091
  this.bus.emit("agent:tool-started", {
1083
1092
  title: tool.displayName ?? tc.name,
1093
+ name: tc.name,
1084
1094
  toolCallId: tc.id,
1085
1095
  kind: display.kind, icon: display.icon, locations: display.locations, rawInput: args,
1086
1096
  displayDetail: tool.formatCall?.(args),
@@ -1097,7 +1107,7 @@ export class AgentLoop {
1097
1107
  resultDisplay,
1098
1108
  });
1099
1109
  this.bus.emit("agent:tool-output", {
1100
- tool: tc.name, output: cached.content, exitCode: 0,
1110
+ tool: tc.name, output: contentText(cached.content), exitCode: 0,
1101
1111
  });
1102
1112
  collectedResults.push({
1103
1113
  callId: tc.id, toolName: tc.name,
@@ -1114,29 +1124,30 @@ export class AgentLoop {
1114
1124
  const result = await this.handlers.call("tool:execute", { name: tc.name, id: tc.id, args, tool, onChunk: defaultOnChunk,
1115
1125
  batchIndex, batchTotal: batchTotal > 1 ? batchTotal : undefined,
1116
1126
  signal });
1117
- // Truncate large outputs to avoid blowing context.
1118
1127
  let content = result.content;
1119
- const maxBytes = tool.maxResultBytes ?? 100_000; // ~25k tokens
1120
- if (content.length > maxBytes) {
1121
- const headBytes = Math.floor(maxBytes * 0.6);
1122
- const tailBytes = maxBytes - headBytes;
1123
- const lines = content.split("\n");
1124
- let headEnd = 0, headLen = 0;
1125
- for (let i = 0; i < lines.length && headLen + lines[i].length + 1 <= headBytes; i++) {
1126
- headLen += lines[i].length + 1;
1127
- headEnd = i + 1;
1128
- }
1129
- let tailStart = lines.length, tailLen = 0;
1130
- for (let i = lines.length - 1; i >= headEnd && tailLen + lines[i].length + 1 <= tailBytes; i--) {
1131
- tailLen += lines[i].length + 1;
1132
- tailStart = i;
1128
+ if (typeof content === "string") {
1129
+ const maxBytes = tool.maxResultBytes ?? 100_000; // ~25k tokens
1130
+ if (content.length > maxBytes) {
1131
+ const headBytes = Math.floor(maxBytes * 0.6);
1132
+ const tailBytes = maxBytes - headBytes;
1133
+ const lines = content.split("\n");
1134
+ let headEnd = 0, headLen = 0;
1135
+ for (let i = 0; i < lines.length && headLen + lines[i].length + 1 <= headBytes; i++) {
1136
+ headLen += lines[i].length + 1;
1137
+ headEnd = i + 1;
1138
+ }
1139
+ let tailStart = lines.length, tailLen = 0;
1140
+ for (let i = lines.length - 1; i >= headEnd && tailLen + lines[i].length + 1 <= tailBytes; i--) {
1141
+ tailLen += lines[i].length + 1;
1142
+ tailStart = i;
1143
+ }
1144
+ const omitted = tailStart - headEnd;
1145
+ content = [
1146
+ ...lines.slice(0, headEnd),
1147
+ `\n[… ${omitted} lines omitted (output truncated to ${Math.round(maxBytes / 1024)}KB) …]\n`,
1148
+ ...lines.slice(tailStart),
1149
+ ].join("\n");
1133
1150
  }
1134
- const omitted = tailStart - headEnd;
1135
- content = [
1136
- ...lines.slice(0, headEnd),
1137
- `\n[… ${omitted} lines omitted (output truncated to ${Math.round(maxBytes / 1024)}KB) …]\n`,
1138
- ...lines.slice(tailStart),
1139
- ].join("\n");
1140
1151
  }
1141
1152
  const finalResult = {
1142
1153
  callId: tc.id, toolName: tc.name,
@@ -1347,7 +1358,7 @@ export class AgentLoop {
1347
1358
  if (this.isContextOverflow(e)) {
1348
1359
  const contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
1349
1360
  const target = Math.floor((contextWindow - RESPONSE_RESERVE) * 0.6);
1350
- const stats = this.compactWithHooks(target, 1);
1361
+ const stats = await this.compactWithHooks(target, 1);
1351
1362
  // If compaction freed nothing, retrying will hit the same error.
1352
1363
  // Surface the real failure instead of looping until exhaustion.
1353
1364
  if (!stats || stats.after >= stats.before) {
@@ -1,4 +1,5 @@
1
1
  import type { ChatCompletionMessageParam } from "./llm-client.js";
2
+ import type { ImageContent } from "./types.js";
2
3
  import { type NuclearEntry } from "./nuclear-form.js";
3
4
  import type { HandlerFunctions } from "../utils/handler-registry.js";
4
5
  /** Search hit shape returned by the `history:search` handler. */
@@ -51,7 +52,7 @@ export declare class ConversationState {
51
52
  arguments: string;
52
53
  };
53
54
  }[], extras?: Record<string, unknown>): void;
54
- addToolResult(toolCallId: string, content: string, isError?: boolean): void;
55
+ addToolResult(toolCallId: string, content: string | ImageContent[], isError?: boolean): void;
55
56
  /** Add tool results as a user message (for inline tool protocol). */
56
57
  addToolResultInline(content: string): void;
57
58
  /** Safe from any context: queues if mid-tool-pair, appends otherwise. */
@@ -91,7 +92,7 @@ export declare class ConversationState {
91
92
  eagerNucleateTools(results: Array<{
92
93
  toolName: string;
93
94
  args: Record<string, unknown>;
94
- content: string;
95
+ content: string | ImageContent[];
95
96
  isError: boolean;
96
97
  }>): void;
97
98
  /** Track an entry in memory (nuclear list + recall archive). */
@@ -123,7 +123,23 @@ export class ConversationState {
123
123
  this.invalidateMessagesCache();
124
124
  }
125
125
  addToolResult(toolCallId, content, isError = false) {
126
- this.messages.push({ role: "tool", tool_call_id: toolCallId, content });
126
+ if (typeof content === "string") {
127
+ this.messages.push({ role: "tool", tool_call_id: toolCallId, content });
128
+ }
129
+ else {
130
+ // Assembles OpenAI vision content parts for multimodal tool results.
131
+ // This format (array of text + image_url blocks on a tool message) is
132
+ // supported by OpenAI and most OpenAI-compatible providers. Providers
133
+ // that don't support it should not declare image modalities, so this
134
+ // path is only reached for providers known to handle it.
135
+ const parts = [];
136
+ for (const img of content) {
137
+ parts.push({ type: "image_url", image_url: { url: `data:${img.mimeType};base64,${img.data}` } });
138
+ }
139
+ const label = isError ? `Error: [${content.length} image(s)]` : `[${content.length} image(s)]`;
140
+ parts.unshift({ type: "text", text: label });
141
+ this.messages.push({ role: "tool", tool_call_id: toolCallId, content: parts });
142
+ }
127
143
  if (isError)
128
144
  this.toolErrors.add(toolCallId);
129
145
  this.invalidateMessagesCache();
@@ -311,11 +327,12 @@ export class ConversationState {
311
327
  const entries = [];
312
328
  for (const r of results) {
313
329
  const seq = this.nextSeq++;
314
- const entry = this.handlers.call("conversation:nucleate-tool", r.toolName, r.args, r.content, r.isError, this.instanceId, seq);
330
+ const text = typeof r.content === "string" ? r.content : `[${r.content.length} image(s)]`;
331
+ const entry = this.handlers.call("conversation:nucleate-tool", r.toolName, r.args, text, r.isError, this.instanceId, seq);
315
332
  entries.push(entry);
316
333
  this.recordNuclearEntry(entry, [
317
334
  { role: "assistant", content: null, tool_calls: [{ id: `seq_${seq}`, type: "function", function: { name: r.toolName, arguments: JSON.stringify(r.args) } }] },
318
- { role: "tool", tool_call_id: `seq_${seq}`, content: r.content },
335
+ { role: "tool", tool_call_id: `seq_${seq}`, content: text },
319
336
  ]);
320
337
  }
321
338
  this.appendToHistory(entries);
@@ -99,6 +99,8 @@ declare module "../core/event-bus.js" {
99
99
  };
100
100
  "agent:tool-started": {
101
101
  title: string;
102
+ /** Canonical tool name; `title` is the display label and may differ. */
103
+ name?: string;
102
104
  toolCallId?: string;
103
105
  kind?: string;
104
106
  icon?: string;
@@ -51,6 +51,7 @@ export interface ProviderRegistration {
51
51
  contextWindow?: number;
52
52
  maxTokens?: number;
53
53
  echoReasoning?: boolean;
54
+ modalities?: ("text" | "image")[];
54
55
  })[];
55
56
  supportsReasoningEffort?: boolean;
56
57
  /** Local daemons etc. — `auth list/login` shows "no auth required". */
@@ -77,6 +78,8 @@ export interface AgentMode {
77
78
  /** Echo reasoning_content back on assistant turns. Required by DeepSeek;
78
79
  * default off (leaky shims may forward it to the model as OOD input). */
79
80
  echoReasoning?: boolean;
81
+ /** Input modalities the model supports. Defaults to ["text"]. */
82
+ modalities?: ("text" | "image")[];
80
83
  buildReasoningParams?: (level: string) => Record<string, unknown>;
81
84
  }
82
85
  /**
@@ -61,7 +61,7 @@ function splitRegistration(p) {
61
61
  }
62
62
  else {
63
63
  ids.push(m.id);
64
- caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
64
+ caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning, modalities: m.modalities });
65
65
  }
66
66
  }
67
67
  return { ids, caps };
@@ -287,6 +287,7 @@ export default function agentBackend(ctx) {
287
287
  reasoning: mc?.reasoning,
288
288
  supportsReasoningEffort: p.supportsReasoningEffort,
289
289
  echoReasoning: mc?.echoReasoning,
290
+ modalities: mc?.modalities,
290
291
  buildReasoningParams: bindReasoning(shapeId, model),
291
292
  });
292
293
  }
@@ -11,7 +11,7 @@
11
11
  */
12
12
  import type { EventBus } from "../core/event-bus.js";
13
13
  import type { LlmClient } from "./llm-client.js";
14
- import type { ToolDefinition } from "./types.js";
14
+ import { type ToolDefinition } from "./types.js";
15
15
  export interface SubagentOptions {
16
16
  /** LLM client to use. */
17
17
  llmClient: LlmClient;
@@ -1,3 +1,4 @@
1
+ import { contentText } from "./types.js";
1
2
  import { ConversationState } from "./conversation-state.js";
2
3
  import { normalizeToolArgs } from "./normalize-args.js";
3
4
  import { wrapTrailingWithDynamicContext } from "../utils/message-utils.js";
@@ -63,6 +64,7 @@ export async function runSubagent(opts) {
63
64
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
64
65
  bus.emit("agent:tool-started", {
65
66
  title: tc.name,
67
+ name: tc.name,
66
68
  toolCallId: tc.id,
67
69
  kind: display.kind,
68
70
  locations: display.locations,
@@ -84,7 +86,9 @@ export async function runSubagent(opts) {
84
86
  resultDisplay,
85
87
  });
86
88
  }
87
- const content = result.isError ? `Error: ${result.content}` : result.content;
89
+ const content = result.isError
90
+ ? `Error: ${contentText(result.content)}`
91
+ : result.content;
88
92
  conversation.addToolResult(tc.id, content, !!result.isError);
89
93
  }
90
94
  }
@@ -10,7 +10,7 @@
10
10
  * doesn't need to know which mode is active.
11
11
  */
12
12
  import type { ChatCompletionTool } from "./llm-client.js";
13
- import type { ToolDefinition } from "./types.js";
13
+ import { type ToolDefinition } from "./types.js";
14
14
  import type { ConversationState } from "./conversation-state.js";
15
15
  export interface PendingToolCall {
16
16
  id: string;
@@ -20,7 +20,7 @@ export interface PendingToolCall {
20
20
  export interface ToolResult {
21
21
  callId: string;
22
22
  toolName: string;
23
- content: string;
23
+ content: string | import("./types.js").ImageContent[];
24
24
  isError: boolean;
25
25
  }
26
26
  /** Streaming filter — strips tool calls from display output. */
@@ -1,3 +1,4 @@
1
+ import { contentText } from "./types.js";
1
2
  // ── API mode (current behavior) ──────────────────────────────────
2
3
  export class ApiToolProtocol {
3
4
  mode = "api";
@@ -33,7 +34,7 @@ export class ApiToolProtocol {
33
34
  }
34
35
  recordResults(conv, results) {
35
36
  for (const r of results) {
36
- const content = r.isError ? `Error: ${r.content}` : r.content;
37
+ const content = r.isError ? `Error: ${contentText(r.content)}` : r.content;
37
38
  conv.addToolResult(r.callId, content, r.isError);
38
39
  }
39
40
  }
@@ -105,7 +106,7 @@ export class InlineToolProtocol {
105
106
  return;
106
107
  const parts = results.map((r) => {
107
108
  const status = r.isError ? "error" : "ok";
108
- return `[${r.toolName} ${r.callId} ${status}]\n${r.content}`;
109
+ return `[${r.toolName} ${r.callId} ${status}]\n${contentText(r.content)}`;
109
110
  });
110
111
  conv.addToolResultInline(parts.join("\n\n"));
111
112
  }
@@ -362,7 +363,7 @@ export class DeferredToolProtocol {
362
363
  }
363
364
  recordResults(conv, results) {
364
365
  for (const r of results) {
365
- const content = r.isError ? `Error: ${r.content}` : r.content;
366
+ const content = r.isError ? `Error: ${contentText(r.content)}` : r.content;
366
367
  conv.addToolResult(r.callId, content, r.isError);
367
368
  }
368
369
  }
@@ -455,7 +456,7 @@ export class DeferredLookupProtocol {
455
456
  }
456
457
  recordResults(conv, results) {
457
458
  for (const r of results) {
458
- const content = r.isError ? `Error: ${r.content}` : r.content;
459
+ const content = r.isError ? `Error: ${contentText(r.content)}` : r.content;
459
460
  conv.addToolResult(r.callId, content, r.isError);
460
461
  }
461
462
  }
@@ -1,2 +1,2 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  export declare function createGlobTool(getCwd: () => string): ToolDefinition;
@@ -2,6 +2,7 @@ import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
3
  import { executeArgv } from "../../utils/executor.js";
4
4
  import { resolveRgPath } from "../../utils/ripgrep-path.js";
5
+ import { contentText } from "../types.js";
5
6
  import { expandHome } from "./expand-home.js";
6
7
  export function createGlobTool(getCwd) {
7
8
  return {
@@ -27,9 +28,10 @@ export function createGlobTool(getCwd) {
27
28
  },
28
29
  showOutput: false,
29
30
  formatResult: (_args, result) => {
30
- if (result.isError || result.content === "No files matched.")
31
+ const text = contentText(result.content);
32
+ if (result.isError || text === "No files matched.")
31
33
  return { summary: "0 files" };
32
- const lines = result.content.split("\n").filter(l => l && !l.startsWith("["));
34
+ const lines = text.split("\n").filter(l => l && !l.startsWith("["));
33
35
  return { summary: `${lines.length} files` };
34
36
  },
35
37
  getDisplayInfo: (args) => ({
@@ -1,2 +1,2 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  export declare function createGrepTool(getCwd: () => string): ToolDefinition;
@@ -1,5 +1,6 @@
1
1
  import { executeArgv } from "../../utils/executor.js";
2
2
  import { resolveRgPath } from "../../utils/ripgrep-path.js";
3
+ import { contentText } from "../types.js";
3
4
  import { expandHome } from "./expand-home.js";
4
5
  export function createGrepTool(getCwd) {
5
6
  return {
@@ -58,9 +59,10 @@ export function createGrepTool(getCwd) {
58
59
  },
59
60
  showOutput: false,
60
61
  formatResult: (args, result) => {
61
- if (result.isError || result.content === "No matches found.")
62
+ const text = contentText(result.content);
63
+ if (result.isError || text === "No matches found.")
62
64
  return { summary: "0 matches" };
63
- const lines = result.content.split("\n").filter(Boolean);
65
+ const lines = text.split("\n").filter(Boolean);
64
66
  // Strip pagination info line from count
65
67
  const resultLines = lines.filter(l => !l.startsWith("[Showing "));
66
68
  const mode = args.output_mode ?? "files_with_matches";
@@ -1,2 +1,2 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  export declare function createLsTool(getCwd: () => string): ToolDefinition;
@@ -1,5 +1,6 @@
1
1
  import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
+ import { contentText } from "../types.js";
3
4
  import { expandHome } from "./expand-home.js";
4
5
  function formatSize(bytes) {
5
6
  if (bytes < 1024)
@@ -33,9 +34,10 @@ export function createLsTool(getCwd) {
33
34
  : [],
34
35
  }),
35
36
  formatResult: (_args, result) => {
36
- if (result.isError || result.content === "(empty directory)")
37
+ const text = contentText(result.content);
38
+ if (result.isError || text === "(empty directory)")
37
39
  return { summary: "0 entries" };
38
- const lines = result.content.split("\n").filter(Boolean);
40
+ const lines = text.split("\n").filter(Boolean);
39
41
  return { summary: `${lines.length} entries` };
40
42
  },
41
43
  async execute(args) {
@@ -1,4 +1,4 @@
1
- import type { ToolDefinition } from "../types.js";
1
+ import { type ToolDefinition } from "../types.js";
2
2
  /** Tracks the last-read state of a file for deduplication. */
3
3
  export interface FileReadState {
4
4
  mtimeMs: number;
@@ -1,6 +1,14 @@
1
1
  import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
+ import { contentText } from "../types.js";
3
4
  import { expandHome } from "./expand-home.js";
5
+ const IMAGE_MIME_TYPES = {
6
+ ".png": "image/png",
7
+ ".jpg": "image/jpeg",
8
+ ".jpeg": "image/jpeg",
9
+ ".gif": "image/gif",
10
+ ".webp": "image/webp",
11
+ };
4
12
  export function createReadFileTool(getCwd, cache) {
5
13
  return {
6
14
  name: "read_file",
@@ -32,11 +40,12 @@ export function createReadFileTool(getCwd, cache) {
32
40
  locations: [{ path: args.path }],
33
41
  }),
34
42
  formatResult: (_args, result) => {
43
+ const text = contentText(result.content);
35
44
  if (result.isError)
36
45
  return {};
37
- if (result.content.startsWith("File unchanged"))
46
+ if (text.startsWith("File unchanged"))
38
47
  return { summary: "cached" };
39
- const lines = result.content.split("\n").filter(l => !l.startsWith("["));
48
+ const lines = text.split("\n").filter(l => !l.startsWith("["));
40
49
  return { summary: `${lines.length} lines` };
41
50
  },
42
51
  async execute(args) {
@@ -72,6 +81,25 @@ export function createReadFileTool(getCwd, cache) {
72
81
  isError: true,
73
82
  };
74
83
  }
84
+ const ext = path.extname(absPath).toLowerCase();
85
+ const mimeType = IMAGE_MIME_TYPES[ext];
86
+ if (mimeType) {
87
+ const MAX_IMAGE_BYTES = 5 * 1024 * 1024; // 5MB — base64 adds ~33%
88
+ if (stat.size > MAX_IMAGE_BYTES) {
89
+ return {
90
+ content: `Image is ${(stat.size / (1024 * 1024)).toFixed(1)}MB — too large. Images are capped at 5MB.`,
91
+ exitCode: 1,
92
+ isError: true,
93
+ };
94
+ }
95
+ const buf = await fs.readFile(absPath);
96
+ const data = buf.toString("base64");
97
+ return {
98
+ content: [{ type: "image", data, mimeType }],
99
+ exitCode: 0,
100
+ isError: false,
101
+ };
102
+ }
75
103
  const content = await fs.readFile(absPath, "utf-8");
76
104
  const lines = content.split("\n");
77
105
  const start = reqOffset - 1; // 1-indexed → 0-indexed
@@ -15,8 +15,18 @@ export interface AgentBackend {
15
15
  start?(): Promise<void>;
16
16
  kill(): void;
17
17
  }
18
+ /** Image content block for multimodal tool results. */
19
+ export interface ImageContent {
20
+ type: "image";
21
+ /** Base64-encoded image data (no data: URL prefix). */
22
+ data: string;
23
+ /** MIME type (e.g. "image/png", "image/jpeg"). */
24
+ mimeType: string;
25
+ }
26
+ /** Extract the text portion of a tool result's content. Returns "" for image-only results. */
27
+ export declare function contentText(content: string | ImageContent[]): string;
18
28
  export interface ToolResult {
19
- content: string;
29
+ content: string | ImageContent[];
20
30
  exitCode: number | null;
21
31
  isError: boolean;
22
32
  /** When set, takes precedence over `tool.formatResult()`. */
@@ -1 +1,6 @@
1
- export {};
1
+ /** Extract the text portion of a tool result's content. Returns "" for image-only results. */
2
+ export function contentText(content) {
3
+ if (typeof content === "string")
4
+ return content;
5
+ return content.map(c => `[image: ${c.mimeType}]`).join("\n");
6
+ }
package/dist/cli/index.js CHANGED
File without changes
@@ -17,7 +17,7 @@ export type { AgentContext, AgentConfig, AgentSurface, AgentConfigSurface, Agent
17
17
  export type { ShellContext, ShellConfig, ShellSurface, ShellConfigSurface, ExtensionContext, RemoteSession, RemoteSessionOptions, RenderSurface, InputModeConfig, TerminalSession, BlockTransformOptions, FencedBlockTransformOptions, AppConfig } from "../shell/host-types.js";
18
18
  export { palette, setPalette, resetPalette } from "../utils/palette.js";
19
19
  export type { ColorPalette } from "../utils/palette.js";
20
- export type { AgentBackend, ToolDefinition } from "../agent/types.js";
20
+ export type { AgentBackend, ToolDefinition, ImageContent } from "../agent/types.js";
21
21
  export { runSubagent, type SubagentOptions } from "../agent/subagent.js";
22
22
  export { LlmClient } from "../agent/llm-client.js";
23
23
  export { HistoryFile, InMemoryHistory, NoopHistory, type HistoryAdapter } from "../agent/history-file.js";
@@ -13,6 +13,8 @@ export interface ModelCapabilityConfig {
13
13
  maxTokens?: number;
14
14
  /** Echo reasoning_content back on assistant turns. Required by DeepSeek. */
15
15
  echoReasoning?: boolean;
16
+ /** Content modalities the model supports (e.g. ["text", "image"]). */
17
+ modalities?: ("text" | "image")[];
16
18
  }
17
19
  /** Provider profile — a named LLM configuration. */
18
20
  export interface ProviderConfig {
@@ -163,6 +165,7 @@ export interface ResolvedProvider {
163
165
  contextWindow?: number;
164
166
  maxTokens?: number;
165
167
  echoReasoning?: boolean;
168
+ modalities?: ("text" | "image")[];
166
169
  }>;
167
170
  /** Borrow another registered provider's reasoning request shape by id. */
168
171
  reasoningShape?: string;
@@ -150,8 +150,8 @@ export function resolveProvider(name) {
150
150
  }
151
151
  else {
152
152
  modelIds.push(m.id);
153
- if (m.reasoning !== undefined || m.contextWindow !== undefined || m.maxTokens !== undefined || m.echoReasoning !== undefined) {
154
- caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
153
+ if (m.reasoning !== undefined || m.contextWindow !== undefined || m.maxTokens !== undefined || m.echoReasoning !== undefined || m.modalities !== undefined) {
154
+ caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning, modalities: m.modalities });
155
155
  }
156
156
  }
157
157
  }
@@ -5,6 +5,7 @@
5
5
  */
6
6
  import "./events.js";
7
7
  import type { ExtensionContext } from "./host-types.js";
8
+ import { type Terminal } from "./terminal.js";
8
9
  export interface ShellActivateOptions {
9
10
  cols: number;
10
11
  rows: number;
@@ -16,6 +17,11 @@ export interface ShellActivateOptions {
16
17
  info: string;
17
18
  model?: string;
18
19
  };
20
+ /**
21
+ * Host-side I/O endpoint. Defaults to processTerminal() so the CLI
22
+ * works unchanged; headless callers (web hubs, tests) supply their own.
23
+ */
24
+ terminal?: Terminal;
19
25
  }
20
26
  export interface ShellHandle {
21
27
  /** Terminate the PTY. */