agent-sh 0.14.9 → 0.14.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +47 -20
  2. package/dist/agent/agent-loop.js +20 -15
  3. package/dist/agent/events.d.ts +2 -1
  4. package/dist/agent/index.js +44 -7
  5. package/dist/agent/live-view.d.ts +3 -3
  6. package/dist/agent/live-view.js +15 -7
  7. package/dist/agent/providers/ollama.d.ts +11 -0
  8. package/dist/agent/providers/ollama.js +72 -0
  9. package/dist/agent/providers/opencode.d.ts +10 -0
  10. package/dist/agent/providers/opencode.js +112 -0
  11. package/dist/agent/providers/openrouter.js +9 -0
  12. package/dist/agent/providers/zai-coding-plan.d.ts +5 -0
  13. package/dist/agent/providers/zai-coding-plan.js +26 -0
  14. package/dist/agent/subagent.js +1 -1
  15. package/dist/cli/args.js +2 -2
  16. package/dist/cli/install.js +10 -1
  17. package/dist/shell/events.d.ts +3 -0
  18. package/dist/shell/shell.js +3 -0
  19. package/dist/utils/diff-renderer.d.ts +4 -0
  20. package/dist/utils/diff-renderer.js +15 -20
  21. package/examples/extensions/ads/SKILL.md +170 -0
  22. package/examples/extensions/ads/index.ts +695 -0
  23. package/examples/extensions/ash-scheme/index.ts +339 -605
  24. package/examples/extensions/ash-scheme/package.json +1 -1
  25. package/examples/extensions/ashi/EXTENDING.md +116 -0
  26. package/examples/extensions/ashi/README.md +10 -54
  27. package/examples/extensions/ashi/package.json +6 -2
  28. package/examples/extensions/ashi/src/autocomplete-controller.ts +95 -0
  29. package/examples/extensions/ashi/src/autocomplete.ts +1 -23
  30. package/examples/extensions/ashi/src/capture.ts +9 -3
  31. package/examples/extensions/ashi/src/chat/assistant.ts +87 -0
  32. package/examples/extensions/ashi/src/chat/lines.ts +20 -0
  33. package/examples/extensions/ashi/src/chat/thinking.ts +42 -0
  34. package/examples/extensions/ashi/src/chat/tool-group.ts +84 -0
  35. package/examples/extensions/ashi/src/chat/user-message.ts +20 -0
  36. package/examples/extensions/ashi/src/cli.ts +58 -12
  37. package/examples/extensions/ashi/src/clipboard-image.ts +41 -0
  38. package/examples/extensions/ashi/src/commands.ts +11 -1
  39. package/examples/extensions/ashi/src/display-config.ts +9 -1
  40. package/examples/extensions/ashi/src/frontend.ts +340 -259
  41. package/examples/extensions/ashi/src/hooks.ts +33 -40
  42. package/examples/extensions/ashi/src/renderer.ts +222 -0
  43. package/examples/extensions/ashi/src/renderers/pi-tui/app.ts +122 -0
  44. package/examples/extensions/ashi/src/renderers/pi-tui/index.ts +23 -0
  45. package/examples/extensions/ashi/src/renderers/pi-tui/nodes.ts +133 -0
  46. package/examples/extensions/ashi/src/renderers/pi-tui/schema-mount.ts +193 -0
  47. package/examples/extensions/ashi/src/renderers/pi-tui/theme-adapters.ts +48 -0
  48. package/examples/extensions/ashi/src/renderers/pi-tui/tool-group.ts +21 -0
  49. package/examples/extensions/ashi/src/schema.ts +43 -205
  50. package/examples/extensions/ashi/src/status-footer.ts +15 -23
  51. package/examples/extensions/ashi/src/terminal-mode.ts +9 -0
  52. package/examples/extensions/ashi/src/theme.ts +1 -47
  53. package/examples/extensions/ashi-ink/README.md +59 -0
  54. package/examples/extensions/ashi-ink/package.json +30 -0
  55. package/examples/extensions/ashi-ink/src/index.ts +6 -0
  56. package/examples/extensions/ashi-ink/src/ink-renderer.tsx +865 -0
  57. package/examples/extensions/ashi-ink/src/shims.d.ts +5 -0
  58. package/examples/extensions/ashi-ink/test/render.test.tsx +408 -0
  59. package/examples/extensions/ashi-ink/tsconfig.json +14 -0
  60. package/examples/extensions/ashi-scheme-render.ts +4 -10
  61. package/examples/extensions/ashi-shell-passthrough.ts +95 -0
  62. package/examples/extensions/latex-images.ts +22 -19
  63. package/examples/extensions/terminal-buffer.ts +4 -2
  64. package/package.json +3 -9
  65. package/examples/extensions/ashi/src/components.ts +0 -238
  66. package/examples/extensions/ollama.ts +0 -108
  67. package/examples/extensions/opencode-provider.ts +0 -251
  68. package/examples/extensions/zai-coding-plan.ts +0 -35
package/README.md CHANGED
@@ -1,15 +1,19 @@
1
1
  # agent-sh
2
2
 
3
- A real shell with an AI agent one keystroke away.
4
-
5
3
  [![npm version](https://img.shields.io/npm/v/agent-sh.svg)](https://www.npmjs.com/package/agent-sh)
6
4
  [![license](https://img.shields.io/npm/l/agent-sh.svg)](https://github.com/guanyilun/agent-sh/blob/main/LICENSE)
7
5
 
8
- ![demo](assets/demo.gif)
6
+ A composable agent runtime — pair any frontend with any agent backend, over one shared extension layer.
7
+
8
+ ## Three example apps built on agent-sh
9
9
 
10
- I live in my terminal. A lot of the time I'm not coding — I'm deploying something, poking at a failing `rsync`, figuring out why `docker build` won't start, fixing a one-liner. And very often I need an AI agent to help. Spinning up a full coding agent for this stuff is overkill, and I got tired of copy-pasting errors into a chat window every time.
10
+ agent-sh is small at its core and does its real work through extensions, so the same runtime drives very different apps. Three to start with all sharing the same agent backends, tools, providers, and `~/.agent-sh/settings.json`:
11
11
 
12
- So I built agent-sh. Under the hood it's a normal shell on top of node-pty — your rc config, your aliases, vim and tmux all just work. But at the start of any line, type `>` and you're talking to a small agent that already sees your cwd, your last command, and its output. Nothing to set up, no project to explain.
12
+ ### 1. A shell with the agent one keystroke away bundled with agent-sh
13
+
14
+ A normal shell on top of node-pty — your rc config, your aliases, vim and tmux all just work. But at the start of any line, type `>` and you're talking to a small agent that already sees your cwd, your last command, and its output. Nothing to set up, no project to explain.
15
+
16
+ ![demo](assets/demo.gif)
13
17
 
14
18
  ```
15
19
  ~ $ ls -la # real shell command
@@ -19,10 +23,47 @@ So I built agent-sh. Under the hood it's a normal shell on top of node-pty — y
19
23
  ~ $ > draft a commit message # agent reads your diff and shell history
20
24
  ```
21
25
 
22
- agent-sh is built to be agent-agnostic. The recommended path is the built-in agent `ash` — a lightweight agent designed so extensions can plug into the same tool surface. If you'd rather host an existing coding agent (pi, claude-code, opencode), you can [bring your own](#bring-your-own-agent) — with the trade-off that it manages its own separate tools.
26
+ ```bash
27
+ npm install -g agent-sh
28
+ ```
29
+
30
+ [Quick Start ↓](#quick-start)
31
+
32
+ ### 2. ashi — a standalone coding agent
33
+
34
+ [**`@guanyilun/ashi`**](examples/extensions/ashi/) is the same `ash` agent in a chat-style TUI, with no shell underneath — just the agent. Installed separately, it reuses agent-sh's backend, tools, slash commands, providers, and skills, and adds session history, in-session branching, and LLM-driven compaction.
35
+
36
+ ```bash
37
+ npm install -g @guanyilun/ashi
38
+ ashi
39
+ ```
40
+
41
+ ashi makes the runtime's **decoupled rendering** concrete: the frontend is itself an extension, and even *how* it draws tool calls and results is a swappable render extension. Same agent backend, same conversation — load a different render extension and the whole TUI restyles, no code changes:
42
+
43
+ | pi-style rendering | claude-code-style rendering |
44
+ |---|---|
45
+ | ![ashi rendering tool calls pi-style](assets/ashi-pi-style.png) | ![ashi rendering tool calls claude-code-style](assets/ashi-claude-code-style.png) |
46
+
47
+ ### 3. asHub — a GUI coding agent
48
+
49
+ [**firslov/asHub**](https://github.com/firslov/asHub) is a third-party cross-platform desktop app (Electron) built on the agent-sh runtime: a multi-session sidebar, persistence across restarts, and a live-streaming interface with Markdown, syntax-highlighted code, diffs, and tool-call rendering. macOS / Windows / Linux.
50
+
51
+ It pushes the same decoupling one step further — the frontend isn't a terminal at all, but a full desktop GUI on the same runtime, backends, and tools:
52
+
53
+ ![asHub desktop GUI](assets/ashub.png)
54
+
55
+ ## How it works
56
+
57
+ agent-sh is a **composable agent runtime**. At its center is a pure kernel — a typed event bus, a named-handler registry, and an extension loader — that knows nothing about terminals, LLMs, shells, or rendering. Everything else plugs into it: the agent backend, its tools, provider management, and the frontend that drives it.
58
+
59
+ The frontend and the agent backend are both just components on the bus, so you **mix and match** them freely — wire several frontends to one backend, or keep one frontend and swap the backend underneath — all sharing the **same extension layer** of tools, content transforms, slash commands, and themes. `import { createCore } from "agent-sh"` gives you the headless kernel; load the pieces you want and wire your own I/O.
60
+
61
+ For the kernel design in full — the bus, handlers, the compositor, and the shell ↔ agent boundary — see [Architecture](docs/architecture.md). To embed the runtime in your own frontend, see the [Library Guide](docs/library.md). The rest of this README covers the bundled shell.
23
62
 
24
63
  ## Quick Start
25
64
 
65
+ **This sets up the agent-sh shell** — the frontend bundled in the `agent-sh` package. (For the other frontends, install [ashi](examples/extensions/ashi/) or [asHub](https://github.com/firslov/asHub) instead.)
66
+
26
67
  ### Installation
27
68
 
28
69
  Install from npm:
@@ -139,20 +180,6 @@ All three bridges receive agent-sh's per-query shell context (`<shell_events>`)
139
180
 
140
181
  **Caveat:** pi, claude-code, and opencode each manage their own tool surfaces, so agent-sh extensions that register tools (or skills, instructions, etc.) for the built-in `ash` agent generally won't be visible to a hosted backend. Frontend extensions (themes, content transforms, slash commands, the TUI renderer) keep working — only the agent-side capabilities differ. Use the bridges when you want that agent's toolset; stay on `ash` when you want agent-sh's extension ecosystem.
141
182
 
142
- ## Key Features
143
-
144
- **Real terminal, zero compromise.** Full PTY with your shell config, aliases, and environment. Shell starts instantly — the agent connects asynchronously in the background.
145
-
146
- **One entry point, smart tool selection.** Type `>` and agent-sh figures out how to help. Scratchpad tools (`bash`, `read_file`, `grep`, `glob`) for investigation. Extensions add capabilities like running commands in your live shell. No modes to pick — the agent reasons about which tools to use based on your intent.
147
-
148
- **Context that just works.** Every query includes your cwd, recent commands, and their output. Run a failing test, type `> fix this`, and agent-sh knows exactly what happened. Context management works like shell history — continuous, persistent across restarts, no sessions to manage. See [Context Management](docs/context-management.md).
149
-
150
- **Any LLM, any backend.** agent-sh works with any OpenAI-compatible API out of the box. Define multiple providers in settings and switch models at runtime with `/model <name>`. Or swap in a completely different agent — bundled bridges run [pi](examples/extensions/pi-bridge/), [claude-code](examples/extensions/claude-code-bridge/), or [opencode](examples/extensions/opencode-bridge/) as a drop-in backend (see [Bring your own agent](#bring-your-own-agent)).
151
-
152
- **Extensible by design.** The entire system is built on a typed event bus. Extensions can add custom input modes, content transforms (render LaTeX as images, Mermaid as diagrams), themes, slash commands, or replace the agent backend entirely. The built-in TUI renderer is itself just an extension.
153
-
154
- **Embeddable as a library.** The core is a headless kernel — `import { createCore } from "agent-sh"` to build WebSocket servers, REST APIs, Electron apps, or test harnesses. No terminal required.
155
-
156
183
  ## Documentation
157
184
 
158
185
  Start with **Usage** to get running, then **Architecture** for the mental model.
@@ -135,8 +135,8 @@ export class AgentLoop {
135
135
  }
136
136
  return acc;
137
137
  });
138
- on("agent:submit", ({ query }) => {
139
- this.handleQuery(query).catch(() => { });
138
+ on("agent:submit", ({ query, images }) => {
139
+ this.handleQuery(query, images).catch(() => { });
140
140
  });
141
141
  on("agent:cancel-request", (e) => {
142
142
  this.abortController?.abort(e.silent ? "silent" : undefined);
@@ -260,7 +260,7 @@ export class AgentLoop {
260
260
  budgetTokens: this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
261
261
  }));
262
262
  onPipe("context:snapshot", (payload) => {
263
- payload.messages = this.conversation.getMessages();
263
+ payload.messages = this.conversation.get();
264
264
  payload.contextWindow = this.currentMode.contextWindow ?? DEFAULT_CONTEXT_WINDOW;
265
265
  payload.activeTokens = this.conversation.estimateTokens();
266
266
  return payload;
@@ -657,12 +657,10 @@ export class AgentLoop {
657
657
  // filter, reorder, inject — whatever strategy fits.
658
658
  h.define("conversation:prepare", (messages) => messages);
659
659
  // ── Conversation primitives for compaction strategies ─────────
660
- // Read messages (for inspection / computing new arrays) and replace
661
- // the whole array (write side). Extensions implementing
662
- // `conversation:compact` use these to observe and mutate.
663
- h.define("conversation:get-messages", () => this.conversation.getMessages());
660
+ // Canonical array (link/replace index space), not forLLM().
661
+ h.define("conversation:get-messages", () => this.conversation.get());
664
662
  h.define("conversation:replace-messages", (msgs) => {
665
- this.conversation.replaceMessages(msgs);
663
+ this.conversation.replace(msgs);
666
664
  });
667
665
  h.define("conversation:estimate-tokens", () => this.conversation.estimateTokens());
668
666
  h.define("conversation:estimate-prompt-tokens", () => this.conversation.estimatePromptTokens());
@@ -671,13 +669,13 @@ export class AgentLoop {
671
669
  const strategy = opts.strategy;
672
670
  if (strategy?.kind === "rewind" || strategy?.kind === "replace") {
673
671
  const before = this.conversation.estimatePromptTokens();
674
- const beforeLen = this.conversation.getMessages().length;
672
+ const beforeLen = this.conversation.get().length;
675
673
  const next = strategy.kind === "rewind"
676
- ? this.conversation.getMessages().slice(0, strategy.toIndex)
674
+ ? this.conversation.get().slice(0, strategy.toIndex)
677
675
  : strategy.messages;
678
- this.conversation.replaceMessages(next);
676
+ this.conversation.replace(next);
679
677
  const after = this.conversation.estimatePromptTokens();
680
- const afterLen = this.conversation.getMessages().length;
678
+ const afterLen = this.conversation.get().length;
681
679
  return { before, after, evictedCount: Math.max(0, beforeLen - afterLen) };
682
680
  }
683
681
  return null;
@@ -755,7 +753,7 @@ export class AgentLoop {
755
753
  return result;
756
754
  });
757
755
  }
758
- async handleQuery(query) {
756
+ async handleQuery(query, images) {
759
757
  // Cancel any in-flight loop (concurrent prompt handling)
760
758
  if (this.abortController) {
761
759
  this.abortController.abort();
@@ -778,7 +776,14 @@ export class AgentLoop {
778
776
  const userContent = queryContext
779
777
  ? `<query_context>\n${queryContext}\n</query_context>\n\n${query}`
780
778
  : query;
781
- this.conversation.addUserMessage(userContent);
779
+ // Fail closed: an image sent to a non-vision model errors and leaves an
780
+ // unsendable message poisoning history, so require declared image support.
781
+ let userImages = images?.length ? images : undefined;
782
+ if (userImages && !this.currentMode.modalities?.includes("image")) {
783
+ this.bus.emit("ui:info", { message: `Current model has no declared image support — ${userImages.length} image(s) dropped.` });
784
+ userImages = undefined;
785
+ }
786
+ this.conversation.addUserMessage(userContent, userImages);
782
787
  this.bus.emit("conversation:message-appended", { role: "user", content: query });
783
788
  responseText = await this.executeLoop(signal);
784
789
  }
@@ -1262,7 +1267,7 @@ export class AgentLoop {
1262
1267
  // wrapTrailingWithDynamicContext for the cache-stability rationale.
1263
1268
  const rawMessages = [
1264
1269
  { role: "system", content: systemPrompt },
1265
- ...wrapTrailingWithDynamicContext(this.conversation.getMessages(), dynamicContext, toolPrompt),
1270
+ ...wrapTrailingWithDynamicContext(this.conversation.forLLM(), dynamicContext, toolPrompt),
1266
1271
  ];
1267
1272
  // Let extensions transform the message array (compact, summarize, filter, etc.)
1268
1273
  const messages = this.handlers.call("conversation:prepare", rawMessages);
@@ -1,5 +1,5 @@
1
1
  import type { ProviderRegistration } from "./host-types.js";
2
- import type { ToolDefinition, ToolResultDisplay } from "./types.js";
2
+ import type { ImageContent, ToolDefinition, ToolResultDisplay } from "./types.js";
3
3
  export interface AgentIdentity {
4
4
  name: string;
5
5
  version: string;
@@ -44,6 +44,7 @@ declare module "../core/event-bus.js" {
44
44
  };
45
45
  "agent:submit": {
46
46
  query: string;
47
+ images?: ImageContent[];
47
48
  };
48
49
  "agent:cancel-request": {
49
50
  silent?: boolean;
@@ -14,6 +14,9 @@ import activateOpenrouter from "./providers/openrouter.js";
14
14
  import activateOpenai from "./providers/openai.js";
15
15
  import activateOpenaiCompatible from "./providers/openai-compatible.js";
16
16
  import activateDeepseek from "./providers/deepseek.js";
17
+ import activateOllama from "./providers/ollama.js";
18
+ import activateZaiCodingPlan from "./providers/zai-coding-plan.js";
19
+ import activateOpencode from "./providers/opencode.js";
17
20
  import { findBash } from "../utils/executor.js";
18
21
  import { createBashTool } from "./tools/bash.js";
19
22
  import { createPwshTool } from "./tools/pwsh.js";
@@ -29,6 +32,12 @@ function persistedModelFor(providerName) {
29
32
  return undefined;
30
33
  return getSettings().providers?.[providerName]?.defaultModel;
31
34
  }
35
+ /** The OpenAI SDK silently defaults an empty baseURL to api.openai.com, so a
36
+ * provider with a key but no endpoint would misroute its key there. `openai`
37
+ * is exempt: that default is its endpoint. */
38
+ function usableProvider(p) {
39
+ return !!p?.apiKey && (!!p.baseURL || p.id === "openai");
40
+ }
32
41
  function defaultReasoningBuilder(level) {
33
42
  if (level === "off")
34
43
  return {};
@@ -276,6 +285,8 @@ export default function agentBackend(ctx) {
276
285
  for (const [id, p] of resolvedProviders) {
277
286
  if (!p.apiKey)
278
287
  continue;
288
+ if (!usableProvider(p))
289
+ continue;
279
290
  const shapeId = p.reasoningShape ?? id;
280
291
  for (const model of p.models) {
281
292
  const mc = p.modelCapabilities?.get(model);
@@ -344,13 +355,32 @@ export default function agentBackend(ctx) {
344
355
  loadedExtensionNames = names;
345
356
  resolvedProviders = computeResolvedProviders();
346
357
  const settings = getSettings();
347
- // Built-ins register unconditionally so `auth list` can enumerate them;
348
- // the fallback must skip keyless entries or it lands on openrouter and
349
- // bails at the `!effectiveApiKey` guard below.
350
- const providerName = config.provider
351
- ?? settings.defaultProvider
352
- ?? [...resolvedProviders].find(([, p]) => p.apiKey)?.[0];
353
- const activeProvider = providerName ? resolvedProviders.get(providerName) ?? null : null;
358
+ let providerName = config.provider ?? settings.defaultProvider;
359
+ let activeProvider = providerName ? resolvedProviders.get(providerName) ?? null : null;
360
+ // Inline CLI credentials carry their own endpoint, so they skip the
361
+ // usable-provider fallback that registry-driven selection needs.
362
+ if (!config.apiKey) {
363
+ if (!providerName) {
364
+ const first = [...resolvedProviders].find(([, p]) => usableProvider(p));
365
+ providerName = first?.[0];
366
+ activeProvider = first?.[1] ?? null;
367
+ }
368
+ else if (!usableProvider(activeProvider)) {
369
+ const reason = !activeProvider ? "is not registered"
370
+ : !activeProvider.apiKey ? "has no API key configured"
371
+ : "has no endpoint configured";
372
+ const next = [...resolvedProviders].find(([, p]) => usableProvider(p));
373
+ if (next) {
374
+ bus.emit("ui:error", { message: `Provider "${providerName}" ${reason}; falling back to "${next[0]}".` });
375
+ providerName = next[0];
376
+ activeProvider = next[1];
377
+ }
378
+ else {
379
+ bus.emit("ui:error", { message: `Provider "${providerName}" ${reason}, and no other configured provider has both an API key and an endpoint. Run \`agent-sh auth\` to configure one.` });
380
+ return;
381
+ }
382
+ }
383
+ }
354
384
  // Persisted defaultModel wins over openrouter's hardcoded DEFAULT_MODELS[0].
355
385
  const effectiveApiKey = config.apiKey ?? activeProvider?.apiKey;
356
386
  const effectiveBaseURL = config.baseURL ?? activeProvider?.baseURL;
@@ -465,6 +495,10 @@ export default function agentBackend(ctx) {
465
495
  bus.emit("ui:error", { message: `Provider "${name}" has no API key configured` });
466
496
  return;
467
497
  }
498
+ if (!p.baseURL && p.id !== "openai") {
499
+ bus.emit("ui:error", { message: `Provider "${name}" has no endpoint configured` });
500
+ return;
501
+ }
468
502
  const switchModel = p.defaultModel ?? p.models[0];
469
503
  if (!switchModel) {
470
504
  bus.emit("ui:error", { message: `Provider "${name}" has no models configured` });
@@ -501,4 +535,7 @@ export function activateAgent(ctx) {
501
535
  if (process.env.OPENAI_BASE_URL)
502
536
  activateOpenaiCompatible(agentCtx);
503
537
  activateDeepseek(agentCtx);
538
+ activateOllama(agentCtx);
539
+ activateZaiCodingPlan(agentCtx);
540
+ activateOpencode(agentCtx);
504
541
  }
@@ -19,7 +19,7 @@ export declare class LiveView {
19
19
  constructor(handlers?: HandlerFunctions, instanceId?: string);
20
20
  private getMessagesJson;
21
21
  private invalidateMessagesCache;
22
- addUserMessage(text: string): void;
22
+ addUserMessage(text: string, images?: ImageContent[]): void;
23
23
  addAssistantMessage(content: string | null, toolCalls?: {
24
24
  id: string;
25
25
  function: {
@@ -34,9 +34,9 @@ export declare class LiveView {
34
34
  appendUserMessage(text: string): void;
35
35
  private hasOpenToolCalls;
36
36
  private flushPendingMessages;
37
- getMessages(): ChatCompletionMessageParam[];
38
- get(): AgentShMessage[];
37
+ /** Send-shaped; may be longer than get() (dangling calls stubbed) — never link()/replace() by these indices. */
39
38
  forLLM(): ChatCompletionMessageParam[];
39
+ get(): AgentShMessage[];
40
40
  replace(msgs: AgentShMessage[]): void;
41
41
  link(index: number, entryId: string): void;
42
42
  /** DeepSeek 400s on tool messages without a matching tool_call;
@@ -1,4 +1,3 @@
1
- import { stripMeta } from "./llm-client.js";
2
1
  export class LiveView {
3
2
  messages = [];
4
3
  messagesDirty = true;
@@ -26,8 +25,19 @@ export class LiveView {
26
25
  this.messagesDirty = true;
27
26
  this.cachedMessagesJson = null;
28
27
  }
29
- addUserMessage(text) {
30
- this.messages.push({ role: "user", content: text });
28
+ addUserMessage(text, images) {
29
+ if (images?.length) {
30
+ const parts = [];
31
+ if (text)
32
+ parts.push({ type: "text", text });
33
+ for (const img of images) {
34
+ parts.push({ type: "image_url", image_url: { url: `data:${img.mimeType};base64,${img.data}` } });
35
+ }
36
+ this.messages.push({ role: "user", content: parts });
37
+ }
38
+ else {
39
+ this.messages.push({ role: "user", content: text });
40
+ }
31
41
  this.invalidateMessagesCache();
32
42
  }
33
43
  addAssistantMessage(content, toolCalls, extras) {
@@ -131,15 +141,13 @@ export class LiveView {
131
141
  }
132
142
  this.invalidateMessagesCache();
133
143
  }
134
- getMessages() {
144
+ /** Send-shaped; may be longer than get() (dangling calls stubbed) — never link()/replace() by these indices. */
145
+ forLLM() {
135
146
  return this.normalizeReasoningConsistency(this.stubDanglingToolCalls(this.dropOrphanToolMessages(this.messages)));
136
147
  }
137
148
  get() {
138
149
  return this.messages;
139
150
  }
140
- forLLM() {
141
- return this.getMessages().map(stripMeta);
142
- }
143
151
  replace(msgs) {
144
152
  this.replaceMessages(msgs);
145
153
  }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Ollama provider — local daemon or Ollama Cloud.
3
+ *
4
+ * Cloud auth: agent-sh auth login ollama-cloud
5
+ * Local host: OLLAMA_HOST (default http://localhost:11434)
6
+ *
7
+ * Catalog comes from /api/tags; per-model context length is fetched
8
+ * from /api/show. Chat goes through the OpenAI-compatible /v1 shim.
9
+ */
10
+ import type { AgentContext } from "../host-types.js";
11
+ export default function activate(ctx: AgentContext): void;
@@ -0,0 +1,72 @@
1
+ import { resolveApiKey } from "../../cli/auth/keys.js";
2
+ const ECHO_REASONING_PATTERNS = [/deepseek/i];
3
+ export default function activate(ctx) {
4
+ const cloudKey = resolveApiKey("ollama-cloud").key ?? process.env.OLLAMA_API_KEY;
5
+ const host = cloudKey
6
+ ? "https://ollama.com"
7
+ : (process.env.OLLAMA_HOST ?? "http://localhost:11434").replace(/\/$/, "");
8
+ const id = cloudKey ? "ollama-cloud" : "ollama";
9
+ const sdkKey = cloudKey || "no-key";
10
+ const noAuth = !cloudKey;
11
+ const baseURL = `${host}/v1`;
12
+ const headers = {};
13
+ if (cloudKey)
14
+ headers.Authorization = `Bearer ${cloudKey}`;
15
+ ctx.agent.providers.configure(id, {
16
+ reasoningParams: (level) => {
17
+ if (level === "off")
18
+ return { reasoning_effort: "none" };
19
+ return { reasoning_effort: level === "xhigh" ? "high" : level };
20
+ },
21
+ });
22
+ ctx.agent.providers.register({ id, apiKey: sdkKey, baseURL, models: [], noAuth });
23
+ fetchCatalog(host, headers).then((models) => {
24
+ if (models.length === 0)
25
+ return;
26
+ ctx.agent.providers.register({
27
+ id,
28
+ apiKey: sdkKey,
29
+ baseURL,
30
+ defaultModel: models[0].id,
31
+ models,
32
+ noAuth,
33
+ });
34
+ }).catch(() => { });
35
+ }
36
+ async function fetchCatalog(host, headers) {
37
+ const tagsRes = await fetch(`${host}/api/tags`, { headers });
38
+ if (!tagsRes.ok)
39
+ return [];
40
+ const tagsData = await tagsRes.json();
41
+ const names = (tagsData.models ?? []).map((m) => m.name);
42
+ if (names.length === 0)
43
+ return [];
44
+ const ctxs = await Promise.all(names.map((name) => fetchContextLength(host, headers, name).catch(() => undefined)));
45
+ return names.map((name, i) => ({
46
+ id: name,
47
+ contextWindow: ctxs[i],
48
+ echoReasoning: ECHO_REASONING_PATTERNS.some((re) => re.test(name)),
49
+ }));
50
+ }
51
+ async function fetchContextLength(host, headers, name) {
52
+ const res = await fetch(`${host}/api/show`, {
53
+ method: "POST",
54
+ headers: { ...headers, "Content-Type": "application/json" },
55
+ body: JSON.stringify({ name }),
56
+ });
57
+ if (!res.ok)
58
+ return undefined;
59
+ const data = await res.json();
60
+ const info = data.model_info ?? {};
61
+ const arch = info["general.architecture"];
62
+ if (arch) {
63
+ const ctx = info[`${arch}.context_length`];
64
+ if (typeof ctx === "number")
65
+ return ctx;
66
+ }
67
+ for (const [k, v] of Object.entries(info)) {
68
+ if (k.endsWith(".context_length") && typeof v === "number")
69
+ return v;
70
+ }
71
+ return undefined;
72
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * OpenCode Zen & Go providers — runtime model discovery via /models +
3
+ * models.dev metadata enrichment.
4
+ *
5
+ * Registers two providers:
6
+ * - opencode — Zen tier (https://opencode.ai/zen/v1)
7
+ * - opencode-go — Go tier (https://opencode.ai/zen/go/v1)
8
+ */
9
+ import type { AgentContext } from "../host-types.js";
10
+ export default function activate(ctx: AgentContext): void;
@@ -0,0 +1,112 @@
1
+ import { resolveApiKey } from "../../cli/auth/keys.js";
2
+ const ZEN_BASE_URL = "https://opencode.ai/zen/v1";
3
+ const GO_BASE_URL = "https://opencode.ai/zen/go/v1";
4
+ const MODELS_DEV_ENDPOINT = "https://models.dev/api.json";
5
+ const DEFAULT_CTX = 128_000;
6
+ const DEFAULT_MAX_TOKENS = 16_384;
7
+ const ZEN_FALLBACK = ["claude-sonnet-4-6"];
8
+ const GO_FALLBACK = ["gpt-5.2"];
9
+ // ── Helpers ──────────────────────────────────────────────────────
10
+ async function fetchJson(url) {
11
+ const res = await fetch(url, {
12
+ headers: { Accept: "application/json" },
13
+ signal: AbortSignal.timeout(15_000),
14
+ });
15
+ if (!res.ok)
16
+ throw new Error(`HTTP ${res.status}`);
17
+ return res.json();
18
+ }
19
+ function findEntry(provider, id) {
20
+ const direct = provider?.models?.[id];
21
+ if (direct)
22
+ return direct;
23
+ if (!provider?.models)
24
+ return undefined;
25
+ return Object.values(provider.models).find((m) => m.id === id);
26
+ }
27
+ function resolveModel(id, meta) {
28
+ const raw = meta?.modalities?.input;
29
+ const modalities = Array.isArray(raw)
30
+ ? raw.filter((v) => v === "text" || v === "image")
31
+ : ["text"];
32
+ return {
33
+ id,
34
+ reasoning: meta?.reasoning ?? false,
35
+ contextWindow: (typeof meta?.limit?.context === "number" && meta.limit.context > 0)
36
+ ? meta.limit.context : DEFAULT_CTX,
37
+ maxTokens: (typeof meta?.limit?.output === "number" && meta.limit.output > 0)
38
+ ? meta.limit.output : DEFAULT_MAX_TOKENS,
39
+ modalities,
40
+ };
41
+ }
42
+ function reasoningParams(level) {
43
+ if (level === "off")
44
+ return { reasoning_effort: "none" };
45
+ return { reasoning_effort: level === "xhigh" ? "high" : level };
46
+ }
47
+ // ── Activation ───────────────────────────────────────────────────
48
+ export default function activate(ctx) {
49
+ const apiKey = process.env.OPENCODE_API_KEY ??
50
+ resolveApiKey("opencode").key ?? undefined;
51
+ ctx.agent.providers.configure("opencode", { reasoningParams });
52
+ ctx.agent.providers.register({
53
+ id: "opencode", apiKey, baseURL: ZEN_BASE_URL,
54
+ defaultModel: ZEN_FALLBACK[0], models: ZEN_FALLBACK,
55
+ supportsReasoningEffort: true,
56
+ });
57
+ ctx.agent.providers.configure("opencode-go", { reasoningParams });
58
+ ctx.agent.providers.register({
59
+ id: "opencode-go", apiKey, baseURL: GO_BASE_URL,
60
+ defaultModel: GO_FALLBACK[0], models: GO_FALLBACK,
61
+ supportsReasoningEffort: true,
62
+ });
63
+ if (!apiKey)
64
+ return;
65
+ fetchModelsDev()
66
+ .then(async (md) => {
67
+ const zenIds = await fetchModelIds(ZEN_BASE_URL);
68
+ const goIds = await fetchModelIds(GO_BASE_URL);
69
+ const resolve = (ids, prov, fb) => (ids.length > 0 ? ids : fb).map((id) => resolveModel(id, findEntry(prov, id)));
70
+ const zen = resolve(zenIds, md?.opencode, ZEN_FALLBACK);
71
+ const go = resolve(goIds, md?.["opencode-go"], GO_FALLBACK);
72
+ ctx.agent.providers.register({
73
+ id: "opencode", apiKey, baseURL: ZEN_BASE_URL,
74
+ defaultModel: zen[0]?.id ?? ZEN_FALLBACK[0], models: zen,
75
+ supportsReasoningEffort: true,
76
+ });
77
+ ctx.agent.providers.register({
78
+ id: "opencode-go", apiKey, baseURL: GO_BASE_URL,
79
+ defaultModel: go[0]?.id ?? GO_FALLBACK[0], models: go,
80
+ supportsReasoningEffort: true,
81
+ });
82
+ })
83
+ .catch(() => { });
84
+ }
85
+ async function fetchModelsDev() {
86
+ try {
87
+ const payload = await fetchJson(MODELS_DEV_ENDPOINT);
88
+ if (!payload || typeof payload !== "object" || Array.isArray(payload))
89
+ return undefined;
90
+ return payload;
91
+ }
92
+ catch {
93
+ return undefined;
94
+ }
95
+ }
96
+ async function fetchModelIds(baseURL) {
97
+ try {
98
+ const res = await fetch(`${baseURL}/models`, {
99
+ headers: { Accept: "application/json" },
100
+ signal: AbortSignal.timeout(10_000),
101
+ });
102
+ if (!res.ok)
103
+ return [];
104
+ const payload = await res.json();
105
+ if (!Array.isArray(payload.data))
106
+ return [];
107
+ return [...new Set(payload.data.map((e) => e.id).filter(Boolean))];
108
+ }
109
+ catch {
110
+ return [];
111
+ }
112
+ }
@@ -14,6 +14,14 @@ function buildReasoningParams(level, _model) {
14
14
  ? { reasoning: { effort: "none" } }
15
15
  : { reasoning: { effort: level } };
16
16
  }
17
+ /** OpenRouter's input_modalities → the text/image subset; undefined when absent
18
+ * so the fail-closed image guard treats the model as text-only. */
19
+ function toModalities(input) {
20
+ if (!Array.isArray(input))
21
+ return undefined;
22
+ const out = input.filter((v) => v === "text" || v === "image");
23
+ return out.length ? out : undefined;
24
+ }
17
25
  export default function activate(ctx) {
18
26
  const apiKey = resolveApiKey("openrouter").key;
19
27
  ctx.agent.providers.configure("openrouter", { reasoningParams: buildReasoningParams });
@@ -42,6 +50,7 @@ export default function activate(ctx) {
42
50
  reasoning: m.supported_parameters?.includes("reasoning") ?? false,
43
51
  contextWindow: m.context_length,
44
52
  echoReasoning: userOverrides.get(m.id) ?? patterns.some((re) => re.test(m.id)),
53
+ modalities: toModalities(m.architecture?.input_modalities),
45
54
  })),
46
55
  });
47
56
  }).catch(() => { });
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Z.AI Coding Plan — Zhipu AI's subscription GLM models for coding.
3
+ */
4
+ import type { AgentContext } from "../host-types.js";
5
+ export default function activate(ctx: AgentContext): void;
@@ -0,0 +1,26 @@
1
+ import { resolveApiKey } from "../../cli/auth/keys.js";
2
+ const BASE_URL = "https://api.z.ai/api/coding/paas/v4";
3
+ const ID = "zai-coding-plan";
4
+ const DEFAULT_MODELS = [
5
+ { id: "glm-5.1", reasoning: true, contextWindow: 200_000 },
6
+ { id: "glm-5-turbo", reasoning: true, contextWindow: 200_000 },
7
+ { id: "glm-4.7", reasoning: true, contextWindow: 204_800 },
8
+ { id: "glm-4.5-air", reasoning: true, contextWindow: 131_072 },
9
+ ];
10
+ function buildReasoningParams(level, _model) {
11
+ if (level === "off")
12
+ return { thinking: { type: "disabled" } };
13
+ const effort = level === "xhigh" ? "high" : level;
14
+ return { thinking: { type: "enabled" }, reasoning_effort: effort };
15
+ }
16
+ export default function activate(ctx) {
17
+ const { key } = resolveApiKey(ID);
18
+ ctx.agent.providers.configure(ID, { reasoningParams: buildReasoningParams });
19
+ ctx.agent.providers.register({
20
+ id: ID,
21
+ apiKey: key ?? process.env.ZAI_API_KEY ?? process.env.ZHIPU_API_KEY,
22
+ baseURL: BASE_URL,
23
+ defaultModel: DEFAULT_MODELS[0].id,
24
+ models: DEFAULT_MODELS,
25
+ });
26
+ }
@@ -109,7 +109,7 @@ async function streamOnce(llmClient, systemPrompt, conversation, apiTools, model
109
109
  const stream = await llmClient.stream({
110
110
  messages: [
111
111
  { role: "system", content: systemPrompt },
112
- ...wrapTrailingWithDynamicContext(conversation.getMessages(), dynamicContext ?? ""),
112
+ ...wrapTrailingWithDynamicContext(conversation.forLLM(), dynamicContext ?? ""),
113
113
  ],
114
114
  tools: apiTools.length > 0 ? apiTools : undefined,
115
115
  model,
package/dist/cli/args.js CHANGED
@@ -53,8 +53,8 @@ export function parseArgs(argv, env = process.env) {
53
53
  let provider;
54
54
  let backend;
55
55
  let shell = env.SHELL || "/bin/bash";
56
- let apiKey = env.OPENAI_API_KEY;
57
- let baseURL = env.OPENAI_BASE_URL;
56
+ let apiKey;
57
+ let baseURL;
58
58
  for (let i = 0; i < argv.length; i++) {
59
59
  const arg = argv[i];
60
60
  if (arg === "--model" && argv[i + 1]) {