agent-sh 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,16 +1,15 @@
1
1
  # agent-sh
2
2
 
3
- An agent that lives in a shell not a shell that lives in an agent.
3
+ A real shell with an AI agent one keystroke away.
4
4
 
5
5
  [![npm version](https://img.shields.io/npm/v/agent-sh.svg)](https://www.npmjs.com/package/agent-sh)
6
6
  [![license](https://img.shields.io/npm/l/agent-sh.svg)](https://github.com/guanyilun/agent-sh/blob/main/LICENSE)
7
- [![website](https://img.shields.io/badge/website-agent--sh.dev-blue)](https://agent-sh.dev)
8
7
 
9
8
  ![demo](assets/demo.gif)
10
9
 
11
- Most AI terminal tools get this backwards: the LLM drives the experience and the shell is bolted on as an afterthought. No real PTY, no job control, no vim, fragile `cd` tracking. The agent is the main character and your terminal is a prop.
10
+ I live in my terminal. A lot of the time I'm not coding I'm deploying something, poking at a failing `rsync`, figuring out why `docker build` won't start, fixing a one-liner. And very often I need an AI agent to help. Spinning up a full coding agent for this stuff is overkill, and I got tired of copy-pasting errors into a chat window every time.
12
11
 
13
- agent-sh flips this. It's your shell first full PTY, your rc config, your aliases, everything just works. But type `>` at the start of a line, and you're talking to an agent that has full context of what you've been doing.
12
+ So I built agent-sh. Under the hood it's a normal shell on top of node-pty your rc config, your aliases, vim and tmux all just work. But at the start of any line, type `>` and you're talking to a small agent that already sees your cwd, your last command, and its output. Nothing to set up, no project to explain.
14
13
 
15
14
  ```
16
15
  ~ $ ls -la # real shell command
@@ -20,9 +19,17 @@ agent-sh flips this. It's your shell first — full PTY, your rc config, your al
20
19
  ~ $ > draft a commit message # agent reads your diff and shell history
21
20
  ```
22
21
 
22
+ I still use Claude Code and pi for serious coding work — this doesn't replace them. But for the quick stuff in the terminal, I reach for agent-sh almost every day now. The built-in agent is lightweight and good enough for most of what I throw at it, and when it isn't, bridge extensions let you plug [Claude Code](examples/extensions/claude-code-bridge/) or [pi](examples/extensions/pi-bridge/) in as the backend.
23
+
23
24
  ## Quick Start
24
25
 
25
- Install and launch:
26
+ Install the latest from GitHub (recommended — development moves faster than npm releases):
27
+
28
+ ```bash
29
+ npm install -g github:guanyilun/agent-sh
30
+ ```
31
+
32
+ Or the last published npm release:
26
33
 
27
34
  ```bash
28
35
  npm install -g agent-sh
@@ -62,7 +69,7 @@ Tip — add a shell alias:
62
69
  alias ash="agent-sh"
63
70
  ```
64
71
 
65
- Requires Node.js 18+.
72
+ Requires Node.js 18+. Currently supports **bash** and **zsh**; other shells (fish, nushell, etc.) are not yet wired up.
66
73
 
67
74
  ## Key Features
68
75
 
@@ -141,10 +141,33 @@ export class AgentLoop {
141
141
  // here in the ctor so late-registered modes aren't dropped.
142
142
  onCtor("config:add-modes", ({ modes: extra }) => {
143
143
  const providers = new Set(extra.map((m) => m.provider).filter(Boolean));
144
+ const prev = this.modes[this.currentModeIndex];
145
+ // Keep the active mode even if the re-registration drops it (persisted
146
+ // model missing from a refreshed catalog) — otherwise currentModeIndex
147
+ // slips to modes[0] and the next stream() call uses a different model
148
+ // mid-turn.
149
+ const activePreserved = prev &&
150
+ prev.provider &&
151
+ providers.has(prev.provider) &&
152
+ !extra.some((m) => m.model === prev.model && m.provider === prev.provider);
144
153
  this.modes = [
145
- ...this.modes.filter((m) => !m.provider || !providers.has(m.provider)),
154
+ ...this.modes.filter((m) => {
155
+ if (activePreserved && m === prev)
156
+ return true;
157
+ return !m.provider || !providers.has(m.provider);
158
+ }),
146
159
  ...extra,
147
160
  ];
161
+ if (prev) {
162
+ const newIdx = this.modes.findIndex((m) => m.model === prev.model && m.provider === prev.provider);
163
+ if (newIdx !== -1)
164
+ this.currentModeIndex = newIdx;
165
+ }
166
+ if (activePreserved && prev) {
167
+ this.bus.emit("ui:info", {
168
+ message: `${prev.provider}:${prev.model} is not in the refreshed catalog — keeping it active until you /model to another.`,
169
+ });
170
+ }
148
171
  this.bus.emit("config:changed", {});
149
172
  });
150
173
  // Fires before wire() too — agent-backend emits this from
@@ -516,8 +539,9 @@ export class AgentLoop {
516
539
  const target = baseURL ?? provider ?? "provider";
517
540
  return `Could not connect to ${target} (${raw}). Check that the API endpoint is reachable.`;
518
541
  }
519
- // Auth errors
520
- if (status === 401 || raw.toLowerCase().includes("auth")) {
542
+ // Explicit signals only — bare "auth" hit "author" in echoed API params.
543
+ if (status === 401 || status === 403 ||
544
+ /\b(unauthorized|authentication|api[-_ ]?key|invalid[-_ ]?token)\b/i.test(raw)) {
521
545
  return `Authentication failed for ${provider ?? "provider"} (model: ${model}). Check your API key.`;
522
546
  }
523
547
  // Model not found
@@ -626,6 +650,9 @@ export class AgentLoop {
626
650
  */
627
651
  registerHandlers() {
628
652
  const h = this.handlers;
653
+ // Advisable so extensions can inject fallback parsers without
654
+ // subclassing the protocol.
655
+ h.define("tool-protocol:extract-calls", (args) => this.toolProtocol.extractToolCalls(args.text, args.streamedCalls));
629
656
  // System prompt: static identity + behavioral instructions.
630
657
  // Extensions can use registerInstruction() for a managed section,
631
658
  // or advise this handler directly for full control.
@@ -894,7 +921,16 @@ export class AgentLoop {
894
921
  const toolCtx = this.compositor
895
922
  ? { ui: createToolUI(this.bus, this.compositor.surface("agent")) }
896
923
  : undefined;
897
- const result = await tool.execute(args, onChunk, toolCtx);
924
+ // Surface thrown errors as tool results so the agent can self-correct
925
+ // instead of the throw killing the whole turn.
926
+ let result;
927
+ try {
928
+ result = await tool.execute(args, onChunk, toolCtx);
929
+ }
930
+ catch (err) {
931
+ const message = err instanceof Error ? err.message : String(err);
932
+ result = { content: message, exitCode: 1, isError: true };
933
+ }
898
934
  // Invalidate read cache when a file is modified
899
935
  if (tool.modifiesFiles && typeof args.path === "string" && !result.isError) {
900
936
  const absPath = path.resolve(process.cwd(), args.path);
@@ -1013,13 +1049,14 @@ export class AgentLoop {
1013
1049
  // tool_call → tool_result chain some providers require.
1014
1050
  // Stream LLM response with retry
1015
1051
  const result = await this.streamWithRetry(systemPrompt, dynamicContext, signal);
1016
- const { text, toolCalls: streamedToolCalls } = result;
1017
- // Extract tool calls via protocol (API mode uses streamed calls,
1018
- // inline mode parses XML from text)
1019
- const toolCalls = this.toolProtocol.extractToolCalls(text, streamedToolCalls);
1052
+ const { text, toolCalls: streamedToolCalls, extras } = result;
1053
+ const toolCalls = this.handlers.call("tool-protocol:extract-calls", {
1054
+ text,
1055
+ streamedCalls: streamedToolCalls,
1056
+ });
1020
1057
  fullResponseText += text;
1021
1058
  // Record the assistant message via protocol
1022
- this.toolProtocol.recordAssistant(this.conversation, text, toolCalls);
1059
+ this.toolProtocol.recordAssistant(this.conversation, text, toolCalls, extras);
1023
1060
  this.bus.emit("conversation:message-appended", {
1024
1061
  role: "assistant",
1025
1062
  content: text,
@@ -1408,6 +1445,11 @@ export class AgentLoop {
1408
1445
  */
1409
1446
  async streamResponse(systemPrompt, dynamicContext, signal) {
1410
1447
  let text = "";
1448
+ // reasoning_details streams as per-chunk fragments keyed by index;
1449
+ // merge .text per index or the provider rejects the fragmented shape.
1450
+ let reasoningField = null;
1451
+ let reasoning = "";
1452
+ const reasoningDetailsByIndex = new Map();
1411
1453
  const pendingToolCalls = [];
1412
1454
  const rawMessages = [
1413
1455
  { role: "system", content: systemPrompt },
@@ -1429,16 +1471,18 @@ export class AgentLoop {
1429
1471
  }
1430
1472
  // Stream filter strips tool tags from display (inline mode only)
1431
1473
  const streamFilter = this.toolProtocol.createStreamFilter(this.toolRegistry.all().map((t) => t.name));
1432
- const stream = await this.llmClient.stream({
1474
+ const requestParams = {
1433
1475
  messages,
1434
1476
  tools: apiTools,
1435
1477
  model: this.currentModel,
1436
1478
  reasoning_effort: this.shouldSendReasoningEffort() ? this.thinkingLevel : undefined,
1437
- signal,
1438
- });
1479
+ };
1480
+ this.bus.emit("llm:request", requestParams);
1481
+ const stream = await this.llmClient.stream({ ...requestParams, signal });
1439
1482
  for await (const chunk of stream) {
1440
1483
  if (signal.aborted)
1441
1484
  break;
1485
+ this.bus.emit("llm:chunk", { chunk });
1442
1486
  // Token usage (may arrive in a chunk with empty choices)
1443
1487
  if (chunk.usage) {
1444
1488
  const u = chunk.usage;
@@ -1470,11 +1514,29 @@ export class AgentLoop {
1470
1514
  });
1471
1515
  }
1472
1516
  }
1473
- // Reasoning/thinking tokens (non-standard, e.g. DeepSeek)
1474
- if (delta?.reasoning_content) {
1475
- this.bus.emit("agent:thinking-chunk", {
1476
- text: delta.reasoning_content,
1477
- });
1517
+ const d = delta;
1518
+ for (const name of ["reasoning", "reasoning_content"]) {
1519
+ if (typeof d?.[name] === "string" && d[name].length > 0) {
1520
+ reasoning += d[name];
1521
+ reasoningField ??= name;
1522
+ this.bus.emit("agent:thinking-chunk", { text: d[name] });
1523
+ }
1524
+ }
1525
+ if (Array.isArray(d?.reasoning_details)) {
1526
+ for (const x of d.reasoning_details) {
1527
+ const idx = typeof x?.index === "number" ? x.index : reasoningDetailsByIndex.size;
1528
+ const prev = reasoningDetailsByIndex.get(idx);
1529
+ if (!prev) {
1530
+ reasoningDetailsByIndex.set(idx, { ...x });
1531
+ }
1532
+ else {
1533
+ if (typeof x.text === "string")
1534
+ prev.text = (prev.text ?? "") + x.text;
1535
+ for (const [k, v] of Object.entries(x))
1536
+ if (k !== "text" && prev[k] === undefined)
1537
+ prev[k] = v;
1538
+ }
1539
+ }
1478
1540
  }
1479
1541
  // Tool calls (streamed incrementally)
1480
1542
  if (delta?.tool_calls) {
@@ -1522,9 +1584,17 @@ export class AgentLoop {
1522
1584
  tc.argumentsJson = "{}";
1523
1585
  }
1524
1586
  }
1587
+ const extras = {};
1588
+ if (reasoning && reasoningField)
1589
+ extras[reasoningField] = reasoning;
1590
+ if (reasoningDetailsByIndex.size > 0) {
1591
+ extras.reasoning_details = [...reasoningDetailsByIndex.entries()]
1592
+ .sort((a, b) => a[0] - b[0]).map(([, v]) => v);
1593
+ }
1525
1594
  return {
1526
1595
  text,
1527
1596
  toolCalls: pendingToolCalls,
1597
+ extras: Object.keys(extras).length > 0 ? extras : undefined,
1528
1598
  };
1529
1599
  }
1530
1600
  }
@@ -49,12 +49,19 @@ export declare class ConversationState {
49
49
  name: string;
50
50
  arguments: string;
51
51
  };
52
- }[]): void;
52
+ }[], extras?: Record<string, unknown>): void;
53
53
  addToolResult(toolCallId: string, content: string, isError?: boolean): void;
54
54
  /** Add tool results as a user message (for inline tool protocol). */
55
55
  addToolResultInline(content: string): void;
56
56
  addSystemNote(text: string): void;
57
57
  getMessages(): ChatCompletionMessageParam[];
58
+ /**
59
+ * DeepSeek 400s if any assistant in a thinking-mode conversation is
60
+ * missing reasoning_content. Cross-alias here (OpenRouter streams as
61
+ * `reasoning`, DeepSeek input expects `reasoning_content`) and stub
62
+ * gaps (text-only turns, pre-fix messages) with empty string.
63
+ */
64
+ private normalizeReasoningConsistency;
58
65
  /**
59
66
  * Replace the messages array wholesale — the write side for custom
60
67
  * compaction strategies. Invalidates API token baseline since the
@@ -78,21 +78,21 @@ export class ConversationState {
78
78
  this.invalidateMessagesCache();
79
79
  this.eagerNucleateUser(text);
80
80
  }
81
- addAssistantMessage(content, toolCalls) {
81
+ addAssistantMessage(content, toolCalls, extras) {
82
+ // extras is opaque provider payload to echo back (reasoning_content,
83
+ // reasoning_details, etc.). Spread verbatim; shape is the stream
84
+ // parser's concern.
85
+ const base = { role: "assistant", content: content ?? (toolCalls?.length ? null : "") };
82
86
  if (toolCalls?.length) {
83
- this.messages.push({
84
- role: "assistant",
85
- content: content ?? null,
86
- tool_calls: toolCalls.map((tc) => ({
87
- id: tc.id,
88
- type: "function",
89
- function: tc.function,
90
- })),
91
- });
92
- }
93
- else {
94
- this.messages.push({ role: "assistant", content: content ?? "" });
87
+ base.tool_calls = toolCalls.map((tc) => ({
88
+ id: tc.id,
89
+ type: "function",
90
+ function: tc.function,
91
+ }));
95
92
  }
93
+ if (extras)
94
+ Object.assign(base, extras);
95
+ this.messages.push(base);
96
96
  this.invalidateMessagesCache();
97
97
  }
98
98
  addToolResult(toolCallId, content, isError = false) {
@@ -111,7 +111,28 @@ export class ConversationState {
111
111
  this.invalidateMessagesCache();
112
112
  }
113
113
  getMessages() {
114
- return this.messages;
114
+ return this.normalizeReasoningConsistency(this.messages);
115
+ }
116
+ /**
117
+ * DeepSeek 400s if any assistant in a thinking-mode conversation is
118
+ * missing reasoning_content. Cross-alias here (OpenRouter streams as
119
+ * `reasoning`, DeepSeek input expects `reasoning_content`) and stub
120
+ * gaps (text-only turns, pre-fix messages) with empty string.
121
+ */
122
+ normalizeReasoningConsistency(messages) {
123
+ const needsNormalize = messages.some((m) => m.role === "assistant" && (m.reasoning !== undefined ||
124
+ m.reasoning_content !== undefined ||
125
+ m.reasoning_details !== undefined));
126
+ if (!needsNormalize)
127
+ return messages;
128
+ return messages.map((m) => {
129
+ if (m.role !== "assistant")
130
+ return m;
131
+ const a = m;
132
+ if (a.reasoning_content !== undefined)
133
+ return m;
134
+ return { ...m, reasoning_content: a.reasoning ?? "" };
135
+ });
115
136
  }
116
137
  /**
117
138
  * Replace the messages array wholesale — the write side for custom
@@ -36,10 +36,14 @@ export interface SubagentOptions {
36
36
  */
37
37
  dynamicContext?: string;
38
38
  /**
39
- * Per-subagent token budget. When total (prompt+completion) tokens
40
- * exceed this, the subagent terminates gracefully on the next
41
- * iteration. The parent's daily budget still counts these tokens
42
- * via onUsage; this is an additional per-call cap.
39
+ * Per-subagent completion-token budget. When the cumulative
40
+ * completion_tokens across iterations exceeds this, the subagent
41
+ * terminates gracefully on the next iteration. We deliberately don't
42
+ * count prompt tokens: the full history is resent each iteration, so
43
+ * prompt-inclusive counting double-charges context and makes a budget
44
+ * of N exhaust after O(log N) tool calls. Completion tokens measure
45
+ * the work the subagent actually produces. The parent's daily budget
46
+ * still sees real prompt+completion via onUsage.
43
47
  */
44
48
  budgetTokens?: number;
45
49
  /**
@@ -28,13 +28,13 @@ export async function runSubagent(opts) {
28
28
  break;
29
29
  }
30
30
  // Stream LLM response
31
- const { text, toolCalls, assistantContent, assistantToolCalls, usage } = await streamOnce(llmClient, systemPrompt, conversation, apiTools, model, signal, dynamicContext);
31
+ const { text, toolCalls, assistantContent, assistantToolCalls, extras, usage } = await streamOnce(llmClient, systemPrompt, conversation, apiTools, model, signal, dynamicContext);
32
32
  if (usage) {
33
- tokensConsumed += usage.total_tokens || 0;
33
+ tokensConsumed += usage.completion_tokens || 0;
34
34
  onUsage?.(usage);
35
35
  }
36
36
  fullResponseText += text;
37
- conversation.addAssistantMessage(assistantContent, assistantToolCalls);
37
+ conversation.addAssistantMessage(assistantContent, assistantToolCalls, extras);
38
38
  // No tool calls → done
39
39
  if (toolCalls.length === 0)
40
40
  break;
@@ -86,7 +86,7 @@ export async function runSubagent(opts) {
86
86
  }
87
87
  }
88
88
  if (budgetExhausted) {
89
- const note = `\n\n[Subagent terminated: token budget (${budgetTokens}) exhausted after ${tokensConsumed} tokens. Returning partial progress.]`;
89
+ const note = `\n\n[Subagent terminated: completion-token budget (${budgetTokens}) exhausted after ${tokensConsumed} completion tokens. Returning partial progress.]`;
90
90
  return fullResponseText + note;
91
91
  }
92
92
  return fullResponseText;
@@ -94,6 +94,9 @@ export async function runSubagent(opts) {
94
94
  /** Stream a single LLM response. */
95
95
  async function streamOnce(llmClient, systemPrompt, conversation, apiTools, model, signal, dynamicContext) {
96
96
  let text = "";
97
+ let reasoning = "";
98
+ let reasoningField = null;
99
+ const reasoningDetailsByIndex = new Map();
97
100
  const pendingToolCalls = [];
98
101
  let usage = null;
99
102
  const messages = [
@@ -127,6 +130,29 @@ async function streamOnce(llmClient, systemPrompt, conversation, apiTools, model
127
130
  if (delta?.content) {
128
131
  text += delta.content;
129
132
  }
133
+ const d = delta;
134
+ for (const name of ["reasoning", "reasoning_content"]) {
135
+ if (typeof d?.[name] === "string" && d[name].length > 0) {
136
+ reasoning += d[name];
137
+ reasoningField ??= name;
138
+ }
139
+ }
140
+ if (Array.isArray(d?.reasoning_details)) {
141
+ for (const x of d.reasoning_details) {
142
+ const idx = typeof x?.index === "number" ? x.index : reasoningDetailsByIndex.size;
143
+ const prev = reasoningDetailsByIndex.get(idx);
144
+ if (!prev) {
145
+ reasoningDetailsByIndex.set(idx, { ...x });
146
+ }
147
+ else {
148
+ if (typeof x.text === "string")
149
+ prev.text = (prev.text ?? "") + x.text;
150
+ for (const [k, v] of Object.entries(x))
151
+ if (k !== "text" && prev[k] === undefined)
152
+ prev[k] = v;
153
+ }
154
+ }
155
+ }
130
156
  if (delta?.tool_calls) {
131
157
  for (const tc of delta.tool_calls) {
132
158
  const idx = tc.index;
@@ -157,5 +183,19 @@ async function streamOnce(llmClient, systemPrompt, conversation, apiTools, model
157
183
  const assistantToolCalls = pendingToolCalls.length
158
184
  ? pendingToolCalls.map(tc => ({ id: tc.id, function: { name: tc.name, arguments: tc.argumentsJson } }))
159
185
  : undefined;
160
- return { text, toolCalls: pendingToolCalls, assistantContent: text || null, assistantToolCalls, usage };
186
+ const extras = {};
187
+ if (reasoning && reasoningField)
188
+ extras[reasoningField] = reasoning;
189
+ if (reasoningDetailsByIndex.size > 0) {
190
+ extras.reasoning_details = [...reasoningDetailsByIndex.entries()]
191
+ .sort((a, b) => a[0] - b[0]).map(([, v]) => v);
192
+ }
193
+ return {
194
+ text,
195
+ toolCalls: pendingToolCalls,
196
+ assistantContent: text || null,
197
+ assistantToolCalls,
198
+ extras: Object.keys(extras).length > 0 ? extras : undefined,
199
+ usage,
200
+ };
161
201
  }
@@ -39,7 +39,7 @@ export interface ToolProtocol {
39
39
  /** Rewrite a tool call before execution (e.g., unwrap meta-tool). */
40
40
  rewriteToolCall(tc: PendingToolCall): PendingToolCall;
41
41
  /** Record the assistant turn in conversation state. */
42
- recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[]): void;
42
+ recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[], extras?: Record<string, unknown>): void;
43
43
  /** Record all tool results for a batch as conversation messages. */
44
44
  recordResults(conv: ConversationState, results: ToolResult[]): void;
45
45
  /** Create a stream filter for stripping tool calls from display. null = pass-through. */
@@ -57,7 +57,7 @@ export declare class ApiToolProtocol implements ToolProtocol {
57
57
  getToolPrompt(): string;
58
58
  extractToolCalls(_text: string, streamedCalls: PendingToolCall[]): PendingToolCall[];
59
59
  rewriteToolCall(tc: PendingToolCall): PendingToolCall;
60
- recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[]): void;
60
+ recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[], extras?: Record<string, unknown>): void;
61
61
  recordResults(conv: ConversationState, results: ToolResult[]): void;
62
62
  createStreamFilter(): null;
63
63
  }
@@ -68,7 +68,7 @@ export declare class InlineToolProtocol implements ToolProtocol {
68
68
  getToolPrompt(tools: ToolDefinition[]): string;
69
69
  rewriteToolCall(tc: PendingToolCall): PendingToolCall;
70
70
  extractToolCalls(text: string, _streamedCalls: PendingToolCall[]): PendingToolCall[];
71
- recordAssistant(conv: ConversationState, text: string, _toolCalls: PendingToolCall[]): void;
71
+ recordAssistant(conv: ConversationState, text: string, _toolCalls: PendingToolCall[], extras?: Record<string, unknown>): void;
72
72
  recordResults(conv: ConversationState, results: ToolResult[]): void;
73
73
  createStreamFilter(_toolNames: string[]): StreamFilter;
74
74
  }
@@ -82,7 +82,7 @@ export declare class DeferredToolProtocol implements ToolProtocol {
82
82
  getToolPrompt(): string;
83
83
  extractToolCalls(_text: string, streamedCalls: PendingToolCall[]): PendingToolCall[];
84
84
  rewriteToolCall(tc: PendingToolCall): PendingToolCall;
85
- recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[]): void;
85
+ recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[], extras?: Record<string, unknown>): void;
86
86
  recordResults(conv: ConversationState, results: ToolResult[]): void;
87
87
  createStreamFilter(): null;
88
88
  }
@@ -97,7 +97,7 @@ export declare class DeferredLookupProtocol implements ToolProtocol {
97
97
  getToolPrompt(): string;
98
98
  extractToolCalls(_text: string, streamedCalls: PendingToolCall[]): PendingToolCall[];
99
99
  rewriteToolCall(tc: PendingToolCall): PendingToolCall;
100
- recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[]): void;
100
+ recordAssistant(conv: ConversationState, text: string, toolCalls: PendingToolCall[], extras?: Record<string, unknown>): void;
101
101
  recordResults(conv: ConversationState, results: ToolResult[]): void;
102
102
  createStreamFilter(): null;
103
103
  getProtocolTools(): ToolDefinition[];
@@ -22,14 +22,14 @@ export class ApiToolProtocol {
22
22
  rewriteToolCall(tc) {
23
23
  return tc;
24
24
  }
25
- recordAssistant(conv, text, toolCalls) {
25
+ recordAssistant(conv, text, toolCalls, extras) {
26
26
  const calls = toolCalls.length
27
27
  ? toolCalls.map((tc) => ({
28
28
  id: tc.id,
29
29
  function: { name: tc.name, arguments: tc.argumentsJson },
30
30
  }))
31
31
  : undefined;
32
- conv.addAssistantMessage(text || null, calls);
32
+ conv.addAssistantMessage(text || null, calls, extras);
33
33
  }
34
34
  recordResults(conv, results) {
35
35
  for (const r of results) {
@@ -97,8 +97,8 @@ export class InlineToolProtocol {
97
97
  }
98
98
  return calls;
99
99
  }
100
- recordAssistant(conv, text, _toolCalls) {
101
- conv.addAssistantMessage(text || null);
100
+ recordAssistant(conv, text, _toolCalls, extras) {
101
+ conv.addAssistantMessage(text || null, undefined, extras);
102
102
  }
103
103
  recordResults(conv, results) {
104
104
  if (results.length === 0)
@@ -351,14 +351,14 @@ export class DeferredToolProtocol {
351
351
  return tc; // Let it fail naturally downstream
352
352
  }
353
353
  }
354
- recordAssistant(conv, text, toolCalls) {
354
+ recordAssistant(conv, text, toolCalls, extras) {
355
355
  const calls = toolCalls.length
356
356
  ? toolCalls.map((tc) => ({
357
357
  id: tc.id,
358
358
  function: { name: tc.name, arguments: tc.argumentsJson },
359
359
  }))
360
360
  : undefined;
361
- conv.addAssistantMessage(text || null, calls);
361
+ conv.addAssistantMessage(text || null, calls, extras);
362
362
  }
363
363
  recordResults(conv, results) {
364
364
  for (const r of results) {
@@ -444,14 +444,14 @@ export class DeferredLookupProtocol {
444
444
  rewriteToolCall(tc) {
445
445
  return tc; // no dispatching needed — load_tool is a real registered tool
446
446
  }
447
- recordAssistant(conv, text, toolCalls) {
447
+ recordAssistant(conv, text, toolCalls, extras) {
448
448
  const calls = toolCalls.length
449
449
  ? toolCalls.map((tc) => ({
450
450
  id: tc.id,
451
451
  function: { name: tc.name, arguments: tc.argumentsJson },
452
452
  }))
453
453
  : undefined;
454
- conv.addAssistantMessage(text || null, calls);
454
+ conv.addAssistantMessage(text || null, calls, extras);
455
455
  }
456
456
  recordResults(conv, results) {
457
457
  for (const r of results) {
@@ -65,6 +65,15 @@ export interface ShellEvents {
65
65
  completion_tokens: number;
66
66
  total_tokens: number;
67
67
  };
68
+ "llm:request": {
69
+ messages: unknown[];
70
+ tools?: unknown;
71
+ model?: string;
72
+ reasoning_effort?: string;
73
+ };
74
+ "llm:chunk": {
75
+ chunk: unknown;
76
+ };
68
77
  "agent:processing-start": Record<string, never>;
69
78
  "agent:processing-done": Record<string, never>;
70
79
  "agent:cancelled": Record<string, never>;
@@ -67,24 +67,6 @@ export default function agentBackend(ctx) {
67
67
  compositor: ctx.compositor,
68
68
  instanceId: ctx.instanceId,
69
69
  });
70
- bus.emit("agent:register-backend", {
71
- name: "ash",
72
- kill: () => agentLoop.kill(),
73
- start: async () => {
74
- if (!resolved) {
75
- bus.emit("ui:error", { message: "Agent backend not started — no LLM provider available. See earlier messages." });
76
- return;
77
- }
78
- agentLoop.wire();
79
- bus.emit("agent:info", {
80
- name: "ash",
81
- version: PACKAGE_VERSION,
82
- model: llmClient.model,
83
- provider: modes[initialModeIndex]?.provider,
84
- contextWindow: modes[initialModeIndex]?.contextWindow,
85
- });
86
- },
87
- });
88
70
  bus.on("core:extensions-loaded", () => {
89
71
  const settings = getSettings();
90
72
  // If the user didn't pick a default, fall back to the first registered
@@ -99,22 +81,49 @@ export default function agentBackend(ctx) {
99
81
  const effectiveApiKey = config.apiKey ?? activeProvider?.apiKey;
100
82
  const effectiveBaseURL = config.baseURL ?? activeProvider?.baseURL;
101
83
  const effectiveModel = config.model ?? persistedModelFor(providerName) ?? activeProvider?.defaultModel;
102
- if (!effectiveApiKey) {
103
- bus.emit("ui:error", { message: "No LLM provider configured. Export OPENROUTER_API_KEY or OPENAI_API_KEY (built-in providers auto-activate), pass --api-key, or run `agent-sh init` for a settings.json template." });
104
- return;
105
- }
106
- if (!effectiveModel) {
107
- bus.emit("ui:error", { message: "No model specified. Use --model or configure a provider with defaultModel in ~/.agent-sh/settings.json" });
84
+ // No provider → don't register ash at all, so another backend (e.g.
85
+ // claude-code-bridge) can own activation. index.ts hard-fails only
86
+ // when no backend ended up registered.
87
+ if (!effectiveApiKey || !effectiveModel)
108
88
  return;
109
- }
110
89
  modes = buildModes();
111
90
  if (modes.length === 0)
112
91
  modes = [{ model: effectiveModel }];
113
- initialModeIndex = Math.max(0, modes.findIndex((m) => m.model === effectiveModel && (!activeProvider || m.provider === activeProvider.id)));
92
+ let foundIdx = modes.findIndex((m) => m.model === effectiveModel && (!activeProvider || m.provider === activeProvider.id));
93
+ // Persisted default may not be in the provider's curated list yet (e.g.
94
+ // openrouter's async catalog fetch hasn't returned). Prepend a stub so
95
+ // the initial config:set-modes activeIndex points at the real model —
96
+ // otherwise AgentLoop reconfigures llmClient back to modes[0].
97
+ if (foundIdx === -1 && activeProvider) {
98
+ modes = [
99
+ {
100
+ model: effectiveModel,
101
+ provider: activeProvider.id,
102
+ providerConfig: { apiKey: effectiveApiKey, baseURL: effectiveBaseURL },
103
+ supportsReasoningEffort: activeProvider.supportsReasoningEffort,
104
+ },
105
+ ...modes,
106
+ ];
107
+ foundIdx = 0;
108
+ }
109
+ initialModeIndex = Math.max(0, foundIdx);
114
110
  llmClient.reconfigure({ apiKey: effectiveApiKey, baseURL: effectiveBaseURL, model: effectiveModel });
115
111
  bus.emit("config:set-modes", { modes, activeIndex: initialModeIndex });
116
112
  resolved = true;
117
- // start() emits agent:info after wiring.
113
+ bus.emit("agent:register-backend", {
114
+ name: "ash",
115
+ kill: () => agentLoop.kill(),
116
+ start: async () => {
117
+ agentLoop.wire();
118
+ bus.emit("agent:info", {
119
+ name: "ash",
120
+ version: PACKAGE_VERSION,
121
+ model: llmClient.model,
122
+ provider: modes[initialModeIndex]?.provider,
123
+ contextWindow: modes[initialModeIndex]?.contextWindow,
124
+ });
125
+ },
126
+ });
118
127
  });
119
128
  bus.on("provider:register", (p) => {
120
129
  const rawModels = p.models ?? (p.defaultModel ? [p.defaultModel] : []);
@@ -1,10 +1,5 @@
1
1
  const BASE_URL = "https://openrouter.ai/api/v1";
2
- // First entry is the cold-start default — kept cheap so trial users don't
3
- // get a surprise bill. Persisted /model selection overrides this.
4
- const DEFAULT_MODELS = [
5
- "deepseek/deepseek-v3.2",
6
- "anthropic/claude-sonnet-4.6",
7
- ];
2
+ const DEFAULT_MODELS = ["anthropic/claude-sonnet-4.6"];
8
3
  export default function activate(ctx) {
9
4
  const apiKey = process.env.OPENROUTER_API_KEY;
10
5
  if (!apiKey)
@@ -50,6 +50,7 @@ function createRenderState() {
50
50
  spinnerStartTime: 0,
51
51
  openTool: null,
52
52
  pendingToolCompletes: new Map(),
53
+ orphanContHeaderKind: undefined,
53
54
  currentToolKind: undefined,
54
55
  toolStartTime: 0,
55
56
  toolExitCode: null,
@@ -178,11 +179,8 @@ export default function activate(ctx) {
178
179
  stopCurrentSpinner();
179
180
  if (!s.renderer)
180
181
  startAgentResponse();
181
- s.renderer.writeLine(`${p.dim}Thinking (ctrl+t to collapse)${p.reset}`);
182
- drain();
183
182
  }
184
183
  else {
185
- // Restart spinner with ctrl+t hint now that we know thinking is available
186
184
  startThinkingSpinner();
187
185
  }
188
186
  }
@@ -256,6 +254,7 @@ export default function activate(ctx) {
256
254
  return;
257
255
  fencedTransform.flush();
258
256
  finalizeToolGroup();
257
+ s.orphanContHeaderKind = undefined;
259
258
  batchGroups = new Map();
260
259
  for (const group of e.groups) {
261
260
  batchGroups.set(group.kind, {
@@ -272,6 +271,7 @@ export default function activate(ctx) {
272
271
  stopCurrentSpinner();
273
272
  s.currentToolKind = e.kind;
274
273
  s.toolStartTime = Date.now();
274
+ s.orphanContHeaderKind = undefined;
275
275
  if (e.title === "user_shell") {
276
276
  finalizeToolGroup();
277
277
  closeToolLine();
@@ -315,10 +315,12 @@ export default function activate(ctx) {
315
315
  showToolCall(e.title, "", { ...e, groupContinuation: true });
316
316
  s.toolGroupRendered++;
317
317
  }
318
- // Record identity so late completes (after a premature finalize
319
- // from a cross-kind standalone start) can render as labeled ⎿ lines.
320
318
  if (e.toolCallId) {
321
- s.pendingToolCompletes.set(e.toolCallId, { title: e.title });
319
+ s.pendingToolCompletes.set(e.toolCallId, {
320
+ title: e.title,
321
+ kind,
322
+ displayDetail: e.displayDetail ?? extractDetail(e),
323
+ });
322
324
  }
323
325
  }
324
326
  else {
@@ -342,13 +344,25 @@ export default function activate(ctx) {
342
344
  s.pendingToolCompletes.delete(e.toolCallId);
343
345
  s.toolGroupCompletedCount++;
344
346
  s.currentToolKind = undefined;
347
+ // Finalize as soon as all members return so aggregate lands right
348
+ // after its children, not below out-of-band renders from the next tool.
349
+ const batchGroup = batchGroups.get(s.toolGroupKind);
350
+ if (batchGroup && s.toolGroupCompletedCount >= batchGroup.total) {
351
+ finalizeToolGroup();
352
+ }
345
353
  }
346
354
  else {
347
- // Route by callId — tools that lost the inline slot get a labeled line.
355
+ // Tools that lost the inline slot render as a labeled ⎿. Orphans
356
+ // (group finalized before they returned) reroute via showOrphanedComplete.
348
357
  const pending = e.toolCallId ? s.pendingToolCompletes.get(e.toolCallId) : undefined;
349
358
  if (pending)
350
359
  s.pendingToolCompletes.delete(e.toolCallId);
351
- showToolComplete(e.exitCode, e.resultDisplay, pending?.title);
360
+ if (pending?.orphaned) {
361
+ showOrphanedComplete(e.exitCode, e.resultDisplay, pending.title, pending.kind, pending.displayDetail);
362
+ }
363
+ else {
364
+ showToolComplete(e.exitCode, e.resultDisplay, pending?.displayDetail ?? pending?.title);
365
+ }
352
366
  s.currentToolKind = undefined;
353
367
  s.spinnerStartTime = 0;
354
368
  startThinkingSpinner();
@@ -746,8 +760,14 @@ export default function activate(ctx) {
746
760
  }
747
761
  else {
748
762
  out().write(` ${batchPrefix}${lines[lines.length - 1]}`);
749
- if (extra?.toolCallId)
750
- s.openTool = { callId: extra.toolCallId, title };
763
+ if (extra?.toolCallId) {
764
+ s.openTool = {
765
+ callId: extra.toolCallId,
766
+ title,
767
+ kind: extra.kind,
768
+ displayDetail: extra.displayDetail ?? extractDetail(extra),
769
+ };
770
+ }
751
771
  }
752
772
  }
753
773
  s.hadToolCalls = true;
@@ -775,6 +795,26 @@ export default function activate(ctx) {
775
795
  if (resultDisplay?.body)
776
796
  renderResultBody(resultDisplay.body);
777
797
  }
798
+ /** Late completion from a finalized group — re-emit the kind header
799
+ * in muted "(cont.)" form so the ⎿ has a legitimate parent, then
800
+ * render the completion as a normal labeled ⎿. Subsequent orphans
801
+ * of the same kind reuse the existing (cont.) header. */
802
+ function showOrphanedComplete(exitCode, resultDisplay, title, kind, displayDetail) {
803
+ if (s.orphanContHeaderKind !== kind) {
804
+ stopCurrentSpinner();
805
+ closeToolLine();
806
+ flushCommandOutput();
807
+ if (!s.renderer)
808
+ startAgentResponse();
809
+ showCollapsedThinking();
810
+ const icon = (kind && KIND_ICONS[kind]) ?? "▶";
811
+ const label = kind ?? "tool";
812
+ s.renderer.writeLine(`${p.muted}${icon} ${label} (cont.)${p.reset}`);
813
+ drain();
814
+ s.orphanContHeaderKind = kind;
815
+ }
816
+ showToolComplete(exitCode, resultDisplay, displayDetail || title);
817
+ }
778
818
  function renderResultBody(body) {
779
819
  if (!s.renderer)
780
820
  return;
@@ -796,10 +836,7 @@ export default function activate(ctx) {
796
836
  stopCurrentSpinner();
797
837
  const thinking = hasThinkingMode();
798
838
  s.spinnerLabel = thinking ? "Thinking" : "Working";
799
- const hint = thinking
800
- ? (s.showThinkingText ? "(ctrl+t to collapse)" : "(ctrl+t to expand)")
801
- : "";
802
- s.spinnerOpts = { hint: hint || undefined, startTime: s.spinnerStartTime };
839
+ s.spinnerOpts = { startTime: s.spinnerStartTime };
803
840
  s.spinner = createSpinner({ startTime: s.spinnerStartTime });
804
841
  s.spinnerInterval = setInterval(() => {
805
842
  if (s.spinner) {
@@ -825,13 +862,25 @@ export default function activate(ctx) {
825
862
  if (s.openTool) {
826
863
  out().write("\n");
827
864
  // Stash identity so the completion renders as ⎿ labeled, not orphan ✓.
828
- s.pendingToolCompletes.set(s.openTool.callId, { title: s.openTool.title });
865
+ s.pendingToolCompletes.set(s.openTool.callId, {
866
+ title: s.openTool.title,
867
+ kind: s.openTool.kind,
868
+ displayDetail: s.openTool.displayDetail,
869
+ });
829
870
  s.openTool = null;
830
871
  }
831
872
  }
832
873
  /** Render the group aggregate ⎿ line, or skip if no members have
833
874
  * completed yet (late completes will render individually as ⎿ labeled). */
834
875
  function finalizeToolGroup() {
876
+ // Late completes from this group have lost their inline slot; mark
877
+ // them so showOrphanedComplete re-emits a (cont.) header for their ⎿.
878
+ if (s.toolGroupKind) {
879
+ for (const pending of s.pendingToolCompletes.values()) {
880
+ if (pending.kind === s.toolGroupKind)
881
+ pending.orphaned = true;
882
+ }
883
+ }
835
884
  const skipAggregate = s.toolGroupCount > 1 && s.toolGroupCompletedCount === 0;
836
885
  if (s.toolGroupCount <= 1 || skipAggregate) {
837
886
  s.toolGroupKind = undefined;
@@ -842,6 +891,7 @@ export default function activate(ctx) {
842
891
  s.toolGroupSummaries = [];
843
892
  return;
844
893
  }
894
+ stopCurrentSpinner();
845
895
  closeToolLine();
846
896
  if (!s.renderer)
847
897
  startAgentResponse();
@@ -938,14 +988,10 @@ export default function activate(ctx) {
938
988
  if (s.spinner) {
939
989
  stopCurrentSpinner();
940
990
  if (s.showThinkingText) {
941
- // Expanding: replace spinner with thinking text header
942
991
  if (!s.renderer)
943
992
  startAgentResponse();
944
- s.renderer.writeLine(`${p.dim}Thinking (ctrl+t to collapse)${p.reset}`);
945
- drain();
946
993
  }
947
994
  else {
948
- // Collapsing: restart spinner with updated hint
949
995
  startThinkingSpinner();
950
996
  }
951
997
  return;
package/dist/index.js CHANGED
@@ -270,6 +270,16 @@ async function main() {
270
270
  // ── Activate agent backend ────────────────────────────────────
271
271
  // Extensions had their chance to register via agent:register-backend.
272
272
  // If none did, the built-in AgentLoop gets wired to bus events.
273
+ const { names: backendNames } = core.bus.emitPipe("config:get-backends", { names: [], active: null });
274
+ if (backendNames.length === 0) {
275
+ shell.kill();
276
+ console.error("\nagent-sh: no agent backend available.\n\n" +
277
+ " Export OPENROUTER_API_KEY or OPENAI_API_KEY for zero-config launch, or\n" +
278
+ " pass --api-key on the command line, or\n" +
279
+ " run `agent-sh init` for a settings.json template.\n" +
280
+ " Alternatively, install a bridge extension (claude-code-bridge, pi-bridge).\n");
281
+ process.exit(1);
282
+ }
273
283
  core.activateBackend();
274
284
  // ── Startup banner ───────────────────────────────────────────
275
285
  const settings = getSettings();
@@ -253,6 +253,36 @@ export class InputHandler {
253
253
  seq += data[i];
254
254
  }
255
255
  }
256
+ else if (next === "]" || next === "P" || next === "_" || next === "^") {
257
+ // String sequences terminated by BEL or ST (ESC \):
258
+ // OSC (ESC ]) — OSC 10/11 color-query responses
259
+ // DCS (ESC P) — tmux XTVERSION query response (iTerm2 etc.)
260
+ // APC (ESC _), PM (ESC ^) — rarer, same termination
261
+ // Forward as a unit so the payload doesn't leak into lineBuffer
262
+ // and onto the bash command line after a foreground app exits.
263
+ let j = i + 2;
264
+ let termEnd = -1;
265
+ while (j < data.length) {
266
+ const c = data[j];
267
+ if (c === "\x07") {
268
+ termEnd = j;
269
+ break;
270
+ }
271
+ if (c === "\x1b" && j + 1 < data.length && data[j + 1] === "\\") {
272
+ termEnd = j + 1;
273
+ break;
274
+ }
275
+ j++;
276
+ }
277
+ if (termEnd !== -1) {
278
+ seq = data.slice(i, termEnd + 1);
279
+ i = termEnd;
280
+ }
281
+ else {
282
+ seq += next;
283
+ i++;
284
+ }
285
+ }
256
286
  else {
257
287
  // ESC + single char (alt-key, etc.)
258
288
  seq += next;
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Dumps every LLM request + streamed chunk to $AGENT_SH_WIRE_DIR
3
+ * (default ~/.agent-sh/wire) for offline replay via curl. Paired files
4
+ * per turn: <stamp>.request.json and <stamp>.chunks.jsonl.
5
+ */
6
+ import * as fs from "node:fs";
7
+ import * as path from "node:path";
8
+ import * as os from "node:os";
9
+ import type { ExtensionContext } from "agent-sh/types";
10
+
11
+ export default function activate(ctx: ExtensionContext): void {
12
+ const dir = process.env.AGENT_SH_WIRE_DIR
13
+ ?? path.join(os.homedir(), ".agent-sh", "wire");
14
+ fs.mkdirSync(dir, { recursive: true });
15
+
16
+ // llm:chunk has no back-pointer to its request, so anchor both on
17
+ // the timestamp set when llm:request fires.
18
+ let currentStamp: string | null = null;
19
+
20
+ ctx.bus.on("llm:request", (req) => {
21
+ currentStamp = new Date().toISOString().replace(/[:.]/g, "-");
22
+ fs.writeFileSync(
23
+ path.join(dir, `${currentStamp}.request.json`),
24
+ JSON.stringify(req, null, 2),
25
+ );
26
+ });
27
+
28
+ ctx.bus.on("llm:chunk", ({ chunk }) => {
29
+ if (!currentStamp) return;
30
+ fs.appendFileSync(
31
+ path.join(dir, `${currentStamp}.chunks.jsonl`),
32
+ JSON.stringify(chunk) + "\n",
33
+ );
34
+ });
35
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-sh",
3
- "version": "0.11.0",
3
+ "version": "0.12.0",
4
4
  "description": "A shell-first terminal where AI is one keystroke away",
5
5
  "type": "module",
6
6
  "main": "dist/core.js",
@@ -95,7 +95,7 @@
95
95
  "dev": "tsx src/index.ts",
96
96
  "build": "tsc",
97
97
  "start": "node dist/index.js",
98
- "prepublishOnly": "npm run build"
98
+ "prepare": "npm run build"
99
99
  },
100
100
  "keywords": [
101
101
  "terminal",