agent-sh 0.13.3 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -87,7 +87,7 @@ export class AgentLoop {
87
87
  // doing X." Addresses Q3 in QUESTIONS.md.
88
88
  lastErrorByTool = new Map(); // tool → error summary
89
89
  lastErrorByFile = new Map(); // file path → error summary
90
- static THINKING_LEVELS = ["off", "low", "medium", "high"];
90
+ static THINKING_LEVELS = ["off", "low", "medium", "high", "xhigh"];
91
91
  bus;
92
92
  llmClient;
93
93
  handlers;
@@ -168,8 +168,13 @@ export class AgentLoop {
168
168
  ];
169
169
  if (prev) {
170
170
  const newIdx = this.modes.findIndex((m) => m.model === prev.model && m.provider === prev.provider);
171
- if (newIdx !== -1)
171
+ if (newIdx !== -1) {
172
172
  this.currentModeIndex = newIdx;
173
+ const next = this.modes[newIdx];
174
+ if (next.providerConfig && next.providerConfig !== prev.providerConfig) {
175
+ this.llmClient.reconfigure({ ...next.providerConfig, model: next.model });
176
+ }
177
+ }
173
178
  }
174
179
  if (activePreserved && prev) {
175
180
  this.bus.emit("ui:info", {
@@ -507,7 +512,8 @@ export class AgentLoop {
507
512
  return mode.buildReasoningParams(this.thinkingLevel);
508
513
  if (this.thinkingLevel === "off")
509
514
  return {};
510
- return { reasoning_effort: this.thinkingLevel };
515
+ const effort = this.thinkingLevel === "xhigh" ? "high" : this.thinkingLevel;
516
+ return { reasoning_effort: effort };
511
517
  }
512
518
  get currentMode() {
513
519
  return this.modes[this.currentModeIndex];
@@ -1076,12 +1082,15 @@ export class AgentLoop {
1076
1082
  streamedCalls: streamedToolCalls,
1077
1083
  });
1078
1084
  fullResponseText += text;
1079
- // Record the assistant message via protocol
1080
- this.toolProtocol.recordAssistant(this.conversation, text, toolCalls, extras);
1081
- this.bus.emit("conversation:message-appended", {
1082
- role: "assistant",
1083
- content: text,
1084
- });
1085
+ if (text || toolCalls.length > 0) {
1086
+ this.toolProtocol.recordAssistant(this.conversation, text, toolCalls, extras);
1087
+ this.bus.emit("conversation:message-appended", {
1088
+ role: "assistant",
1089
+ content: text,
1090
+ });
1091
+ }
1092
+ if (signal.aborted)
1093
+ break;
1085
1094
  // No tool calls → agent is done
1086
1095
  if (toolCalls.length === 0) {
1087
1096
  this.conversation.eagerNucleateAgent(fullResponseText);
@@ -1502,83 +1511,90 @@ export class AgentLoop {
1502
1511
  };
1503
1512
  this.bus.emit("llm:request", requestParams);
1504
1513
  const stream = await this.llmClient.stream({ ...requestParams, signal });
1505
- for await (const chunk of stream) {
1506
- if (signal.aborted)
1507
- break;
1508
- this.bus.emit("llm:chunk", { chunk });
1509
- // Token usage (may arrive in a chunk with empty choices)
1510
- if (chunk.usage) {
1511
- const u = chunk.usage;
1512
- const promptTokens = u.prompt_tokens ?? 0;
1513
- this.bus.emit("agent:usage", {
1514
- prompt_tokens: promptTokens,
1515
- completion_tokens: u.completion_tokens ?? 0,
1516
- total_tokens: u.total_tokens ?? 0,
1517
- });
1518
- // Feed accurate token count back to conversation state
1519
- if (promptTokens > 0) {
1520
- this.conversation.updateApiTokenCount(promptTokens);
1521
- }
1522
- }
1523
- const choice = chunk.choices[0];
1524
- if (!choice)
1525
- continue;
1526
- const delta = choice.delta;
1527
- // Text content
1528
- if (delta?.content) {
1529
- text += delta.content;
1530
- // Filter tool tags from display output (inline mode)
1531
- const displayText = streamFilter
1532
- ? streamFilter.feed(delta.content)
1533
- : delta.content;
1534
- if (displayText) {
1535
- this.bus.emitTransform("agent:response-chunk", {
1536
- blocks: [{ type: "text", text: displayText }],
1514
+ try {
1515
+ for await (const chunk of stream) {
1516
+ if (signal.aborted)
1517
+ break;
1518
+ this.bus.emit("llm:chunk", { chunk });
1519
+ // Token usage (may arrive in a chunk with empty choices)
1520
+ if (chunk.usage) {
1521
+ const u = chunk.usage;
1522
+ const promptTokens = u.prompt_tokens ?? 0;
1523
+ this.bus.emit("agent:usage", {
1524
+ prompt_tokens: promptTokens,
1525
+ completion_tokens: u.completion_tokens ?? 0,
1526
+ total_tokens: u.total_tokens ?? 0,
1537
1527
  });
1528
+ // Feed accurate token count back to conversation state
1529
+ if (promptTokens > 0) {
1530
+ this.conversation.updateApiTokenCount(promptTokens);
1531
+ }
1538
1532
  }
1539
- }
1540
- const d = delta;
1541
- for (const name of ["reasoning", "reasoning_content"]) {
1542
- if (typeof d?.[name] === "string" && d[name].length > 0) {
1543
- reasoning += d[name];
1544
- reasoningField ??= name;
1545
- this.bus.emit("agent:thinking-chunk", { text: d[name] });
1546
- }
1547
- }
1548
- if (Array.isArray(d?.reasoning_details)) {
1549
- for (const x of d.reasoning_details) {
1550
- const idx = typeof x?.index === "number" ? x.index : reasoningDetailsByIndex.size;
1551
- const prev = reasoningDetailsByIndex.get(idx);
1552
- if (!prev) {
1553
- reasoningDetailsByIndex.set(idx, { ...x });
1533
+ const choice = chunk.choices[0];
1534
+ if (!choice)
1535
+ continue;
1536
+ const delta = choice.delta;
1537
+ // Text content
1538
+ if (delta?.content) {
1539
+ text += delta.content;
1540
+ // Filter tool tags from display output (inline mode)
1541
+ const displayText = streamFilter
1542
+ ? streamFilter.feed(delta.content)
1543
+ : delta.content;
1544
+ if (displayText) {
1545
+ this.bus.emitTransform("agent:response-chunk", {
1546
+ blocks: [{ type: "text", text: displayText }],
1547
+ });
1554
1548
  }
1555
- else {
1556
- if (typeof x.text === "string")
1557
- prev.text = (prev.text ?? "") + x.text;
1558
- for (const [k, v] of Object.entries(x))
1559
- if (k !== "text" && prev[k] === undefined)
1560
- prev[k] = v;
1549
+ }
1550
+ const d = delta;
1551
+ for (const name of ["reasoning", "reasoning_content"]) {
1552
+ if (typeof d?.[name] === "string" && d[name].length > 0) {
1553
+ reasoning += d[name];
1554
+ reasoningField ??= name;
1555
+ this.bus.emit("agent:thinking-chunk", { text: d[name] });
1561
1556
  }
1562
1557
  }
1563
- }
1564
- // Tool calls (streamed incrementally)
1565
- if (delta?.tool_calls) {
1566
- for (const tc of delta.tool_calls) {
1567
- const idx = tc.index;
1568
- if (!pendingToolCalls[idx]) {
1569
- pendingToolCalls[idx] = {
1570
- id: tc.id,
1571
- name: tc.function.name,
1572
- argumentsJson: "",
1573
- };
1558
+ if (Array.isArray(d?.reasoning_details)) {
1559
+ for (const x of d.reasoning_details) {
1560
+ const idx = typeof x?.index === "number" ? x.index : reasoningDetailsByIndex.size;
1561
+ const prev = reasoningDetailsByIndex.get(idx);
1562
+ if (!prev) {
1563
+ reasoningDetailsByIndex.set(idx, { ...x });
1564
+ }
1565
+ else {
1566
+ if (typeof x.text === "string")
1567
+ prev.text = (prev.text ?? "") + x.text;
1568
+ for (const [k, v] of Object.entries(x))
1569
+ if (k !== "text" && prev[k] === undefined)
1570
+ prev[k] = v;
1571
+ }
1574
1572
  }
1575
- if (tc.function?.arguments) {
1576
- pendingToolCalls[idx].argumentsJson +=
1577
- tc.function.arguments;
1573
+ }
1574
+ // Tool calls (streamed incrementally)
1575
+ if (delta?.tool_calls) {
1576
+ for (const tc of delta.tool_calls) {
1577
+ const idx = tc.index;
1578
+ if (!pendingToolCalls[idx]) {
1579
+ pendingToolCalls[idx] = {
1580
+ id: tc.id,
1581
+ name: tc.function.name,
1582
+ argumentsJson: "",
1583
+ };
1584
+ }
1585
+ if (tc.function?.arguments) {
1586
+ pendingToolCalls[idx].argumentsJson +=
1587
+ tc.function.arguments;
1588
+ }
1578
1589
  }
1579
1590
  }
1580
1591
  }
1581
1592
  }
1593
+ catch (e) {
1594
+ // On abort, fall through with whatever was accumulated so far.
1595
+ if (!signal.aborted)
1596
+ throw e;
1597
+ }
1582
1598
  // Flush any buffered content from the stream filter
1583
1599
  if (streamFilter) {
1584
1600
  const remaining = streamFilter.flush();
@@ -15,7 +15,9 @@ function persistedModelFor(providerName) {
15
15
  return getSettings().providers?.[providerName]?.defaultModel;
16
16
  }
17
17
  function defaultReasoningBuilder(level) {
18
- return level === "off" ? {} : { reasoning_effort: level };
18
+ if (level === "off")
19
+ return {};
20
+ return { reasoning_effort: level === "xhigh" ? "high" : level };
19
21
  }
20
22
  function mergeCaps(settingsCaps, payloadCaps, modelIds) {
21
23
  if (!settingsCaps)
@@ -118,11 +120,12 @@ export default function agentBackend(ctx) {
118
120
  ctx.define("llm:get-client", () => llmClient);
119
121
  ctx.define("llm:invoke", (messages, opts) => {
120
122
  const effort = opts?.reasoningEffort;
123
+ const clampedEffort = effort === "xhigh" ? "high" : effort;
121
124
  return llmClient.complete({
122
125
  messages: messages,
123
126
  max_tokens: opts?.maxTokens,
124
127
  model: opts?.model,
125
- ...(effort && effort !== "off" ? { reasoning_effort: effort } : {}),
128
+ ...(clampedEffort && clampedEffort !== "off" ? { reasoning_effort: clampedEffort } : {}),
126
129
  });
127
130
  });
128
131
  let modes = [];
@@ -2,6 +2,8 @@
2
2
  * Cloud OpenAI (api.openai.com). reasoning_effort vocabulary diverges per
3
3
  * family: o-series has no off; gpt-5-codex floors at "low"; plain gpt-5
4
4
  * floors at "minimal"; gpt-5.1+ accepts "none" as documented full off.
5
+ * Top tier: only gpt-5.1-codex-max and gpt-5.[4-9]+ accept "xhigh"; others
6
+ * clamp to "high".
5
7
  */
6
8
  import type { AgentContext } from "../host-types.js";
7
9
  export default function activate(ctx: AgentContext): void;
@@ -18,9 +18,16 @@ function offEffortFor(model) {
18
18
  return "minimal";
19
19
  return null;
20
20
  }
21
+ function supportsXhigh(model) {
22
+ if (model.startsWith("gpt-5.1-codex-max"))
23
+ return true;
24
+ return /^gpt-5\.[4-9]/.test(model);
25
+ }
21
26
  function buildReasoningParams(level, model) {
22
- if (level !== "off")
23
- return { reasoning_effort: level };
27
+ if (level !== "off") {
28
+ const effort = level === "xhigh" && !(model && supportsXhigh(model)) ? "high" : level;
29
+ return { reasoning_effort: effort };
30
+ }
24
31
  const off = model ? offEffortFor(model) : null;
25
32
  return off ? { reasoning_effort: off } : {};
26
33
  }
@@ -446,10 +446,13 @@ function getModelsPayload(): Record<string, unknown> | undefined {
446
446
  if (!core) return undefined;
447
447
  const info = core.bus.emitPipe("config:get-models", { models: [], active: null });
448
448
  if (!info.models.length) return undefined;
449
+ const idFor = (m: { model: string; provider: string }) =>
450
+ m.provider ? `${m.model}@${m.provider}` : m.model;
451
+ const current = info.active ?? info.models[0]!;
449
452
  return {
450
- currentModelId: info.active?.model ?? info.models[0]?.model,
453
+ currentModelId: idFor(current),
451
454
  availableModels: info.models.map((m) => ({
452
- modelId: m.model,
455
+ modelId: idFor(m),
453
456
  name: m.provider ? `${m.provider}/${m.model}` : m.model,
454
457
  description: m.provider ? `Provider: ${m.provider}` : "",
455
458
  })),
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@guanyilun/ashi",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Ash in an interactive TUI — agent-sh's built-in agent without the shell underneath",
5
5
  "type": "module",
6
6
  "main": "dist/cli.js",
@@ -48,7 +48,7 @@
48
48
  },
49
49
  "dependencies": {
50
50
  "@earendil-works/pi-tui": "^0.74.0",
51
- "agent-sh": "^0.13.2",
51
+ "agent-sh": "^0.13.3",
52
52
  "chalk": "^5.5.0",
53
53
  "cli-highlight": "^2.1.11"
54
54
  },
@@ -1,44 +1,46 @@
1
1
  /**
2
- * Ollama provider extension — local daemon and Ollama Cloud.
2
+ * Ollama provider extension — local daemon or Ollama Cloud.
3
3
  *
4
- * OLLAMA_API_KEY → Ollama Cloud (https://ollama.com)
5
- * OLLAMA_HOST → local host override (default http://localhost:11434)
4
+ * Cloud auth (any of):
5
+ * agent-sh auth login ollama-cloud # preferred
6
+ * OLLAMA_API_KEY=... # env fallback
7
+ *
8
+ * Local host:
9
+ * OLLAMA_HOST (default http://localhost:11434)
6
10
  *
7
11
  * Catalog comes from /api/tags; per-model context length is fetched
8
12
  * from /api/show (model_info["${arch}.context_length"]). Chat goes
9
13
  * through the OpenAI-compatible /v1/chat/completions shim.
10
14
  *
11
- * Setup (cloud):
12
- * export OLLAMA_API_KEY="your-key"
13
- *
14
- * Setup (local):
15
- * ollama serve # default http://localhost:11434
16
- *
17
15
  * Usage:
18
16
  * agent-sh -e ./examples/extensions/ollama.ts
19
17
  *
20
18
  * # Or add to settings.json:
21
19
  * { "extensions": ["./examples/extensions/ollama.ts"] }
22
20
  */
23
- import type { ExtensionContext } from "agent-sh/types";
21
+ import { resolveApiKey } from "agent-sh/auth";
22
+ import type { AgentContext } from "agent-sh/types";
24
23
 
25
24
  const ECHO_REASONING_PATTERNS: RegExp[] = [/deepseek/i];
26
25
 
27
- export default function activate(ctx: ExtensionContext): void {
28
- const apiKey = process.env.OLLAMA_API_KEY;
29
- const host = apiKey
26
+ export default function activate(ctx: AgentContext): void {
27
+ const cloudKey = resolveApiKey("ollama-cloud").key ?? process.env.OLLAMA_API_KEY;
28
+ const host = cloudKey
30
29
  ? "https://ollama.com"
31
30
  : (process.env.OLLAMA_HOST ?? "http://localhost:11434").replace(/\/$/, "");
32
- const id = apiKey ? "ollama-cloud" : "ollama";
31
+ const id = cloudKey ? "ollama-cloud" : "ollama";
33
32
 
34
33
  // OpenAI SDK rejects an empty apiKey; the local daemon ignores the value.
35
- const sdkKey = apiKey || "no-key";
34
+ const sdkKey = cloudKey || "no-key";
36
35
  const baseURL = `${host}/v1`;
37
36
  const headers: Record<string, string> = {};
38
- if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
37
+ if (cloudKey) headers.Authorization = `Bearer ${cloudKey}`;
39
38
 
40
39
  ctx.agent.providers.configure(id, {
41
- reasoningParams: (level) => ({ reasoning_effort: level === "off" ? "none" : level }),
40
+ reasoningParams: (level) => {
41
+ if (level === "off") return { reasoning_effort: "none" };
42
+ return { reasoning_effort: level === "xhigh" ? "high" : level };
43
+ },
42
44
  });
43
45
 
44
46
  ctx.bus.emit("provider:register", { id, apiKey: sdkKey, baseURL, models: [] });
@@ -19,7 +19,8 @@ const DEFAULT_MODELS = [
19
19
 
20
20
  function buildReasoningParams(level: string, _model?: string): Record<string, unknown> {
21
21
  if (level === "off") return { thinking: { type: "disabled" } };
22
- return { thinking: { type: "enabled" }, reasoning_effort: level };
22
+ const effort = level === "xhigh" ? "high" : level;
23
+ return { thinking: { type: "enabled" }, reasoning_effort: effort };
23
24
  }
24
25
 
25
26
  export default function activate(ctx: AgentContext): void {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-sh",
3
- "version": "0.13.3",
3
+ "version": "0.13.4",
4
4
  "description": "A shell-first terminal where AI is one keystroke away",
5
5
  "type": "module",
6
6
  "main": "dist/core/index.js",
@@ -1,78 +0,0 @@
1
- /**
2
- * Ollama Cloud — hosted Ollama models (https://ollama.com).
3
- *
4
- * Auth: agent-sh auth login ollama-cloud
5
- * Usage: agent-sh -e ./examples/extensions/ollama-cloud.ts
6
- */
7
- import { resolveApiKey } from "agent-sh/auth";
8
- import type { AgentContext } from "agent-sh/types";
9
-
10
- const HOST = "https://ollama.com";
11
- const BASE_URL = `${HOST}/v1`;
12
- const ID = "ollama-cloud";
13
-
14
- function buildReasoningParams(level: string, _model?: string): Record<string, unknown> {
15
- return { reasoning_effort: level === "off" ? "none" : level };
16
- }
17
-
18
- async function fetchModels(apiKey: string) {
19
- const headers: Record<string, string> = { Authorization: `Bearer ${apiKey}` };
20
- const tagsRes = await fetch(`${HOST}/api/tags`, { headers });
21
- if (!tagsRes.ok) return [];
22
- const tagsData = await tagsRes.json() as { models?: { name: string }[] };
23
- const names = (tagsData.models ?? []).map((m) => m.name);
24
- if (!names.length) return [];
25
-
26
- const ctxs = await Promise.all(
27
- names.map((name) =>
28
- fetch(`${HOST}/api/show`, {
29
- method: "POST",
30
- headers: { ...headers, "Content-Type": "application/json" },
31
- body: JSON.stringify({ name }),
32
- })
33
- .then((r) => r.ok ? r.json() as Promise<{ model_info?: Record<string, unknown> }> : null)
34
- .then((d) => {
35
- if (!d?.model_info) return undefined;
36
- const info = d.model_info;
37
- const arch = info["general.architecture"] as string | undefined;
38
- if (arch) {
39
- const ctx = info[`${arch}.context_length`];
40
- if (typeof ctx === "number") return ctx;
41
- }
42
- for (const [k, v] of Object.entries(info)) {
43
- if (k.endsWith(".context_length") && typeof v === "number") return v;
44
- }
45
- return undefined;
46
- })
47
- .catch(() => undefined),
48
- ),
49
- );
50
-
51
- return names.map((name, i) => ({
52
- id: name,
53
- contextWindow: ctxs[i],
54
- echoReasoning: /deepseek/i.test(name),
55
- }));
56
- }
57
-
58
- export default function activate(ctx: AgentContext): void {
59
- const { key } = resolveApiKey(ID);
60
- const apiKey = key ?? process.env.OLLAMA_API_KEY;
61
- if (!apiKey) return;
62
-
63
- ctx.agent.providers.configure(ID, { reasoningParams: buildReasoningParams });
64
-
65
- // Register placeholder while catalog loads
66
- ctx.bus.emit("provider:register", { id: ID, apiKey, baseURL: BASE_URL, models: [] });
67
-
68
- fetchModels(apiKey).then((models) => {
69
- if (!models.length) return;
70
- ctx.bus.emit("provider:register", {
71
- id: ID,
72
- apiKey,
73
- baseURL: BASE_URL,
74
- defaultModel: models[0]!.id,
75
- models,
76
- });
77
- }).catch(() => { /* keep placeholder */ });
78
- }