agent-sh 0.12.19 → 0.12.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -57,14 +57,22 @@ export OPENAI_API_KEY=sk-...
57
57
  agent-sh
58
58
  ```
59
59
 
60
+ **DeepSeek:**
61
+
62
+ ```bash
63
+ export DEEPSEEK_API_KEY=sk-...
64
+ agent-sh
65
+ ```
66
+
60
67
  **Local models** (Ollama, llama.cpp server, LM Studio, vLLM — anything OpenAI-compatible):
61
68
 
62
69
  ```bash
63
- export OPENAI_API_KEY=ollama # any value; dummy is fine
64
70
  export OPENAI_BASE_URL=http://localhost:11434/v1 # point at your server
65
71
  agent-sh
66
72
  ```
67
73
 
74
+ Set `OPENAI_API_KEY` too if your server requires auth.
75
+
68
76
  Once running, switch models at any time with `/model <name>` (tab-completes; selection persists across sessions).
69
77
 
70
78
  For richer configuration (multiple providers, extensions), run `agent-sh init` to scaffold `~/.agent-sh/settings.json` with copy-pasteable examples. See the [Usage Guide](docs/usage.md) for the full list of supported providers.
@@ -4,6 +4,7 @@ import * as path from "node:path";
4
4
  import * as os from "node:os";
5
5
  import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
6
6
  import { ToolRegistry } from "./tool-registry.js";
7
+ import { normalizeToolArgs } from "./normalize-args.js";
7
8
  import { ConversationState } from "./conversation-state.js";
8
9
  import { HistoryFile } from "./history-file.js";
9
10
  import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
@@ -1188,6 +1189,10 @@ export class AgentLoop {
1188
1189
  });
1189
1190
  return;
1190
1191
  }
1192
+ // Normalize against the tool's input_schema: some LLMs stringify
1193
+ // nested object/array args despite the schema. See
1194
+ // normalize-args.ts for the diagnostic that uncovered this.
1195
+ args = normalizeToolArgs(args, tool.input_schema);
1191
1196
  // ── Round-scoped cache for cacheable read-only tools ──
1192
1197
  const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
1193
1198
  const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
@@ -1527,6 +1532,7 @@ export class AgentLoop {
1527
1532
  messages,
1528
1533
  tools: apiTools,
1529
1534
  model: this.currentModel,
1535
+ max_tokens: this.currentMode.maxTokens ?? 65536,
1530
1536
  ...this.reasoningParams(),
1531
1537
  };
1532
1538
  this.bus.emit("llm:request", requestParams);
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Schema-aware tool-arg normalization.
3
+ *
4
+ * Some LLMs (notably Claude) occasionally emit nested object/array
5
+ * tool-call arguments as JSON-encoded strings instead of native
6
+ * objects, despite the schema declaring `type: "object"` /
7
+ * `type: "array"`. The discrepancy was diagnosed by the superash field
8
+ * test (2026-05-03 / commit `b9efd47`):
9
+ *
10
+ * describe_demos: 'task' arrived as a string (length 1267)
11
+ * last char code: 93 (']')
12
+ * truncation suspected: true
13
+ *
14
+ * Tool handlers downstream had to add ad-hoc JSON.parse fallbacks. This
15
+ * helper centralizes the fix at the kernel boundary: after parsing the
16
+ * outer `argumentsJson`, walk each top-level field; for any field whose
17
+ * schema declares `object` or `array` but whose value is a string, run
18
+ * a single JSON.parse pass. On parse failure (e.g. truncated content),
19
+ * the string is left as-is — the tool can produce a clean error.
20
+ *
21
+ * Top-level only by design. Recursing into nested object schemas would
22
+ * change semantics for tools that legitimately accept stringified
23
+ * payloads as inner fields, and the observed wild cases all stringify
24
+ * at the top level.
25
+ */
26
+ /** Normalize tool-call args against the tool's input_schema. Pure: does
27
+ * not mutate `args`. Returns a new object with stringified-then-decoded
28
+ * fields swapped in where applicable. */
29
+ export declare function normalizeToolArgs(args: Record<string, unknown>, schema: unknown): Record<string, unknown>;
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Schema-aware tool-arg normalization.
3
+ *
4
+ * Some LLMs (notably Claude) occasionally emit nested object/array
5
+ * tool-call arguments as JSON-encoded strings instead of native
6
+ * objects, despite the schema declaring `type: "object"` /
7
+ * `type: "array"`. The discrepancy was diagnosed by the superash field
8
+ * test (2026-05-03 / commit `b9efd47`):
9
+ *
10
+ * describe_demos: 'task' arrived as a string (length 1267)
11
+ * last char code: 93 (']')
12
+ * truncation suspected: true
13
+ *
14
+ * Tool handlers downstream had to add ad-hoc JSON.parse fallbacks. This
15
+ * helper centralizes the fix at the kernel boundary: after parsing the
16
+ * outer `argumentsJson`, walk each top-level field; for any field whose
17
+ * schema declares `object` or `array` but whose value is a string, run
18
+ * a single JSON.parse pass. On parse failure (e.g. truncated content),
19
+ * the string is left as-is — the tool can produce a clean error.
20
+ *
21
+ * Top-level only by design. Recursing into nested object schemas would
22
+ * change semantics for tools that legitimately accept stringified
23
+ * payloads as inner fields, and the observed wild cases all stringify
24
+ * at the top level.
25
+ */
26
+ /** Normalize tool-call args against the tool's input_schema. Pure: does
27
+ * not mutate `args`. Returns a new object with stringified-then-decoded
28
+ * fields swapped in where applicable. */
29
+ export function normalizeToolArgs(args, schema) {
30
+ if (!schema || typeof schema !== "object")
31
+ return args;
32
+ const properties = schema.properties;
33
+ if (!properties || typeof properties !== "object")
34
+ return args;
35
+ let out = null;
36
+ for (const [field, fieldSchema] of Object.entries(properties)) {
37
+ if (!fieldSchema || typeof fieldSchema !== "object")
38
+ continue;
39
+ const expectedType = fieldSchema.type;
40
+ if (expectedType !== "object" && expectedType !== "array")
41
+ continue;
42
+ const value = args[field];
43
+ if (typeof value !== "string")
44
+ continue;
45
+ try {
46
+ const parsed = JSON.parse(value);
47
+ if (out === null)
48
+ out = { ...args };
49
+ out[field] = parsed;
50
+ }
51
+ catch {
52
+ // Leave as string — downstream tool can produce a useful error.
53
+ }
54
+ }
55
+ return out ?? args;
56
+ }
@@ -1,4 +1,5 @@
1
1
  import { ConversationState } from "./conversation-state.js";
2
+ import { normalizeToolArgs } from "./normalize-args.js";
2
3
  import { wrapTrailingWithDynamicContext } from "../utils/message-utils.js";
3
4
  /**
4
5
  * Run a subagent to completion.
@@ -56,6 +57,7 @@ export async function runSubagent(opts) {
56
57
  conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`, true);
57
58
  continue;
58
59
  }
60
+ args = normalizeToolArgs(args, tool.input_schema);
59
61
  // Emit tool events for TUI (if bus provided)
60
62
  if (bus) {
61
63
  const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
@@ -69,6 +69,7 @@ export interface ShellEvents {
69
69
  messages: unknown[];
70
70
  tools?: unknown;
71
71
  model?: string;
72
+ max_tokens?: number;
72
73
  reasoning_effort?: string;
73
74
  };
74
75
  "llm:chunk": {
@@ -318,6 +319,7 @@ export interface ShellEvents {
318
319
  id: string;
319
320
  reasoning?: boolean;
320
321
  contextWindow?: number;
322
+ maxTokens?: number;
321
323
  echoReasoning?: boolean;
322
324
  })[];
323
325
  /** Provider supports the reasoning_effort parameter. Default: true. */
@@ -325,7 +327,7 @@ export interface ShellEvents {
325
327
  };
326
328
  "provider:configure": {
327
329
  id: string;
328
- reasoningParams?: (level: string) => Record<string, unknown>;
330
+ reasoningParams?: (level: string, model?: string) => Record<string, unknown>;
329
331
  };
330
332
  "agent:register-tool": {
331
333
  tool: import("./agent/types.js").ToolDefinition;
@@ -11,24 +11,50 @@ function persistedModelFor(providerName) {
11
11
  function defaultReasoningBuilder(level) {
12
12
  return level === "off" ? {} : { reasoning_effort: level };
13
13
  }
14
+ function mergeCaps(settingsCaps, payloadCaps, modelIds) {
15
+ if (!settingsCaps)
16
+ return payloadCaps.size > 0 ? payloadCaps : undefined;
17
+ const out = new Map();
18
+ for (const id of modelIds) {
19
+ const s = settingsCaps.get(id);
20
+ const p = payloadCaps.get(id);
21
+ if (!s && !p)
22
+ continue;
23
+ out.set(id, {
24
+ reasoning: s?.reasoning ?? p?.reasoning,
25
+ contextWindow: s?.contextWindow ?? p?.contextWindow,
26
+ maxTokens: s?.maxTokens ?? p?.maxTokens,
27
+ echoReasoning: s?.echoReasoning ?? p?.echoReasoning,
28
+ });
29
+ }
30
+ return out.size > 0 ? out : undefined;
31
+ }
14
32
  export default function agentBackend(ctx) {
15
33
  const { bus } = ctx;
16
34
  const config = ctx.call("config:get-shell-config") ?? {};
17
- // Seed from settings.json; runtime provider:register events add more.
35
+ // Immutable settings snapshot; provider:register payloads merge against it.
18
36
  const providerRegistry = new Map();
37
+ const settingsProviders = new Map();
19
38
  for (const name of getProviderNames()) {
20
39
  const p = resolveProvider(name);
21
- if (p)
40
+ if (p) {
22
41
  providerRegistry.set(name, p);
42
+ settingsProviders.set(name, p);
43
+ }
23
44
  }
24
45
  const providerHooks = new Map();
46
+ // Bakes model id into the hook so AgentMode.buildReasoningParams keeps
47
+ // its (level) signature while the hook can branch on model.
48
+ const bindReasoning = (shapeId, model) => {
49
+ const hook = providerHooks.get(shapeId)?.reasoningParams;
50
+ return hook ? (level) => hook(level, model) : defaultReasoningBuilder;
51
+ };
25
52
  const buildModes = () => {
26
53
  const allModes = [];
27
54
  for (const [id, p] of providerRegistry) {
28
55
  if (!p.apiKey)
29
56
  continue;
30
57
  const shapeId = p.reasoningShape ?? id;
31
- const buildReasoningParams = providerHooks.get(shapeId)?.reasoningParams ?? defaultReasoningBuilder;
32
58
  for (const model of p.models) {
33
59
  const mc = p.modelCapabilities?.get(model);
34
60
  allModes.push({
@@ -36,10 +62,11 @@ export default function agentBackend(ctx) {
36
62
  provider: id,
37
63
  providerConfig: { apiKey: p.apiKey, baseURL: p.baseURL },
38
64
  contextWindow: mc?.contextWindow ?? p.contextWindow,
65
+ maxTokens: mc?.maxTokens ?? (mc?.contextWindow ? Math.min(Math.floor(mc.contextWindow * 0.4), 65536) : undefined),
39
66
  reasoning: mc?.reasoning,
40
67
  supportsReasoningEffort: p.supportsReasoningEffort,
41
68
  echoReasoning: mc?.echoReasoning,
42
- buildReasoningParams,
69
+ buildReasoningParams: bindReasoning(shapeId, model),
43
70
  });
44
71
  }
45
72
  }
@@ -54,6 +81,8 @@ export default function agentBackend(ctx) {
54
81
  return llmClient.complete({
55
82
  messages: messages,
56
83
  max_tokens: opts?.maxTokens,
84
+ model: opts?.model,
85
+ reasoning_effort: opts?.reasoningEffort,
57
86
  });
58
87
  });
59
88
  let modes = [];
@@ -141,38 +170,45 @@ export default function agentBackend(ctx) {
141
170
  });
142
171
  bus.on("provider:register", (p) => {
143
172
  const rawModels = p.models ?? (p.defaultModel ? [p.defaultModel] : []);
144
- const modelIds = [];
145
- const caps = new Map();
173
+ const payloadModelIds = [];
174
+ const payloadCaps = new Map();
146
175
  for (const m of rawModels) {
147
176
  if (typeof m === "string") {
148
- modelIds.push(m);
177
+ payloadModelIds.push(m);
149
178
  }
150
179
  else {
151
- modelIds.push(m.id);
152
- caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, echoReasoning: m.echoReasoning });
180
+ payloadModelIds.push(m.id);
181
+ payloadCaps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
153
182
  }
154
183
  }
155
- providerRegistry.set(p.id, {
184
+ const settings = settingsProviders.get(p.id);
185
+ const modelIds = settings?.modelsExplicit && settings.models.length > 0 ? settings.models : payloadModelIds;
186
+ const mergedCaps = mergeCaps(settings?.modelCapabilities, payloadCaps, modelIds);
187
+ const merged = {
156
188
  id: p.id,
157
- apiKey: p.apiKey,
158
- baseURL: p.baseURL,
159
- defaultModel: p.defaultModel,
189
+ apiKey: settings?.apiKey ?? p.apiKey,
190
+ baseURL: settings?.baseURL ?? p.baseURL,
191
+ defaultModel: settings?.defaultModel ?? p.defaultModel,
160
192
  models: modelIds,
161
- supportsReasoningEffort: p.supportsReasoningEffort,
162
- modelCapabilities: caps.size > 0 ? caps : undefined,
163
- });
164
- const buildReasoningParams = providerHooks.get(p.id)?.reasoningParams ?? defaultReasoningBuilder;
193
+ modelsExplicit: settings?.modelsExplicit ?? false,
194
+ contextWindow: settings?.contextWindow,
195
+ supportsReasoningEffort: settings?.supportsReasoningEffort ?? p.supportsReasoningEffort,
196
+ modelCapabilities: mergedCaps,
197
+ reasoningShape: settings?.reasoningShape,
198
+ };
199
+ providerRegistry.set(p.id, merged);
165
200
  const addModes = modelIds.map((m) => {
166
- const mc = caps.get(m);
201
+ const mc = mergedCaps?.get(m);
167
202
  return {
168
203
  model: m,
169
204
  provider: p.id,
170
- providerConfig: { apiKey: p.apiKey ?? "", baseURL: p.baseURL },
205
+ providerConfig: { apiKey: merged.apiKey ?? "", baseURL: merged.baseURL },
171
206
  contextWindow: mc?.contextWindow,
207
+ maxTokens: mc?.maxTokens,
172
208
  reasoning: mc?.reasoning,
173
- supportsReasoningEffort: p.supportsReasoningEffort,
209
+ supportsReasoningEffort: merged.supportsReasoningEffort,
174
210
  echoReasoning: mc?.echoReasoning,
175
- buildReasoningParams,
211
+ buildReasoningParams: bindReasoning(p.id, m),
176
212
  };
177
213
  });
178
214
  bus.emit("config:add-modes", { modes: addModes });
@@ -212,6 +248,7 @@ export default function agentBackend(ctx) {
212
248
  provider: name,
213
249
  providerConfig: { apiKey: p.apiKey, baseURL: p.baseURL },
214
250
  contextWindow: mc?.contextWindow ?? p.contextWindow,
251
+ maxTokens: mc?.maxTokens ?? (mc?.contextWindow ? Math.min(Math.floor(mc.contextWindow * 0.4), 65536) : undefined),
215
252
  reasoning: mc?.reasoning,
216
253
  supportsReasoningEffort: p.supportsReasoningEffort,
217
254
  echoReasoning: mc?.echoReasoning,
@@ -3,10 +3,15 @@ export const BUILTIN_EXTENSIONS = [
3
3
  { name: "agent-backend", load: () => import("./agent-backend.js").then(m => m.default) },
4
4
  { name: "openrouter",
5
5
  when: () => !!process.env.OPENROUTER_API_KEY,
6
- load: () => import("./openrouter.js").then(m => m.default) },
6
+ load: () => import("./providers/openrouter.js").then(m => m.default) },
7
7
  { name: "openai",
8
- when: () => !!process.env.OPENAI_API_KEY,
9
- load: () => import("./openai.js").then(m => m.default) },
8
+ when: () => !!process.env.OPENAI_API_KEY && !process.env.OPENAI_BASE_URL,
9
+ load: () => import("./providers/openai.js").then(m => m.default) },
10
+ { name: "openai-compatible",
11
+ when: () => !!process.env.OPENAI_BASE_URL,
12
+ load: () => import("./providers/openai-compatible.js").then(m => m.default) },
13
+ { name: "deepseek",
14
+ load: () => import("./providers/deepseek.js").then(m => m.default) },
10
15
  { name: "tui-renderer", load: () => import("./tui-renderer.js").then(m => m.default) },
11
16
  { name: "slash-commands", load: () => import("./slash-commands.js").then(m => m.default) },
12
17
  { name: "file-autocomplete", load: () => import("./file-autocomplete.js").then(m => m.default) },
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Native DeepSeek (api.deepseek.com). V4 ignores reasoning_effort for
3
+ * on/off — disable lives in a separate `thinking` field that defaults
4
+ * to enabled. The hook always attaches; provider registration via env
5
+ * is opt-in alongside any settings.json entry.
6
+ */
7
+ import type { ExtensionContext } from "../../types.js";
8
+ export default function activate(ctx: ExtensionContext): void;
@@ -0,0 +1,23 @@
1
+ const BASE_URL = "https://api.deepseek.com";
2
+ const DEFAULT_MODELS = [
3
+ { id: "deepseek-v4-flash", reasoning: true, echoReasoning: true },
4
+ { id: "deepseek-v4-pro", reasoning: true, echoReasoning: true },
5
+ ];
6
+ function buildReasoningParams(level, _model) {
7
+ return level === "off"
8
+ ? { thinking: { type: "disabled" } }
9
+ : { thinking: { type: "enabled" }, reasoning_effort: level };
10
+ }
11
+ export default function activate(ctx) {
12
+ ctx.providers.configure("deepseek", { reasoningParams: buildReasoningParams });
13
+ const apiKey = process.env.DEEPSEEK_API_KEY;
14
+ if (!apiKey)
15
+ return;
16
+ ctx.bus.emit("provider:register", {
17
+ id: "deepseek",
18
+ apiKey,
19
+ baseURL: BASE_URL,
20
+ defaultModel: DEFAULT_MODELS[0].id,
21
+ models: DEFAULT_MODELS,
22
+ });
23
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * OpenAI Chat Completions-compatible local/3rd-party server (Ollama, LM
3
+ * Studio, vLLM, llama.cpp, …). No reasoning hook — the right shape depends
4
+ * on which model the server is serving; user extensions can add one.
5
+ */
6
+ import type { ExtensionContext } from "../../types.js";
7
+ export default function activate(ctx: ExtensionContext): void;
@@ -0,0 +1,30 @@
1
+ export default function activate(ctx) {
2
+ const baseURL = process.env.OPENAI_BASE_URL;
3
+ if (!baseURL)
4
+ return;
5
+ // Local servers often need no key; SDK still wants a non-empty string.
6
+ const apiKey = process.env.OPENAI_API_KEY || "no-key";
7
+ const id = "openai-compatible";
8
+ ctx.bus.emit("provider:register", { id, apiKey, baseURL, models: [] });
9
+ fetchModels(baseURL, apiKey).then((models) => {
10
+ if (models.length === 0)
11
+ return;
12
+ ctx.bus.emit("provider:register", {
13
+ id,
14
+ apiKey,
15
+ baseURL,
16
+ defaultModel: models[0],
17
+ models,
18
+ });
19
+ }).catch(() => { });
20
+ }
21
+ async function fetchModels(baseURL, apiKey) {
22
+ const headers = {};
23
+ if (apiKey && apiKey !== "no-key")
24
+ headers.Authorization = `Bearer ${apiKey}`;
25
+ const res = await fetch(`${baseURL.replace(/\/$/, "")}/models`, { headers });
26
+ if (!res.ok)
27
+ return [];
28
+ const data = await res.json();
29
+ return (data.data ?? []).map((m) => m.id);
30
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Cloud OpenAI (api.openai.com). reasoning_effort vocabulary diverges per
3
+ * family: o-series has no off; gpt-5-codex floors at "low"; plain gpt-5
4
+ * floors at "minimal"; gpt-5.1+ accepts "none" as documented full off.
5
+ */
6
+ import type { ExtensionContext } from "../../types.js";
7
+ export default function activate(ctx: ExtensionContext): void;
@@ -0,0 +1,39 @@
1
+ const CLOUD_MODELS = [
2
+ { id: "gpt-5", reasoning: true },
3
+ { id: "gpt-4.1", reasoning: false },
4
+ { id: "gpt-4o", reasoning: false },
5
+ { id: "gpt-4o-mini", reasoning: false },
6
+ { id: "o3", reasoning: true },
7
+ { id: "o3-mini", reasoning: true },
8
+ ];
9
+ function offEffortFor(model) {
10
+ if (/^o\d/.test(model))
11
+ return null;
12
+ if (model.startsWith("gpt-5-codex"))
13
+ return "low";
14
+ if (/^gpt-5\.[1-9]/.test(model))
15
+ return "none";
16
+ if (/^gpt-5(?!\.)/.test(model))
17
+ return "minimal";
18
+ return null;
19
+ }
20
+ function buildReasoningParams(level, model) {
21
+ if (level !== "off")
22
+ return { reasoning_effort: level };
23
+ const off = model ? offEffortFor(model) : null;
24
+ return off ? { reasoning_effort: off } : {};
25
+ }
26
+ export default function activate(ctx) {
27
+ const apiKey = process.env.OPENAI_API_KEY;
28
+ if (!apiKey)
29
+ return;
30
+ if (process.env.OPENAI_BASE_URL)
31
+ return; // openai-compatible handles this
32
+ ctx.providers.configure("openai", { reasoningParams: buildReasoningParams });
33
+ ctx.bus.emit("provider:register", {
34
+ id: "openai",
35
+ apiKey,
36
+ defaultModel: CLOUD_MODELS[0].id,
37
+ models: CLOUD_MODELS,
38
+ });
39
+ }
@@ -3,5 +3,5 @@
3
3
  * Registers curated defaults synchronously so the first query works, then
4
4
  * fetches the full catalog to populate /model autocomplete.
5
5
  */
6
- import type { ExtensionContext } from "../types.js";
6
+ import type { ExtensionContext } from "../../types.js";
7
7
  export default function activate(ctx: ExtensionContext): void;
@@ -1,4 +1,4 @@
1
- import { getSettings } from "../settings.js";
1
+ import { getSettings } from "../../settings.js";
2
2
  const BASE_URL = "https://openrouter.ai/api/v1";
3
3
  const DEFAULT_MODELS = ["deepseek/deepseek-v4-flash"];
4
4
  // Built-in defaults for models requiring reasoning_content echoed back
@@ -6,9 +6,11 @@ const DEFAULT_MODELS = ["deepseek/deepseek-v4-flash"];
6
6
  // providers.openrouter.echoReasoningPatterns = ["deepseek", "..."]
7
7
  // providers.openrouter.models[*].echoReasoning = true | false
8
8
  const BUILTIN_ECHO_REASONING_PATTERNS = [/deepseek/i];
9
- function buildReasoningParams(level) {
9
+ // `effort: "none"` is the documented disable; honored by OpenAI/Grok, ignored
10
+ // by Anthropic/Gemini/DeepSeek-via-OpenRouter (use native deepseek for a hard off).
11
+ function buildReasoningParams(level, _model) {
10
12
  return level === "off"
11
- ? { reasoning: { enabled: false } }
13
+ ? { reasoning: { effort: "none" } }
12
14
  : { reasoning: { effort: level } };
13
15
  }
14
16
  export default function activate(ctx) {
@@ -67,6 +67,8 @@ function createRenderState() {
67
67
  isThinking: false,
68
68
  showThinkingText: false,
69
69
  thinkingPending: false,
70
+ previewedDiffPending: false,
71
+ previewedDiffToolIds: new Set(),
70
72
  };
71
73
  }
72
74
  export default function activate(ctx) {
@@ -175,21 +177,20 @@ export default function activate(ctx) {
175
177
  s.thinkingPending = true;
176
178
  if (!s.isThinking) {
177
179
  s.isThinking = true;
178
- if (s.showThinkingText) {
179
- stopCurrentSpinner();
180
- if (!s.renderer)
181
- startAgentResponse();
182
- }
183
- else {
180
+ if (!s.showThinkingText)
184
181
  startThinkingSpinner();
185
- }
186
182
  }
187
- if (s.showThinkingText && e.text) {
188
- s.thinkingPending = false;
183
+ if (s.showThinkingText) {
184
+ stopCurrentSpinner();
189
185
  if (!s.renderer)
190
186
  startAgentResponse();
191
- s.renderer.push(`${p.dim}${e.text}${p.reset}`);
192
- drain();
187
+ if (e.text) {
188
+ s.thinkingPending = false;
189
+ // Wrap each sub-line so dim survives \n boundaries in the renderer.
190
+ const wrapped = `${p.dim}${e.text.replace(/\n/g, `${p.reset}\n${p.dim}`)}${p.reset}`;
191
+ s.renderer.push(wrapped);
192
+ drain();
193
+ }
193
194
  }
194
195
  });
195
196
  bus.on("agent:response-chunk", (e) => {
@@ -272,6 +273,10 @@ export default function activate(ctx) {
272
273
  s.currentToolKind = e.kind;
273
274
  s.toolStartTime = Date.now();
274
275
  s.orphanContHeaderKind = undefined;
276
+ if (s.previewedDiffPending && e.toolCallId) {
277
+ s.previewedDiffToolIds.add(e.toolCallId);
278
+ }
279
+ s.previewedDiffPending = false;
275
280
  if (e.title === "user_shell") {
276
281
  finalizeToolGroup();
277
282
  closeToolLine();
@@ -335,11 +340,18 @@ export default function activate(ctx) {
335
340
  s.toolExitCode = e.exitCode;
336
341
  if (e.exitCode !== 0)
337
342
  s.toolGroupAllOk = false;
343
+ let resultDisplay = e.resultDisplay;
344
+ if (e.toolCallId && s.previewedDiffToolIds.has(e.toolCallId)) {
345
+ s.previewedDiffToolIds.delete(e.toolCallId);
346
+ if (resultDisplay?.body?.kind === "diff") {
347
+ resultDisplay = { ...resultDisplay, body: undefined };
348
+ }
349
+ }
338
350
  if (s.toolGroupKind) {
339
351
  // Grouped tool — track success/failure and summaries, show aggregate on ⎿ line.
340
352
  // Don't restart spinner between grouped tools — it's already running from group start.
341
- if (e.resultDisplay?.summary)
342
- s.toolGroupSummaries.push(e.resultDisplay.summary);
353
+ if (resultDisplay?.summary)
354
+ s.toolGroupSummaries.push(resultDisplay.summary);
343
355
  if (e.toolCallId)
344
356
  s.pendingToolCompletes.delete(e.toolCallId);
345
357
  s.toolGroupCompletedCount++;
@@ -358,10 +370,10 @@ export default function activate(ctx) {
358
370
  if (pending)
359
371
  s.pendingToolCompletes.delete(e.toolCallId);
360
372
  if (pending?.orphaned) {
361
- showOrphanedComplete(e.exitCode, e.resultDisplay, pending.title, pending.kind, pending.displayDetail);
373
+ showOrphanedComplete(e.exitCode, resultDisplay, pending.title, pending.kind, pending.displayDetail);
362
374
  }
363
375
  else {
364
- showToolComplete(e.exitCode, e.resultDisplay, pending?.displayDetail ?? pending?.title);
376
+ showToolComplete(e.exitCode, resultDisplay, pending?.displayDetail ?? pending?.title);
365
377
  }
366
378
  s.currentToolKind = undefined;
367
379
  s.spinnerStartTime = 0;
@@ -432,6 +444,7 @@ export default function activate(ctx) {
432
444
  // Mark lastContentKind as "tool" so the tool call line that follows
433
445
  // doesn't inject an extra gap between the diff box and the checkmark.
434
446
  s.lastContentKind = "tool";
447
+ s.previewedDiffPending = true;
435
448
  }
436
449
  // Don't endAgentResponse() here — permission requests that aren't
437
450
  // file-write diffs are handled inline (auto-approved or by extensions).
@@ -654,26 +667,16 @@ export default function activate(ctx) {
654
667
  return [];
655
668
  const boxW = Math.min(120, width - 2);
656
669
  const contentW = boxW - 4;
657
- let body;
658
- if (diff.isNewFile) {
659
- const lines = diff.hunks.flatMap(h => h.lines.map(l => l.text));
660
- const preview = getSettings().newFilePreviewLines;
661
- const head = lines.slice(0, preview);
662
- const truncated = head.map(l => l.length > contentW ? l.slice(0, contentW - 1) + "…" : l);
663
- const more = lines.length > preview
664
- ? [`${p.dim}… ${lines.length - preview} more lines${p.reset}`]
665
- : [];
666
- body = ["", ...truncated, ...more, ""];
667
- }
668
- else {
669
- const diffLines = renderDiff(diff, {
670
- width: contentW,
671
- filePath,
672
- maxLines: getSettings().diffMaxLines,
673
- trueColor: true,
674
- });
675
- body = diffLines.length > 1 ? ["", ...diffLines.slice(1), ""] : diffLines;
676
- }
670
+ const maxLines = diff.isNewFile
671
+ ? getSettings().newFilePreviewLines
672
+ : getSettings().diffMaxLines;
673
+ const diffLines = renderDiff(diff, {
674
+ width: contentW,
675
+ filePath,
676
+ maxLines,
677
+ trueColor: true,
678
+ });
679
+ const body = diffLines.length > 1 ? ["", ...diffLines.slice(1), ""] : diffLines;
677
680
  return renderBoxFrame(body, {
678
681
  width: boxW,
679
682
  style: "rounded",
@@ -9,6 +9,8 @@ export interface ModelCapabilityConfig {
9
9
  reasoning?: boolean;
10
10
  /** Context window size in tokens for this specific model. */
11
11
  contextWindow?: number;
12
+ /** Max output tokens for this model. */
13
+ maxTokens?: number;
12
14
  /** Echo reasoning_content back on assistant turns. Required by DeepSeek. */
13
15
  echoReasoning?: boolean;
14
16
  }
@@ -141,6 +143,8 @@ export interface ResolvedProvider {
141
143
  baseURL?: string;
142
144
  defaultModel?: string;
143
145
  models: string[];
146
+ /** User explicitly listed `models` (locks the catalog to that list). */
147
+ modelsExplicit: boolean;
144
148
  contextWindow?: number;
145
149
  /** Provider supports the reasoning_effort parameter. Default: true. */
146
150
  supportsReasoningEffort?: boolean;
@@ -148,6 +152,7 @@ export interface ResolvedProvider {
148
152
  modelCapabilities?: Map<string, {
149
153
  reasoning?: boolean;
150
154
  contextWindow?: number;
155
+ maxTokens?: number;
151
156
  echoReasoning?: boolean;
152
157
  }>;
153
158
  /** Borrow another registered provider's reasoning request shape by id. */
package/dist/settings.js CHANGED
@@ -148,8 +148,8 @@ export function resolveProvider(name) {
148
148
  }
149
149
  else {
150
150
  modelIds.push(m.id);
151
- if (m.reasoning !== undefined || m.contextWindow !== undefined || m.echoReasoning !== undefined) {
152
- caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, echoReasoning: m.echoReasoning });
151
+ if (m.reasoning !== undefined || m.contextWindow !== undefined || m.maxTokens !== undefined || m.echoReasoning !== undefined) {
152
+ caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
153
153
  }
154
154
  }
155
155
  }
@@ -160,6 +160,7 @@ export function resolveProvider(name) {
160
160
  baseURL: provider.baseURL,
161
161
  defaultModel,
162
162
  models: modelIds.length ? modelIds : (defaultModel ? [defaultModel] : []),
163
+ modelsExplicit: Array.isArray(provider.models),
163
164
  contextWindow: provider.contextWindow,
164
165
  modelCapabilities: caps.size > 0 ? caps : undefined,
165
166
  reasoningShape: provider.reasoningShape,
package/dist/types.d.ts CHANGED
@@ -41,6 +41,8 @@ export interface AgentMode {
41
41
  };
42
42
  /** Context window size in tokens (for usage display). */
43
43
  contextWindow?: number;
44
+ /** Max output tokens for this mode. */
45
+ maxTokens?: number;
44
46
  /** Model supports reasoning/thinking tokens. */
45
47
  reasoning?: boolean;
46
48
  /** Provider supports the reasoning_effort parameter. */
@@ -65,14 +67,27 @@ export interface LlmSession {
65
67
  }
66
68
  export interface LlmInterface {
67
69
  readonly available: boolean;
70
+ /** `model` overrides the globally-configured model for this call only.
71
+ * Provider-specific identifier (e.g. "claude-haiku-4-5"). When omitted,
72
+ * the active provider's configured default is used.
73
+ *
74
+ * `reasoningEffort` controls thinking-model token allocation between
75
+ * reasoning and final content (e.g. "low", "medium", "high", or
76
+ * provider-specific). For non-reasoning models it is ignored. Set to
77
+ * "low" for cheap structured-output calls so reasoning doesn't exhaust
78
+ * the max-tokens budget and leave content empty. */
68
79
  ask(opts: {
69
80
  query: string;
70
81
  system?: string;
71
82
  maxTokens?: number;
83
+ model?: string;
84
+ reasoningEffort?: string;
72
85
  }): Promise<string>;
73
86
  session(opts?: {
74
87
  system?: string;
75
88
  maxTokens?: number;
89
+ model?: string;
90
+ reasoningEffort?: string;
76
91
  }): LlmSession;
77
92
  }
78
93
  export interface AgentShellConfig {
@@ -156,7 +171,7 @@ export interface ExtensionContext {
156
171
  }) => () => void;
157
172
  providers: {
158
173
  configure: (id: string, opts: {
159
- reasoningParams?: (level: string) => Record<string, unknown>;
174
+ reasoningParams?: (level: string, model?: string) => Record<string, unknown>;
160
175
  }) => void;
161
176
  };
162
177
  llm: LlmInterface;
@@ -5,7 +5,7 @@
5
5
  * never writes to stdout. Supports multiple border styles and
6
6
  * optional title/footer sections with dividers.
7
7
  */
8
- import { visibleLen, truncateToWidth } from "./ansi.js";
8
+ import { visibleLen, truncateToWidth, truncateAnsiToWidth } from "./ansi.js";
9
9
  import { palette as p } from "./palette.js";
10
10
  const BORDERS = {
11
11
  rounded: { tl: "╭", tr: "╮", bl: "╰", br: "╯", h: "─", v: "│", ml: "├", mr: "┤" },
@@ -32,14 +32,20 @@ export function renderBoxFrame(content, opts) {
32
32
  const output = [];
33
33
  // Top border (with optional left/right titles)
34
34
  if (opts.title || opts.titleRight) {
35
- const leftPart = opts.title
36
- ? `${p.reset} ${opts.title} ${bc}`
37
- : "";
38
- const leftVis = opts.title ? visibleLen(opts.title) + 2 : 0; // +2 for spaces
39
- const rightPart = opts.titleRight
40
- ? `${p.reset} ${opts.titleRight} ${bc}`
41
- : "";
35
+ // Budget: 2 corners + 1 minimum dash + space-padding around each title.
36
+ // Truncate the left title first if combined widths overflow — titleRight
37
+ // is typically short metadata (model name, stats) worth preserving.
38
+ let title = opts.title;
42
39
  const rightVis = opts.titleRight ? visibleLen(opts.titleRight) + 2 : 0;
40
+ const leftBudget = width - 2 - 1 - rightVis; // total - corners - min dash - right
41
+ let leftVis = title ? visibleLen(title) + 2 : 0;
42
+ if (title && leftVis > leftBudget) {
43
+ const maxTitleVis = Math.max(1, leftBudget - 2);
44
+ title = truncateAnsiToWidth(title, maxTitleVis);
45
+ leftVis = visibleLen(title) + 2;
46
+ }
47
+ const leftPart = title ? `${p.reset} ${title} ${bc}` : "";
48
+ const rightPart = opts.titleRight ? `${p.reset} ${opts.titleRight} ${bc}` : "";
43
49
  const dashCount = Math.max(1, width - 2 - leftVis - rightVis);
44
50
  output.push(`${bc}${b.tl}${leftPart}${b.h.repeat(dashCount)}${rightPart}${b.tr}${p.reset}`);
45
51
  }
@@ -33,7 +33,8 @@ export declare class LlmClient {
33
33
  tools?: ChatCompletionTool[];
34
34
  model?: string;
35
35
  max_tokens?: number;
36
- /** Reasoning effort level (e.g. "low", "medium", "high"). Provider-dependent. */
36
+ /** Reasoning effort: "off" | "low" | "medium" | "high". Provider-dependent;
37
+ * "off" matches agent-loop's thinkingLevel and omits the field. */
37
38
  reasoning_effort?: string;
38
39
  signal?: AbortSignal;
39
40
  }): import("openai").APIPromise<import("openai/core/streaming.mjs").Stream<OpenAI.Chat.Completions.ChatCompletionChunk>>;
@@ -45,5 +46,8 @@ export declare class LlmClient {
45
46
  messages: ChatCompletionMessageParam[];
46
47
  model?: string;
47
48
  max_tokens?: number;
49
+ /** Reasoning effort: "off" | "low" | "medium" | "high". Provider-dependent;
50
+ * "off" matches agent-loop's thinkingLevel and omits the field. */
51
+ reasoning_effort?: string;
48
52
  }): Promise<string>;
49
53
  }
@@ -40,14 +40,15 @@ export class LlmClient {
40
40
  * Returns an async iterable of chunks.
41
41
  */
42
42
  stream(opts) {
43
+ const sendEffort = opts.reasoning_effort && opts.reasoning_effort !== "off";
43
44
  const body = {
44
45
  model: opts.model ?? this.model,
45
46
  messages: opts.messages,
46
47
  tools: opts.tools?.length ? opts.tools : undefined,
47
- max_tokens: opts.max_tokens ?? 8192,
48
+ max_tokens: opts.max_tokens ?? 65536,
48
49
  stream: true,
49
50
  stream_options: { include_usage: true },
50
- ...(opts.reasoning_effort
51
+ ...(sendEffort
51
52
  ? { reasoning_effort: opts.reasoning_effort }
52
53
  : {}),
53
54
  };
@@ -58,10 +59,14 @@ export class LlmClient {
58
59
  * Returns the text content of the first choice.
59
60
  */
60
61
  async complete(opts) {
62
+ const sendEffort = opts.reasoning_effort && opts.reasoning_effort !== "off";
61
63
  const response = await this.client.chat.completions.create({
62
64
  model: opts.model ?? this.model,
63
65
  messages: opts.messages,
64
66
  max_tokens: opts.max_tokens ?? 1024,
67
+ ...(sendEffort
68
+ ? { reasoning_effort: opts.reasoning_effort }
69
+ : {}),
65
70
  });
66
71
  return response.choices[0]?.message?.content ?? "";
67
72
  }
@@ -1,18 +1,18 @@
1
1
  export function createLlmFacade(handlers) {
2
- const invoke = (messages, maxTokens) => {
3
- const result = handlers.call("llm:invoke", messages, { maxTokens });
2
+ const invoke = (messages, maxTokens, model, reasoningEffort) => {
3
+ const result = handlers.call("llm:invoke", messages, { maxTokens, model, reasoningEffort });
4
4
  if (result === undefined)
5
5
  return Promise.reject(new Error("ctx.llm: no LLM backend available"));
6
6
  return result;
7
7
  };
8
8
  return {
9
9
  get available() { return handlers.list().includes("llm:invoke"); },
10
- ask: ({ query, system, maxTokens }) => {
10
+ ask: ({ query, system, maxTokens, model, reasoningEffort }) => {
11
11
  const messages = [];
12
12
  if (system)
13
13
  messages.push({ role: "system", content: system });
14
14
  messages.push({ role: "user", content: query });
15
- return invoke(messages, maxTokens);
15
+ return invoke(messages, maxTokens, model, reasoningEffort);
16
16
  },
17
17
  session: (opts = {}) => {
18
18
  const messages = [];
@@ -21,7 +21,7 @@ export function createLlmFacade(handlers) {
21
21
  const session = {
22
22
  async send(message) {
23
23
  messages.push({ role: "user", content: message });
24
- const reply = await invoke(messages, opts.maxTokens);
24
+ const reply = await invoke(messages, opts.maxTokens, opts.model, opts.reasoningEffort);
25
25
  messages.push({ role: "assistant", content: reply });
26
26
  return reply;
27
27
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-sh",
3
- "version": "0.12.19",
3
+ "version": "0.12.21",
4
4
  "description": "A shell-first terminal where AI is one keystroke away",
5
5
  "type": "module",
6
6
  "main": "dist/core.js",
@@ -1,9 +0,0 @@
1
- /**
2
- * Built-in OpenAI-compatible provider. Two activation paths:
3
- * - OPENAI_API_KEY only → cloud OpenAI, ships a curated catalog.
4
- * - OPENAI_BASE_URL (any key) → local/3rd-party server (Ollama, LM Studio,
5
- * vLLM, llama.cpp); the catalog is fetched
6
- * from the server's /models endpoint.
7
- */
8
- import type { ExtensionContext } from "../types.js";
9
- export default function activate(ctx: ExtensionContext): void;
@@ -1,49 +0,0 @@
1
- const OPENAI_CLOUD_MODELS = [
2
- { id: "gpt-5", reasoning: true },
3
- { id: "gpt-4.1", reasoning: false },
4
- { id: "gpt-4o", reasoning: false },
5
- { id: "gpt-4o-mini", reasoning: false },
6
- { id: "o3", reasoning: true },
7
- { id: "o3-mini", reasoning: true },
8
- ];
9
- export default function activate(ctx) {
10
- const apiKey = process.env.OPENAI_API_KEY ?? "";
11
- const baseURL = process.env.OPENAI_BASE_URL;
12
- if (!baseURL) {
13
- if (!apiKey)
14
- return;
15
- ctx.bus.emit("provider:register", {
16
- id: "openai",
17
- apiKey,
18
- defaultModel: OPENAI_CLOUD_MODELS[0].id,
19
- models: OPENAI_CLOUD_MODELS,
20
- });
21
- return;
22
- }
23
- const id = "openai-compatible";
24
- // Local servers (Ollama, llama.cpp) often need no key; the SDK still
25
- // requires a non-empty string for construction.
26
- const sdkKey = apiKey || "no-key";
27
- ctx.bus.emit("provider:register", { id, apiKey: sdkKey, baseURL, models: [] });
28
- fetchModels(baseURL, apiKey).then((models) => {
29
- if (models.length === 0)
30
- return;
31
- ctx.bus.emit("provider:register", {
32
- id,
33
- apiKey: sdkKey,
34
- baseURL,
35
- defaultModel: models[0],
36
- models,
37
- });
38
- }).catch(() => { });
39
- }
40
- async function fetchModels(baseURL, apiKey) {
41
- const headers = {};
42
- if (apiKey)
43
- headers.Authorization = `Bearer ${apiKey}`;
44
- const res = await fetch(`${baseURL.replace(/\/$/, "")}/models`, { headers });
45
- if (!res.ok)
46
- return [];
47
- const data = await res.json();
48
- return (data.data ?? []).map((m) => m.id);
49
- }