agent-sh 0.12.19 → 0.12.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -1
- package/dist/agent/agent-loop.js +6 -0
- package/dist/agent/normalize-args.d.ts +29 -0
- package/dist/agent/normalize-args.js +56 -0
- package/dist/agent/subagent.js +2 -0
- package/dist/event-bus.d.ts +3 -1
- package/dist/extensions/agent-backend.js +58 -21
- package/dist/extensions/index.js +8 -3
- package/dist/extensions/providers/deepseek.d.ts +8 -0
- package/dist/extensions/providers/deepseek.js +23 -0
- package/dist/extensions/providers/openai-compatible.d.ts +7 -0
- package/dist/extensions/providers/openai-compatible.js +30 -0
- package/dist/extensions/providers/openai.d.ts +7 -0
- package/dist/extensions/providers/openai.js +39 -0
- package/dist/extensions/{openrouter.d.ts → providers/openrouter.d.ts} +1 -1
- package/dist/extensions/{openrouter.js → providers/openrouter.js} +5 -3
- package/dist/extensions/tui-renderer.js +38 -35
- package/dist/settings.d.ts +5 -0
- package/dist/settings.js +3 -2
- package/dist/types.d.ts +16 -1
- package/dist/utils/box-frame.js +14 -8
- package/dist/utils/llm-client.d.ts +5 -1
- package/dist/utils/llm-client.js +7 -2
- package/dist/utils/llm-facade.js +5 -5
- package/package.json +1 -1
- package/dist/extensions/openai.d.ts +0 -9
- package/dist/extensions/openai.js +0 -49
package/README.md
CHANGED
|
@@ -57,14 +57,22 @@ export OPENAI_API_KEY=sk-...
|
|
|
57
57
|
agent-sh
|
|
58
58
|
```
|
|
59
59
|
|
|
60
|
+
**DeepSeek:**
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
export DEEPSEEK_API_KEY=sk-...
|
|
64
|
+
agent-sh
|
|
65
|
+
```
|
|
66
|
+
|
|
60
67
|
**Local models** (Ollama, llama.cpp server, LM Studio, vLLM — anything OpenAI-compatible):
|
|
61
68
|
|
|
62
69
|
```bash
|
|
63
|
-
export OPENAI_API_KEY=ollama # any value; dummy is fine
|
|
64
70
|
export OPENAI_BASE_URL=http://localhost:11434/v1 # point at your server
|
|
65
71
|
agent-sh
|
|
66
72
|
```
|
|
67
73
|
|
|
74
|
+
Set `OPENAI_API_KEY` too if your server requires auth.
|
|
75
|
+
|
|
68
76
|
Once running, switch models at any time with `/model <name>` (tab-completes; selection persists across sessions).
|
|
69
77
|
|
|
70
78
|
For richer configuration (multiple providers, extensions), run `agent-sh init` to scaffold `~/.agent-sh/settings.json` with copy-pasteable examples. See the [Usage Guide](docs/usage.md) for the full list of supported providers.
|
package/dist/agent/agent-loop.js
CHANGED
|
@@ -4,6 +4,7 @@ import * as path from "node:path";
|
|
|
4
4
|
import * as os from "node:os";
|
|
5
5
|
import { computeDiff, computeEditDiff, computeInputDiff } from "../utils/diff.js";
|
|
6
6
|
import { ToolRegistry } from "./tool-registry.js";
|
|
7
|
+
import { normalizeToolArgs } from "./normalize-args.js";
|
|
7
8
|
import { ConversationState } from "./conversation-state.js";
|
|
8
9
|
import { HistoryFile } from "./history-file.js";
|
|
9
10
|
import { nucleate, formatNuclearLine, isReadOnly } from "./nuclear-form.js";
|
|
@@ -1188,6 +1189,10 @@ export class AgentLoop {
|
|
|
1188
1189
|
});
|
|
1189
1190
|
return;
|
|
1190
1191
|
}
|
|
1192
|
+
// Normalize against the tool's input_schema: some LLMs stringify
|
|
1193
|
+
// nested object/array args despite the schema. See
|
|
1194
|
+
// normalize-args.ts for the diagnostic that uncovered this.
|
|
1195
|
+
args = normalizeToolArgs(args, tool.input_schema);
|
|
1191
1196
|
// ── Round-scoped cache for cacheable read-only tools ──
|
|
1192
1197
|
const cacheable = !tool.modifiesFiles && !tool.requiresPermission && tool.showOutput !== true;
|
|
1193
1198
|
const cacheKey = cacheable ? `${tc.name}:${JSON.stringify(args)}` : null;
|
|
@@ -1527,6 +1532,7 @@ export class AgentLoop {
|
|
|
1527
1532
|
messages,
|
|
1528
1533
|
tools: apiTools,
|
|
1529
1534
|
model: this.currentModel,
|
|
1535
|
+
max_tokens: this.currentMode.maxTokens ?? 65536,
|
|
1530
1536
|
...this.reasoningParams(),
|
|
1531
1537
|
};
|
|
1532
1538
|
this.bus.emit("llm:request", requestParams);
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema-aware tool-arg normalization.
|
|
3
|
+
*
|
|
4
|
+
* Some LLMs (notably Claude) occasionally emit nested object/array
|
|
5
|
+
* tool-call arguments as JSON-encoded strings instead of native
|
|
6
|
+
* objects, despite the schema declaring `type: "object"` /
|
|
7
|
+
* `type: "array"`. The discrepancy was diagnosed by the superash field
|
|
8
|
+
* test (2026-05-03 / commit `b9efd47`):
|
|
9
|
+
*
|
|
10
|
+
* describe_demos: 'task' arrived as a string (length 1267)
|
|
11
|
+
* last char code: 93 (']')
|
|
12
|
+
* truncation suspected: true
|
|
13
|
+
*
|
|
14
|
+
* Tool handlers downstream had to add ad-hoc JSON.parse fallbacks. This
|
|
15
|
+
* helper centralizes the fix at the kernel boundary: after parsing the
|
|
16
|
+
* outer `argumentsJson`, walk each top-level field; for any field whose
|
|
17
|
+
* schema declares `object` or `array` but whose value is a string, run
|
|
18
|
+
* a single JSON.parse pass. On parse failure (e.g. truncated content),
|
|
19
|
+
* the string is left as-is — the tool can produce a clean error.
|
|
20
|
+
*
|
|
21
|
+
* Top-level only by design. Recursing into nested object schemas would
|
|
22
|
+
* change semantics for tools that legitimately accept stringified
|
|
23
|
+
* payloads as inner fields, and the observed wild cases all stringify
|
|
24
|
+
* at the top level.
|
|
25
|
+
*/
|
|
26
|
+
/** Normalize tool-call args against the tool's input_schema. Pure: does
|
|
27
|
+
* not mutate `args`. Returns a new object with stringified-then-decoded
|
|
28
|
+
* fields swapped in where applicable. */
|
|
29
|
+
export declare function normalizeToolArgs(args: Record<string, unknown>, schema: unknown): Record<string, unknown>;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema-aware tool-arg normalization.
|
|
3
|
+
*
|
|
4
|
+
* Some LLMs (notably Claude) occasionally emit nested object/array
|
|
5
|
+
* tool-call arguments as JSON-encoded strings instead of native
|
|
6
|
+
* objects, despite the schema declaring `type: "object"` /
|
|
7
|
+
* `type: "array"`. The discrepancy was diagnosed by the superash field
|
|
8
|
+
* test (2026-05-03 / commit `b9efd47`):
|
|
9
|
+
*
|
|
10
|
+
* describe_demos: 'task' arrived as a string (length 1267)
|
|
11
|
+
* last char code: 93 (']')
|
|
12
|
+
* truncation suspected: true
|
|
13
|
+
*
|
|
14
|
+
* Tool handlers downstream had to add ad-hoc JSON.parse fallbacks. This
|
|
15
|
+
* helper centralizes the fix at the kernel boundary: after parsing the
|
|
16
|
+
* outer `argumentsJson`, walk each top-level field; for any field whose
|
|
17
|
+
* schema declares `object` or `array` but whose value is a string, run
|
|
18
|
+
* a single JSON.parse pass. On parse failure (e.g. truncated content),
|
|
19
|
+
* the string is left as-is — the tool can produce a clean error.
|
|
20
|
+
*
|
|
21
|
+
* Top-level only by design. Recursing into nested object schemas would
|
|
22
|
+
* change semantics for tools that legitimately accept stringified
|
|
23
|
+
* payloads as inner fields, and the observed wild cases all stringify
|
|
24
|
+
* at the top level.
|
|
25
|
+
*/
|
|
26
|
+
/** Normalize tool-call args against the tool's input_schema. Pure: does
|
|
27
|
+
* not mutate `args`. Returns a new object with stringified-then-decoded
|
|
28
|
+
* fields swapped in where applicable. */
|
|
29
|
+
export function normalizeToolArgs(args, schema) {
|
|
30
|
+
if (!schema || typeof schema !== "object")
|
|
31
|
+
return args;
|
|
32
|
+
const properties = schema.properties;
|
|
33
|
+
if (!properties || typeof properties !== "object")
|
|
34
|
+
return args;
|
|
35
|
+
let out = null;
|
|
36
|
+
for (const [field, fieldSchema] of Object.entries(properties)) {
|
|
37
|
+
if (!fieldSchema || typeof fieldSchema !== "object")
|
|
38
|
+
continue;
|
|
39
|
+
const expectedType = fieldSchema.type;
|
|
40
|
+
if (expectedType !== "object" && expectedType !== "array")
|
|
41
|
+
continue;
|
|
42
|
+
const value = args[field];
|
|
43
|
+
if (typeof value !== "string")
|
|
44
|
+
continue;
|
|
45
|
+
try {
|
|
46
|
+
const parsed = JSON.parse(value);
|
|
47
|
+
if (out === null)
|
|
48
|
+
out = { ...args };
|
|
49
|
+
out[field] = parsed;
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
// Leave as string — downstream tool can produce a useful error.
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return out ?? args;
|
|
56
|
+
}
|
package/dist/agent/subagent.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ConversationState } from "./conversation-state.js";
|
|
2
|
+
import { normalizeToolArgs } from "./normalize-args.js";
|
|
2
3
|
import { wrapTrailingWithDynamicContext } from "../utils/message-utils.js";
|
|
3
4
|
/**
|
|
4
5
|
* Run a subagent to completion.
|
|
@@ -56,6 +57,7 @@ export async function runSubagent(opts) {
|
|
|
56
57
|
conversation.addToolResult(tc.id, `Error: Invalid JSON arguments for ${tc.name}`, true);
|
|
57
58
|
continue;
|
|
58
59
|
}
|
|
60
|
+
args = normalizeToolArgs(args, tool.input_schema);
|
|
59
61
|
// Emit tool events for TUI (if bus provided)
|
|
60
62
|
if (bus) {
|
|
61
63
|
const display = tool.getDisplayInfo?.(args) ?? { kind: "execute" };
|
package/dist/event-bus.d.ts
CHANGED
|
@@ -69,6 +69,7 @@ export interface ShellEvents {
|
|
|
69
69
|
messages: unknown[];
|
|
70
70
|
tools?: unknown;
|
|
71
71
|
model?: string;
|
|
72
|
+
max_tokens?: number;
|
|
72
73
|
reasoning_effort?: string;
|
|
73
74
|
};
|
|
74
75
|
"llm:chunk": {
|
|
@@ -318,6 +319,7 @@ export interface ShellEvents {
|
|
|
318
319
|
id: string;
|
|
319
320
|
reasoning?: boolean;
|
|
320
321
|
contextWindow?: number;
|
|
322
|
+
maxTokens?: number;
|
|
321
323
|
echoReasoning?: boolean;
|
|
322
324
|
})[];
|
|
323
325
|
/** Provider supports the reasoning_effort parameter. Default: true. */
|
|
@@ -325,7 +327,7 @@ export interface ShellEvents {
|
|
|
325
327
|
};
|
|
326
328
|
"provider:configure": {
|
|
327
329
|
id: string;
|
|
328
|
-
reasoningParams?: (level: string) => Record<string, unknown>;
|
|
330
|
+
reasoningParams?: (level: string, model?: string) => Record<string, unknown>;
|
|
329
331
|
};
|
|
330
332
|
"agent:register-tool": {
|
|
331
333
|
tool: import("./agent/types.js").ToolDefinition;
|
|
@@ -11,24 +11,50 @@ function persistedModelFor(providerName) {
|
|
|
11
11
|
function defaultReasoningBuilder(level) {
|
|
12
12
|
return level === "off" ? {} : { reasoning_effort: level };
|
|
13
13
|
}
|
|
14
|
+
function mergeCaps(settingsCaps, payloadCaps, modelIds) {
|
|
15
|
+
if (!settingsCaps)
|
|
16
|
+
return payloadCaps.size > 0 ? payloadCaps : undefined;
|
|
17
|
+
const out = new Map();
|
|
18
|
+
for (const id of modelIds) {
|
|
19
|
+
const s = settingsCaps.get(id);
|
|
20
|
+
const p = payloadCaps.get(id);
|
|
21
|
+
if (!s && !p)
|
|
22
|
+
continue;
|
|
23
|
+
out.set(id, {
|
|
24
|
+
reasoning: s?.reasoning ?? p?.reasoning,
|
|
25
|
+
contextWindow: s?.contextWindow ?? p?.contextWindow,
|
|
26
|
+
maxTokens: s?.maxTokens ?? p?.maxTokens,
|
|
27
|
+
echoReasoning: s?.echoReasoning ?? p?.echoReasoning,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
return out.size > 0 ? out : undefined;
|
|
31
|
+
}
|
|
14
32
|
export default function agentBackend(ctx) {
|
|
15
33
|
const { bus } = ctx;
|
|
16
34
|
const config = ctx.call("config:get-shell-config") ?? {};
|
|
17
|
-
//
|
|
35
|
+
// Immutable settings snapshot; provider:register payloads merge against it.
|
|
18
36
|
const providerRegistry = new Map();
|
|
37
|
+
const settingsProviders = new Map();
|
|
19
38
|
for (const name of getProviderNames()) {
|
|
20
39
|
const p = resolveProvider(name);
|
|
21
|
-
if (p)
|
|
40
|
+
if (p) {
|
|
22
41
|
providerRegistry.set(name, p);
|
|
42
|
+
settingsProviders.set(name, p);
|
|
43
|
+
}
|
|
23
44
|
}
|
|
24
45
|
const providerHooks = new Map();
|
|
46
|
+
// Bakes model id into the hook so AgentMode.buildReasoningParams keeps
|
|
47
|
+
// its (level) signature while the hook can branch on model.
|
|
48
|
+
const bindReasoning = (shapeId, model) => {
|
|
49
|
+
const hook = providerHooks.get(shapeId)?.reasoningParams;
|
|
50
|
+
return hook ? (level) => hook(level, model) : defaultReasoningBuilder;
|
|
51
|
+
};
|
|
25
52
|
const buildModes = () => {
|
|
26
53
|
const allModes = [];
|
|
27
54
|
for (const [id, p] of providerRegistry) {
|
|
28
55
|
if (!p.apiKey)
|
|
29
56
|
continue;
|
|
30
57
|
const shapeId = p.reasoningShape ?? id;
|
|
31
|
-
const buildReasoningParams = providerHooks.get(shapeId)?.reasoningParams ?? defaultReasoningBuilder;
|
|
32
58
|
for (const model of p.models) {
|
|
33
59
|
const mc = p.modelCapabilities?.get(model);
|
|
34
60
|
allModes.push({
|
|
@@ -36,10 +62,11 @@ export default function agentBackend(ctx) {
|
|
|
36
62
|
provider: id,
|
|
37
63
|
providerConfig: { apiKey: p.apiKey, baseURL: p.baseURL },
|
|
38
64
|
contextWindow: mc?.contextWindow ?? p.contextWindow,
|
|
65
|
+
maxTokens: mc?.maxTokens ?? (mc?.contextWindow ? Math.min(Math.floor(mc.contextWindow * 0.4), 65536) : undefined),
|
|
39
66
|
reasoning: mc?.reasoning,
|
|
40
67
|
supportsReasoningEffort: p.supportsReasoningEffort,
|
|
41
68
|
echoReasoning: mc?.echoReasoning,
|
|
42
|
-
buildReasoningParams,
|
|
69
|
+
buildReasoningParams: bindReasoning(shapeId, model),
|
|
43
70
|
});
|
|
44
71
|
}
|
|
45
72
|
}
|
|
@@ -54,6 +81,8 @@ export default function agentBackend(ctx) {
|
|
|
54
81
|
return llmClient.complete({
|
|
55
82
|
messages: messages,
|
|
56
83
|
max_tokens: opts?.maxTokens,
|
|
84
|
+
model: opts?.model,
|
|
85
|
+
reasoning_effort: opts?.reasoningEffort,
|
|
57
86
|
});
|
|
58
87
|
});
|
|
59
88
|
let modes = [];
|
|
@@ -141,38 +170,45 @@ export default function agentBackend(ctx) {
|
|
|
141
170
|
});
|
|
142
171
|
bus.on("provider:register", (p) => {
|
|
143
172
|
const rawModels = p.models ?? (p.defaultModel ? [p.defaultModel] : []);
|
|
144
|
-
const
|
|
145
|
-
const
|
|
173
|
+
const payloadModelIds = [];
|
|
174
|
+
const payloadCaps = new Map();
|
|
146
175
|
for (const m of rawModels) {
|
|
147
176
|
if (typeof m === "string") {
|
|
148
|
-
|
|
177
|
+
payloadModelIds.push(m);
|
|
149
178
|
}
|
|
150
179
|
else {
|
|
151
|
-
|
|
152
|
-
|
|
180
|
+
payloadModelIds.push(m.id);
|
|
181
|
+
payloadCaps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
|
|
153
182
|
}
|
|
154
183
|
}
|
|
155
|
-
|
|
184
|
+
const settings = settingsProviders.get(p.id);
|
|
185
|
+
const modelIds = settings?.modelsExplicit && settings.models.length > 0 ? settings.models : payloadModelIds;
|
|
186
|
+
const mergedCaps = mergeCaps(settings?.modelCapabilities, payloadCaps, modelIds);
|
|
187
|
+
const merged = {
|
|
156
188
|
id: p.id,
|
|
157
|
-
apiKey: p.apiKey,
|
|
158
|
-
baseURL: p.baseURL,
|
|
159
|
-
defaultModel: p.defaultModel,
|
|
189
|
+
apiKey: settings?.apiKey ?? p.apiKey,
|
|
190
|
+
baseURL: settings?.baseURL ?? p.baseURL,
|
|
191
|
+
defaultModel: settings?.defaultModel ?? p.defaultModel,
|
|
160
192
|
models: modelIds,
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
193
|
+
modelsExplicit: settings?.modelsExplicit ?? false,
|
|
194
|
+
contextWindow: settings?.contextWindow,
|
|
195
|
+
supportsReasoningEffort: settings?.supportsReasoningEffort ?? p.supportsReasoningEffort,
|
|
196
|
+
modelCapabilities: mergedCaps,
|
|
197
|
+
reasoningShape: settings?.reasoningShape,
|
|
198
|
+
};
|
|
199
|
+
providerRegistry.set(p.id, merged);
|
|
165
200
|
const addModes = modelIds.map((m) => {
|
|
166
|
-
const mc =
|
|
201
|
+
const mc = mergedCaps?.get(m);
|
|
167
202
|
return {
|
|
168
203
|
model: m,
|
|
169
204
|
provider: p.id,
|
|
170
|
-
providerConfig: { apiKey:
|
|
205
|
+
providerConfig: { apiKey: merged.apiKey ?? "", baseURL: merged.baseURL },
|
|
171
206
|
contextWindow: mc?.contextWindow,
|
|
207
|
+
maxTokens: mc?.maxTokens,
|
|
172
208
|
reasoning: mc?.reasoning,
|
|
173
|
-
supportsReasoningEffort:
|
|
209
|
+
supportsReasoningEffort: merged.supportsReasoningEffort,
|
|
174
210
|
echoReasoning: mc?.echoReasoning,
|
|
175
|
-
buildReasoningParams,
|
|
211
|
+
buildReasoningParams: bindReasoning(p.id, m),
|
|
176
212
|
};
|
|
177
213
|
});
|
|
178
214
|
bus.emit("config:add-modes", { modes: addModes });
|
|
@@ -212,6 +248,7 @@ export default function agentBackend(ctx) {
|
|
|
212
248
|
provider: name,
|
|
213
249
|
providerConfig: { apiKey: p.apiKey, baseURL: p.baseURL },
|
|
214
250
|
contextWindow: mc?.contextWindow ?? p.contextWindow,
|
|
251
|
+
maxTokens: mc?.maxTokens ?? (mc?.contextWindow ? Math.min(Math.floor(mc.contextWindow * 0.4), 65536) : undefined),
|
|
215
252
|
reasoning: mc?.reasoning,
|
|
216
253
|
supportsReasoningEffort: p.supportsReasoningEffort,
|
|
217
254
|
echoReasoning: mc?.echoReasoning,
|
package/dist/extensions/index.js
CHANGED
|
@@ -3,10 +3,15 @@ export const BUILTIN_EXTENSIONS = [
|
|
|
3
3
|
{ name: "agent-backend", load: () => import("./agent-backend.js").then(m => m.default) },
|
|
4
4
|
{ name: "openrouter",
|
|
5
5
|
when: () => !!process.env.OPENROUTER_API_KEY,
|
|
6
|
-
load: () => import("./openrouter.js").then(m => m.default) },
|
|
6
|
+
load: () => import("./providers/openrouter.js").then(m => m.default) },
|
|
7
7
|
{ name: "openai",
|
|
8
|
-
when: () => !!process.env.OPENAI_API_KEY,
|
|
9
|
-
load: () => import("./openai.js").then(m => m.default) },
|
|
8
|
+
when: () => !!process.env.OPENAI_API_KEY && !process.env.OPENAI_BASE_URL,
|
|
9
|
+
load: () => import("./providers/openai.js").then(m => m.default) },
|
|
10
|
+
{ name: "openai-compatible",
|
|
11
|
+
when: () => !!process.env.OPENAI_BASE_URL,
|
|
12
|
+
load: () => import("./providers/openai-compatible.js").then(m => m.default) },
|
|
13
|
+
{ name: "deepseek",
|
|
14
|
+
load: () => import("./providers/deepseek.js").then(m => m.default) },
|
|
10
15
|
{ name: "tui-renderer", load: () => import("./tui-renderer.js").then(m => m.default) },
|
|
11
16
|
{ name: "slash-commands", load: () => import("./slash-commands.js").then(m => m.default) },
|
|
12
17
|
{ name: "file-autocomplete", load: () => import("./file-autocomplete.js").then(m => m.default) },
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native DeepSeek (api.deepseek.com). V4 ignores reasoning_effort for
|
|
3
|
+
* on/off — disable lives in a separate `thinking` field that defaults
|
|
4
|
+
* to enabled. The hook always attaches; provider registration via env
|
|
5
|
+
* is opt-in alongside any settings.json entry.
|
|
6
|
+
*/
|
|
7
|
+
import type { ExtensionContext } from "../../types.js";
|
|
8
|
+
export default function activate(ctx: ExtensionContext): void;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
const BASE_URL = "https://api.deepseek.com";
|
|
2
|
+
const DEFAULT_MODELS = [
|
|
3
|
+
{ id: "deepseek-v4-flash", reasoning: true, echoReasoning: true },
|
|
4
|
+
{ id: "deepseek-v4-pro", reasoning: true, echoReasoning: true },
|
|
5
|
+
];
|
|
6
|
+
function buildReasoningParams(level, _model) {
|
|
7
|
+
return level === "off"
|
|
8
|
+
? { thinking: { type: "disabled" } }
|
|
9
|
+
: { thinking: { type: "enabled" }, reasoning_effort: level };
|
|
10
|
+
}
|
|
11
|
+
export default function activate(ctx) {
|
|
12
|
+
ctx.providers.configure("deepseek", { reasoningParams: buildReasoningParams });
|
|
13
|
+
const apiKey = process.env.DEEPSEEK_API_KEY;
|
|
14
|
+
if (!apiKey)
|
|
15
|
+
return;
|
|
16
|
+
ctx.bus.emit("provider:register", {
|
|
17
|
+
id: "deepseek",
|
|
18
|
+
apiKey,
|
|
19
|
+
baseURL: BASE_URL,
|
|
20
|
+
defaultModel: DEFAULT_MODELS[0].id,
|
|
21
|
+
models: DEFAULT_MODELS,
|
|
22
|
+
});
|
|
23
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Chat Completions-compatible local/3rd-party server (Ollama, LM
|
|
3
|
+
* Studio, vLLM, llama.cpp, …). No reasoning hook — the right shape depends
|
|
4
|
+
* on which model the server is serving; user extensions can add one.
|
|
5
|
+
*/
|
|
6
|
+
import type { ExtensionContext } from "../../types.js";
|
|
7
|
+
export default function activate(ctx: ExtensionContext): void;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export default function activate(ctx) {
|
|
2
|
+
const baseURL = process.env.OPENAI_BASE_URL;
|
|
3
|
+
if (!baseURL)
|
|
4
|
+
return;
|
|
5
|
+
// Local servers often need no key; SDK still wants a non-empty string.
|
|
6
|
+
const apiKey = process.env.OPENAI_API_KEY || "no-key";
|
|
7
|
+
const id = "openai-compatible";
|
|
8
|
+
ctx.bus.emit("provider:register", { id, apiKey, baseURL, models: [] });
|
|
9
|
+
fetchModels(baseURL, apiKey).then((models) => {
|
|
10
|
+
if (models.length === 0)
|
|
11
|
+
return;
|
|
12
|
+
ctx.bus.emit("provider:register", {
|
|
13
|
+
id,
|
|
14
|
+
apiKey,
|
|
15
|
+
baseURL,
|
|
16
|
+
defaultModel: models[0],
|
|
17
|
+
models,
|
|
18
|
+
});
|
|
19
|
+
}).catch(() => { });
|
|
20
|
+
}
|
|
21
|
+
async function fetchModels(baseURL, apiKey) {
|
|
22
|
+
const headers = {};
|
|
23
|
+
if (apiKey && apiKey !== "no-key")
|
|
24
|
+
headers.Authorization = `Bearer ${apiKey}`;
|
|
25
|
+
const res = await fetch(`${baseURL.replace(/\/$/, "")}/models`, { headers });
|
|
26
|
+
if (!res.ok)
|
|
27
|
+
return [];
|
|
28
|
+
const data = await res.json();
|
|
29
|
+
return (data.data ?? []).map((m) => m.id);
|
|
30
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cloud OpenAI (api.openai.com). reasoning_effort vocabulary diverges per
|
|
3
|
+
* family: o-series has no off; gpt-5-codex floors at "low"; plain gpt-5
|
|
4
|
+
* floors at "minimal"; gpt-5.1+ accepts "none" as documented full off.
|
|
5
|
+
*/
|
|
6
|
+
import type { ExtensionContext } from "../../types.js";
|
|
7
|
+
export default function activate(ctx: ExtensionContext): void;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
const CLOUD_MODELS = [
|
|
2
|
+
{ id: "gpt-5", reasoning: true },
|
|
3
|
+
{ id: "gpt-4.1", reasoning: false },
|
|
4
|
+
{ id: "gpt-4o", reasoning: false },
|
|
5
|
+
{ id: "gpt-4o-mini", reasoning: false },
|
|
6
|
+
{ id: "o3", reasoning: true },
|
|
7
|
+
{ id: "o3-mini", reasoning: true },
|
|
8
|
+
];
|
|
9
|
+
function offEffortFor(model) {
|
|
10
|
+
if (/^o\d/.test(model))
|
|
11
|
+
return null;
|
|
12
|
+
if (model.startsWith("gpt-5-codex"))
|
|
13
|
+
return "low";
|
|
14
|
+
if (/^gpt-5\.[1-9]/.test(model))
|
|
15
|
+
return "none";
|
|
16
|
+
if (/^gpt-5(?!\.)/.test(model))
|
|
17
|
+
return "minimal";
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
function buildReasoningParams(level, model) {
|
|
21
|
+
if (level !== "off")
|
|
22
|
+
return { reasoning_effort: level };
|
|
23
|
+
const off = model ? offEffortFor(model) : null;
|
|
24
|
+
return off ? { reasoning_effort: off } : {};
|
|
25
|
+
}
|
|
26
|
+
export default function activate(ctx) {
|
|
27
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
28
|
+
if (!apiKey)
|
|
29
|
+
return;
|
|
30
|
+
if (process.env.OPENAI_BASE_URL)
|
|
31
|
+
return; // openai-compatible handles this
|
|
32
|
+
ctx.providers.configure("openai", { reasoningParams: buildReasoningParams });
|
|
33
|
+
ctx.bus.emit("provider:register", {
|
|
34
|
+
id: "openai",
|
|
35
|
+
apiKey,
|
|
36
|
+
defaultModel: CLOUD_MODELS[0].id,
|
|
37
|
+
models: CLOUD_MODELS,
|
|
38
|
+
});
|
|
39
|
+
}
|
|
@@ -3,5 +3,5 @@
|
|
|
3
3
|
* Registers curated defaults synchronously so the first query works, then
|
|
4
4
|
* fetches the full catalog to populate /model autocomplete.
|
|
5
5
|
*/
|
|
6
|
-
import type { ExtensionContext } from "
|
|
6
|
+
import type { ExtensionContext } from "../../types.js";
|
|
7
7
|
export default function activate(ctx: ExtensionContext): void;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { getSettings } from "
|
|
1
|
+
import { getSettings } from "../../settings.js";
|
|
2
2
|
const BASE_URL = "https://openrouter.ai/api/v1";
|
|
3
3
|
const DEFAULT_MODELS = ["deepseek/deepseek-v4-flash"];
|
|
4
4
|
// Built-in defaults for models requiring reasoning_content echoed back
|
|
@@ -6,9 +6,11 @@ const DEFAULT_MODELS = ["deepseek/deepseek-v4-flash"];
|
|
|
6
6
|
// providers.openrouter.echoReasoningPatterns = ["deepseek", "..."]
|
|
7
7
|
// providers.openrouter.models[*].echoReasoning = true | false
|
|
8
8
|
const BUILTIN_ECHO_REASONING_PATTERNS = [/deepseek/i];
|
|
9
|
-
|
|
9
|
+
// `effort: "none"` is the documented disable; honored by OpenAI/Grok, ignored
|
|
10
|
+
// by Anthropic/Gemini/DeepSeek-via-OpenRouter (use native deepseek for a hard off).
|
|
11
|
+
function buildReasoningParams(level, _model) {
|
|
10
12
|
return level === "off"
|
|
11
|
-
? { reasoning: {
|
|
13
|
+
? { reasoning: { effort: "none" } }
|
|
12
14
|
: { reasoning: { effort: level } };
|
|
13
15
|
}
|
|
14
16
|
export default function activate(ctx) {
|
|
@@ -67,6 +67,8 @@ function createRenderState() {
|
|
|
67
67
|
isThinking: false,
|
|
68
68
|
showThinkingText: false,
|
|
69
69
|
thinkingPending: false,
|
|
70
|
+
previewedDiffPending: false,
|
|
71
|
+
previewedDiffToolIds: new Set(),
|
|
70
72
|
};
|
|
71
73
|
}
|
|
72
74
|
export default function activate(ctx) {
|
|
@@ -175,21 +177,20 @@ export default function activate(ctx) {
|
|
|
175
177
|
s.thinkingPending = true;
|
|
176
178
|
if (!s.isThinking) {
|
|
177
179
|
s.isThinking = true;
|
|
178
|
-
if (s.showThinkingText)
|
|
179
|
-
stopCurrentSpinner();
|
|
180
|
-
if (!s.renderer)
|
|
181
|
-
startAgentResponse();
|
|
182
|
-
}
|
|
183
|
-
else {
|
|
180
|
+
if (!s.showThinkingText)
|
|
184
181
|
startThinkingSpinner();
|
|
185
|
-
}
|
|
186
182
|
}
|
|
187
|
-
if (s.showThinkingText
|
|
188
|
-
|
|
183
|
+
if (s.showThinkingText) {
|
|
184
|
+
stopCurrentSpinner();
|
|
189
185
|
if (!s.renderer)
|
|
190
186
|
startAgentResponse();
|
|
191
|
-
|
|
192
|
-
|
|
187
|
+
if (e.text) {
|
|
188
|
+
s.thinkingPending = false;
|
|
189
|
+
// Wrap each sub-line so dim survives \n boundaries in the renderer.
|
|
190
|
+
const wrapped = `${p.dim}${e.text.replace(/\n/g, `${p.reset}\n${p.dim}`)}${p.reset}`;
|
|
191
|
+
s.renderer.push(wrapped);
|
|
192
|
+
drain();
|
|
193
|
+
}
|
|
193
194
|
}
|
|
194
195
|
});
|
|
195
196
|
bus.on("agent:response-chunk", (e) => {
|
|
@@ -272,6 +273,10 @@ export default function activate(ctx) {
|
|
|
272
273
|
s.currentToolKind = e.kind;
|
|
273
274
|
s.toolStartTime = Date.now();
|
|
274
275
|
s.orphanContHeaderKind = undefined;
|
|
276
|
+
if (s.previewedDiffPending && e.toolCallId) {
|
|
277
|
+
s.previewedDiffToolIds.add(e.toolCallId);
|
|
278
|
+
}
|
|
279
|
+
s.previewedDiffPending = false;
|
|
275
280
|
if (e.title === "user_shell") {
|
|
276
281
|
finalizeToolGroup();
|
|
277
282
|
closeToolLine();
|
|
@@ -335,11 +340,18 @@ export default function activate(ctx) {
|
|
|
335
340
|
s.toolExitCode = e.exitCode;
|
|
336
341
|
if (e.exitCode !== 0)
|
|
337
342
|
s.toolGroupAllOk = false;
|
|
343
|
+
let resultDisplay = e.resultDisplay;
|
|
344
|
+
if (e.toolCallId && s.previewedDiffToolIds.has(e.toolCallId)) {
|
|
345
|
+
s.previewedDiffToolIds.delete(e.toolCallId);
|
|
346
|
+
if (resultDisplay?.body?.kind === "diff") {
|
|
347
|
+
resultDisplay = { ...resultDisplay, body: undefined };
|
|
348
|
+
}
|
|
349
|
+
}
|
|
338
350
|
if (s.toolGroupKind) {
|
|
339
351
|
// Grouped tool — track success/failure and summaries, show aggregate on ⎿ line.
|
|
340
352
|
// Don't restart spinner between grouped tools — it's already running from group start.
|
|
341
|
-
if (
|
|
342
|
-
s.toolGroupSummaries.push(
|
|
353
|
+
if (resultDisplay?.summary)
|
|
354
|
+
s.toolGroupSummaries.push(resultDisplay.summary);
|
|
343
355
|
if (e.toolCallId)
|
|
344
356
|
s.pendingToolCompletes.delete(e.toolCallId);
|
|
345
357
|
s.toolGroupCompletedCount++;
|
|
@@ -358,10 +370,10 @@ export default function activate(ctx) {
|
|
|
358
370
|
if (pending)
|
|
359
371
|
s.pendingToolCompletes.delete(e.toolCallId);
|
|
360
372
|
if (pending?.orphaned) {
|
|
361
|
-
showOrphanedComplete(e.exitCode,
|
|
373
|
+
showOrphanedComplete(e.exitCode, resultDisplay, pending.title, pending.kind, pending.displayDetail);
|
|
362
374
|
}
|
|
363
375
|
else {
|
|
364
|
-
showToolComplete(e.exitCode,
|
|
376
|
+
showToolComplete(e.exitCode, resultDisplay, pending?.displayDetail ?? pending?.title);
|
|
365
377
|
}
|
|
366
378
|
s.currentToolKind = undefined;
|
|
367
379
|
s.spinnerStartTime = 0;
|
|
@@ -432,6 +444,7 @@ export default function activate(ctx) {
|
|
|
432
444
|
// Mark lastContentKind as "tool" so the tool call line that follows
|
|
433
445
|
// doesn't inject an extra gap between the diff box and the checkmark.
|
|
434
446
|
s.lastContentKind = "tool";
|
|
447
|
+
s.previewedDiffPending = true;
|
|
435
448
|
}
|
|
436
449
|
// Don't endAgentResponse() here — permission requests that aren't
|
|
437
450
|
// file-write diffs are handled inline (auto-approved or by extensions).
|
|
@@ -654,26 +667,16 @@ export default function activate(ctx) {
|
|
|
654
667
|
return [];
|
|
655
668
|
const boxW = Math.min(120, width - 2);
|
|
656
669
|
const contentW = boxW - 4;
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
}
|
|
668
|
-
else {
|
|
669
|
-
const diffLines = renderDiff(diff, {
|
|
670
|
-
width: contentW,
|
|
671
|
-
filePath,
|
|
672
|
-
maxLines: getSettings().diffMaxLines,
|
|
673
|
-
trueColor: true,
|
|
674
|
-
});
|
|
675
|
-
body = diffLines.length > 1 ? ["", ...diffLines.slice(1), ""] : diffLines;
|
|
676
|
-
}
|
|
670
|
+
const maxLines = diff.isNewFile
|
|
671
|
+
? getSettings().newFilePreviewLines
|
|
672
|
+
: getSettings().diffMaxLines;
|
|
673
|
+
const diffLines = renderDiff(diff, {
|
|
674
|
+
width: contentW,
|
|
675
|
+
filePath,
|
|
676
|
+
maxLines,
|
|
677
|
+
trueColor: true,
|
|
678
|
+
});
|
|
679
|
+
const body = diffLines.length > 1 ? ["", ...diffLines.slice(1), ""] : diffLines;
|
|
677
680
|
return renderBoxFrame(body, {
|
|
678
681
|
width: boxW,
|
|
679
682
|
style: "rounded",
|
package/dist/settings.d.ts
CHANGED
|
@@ -9,6 +9,8 @@ export interface ModelCapabilityConfig {
|
|
|
9
9
|
reasoning?: boolean;
|
|
10
10
|
/** Context window size in tokens for this specific model. */
|
|
11
11
|
contextWindow?: number;
|
|
12
|
+
/** Max output tokens for this model. */
|
|
13
|
+
maxTokens?: number;
|
|
12
14
|
/** Echo reasoning_content back on assistant turns. Required by DeepSeek. */
|
|
13
15
|
echoReasoning?: boolean;
|
|
14
16
|
}
|
|
@@ -141,6 +143,8 @@ export interface ResolvedProvider {
|
|
|
141
143
|
baseURL?: string;
|
|
142
144
|
defaultModel?: string;
|
|
143
145
|
models: string[];
|
|
146
|
+
/** User explicitly listed `models` (locks the catalog to that list). */
|
|
147
|
+
modelsExplicit: boolean;
|
|
144
148
|
contextWindow?: number;
|
|
145
149
|
/** Provider supports the reasoning_effort parameter. Default: true. */
|
|
146
150
|
supportsReasoningEffort?: boolean;
|
|
@@ -148,6 +152,7 @@ export interface ResolvedProvider {
|
|
|
148
152
|
modelCapabilities?: Map<string, {
|
|
149
153
|
reasoning?: boolean;
|
|
150
154
|
contextWindow?: number;
|
|
155
|
+
maxTokens?: number;
|
|
151
156
|
echoReasoning?: boolean;
|
|
152
157
|
}>;
|
|
153
158
|
/** Borrow another registered provider's reasoning request shape by id. */
|
package/dist/settings.js
CHANGED
|
@@ -148,8 +148,8 @@ export function resolveProvider(name) {
|
|
|
148
148
|
}
|
|
149
149
|
else {
|
|
150
150
|
modelIds.push(m.id);
|
|
151
|
-
if (m.reasoning !== undefined || m.contextWindow !== undefined || m.echoReasoning !== undefined) {
|
|
152
|
-
caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, echoReasoning: m.echoReasoning });
|
|
151
|
+
if (m.reasoning !== undefined || m.contextWindow !== undefined || m.maxTokens !== undefined || m.echoReasoning !== undefined) {
|
|
152
|
+
caps.set(m.id, { reasoning: m.reasoning, contextWindow: m.contextWindow, maxTokens: m.maxTokens, echoReasoning: m.echoReasoning });
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
155
|
}
|
|
@@ -160,6 +160,7 @@ export function resolveProvider(name) {
|
|
|
160
160
|
baseURL: provider.baseURL,
|
|
161
161
|
defaultModel,
|
|
162
162
|
models: modelIds.length ? modelIds : (defaultModel ? [defaultModel] : []),
|
|
163
|
+
modelsExplicit: Array.isArray(provider.models),
|
|
163
164
|
contextWindow: provider.contextWindow,
|
|
164
165
|
modelCapabilities: caps.size > 0 ? caps : undefined,
|
|
165
166
|
reasoningShape: provider.reasoningShape,
|
package/dist/types.d.ts
CHANGED
|
@@ -41,6 +41,8 @@ export interface AgentMode {
|
|
|
41
41
|
};
|
|
42
42
|
/** Context window size in tokens (for usage display). */
|
|
43
43
|
contextWindow?: number;
|
|
44
|
+
/** Max output tokens for this mode. */
|
|
45
|
+
maxTokens?: number;
|
|
44
46
|
/** Model supports reasoning/thinking tokens. */
|
|
45
47
|
reasoning?: boolean;
|
|
46
48
|
/** Provider supports the reasoning_effort parameter. */
|
|
@@ -65,14 +67,27 @@ export interface LlmSession {
|
|
|
65
67
|
}
|
|
66
68
|
export interface LlmInterface {
|
|
67
69
|
readonly available: boolean;
|
|
70
|
+
/** `model` overrides the globally-configured model for this call only.
|
|
71
|
+
* Provider-specific identifier (e.g. "claude-haiku-4-5"). When omitted,
|
|
72
|
+
* the active provider's configured default is used.
|
|
73
|
+
*
|
|
74
|
+
* `reasoningEffort` controls thinking-model token allocation between
|
|
75
|
+
* reasoning and final content (e.g. "low", "medium", "high", or
|
|
76
|
+
* provider-specific). For non-reasoning models it is ignored. Set to
|
|
77
|
+
* "low" for cheap structured-output calls so reasoning doesn't exhaust
|
|
78
|
+
* the max-tokens budget and leave content empty. */
|
|
68
79
|
ask(opts: {
|
|
69
80
|
query: string;
|
|
70
81
|
system?: string;
|
|
71
82
|
maxTokens?: number;
|
|
83
|
+
model?: string;
|
|
84
|
+
reasoningEffort?: string;
|
|
72
85
|
}): Promise<string>;
|
|
73
86
|
session(opts?: {
|
|
74
87
|
system?: string;
|
|
75
88
|
maxTokens?: number;
|
|
89
|
+
model?: string;
|
|
90
|
+
reasoningEffort?: string;
|
|
76
91
|
}): LlmSession;
|
|
77
92
|
}
|
|
78
93
|
export interface AgentShellConfig {
|
|
@@ -156,7 +171,7 @@ export interface ExtensionContext {
|
|
|
156
171
|
}) => () => void;
|
|
157
172
|
providers: {
|
|
158
173
|
configure: (id: string, opts: {
|
|
159
|
-
reasoningParams?: (level: string) => Record<string, unknown>;
|
|
174
|
+
reasoningParams?: (level: string, model?: string) => Record<string, unknown>;
|
|
160
175
|
}) => void;
|
|
161
176
|
};
|
|
162
177
|
llm: LlmInterface;
|
package/dist/utils/box-frame.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* never writes to stdout. Supports multiple border styles and
|
|
6
6
|
* optional title/footer sections with dividers.
|
|
7
7
|
*/
|
|
8
|
-
import { visibleLen, truncateToWidth } from "./ansi.js";
|
|
8
|
+
import { visibleLen, truncateToWidth, truncateAnsiToWidth } from "./ansi.js";
|
|
9
9
|
import { palette as p } from "./palette.js";
|
|
10
10
|
const BORDERS = {
|
|
11
11
|
rounded: { tl: "╭", tr: "╮", bl: "╰", br: "╯", h: "─", v: "│", ml: "├", mr: "┤" },
|
|
@@ -32,14 +32,20 @@ export function renderBoxFrame(content, opts) {
|
|
|
32
32
|
const output = [];
|
|
33
33
|
// Top border (with optional left/right titles)
|
|
34
34
|
if (opts.title || opts.titleRight) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
const rightPart = opts.titleRight
|
|
40
|
-
? `${p.reset} ${opts.titleRight} ${bc}`
|
|
41
|
-
: "";
|
|
35
|
+
// Budget: 2 corners + 1 minimum dash + space-padding around each title.
|
|
36
|
+
// Truncate the left title first if combined widths overflow — titleRight
|
|
37
|
+
// is typically short metadata (model name, stats) worth preserving.
|
|
38
|
+
let title = opts.title;
|
|
42
39
|
const rightVis = opts.titleRight ? visibleLen(opts.titleRight) + 2 : 0;
|
|
40
|
+
const leftBudget = width - 2 - 1 - rightVis; // total - corners - min dash - right
|
|
41
|
+
let leftVis = title ? visibleLen(title) + 2 : 0;
|
|
42
|
+
if (title && leftVis > leftBudget) {
|
|
43
|
+
const maxTitleVis = Math.max(1, leftBudget - 2);
|
|
44
|
+
title = truncateAnsiToWidth(title, maxTitleVis);
|
|
45
|
+
leftVis = visibleLen(title) + 2;
|
|
46
|
+
}
|
|
47
|
+
const leftPart = title ? `${p.reset} ${title} ${bc}` : "";
|
|
48
|
+
const rightPart = opts.titleRight ? `${p.reset} ${opts.titleRight} ${bc}` : "";
|
|
43
49
|
const dashCount = Math.max(1, width - 2 - leftVis - rightVis);
|
|
44
50
|
output.push(`${bc}${b.tl}${leftPart}${b.h.repeat(dashCount)}${rightPart}${b.tr}${p.reset}`);
|
|
45
51
|
}
|
|
@@ -33,7 +33,8 @@ export declare class LlmClient {
|
|
|
33
33
|
tools?: ChatCompletionTool[];
|
|
34
34
|
model?: string;
|
|
35
35
|
max_tokens?: number;
|
|
36
|
-
/** Reasoning effort
|
|
36
|
+
/** Reasoning effort: "off" | "low" | "medium" | "high". Provider-dependent;
|
|
37
|
+
* "off" matches agent-loop's thinkingLevel and omits the field. */
|
|
37
38
|
reasoning_effort?: string;
|
|
38
39
|
signal?: AbortSignal;
|
|
39
40
|
}): import("openai").APIPromise<import("openai/core/streaming.mjs").Stream<OpenAI.Chat.Completions.ChatCompletionChunk>>;
|
|
@@ -45,5 +46,8 @@ export declare class LlmClient {
|
|
|
45
46
|
messages: ChatCompletionMessageParam[];
|
|
46
47
|
model?: string;
|
|
47
48
|
max_tokens?: number;
|
|
49
|
+
/** Reasoning effort: "off" | "low" | "medium" | "high". Provider-dependent;
|
|
50
|
+
* "off" matches agent-loop's thinkingLevel and omits the field. */
|
|
51
|
+
reasoning_effort?: string;
|
|
48
52
|
}): Promise<string>;
|
|
49
53
|
}
|
package/dist/utils/llm-client.js
CHANGED
|
@@ -40,14 +40,15 @@ export class LlmClient {
|
|
|
40
40
|
* Returns an async iterable of chunks.
|
|
41
41
|
*/
|
|
42
42
|
stream(opts) {
|
|
43
|
+
const sendEffort = opts.reasoning_effort && opts.reasoning_effort !== "off";
|
|
43
44
|
const body = {
|
|
44
45
|
model: opts.model ?? this.model,
|
|
45
46
|
messages: opts.messages,
|
|
46
47
|
tools: opts.tools?.length ? opts.tools : undefined,
|
|
47
|
-
max_tokens: opts.max_tokens ??
|
|
48
|
+
max_tokens: opts.max_tokens ?? 65536,
|
|
48
49
|
stream: true,
|
|
49
50
|
stream_options: { include_usage: true },
|
|
50
|
-
...(
|
|
51
|
+
...(sendEffort
|
|
51
52
|
? { reasoning_effort: opts.reasoning_effort }
|
|
52
53
|
: {}),
|
|
53
54
|
};
|
|
@@ -58,10 +59,14 @@ export class LlmClient {
|
|
|
58
59
|
* Returns the text content of the first choice.
|
|
59
60
|
*/
|
|
60
61
|
async complete(opts) {
|
|
62
|
+
const sendEffort = opts.reasoning_effort && opts.reasoning_effort !== "off";
|
|
61
63
|
const response = await this.client.chat.completions.create({
|
|
62
64
|
model: opts.model ?? this.model,
|
|
63
65
|
messages: opts.messages,
|
|
64
66
|
max_tokens: opts.max_tokens ?? 1024,
|
|
67
|
+
...(sendEffort
|
|
68
|
+
? { reasoning_effort: opts.reasoning_effort }
|
|
69
|
+
: {}),
|
|
65
70
|
});
|
|
66
71
|
return response.choices[0]?.message?.content ?? "";
|
|
67
72
|
}
|
package/dist/utils/llm-facade.js
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
export function createLlmFacade(handlers) {
|
|
2
|
-
const invoke = (messages, maxTokens) => {
|
|
3
|
-
const result = handlers.call("llm:invoke", messages, { maxTokens });
|
|
2
|
+
const invoke = (messages, maxTokens, model, reasoningEffort) => {
|
|
3
|
+
const result = handlers.call("llm:invoke", messages, { maxTokens, model, reasoningEffort });
|
|
4
4
|
if (result === undefined)
|
|
5
5
|
return Promise.reject(new Error("ctx.llm: no LLM backend available"));
|
|
6
6
|
return result;
|
|
7
7
|
};
|
|
8
8
|
return {
|
|
9
9
|
get available() { return handlers.list().includes("llm:invoke"); },
|
|
10
|
-
ask: ({ query, system, maxTokens }) => {
|
|
10
|
+
ask: ({ query, system, maxTokens, model, reasoningEffort }) => {
|
|
11
11
|
const messages = [];
|
|
12
12
|
if (system)
|
|
13
13
|
messages.push({ role: "system", content: system });
|
|
14
14
|
messages.push({ role: "user", content: query });
|
|
15
|
-
return invoke(messages, maxTokens);
|
|
15
|
+
return invoke(messages, maxTokens, model, reasoningEffort);
|
|
16
16
|
},
|
|
17
17
|
session: (opts = {}) => {
|
|
18
18
|
const messages = [];
|
|
@@ -21,7 +21,7 @@ export function createLlmFacade(handlers) {
|
|
|
21
21
|
const session = {
|
|
22
22
|
async send(message) {
|
|
23
23
|
messages.push({ role: "user", content: message });
|
|
24
|
-
const reply = await invoke(messages, opts.maxTokens);
|
|
24
|
+
const reply = await invoke(messages, opts.maxTokens, opts.model, opts.reasoningEffort);
|
|
25
25
|
messages.push({ role: "assistant", content: reply });
|
|
26
26
|
return reply;
|
|
27
27
|
},
|
package/package.json
CHANGED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Built-in OpenAI-compatible provider. Two activation paths:
|
|
3
|
-
* - OPENAI_API_KEY only → cloud OpenAI, ships a curated catalog.
|
|
4
|
-
* - OPENAI_BASE_URL (any key) → local/3rd-party server (Ollama, LM Studio,
|
|
5
|
-
* vLLM, llama.cpp); the catalog is fetched
|
|
6
|
-
* from the server's /models endpoint.
|
|
7
|
-
*/
|
|
8
|
-
import type { ExtensionContext } from "../types.js";
|
|
9
|
-
export default function activate(ctx: ExtensionContext): void;
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
const OPENAI_CLOUD_MODELS = [
|
|
2
|
-
{ id: "gpt-5", reasoning: true },
|
|
3
|
-
{ id: "gpt-4.1", reasoning: false },
|
|
4
|
-
{ id: "gpt-4o", reasoning: false },
|
|
5
|
-
{ id: "gpt-4o-mini", reasoning: false },
|
|
6
|
-
{ id: "o3", reasoning: true },
|
|
7
|
-
{ id: "o3-mini", reasoning: true },
|
|
8
|
-
];
|
|
9
|
-
export default function activate(ctx) {
|
|
10
|
-
const apiKey = process.env.OPENAI_API_KEY ?? "";
|
|
11
|
-
const baseURL = process.env.OPENAI_BASE_URL;
|
|
12
|
-
if (!baseURL) {
|
|
13
|
-
if (!apiKey)
|
|
14
|
-
return;
|
|
15
|
-
ctx.bus.emit("provider:register", {
|
|
16
|
-
id: "openai",
|
|
17
|
-
apiKey,
|
|
18
|
-
defaultModel: OPENAI_CLOUD_MODELS[0].id,
|
|
19
|
-
models: OPENAI_CLOUD_MODELS,
|
|
20
|
-
});
|
|
21
|
-
return;
|
|
22
|
-
}
|
|
23
|
-
const id = "openai-compatible";
|
|
24
|
-
// Local servers (Ollama, llama.cpp) often need no key; the SDK still
|
|
25
|
-
// requires a non-empty string for construction.
|
|
26
|
-
const sdkKey = apiKey || "no-key";
|
|
27
|
-
ctx.bus.emit("provider:register", { id, apiKey: sdkKey, baseURL, models: [] });
|
|
28
|
-
fetchModels(baseURL, apiKey).then((models) => {
|
|
29
|
-
if (models.length === 0)
|
|
30
|
-
return;
|
|
31
|
-
ctx.bus.emit("provider:register", {
|
|
32
|
-
id,
|
|
33
|
-
apiKey: sdkKey,
|
|
34
|
-
baseURL,
|
|
35
|
-
defaultModel: models[0],
|
|
36
|
-
models,
|
|
37
|
-
});
|
|
38
|
-
}).catch(() => { });
|
|
39
|
-
}
|
|
40
|
-
async function fetchModels(baseURL, apiKey) {
|
|
41
|
-
const headers = {};
|
|
42
|
-
if (apiKey)
|
|
43
|
-
headers.Authorization = `Bearer ${apiKey}`;
|
|
44
|
-
const res = await fetch(`${baseURL.replace(/\/$/, "")}/models`, { headers });
|
|
45
|
-
if (!res.ok)
|
|
46
|
-
return [];
|
|
47
|
-
const data = await res.json();
|
|
48
|
-
return (data.data ?? []).map((m) => m.id);
|
|
49
|
-
}
|