@oh-my-pi/pi-coding-agent 13.9.2 → 13.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +64 -0
- package/examples/sdk/02-custom-model.ts +2 -1
- package/package.json +7 -7
- package/src/cli/args.ts +10 -6
- package/src/cli/list-models.ts +2 -2
- package/src/commands/launch.ts +3 -3
- package/src/config/model-registry.ts +136 -38
- package/src/config/model-resolver.ts +47 -21
- package/src/config/settings-schema.ts +56 -2
- package/src/discovery/helpers.ts +3 -3
- package/src/extensibility/custom-tools/types.ts +2 -0
- package/src/extensibility/extensions/loader.ts +3 -2
- package/src/extensibility/extensions/types.ts +10 -7
- package/src/extensibility/hooks/types.ts +2 -0
- package/src/main.ts +5 -22
- package/src/memories/index.ts +7 -3
- package/src/modes/components/footer.ts +10 -8
- package/src/modes/components/model-selector.ts +33 -38
- package/src/modes/components/settings-defs.ts +32 -3
- package/src/modes/components/settings-selector.ts +16 -5
- package/src/modes/components/status-line/context-thresholds.ts +68 -0
- package/src/modes/components/status-line/segments.ts +11 -12
- package/src/modes/components/status-line.ts +2 -6
- package/src/modes/components/thinking-selector.ts +7 -7
- package/src/modes/components/tree-selector.ts +3 -2
- package/src/modes/controllers/command-controller.ts +11 -26
- package/src/modes/controllers/event-controller.ts +16 -3
- package/src/modes/controllers/input-controller.ts +4 -2
- package/src/modes/controllers/selector-controller.ts +5 -4
- package/src/modes/interactive-mode.ts +2 -2
- package/src/modes/rpc/rpc-client.ts +5 -10
- package/src/modes/rpc/rpc-types.ts +5 -5
- package/src/modes/theme/theme.ts +8 -3
- package/src/priority.json +1 -0
- package/src/prompts/system/auto-handoff-threshold-focus.md +1 -0
- package/src/prompts/system/system-prompt.md +18 -2
- package/src/prompts/tools/hashline.md +139 -83
- package/src/sdk.ts +24 -16
- package/src/session/agent-session.ts +261 -118
- package/src/session/agent-storage.ts +14 -14
- package/src/session/compaction/compaction.ts +500 -13
- package/src/session/messages.ts +12 -1
- package/src/session/session-manager.ts +77 -19
- package/src/slash-commands/builtin-registry.ts +48 -0
- package/src/task/agents.ts +3 -2
- package/src/task/executor.ts +2 -2
- package/src/task/types.ts +2 -1
- package/src/thinking.ts +87 -0
- package/src/tools/browser.ts +15 -6
- package/src/tools/fetch.ts +118 -100
- package/src/tools/index.ts +2 -1
- package/src/web/kagi.ts +62 -7
- package/src/web/search/providers/exa.ts +74 -3
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,70 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [13.9.4] - 2026-03-07
|
|
6
|
+
### Added
|
|
7
|
+
|
|
8
|
+
- Automatic detection of Ollama model capabilities including reasoning/thinking support and vision input via the `/api/show` endpoint
|
|
9
|
+
- Improved Kagi API error handling with extraction of detailed error messages from JSON and plain text responses
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Updated Kagi provider description to clarify requirement for Kagi Search API beta access
|
|
14
|
+
|
|
15
|
+
## [13.9.3] - 2026-03-07
|
|
16
|
+
|
|
17
|
+
### Breaking Changes
|
|
18
|
+
|
|
19
|
+
- Changed `ThinkingLevel` type to be imported from `@oh-my-pi/pi-agent-core` instead of `@oh-my-pi/pi-ai`
|
|
20
|
+
- Changed thinking level representation from string literals to `Effort` enum values (e.g., `Effort.High` instead of `"high"`)
|
|
21
|
+
- Changed `getThinkingLevel()` return type to `ThinkingLevel | undefined` to support models without thinking support
|
|
22
|
+
- Changed model `reasoning` property to `thinking` property with `ThinkingConfig` for explicit effort level configuration
|
|
23
|
+
- Changed `thinkingLevel` in session context to be optional (`ThinkingLevel | undefined`) instead of always present
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- Added `thinking.ts` module with `getThinkingLevelMetadata()` and `resolveThinkingLevelForModel()` utilities for thinking level handling
|
|
28
|
+
- Added `ThinkingConfig` support to model definitions for specifying supported thinking effort levels per model
|
|
29
|
+
- Added `enrichModelThinking()` function to apply thinking configuration to models during registry initialization
|
|
30
|
+
- Added `clampThinkingLevelForModel()` function to constrain thinking levels to model-supported ranges
|
|
31
|
+
- Added `getSupportedEfforts()` function to retrieve available thinking efforts for a model
|
|
32
|
+
- Added `Effort` enum import from `@oh-my-pi/pi-ai` for type-safe thinking level representation
|
|
33
|
+
- Added `/fast` slash command to toggle OpenAI service tier priority mode for faster response processing
|
|
34
|
+
- Added `serviceTier` setting to control OpenAI processing priority (none, auto, default, flex, scale, priority)
|
|
35
|
+
- Added `compaction.remoteEnabled` setting to control use of remote compaction endpoints
|
|
36
|
+
- Added remote compaction support for OpenAI and OpenAI Codex models with encrypted reasoning preservation
|
|
37
|
+
- Added fast mode indicator (⚡) to model segment in status line when priority service tier is active
|
|
38
|
+
- Added context usage threshold levels (normal, warning, purple, error) with token-aware thresholds for better context awareness
|
|
39
|
+
- Added `isFastModeEnabled()`, `setFastMode()`, and `toggleFastMode()` methods to AgentSession for fast mode control
|
|
40
|
+
|
|
41
|
+
### Changed
|
|
42
|
+
|
|
43
|
+
- Changed credential deletion to disable credentials with persisted cause instead of permanent deletion
|
|
44
|
+
- Added `disabledCause` parameter to credential deletion methods to track reason for disabling
|
|
45
|
+
- Changed thinking level parsing to use `parseEffort()` from local thinking module instead of `parseThinkingLevel()` from pi-ai
|
|
46
|
+
- Changed model list display to show supported thinking efforts (e.g., "low,medium,high") instead of yes/no reasoning indicator
|
|
47
|
+
- Changed footer and status line to check `model.thinking` instead of `model.reasoning` for thinking level display
|
|
48
|
+
- Changed thinking selector to work with `Effort` type instead of `ThinkingLevel` for available levels
|
|
49
|
+
- Changed model resolver to return `undefined` for thinking level instead of `"off"` when no thinking is specified
|
|
50
|
+
- Changed compaction reasoning parameters to use `Effort` enum values instead of string literals
|
|
51
|
+
- Changed RPC types to use `Effort` for cycling thinking levels and `ThinkingLevel | undefined` for session state
|
|
52
|
+
- Changed theme thinking border color function to accept both `ThinkingLevel` and `Effort` types
|
|
53
|
+
- Changed context usage coloring in footer and status line to use token-aware thresholds instead of fixed percentages
|
|
54
|
+
- Changed compaction to preserve OpenAI remote compaction state and encrypted reasoning across sessions
|
|
55
|
+
- Changed compaction to skip emitting kept messages when using OpenAI remote compaction with preserved history
|
|
56
|
+
- Changed session context to include `serviceTier` field for tracking active service tier across session branches
|
|
57
|
+
- Changed `compact()` function to accept `remoteInstructions` option for custom remote compaction prompts
|
|
58
|
+
- Changed model registry to apply hardcoded policies (gpt-5.4 context window) consistently across all model loading paths
|
|
59
|
+
|
|
60
|
+
### Fixed
|
|
61
|
+
|
|
62
|
+
- Fixed OpenAI remote compaction to correctly append incremental responses instead of replacing entire history
|
|
63
|
+
- Fixed thinking level display logic in main.ts to correctly check for undefined instead of "off"
|
|
64
|
+
- Fixed model registry to preserve explicit thinking configuration on runtime-registered models
|
|
65
|
+
- Fixed usage limit reset time calculation to use absolute `resetsAt` timestamps instead of deprecated `resetInMs` field
|
|
66
|
+
- Fixed compaction summary message creation to no longer be automatically added to chat during compaction (now handled by session manager)
|
|
67
|
+
- Fixed Kagi web search errors to surface the provider's beta-access message and clarified that Kagi search requires Search API beta access
|
|
68
|
+
|
|
5
69
|
## [13.9.2] - 2026-03-05
|
|
6
70
|
|
|
7
71
|
### Added
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Shows how to select a specific model and thinking level.
|
|
5
5
|
*/
|
|
6
|
+
import { ThinkingLevel } from "@oh-my-pi/pi-agent-core";
|
|
6
7
|
import { getModel } from "@oh-my-pi/pi-ai";
|
|
7
8
|
import { createAgentSession, discoverAuthStorage, discoverModels } from "@oh-my-pi/pi-coding-agent";
|
|
8
9
|
|
|
@@ -32,7 +33,7 @@ console.log(
|
|
|
32
33
|
if (available.length > 0) {
|
|
33
34
|
const { session } = await createAgentSession({
|
|
34
35
|
model: available[0],
|
|
35
|
-
thinkingLevel:
|
|
36
|
+
thinkingLevel: ThinkingLevel.Medium, // off, low, medium, high
|
|
36
37
|
authStorage,
|
|
37
38
|
modelRegistry,
|
|
38
39
|
});
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-coding-agent",
|
|
4
|
-
"version": "13.9.
|
|
4
|
+
"version": "13.9.4",
|
|
5
5
|
"description": "Coding agent CLI with read, bash, edit, write tools and session management",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -41,12 +41,12 @@
|
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
43
|
"@mozilla/readability": "^0.6",
|
|
44
|
-
"@oh-my-pi/omp-stats": "13.9.
|
|
45
|
-
"@oh-my-pi/pi-agent-core": "13.9.
|
|
46
|
-
"@oh-my-pi/pi-ai": "13.9.
|
|
47
|
-
"@oh-my-pi/pi-natives": "13.9.
|
|
48
|
-
"@oh-my-pi/pi-tui": "13.9.
|
|
49
|
-
"@oh-my-pi/pi-utils": "13.9.
|
|
44
|
+
"@oh-my-pi/omp-stats": "13.9.4",
|
|
45
|
+
"@oh-my-pi/pi-agent-core": "13.9.4",
|
|
46
|
+
"@oh-my-pi/pi-ai": "13.9.4",
|
|
47
|
+
"@oh-my-pi/pi-natives": "13.9.4",
|
|
48
|
+
"@oh-my-pi/pi-tui": "13.9.4",
|
|
49
|
+
"@oh-my-pi/pi-utils": "13.9.4",
|
|
50
50
|
"@sinclair/typebox": "^0.34",
|
|
51
51
|
"@xterm/headless": "^6.0",
|
|
52
52
|
"ajv": "^8.18",
|
package/src/cli/args.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CLI argument parsing and help display
|
|
3
3
|
*/
|
|
4
|
-
import {
|
|
4
|
+
import { type Effort, THINKING_EFFORTS } from "@oh-my-pi/pi-ai";
|
|
5
5
|
import { APP_NAME, CONFIG_DIR_NAME, logger } from "@oh-my-pi/pi-utils";
|
|
6
6
|
import chalk from "chalk";
|
|
7
|
+
import { parseEffort } from "../thinking";
|
|
7
8
|
import { BUILTIN_TOOLS } from "../tools";
|
|
8
9
|
|
|
9
10
|
export type Mode = "text" | "json" | "rpc";
|
|
@@ -19,7 +20,7 @@ export interface Args {
|
|
|
19
20
|
apiKey?: string;
|
|
20
21
|
systemPrompt?: string;
|
|
21
22
|
appendSystemPrompt?: string;
|
|
22
|
-
thinking?:
|
|
23
|
+
thinking?: Effort;
|
|
23
24
|
continue?: boolean;
|
|
24
25
|
resume?: string | true;
|
|
25
26
|
help?: boolean;
|
|
@@ -107,7 +108,10 @@ export function parseArgs(args: string[], extensionFlags?: Map<string, { type: "
|
|
|
107
108
|
} else if (arg === "--no-pty") {
|
|
108
109
|
result.noPty = true;
|
|
109
110
|
} else if (arg === "--tools" && i + 1 < args.length) {
|
|
110
|
-
const toolNames = args[++i]
|
|
111
|
+
const toolNames = args[++i]
|
|
112
|
+
.split(",")
|
|
113
|
+
.map(s => s.trim().toLowerCase())
|
|
114
|
+
.filter(Boolean);
|
|
111
115
|
const validTools: string[] = [];
|
|
112
116
|
for (const name of toolNames) {
|
|
113
117
|
if (name in BUILTIN_TOOLS) {
|
|
@@ -122,13 +126,13 @@ export function parseArgs(args: string[], extensionFlags?: Map<string, { type: "
|
|
|
122
126
|
result.tools = validTools;
|
|
123
127
|
} else if (arg === "--thinking" && i + 1 < args.length) {
|
|
124
128
|
const rawThinking = args[++i];
|
|
125
|
-
const thinking =
|
|
129
|
+
const thinking = parseEffort(rawThinking);
|
|
126
130
|
if (thinking !== undefined) {
|
|
127
131
|
result.thinking = thinking;
|
|
128
132
|
} else {
|
|
129
133
|
logger.warn("Invalid thinking level passed to --thinking", {
|
|
130
134
|
level: rawThinking,
|
|
131
|
-
validThinkingLevels:
|
|
135
|
+
validThinkingLevels: THINKING_EFFORTS,
|
|
132
136
|
});
|
|
133
137
|
}
|
|
134
138
|
} else if (arg === "--print" || arg === "-p") {
|
|
@@ -207,7 +211,7 @@ export function getExtraHelpText(): string {
|
|
|
207
211
|
MISTRAL_API_KEY - Mistral models
|
|
208
212
|
ZAI_API_KEY - z.ai models (ZhipuAI/GLM)
|
|
209
213
|
MINIMAX_API_KEY - MiniMax models
|
|
210
|
-
OPENCODE_API_KEY - OpenCode models
|
|
214
|
+
OPENCODE_API_KEY - OpenCode Zen/OpenCode Go models
|
|
211
215
|
CURSOR_ACCESS_TOKEN - Cursor AI models
|
|
212
216
|
AI_GATEWAY_API_KEY - Vercel AI Gateway
|
|
213
217
|
|
package/src/cli/list-models.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* List available models with optional fuzzy search
|
|
3
3
|
*/
|
|
4
|
-
import type
|
|
4
|
+
import { type Api, getSupportedEfforts, type Model } from "@oh-my-pi/pi-ai";
|
|
5
5
|
import { formatNumber } from "@oh-my-pi/pi-utils";
|
|
6
6
|
import type { ModelRegistry } from "../config/model-registry";
|
|
7
7
|
import { fuzzyFilter } from "../utils/fuzzy";
|
|
@@ -41,7 +41,7 @@ export async function listModels(modelRegistry: ModelRegistry, searchPattern?: s
|
|
|
41
41
|
model: m.id,
|
|
42
42
|
context: formatNumber(m.contextWindow),
|
|
43
43
|
maxOut: formatNumber(m.maxTokens),
|
|
44
|
-
thinking: m.reasoning ? "yes" : "
|
|
44
|
+
thinking: m.thinking ? getSupportedEfforts(m).join(",") : m.reasoning ? "yes" : "-",
|
|
45
45
|
images: m.input.includes("image") ? "yes" : "no",
|
|
46
46
|
}));
|
|
47
47
|
|
package/src/commands/launch.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Root command for the coding agent CLI.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import { THINKING_EFFORTS } from "@oh-my-pi/pi-ai";
|
|
6
6
|
import { APP_NAME } from "@oh-my-pi/pi-utils";
|
|
7
7
|
import { Args, Command, Flags } from "@oh-my-pi/pi-utils/cli";
|
|
8
8
|
import { parseArgs } from "../cli/args";
|
|
@@ -86,8 +86,8 @@ export default class Index extends Command {
|
|
|
86
86
|
description: "Comma-separated list of tools to enable (default: all)",
|
|
87
87
|
}),
|
|
88
88
|
thinking: Flags.string({
|
|
89
|
-
description: `Set thinking level: ${
|
|
90
|
-
options:
|
|
89
|
+
description: `Set thinking level: ${THINKING_EFFORTS.join(", ")}`,
|
|
90
|
+
options: [...THINKING_EFFORTS],
|
|
91
91
|
}),
|
|
92
92
|
hook: Flags.string({
|
|
93
93
|
description: "Load a hook/extension file (can be used multiple times)",
|
|
@@ -4,6 +4,7 @@ import {
|
|
|
4
4
|
type Context,
|
|
5
5
|
createModelManager,
|
|
6
6
|
DEFAULT_LOCAL_TOKEN,
|
|
7
|
+
enrichModelThinking,
|
|
7
8
|
getBundledModels,
|
|
8
9
|
getBundledProviders,
|
|
9
10
|
googleAntigravityModelManagerOptions,
|
|
@@ -18,10 +19,11 @@ import {
|
|
|
18
19
|
registerCustomApi,
|
|
19
20
|
registerOAuthProvider,
|
|
20
21
|
type SimpleStreamOptions,
|
|
22
|
+
type ThinkingConfig,
|
|
21
23
|
unregisterCustomApis,
|
|
22
24
|
unregisterOAuthProviders,
|
|
23
25
|
} from "@oh-my-pi/pi-ai";
|
|
24
|
-
import { logger } from "@oh-my-pi/pi-utils";
|
|
26
|
+
import { isRecord, logger } from "@oh-my-pi/pi-utils";
|
|
25
27
|
import { type Static, Type } from "@sinclair/typebox";
|
|
26
28
|
import { type ConfigError, ConfigFile } from "../config";
|
|
27
29
|
import type { ThemeColor } from "../modes/theme/theme";
|
|
@@ -72,6 +74,28 @@ const OpenAICompatSchema = Type.Object({
|
|
|
72
74
|
vercelGatewayRouting: Type.Optional(VercelGatewayRoutingSchema),
|
|
73
75
|
});
|
|
74
76
|
|
|
77
|
+
const EffortSchema = Type.Union([
|
|
78
|
+
Type.Literal("minimal"),
|
|
79
|
+
Type.Literal("low"),
|
|
80
|
+
Type.Literal("medium"),
|
|
81
|
+
Type.Literal("high"),
|
|
82
|
+
Type.Literal("xhigh"),
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
const ThinkingControlModeSchema = Type.Union([
|
|
86
|
+
Type.Literal("effort"),
|
|
87
|
+
Type.Literal("budget"),
|
|
88
|
+
Type.Literal("google-level"),
|
|
89
|
+
Type.Literal("anthropic-adaptive"),
|
|
90
|
+
Type.Literal("anthropic-budget-effort"),
|
|
91
|
+
]);
|
|
92
|
+
|
|
93
|
+
const ModelThinkingSchema = Type.Object({
|
|
94
|
+
minLevel: EffortSchema,
|
|
95
|
+
maxLevel: EffortSchema,
|
|
96
|
+
mode: ThinkingControlModeSchema,
|
|
97
|
+
});
|
|
98
|
+
|
|
75
99
|
// Schema for custom model definition
|
|
76
100
|
// Most fields are optional with sensible defaults for local models (Ollama, LM Studio, etc.)
|
|
77
101
|
const ModelDefinitionSchema = Type.Object({
|
|
@@ -88,7 +112,9 @@ const ModelDefinitionSchema = Type.Object({
|
|
|
88
112
|
Type.Literal("google-vertex"),
|
|
89
113
|
]),
|
|
90
114
|
),
|
|
115
|
+
baseUrl: Type.Optional(Type.String({ minLength: 1 })),
|
|
91
116
|
reasoning: Type.Optional(Type.Boolean()),
|
|
117
|
+
thinking: Type.Optional(ModelThinkingSchema),
|
|
92
118
|
input: Type.Optional(Type.Array(Type.Union([Type.Literal("text"), Type.Literal("image")]))),
|
|
93
119
|
cost: Type.Optional(
|
|
94
120
|
Type.Object({
|
|
@@ -110,6 +136,7 @@ const ModelDefinitionSchema = Type.Object({
|
|
|
110
136
|
const ModelOverrideSchema = Type.Object({
|
|
111
137
|
name: Type.Optional(Type.String({ minLength: 1 })),
|
|
112
138
|
reasoning: Type.Optional(Type.Boolean()),
|
|
139
|
+
thinking: Type.Optional(ModelThinkingSchema),
|
|
113
140
|
input: Type.Optional(Type.Array(Type.Union([Type.Literal("text"), Type.Literal("image")]))),
|
|
114
141
|
cost: Type.Optional(
|
|
115
142
|
Type.Object({
|
|
@@ -375,6 +402,7 @@ function applyModelOverride(model: Model<Api>, override: ModelOverride): Model<A
|
|
|
375
402
|
const result = { ...model };
|
|
376
403
|
if (override.name !== undefined) result.name = override.name;
|
|
377
404
|
if (override.reasoning !== undefined) result.reasoning = override.reasoning;
|
|
405
|
+
if (override.thinking !== undefined) result.thinking = override.thinking as ThinkingConfig;
|
|
378
406
|
if (override.input !== undefined) result.input = override.input as ("text" | "image")[];
|
|
379
407
|
if (override.contextWindow !== undefined) result.contextWindow = override.contextWindow;
|
|
380
408
|
if (override.maxTokens !== undefined) result.maxTokens = override.maxTokens;
|
|
@@ -392,14 +420,16 @@ function applyModelOverride(model: Model<Api>, override: ModelOverride): Model<A
|
|
|
392
420
|
result.headers = { ...model.headers, ...override.headers };
|
|
393
421
|
}
|
|
394
422
|
result.compat = mergeCompat(model.compat, override.compat);
|
|
395
|
-
return result;
|
|
423
|
+
return enrichModelThinking(result);
|
|
396
424
|
}
|
|
397
425
|
|
|
398
426
|
interface CustomModelDefinitionLike {
|
|
399
427
|
id: string;
|
|
400
428
|
name?: string;
|
|
401
429
|
api?: Api;
|
|
430
|
+
baseUrl?: string;
|
|
402
431
|
reasoning?: boolean;
|
|
432
|
+
thinking?: ThinkingConfig;
|
|
403
433
|
input?: ("text" | "image")[];
|
|
404
434
|
cost?: { input: number; output: number; cacheRead: number; cacheWrite: number };
|
|
405
435
|
contextWindow?: number;
|
|
@@ -445,13 +475,14 @@ function buildCustomModel(
|
|
|
445
475
|
const withDefaults = options.useDefaults;
|
|
446
476
|
const cost = modelDef.cost ?? (withDefaults ? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } : undefined);
|
|
447
477
|
const input = modelDef.input ?? (withDefaults ? ["text"] : undefined);
|
|
448
|
-
return {
|
|
478
|
+
return enrichModelThinking({
|
|
449
479
|
id: modelDef.id,
|
|
450
480
|
name: modelDef.name ?? (withDefaults ? modelDef.id : undefined),
|
|
451
481
|
api,
|
|
452
482
|
provider: providerName,
|
|
453
|
-
baseUrl: providerBaseUrl,
|
|
483
|
+
baseUrl: modelDef.baseUrl ?? providerBaseUrl,
|
|
454
484
|
reasoning: modelDef.reasoning ?? (withDefaults ? false : undefined),
|
|
485
|
+
thinking: modelDef.thinking as ThinkingConfig | undefined,
|
|
455
486
|
input: input as ("text" | "image")[],
|
|
456
487
|
cost,
|
|
457
488
|
contextWindow: modelDef.contextWindow ?? (withDefaults ? 128000 : undefined),
|
|
@@ -460,7 +491,7 @@ function buildCustomModel(
|
|
|
460
491
|
compat: modelDef.compat,
|
|
461
492
|
contextPromotionTarget: modelDef.contextPromotionTarget,
|
|
462
493
|
premiumMultiplier: modelDef.premiumMultiplier,
|
|
463
|
-
} as Model<Api
|
|
494
|
+
} as Model<Api>);
|
|
464
495
|
}
|
|
465
496
|
|
|
466
497
|
/**
|
|
@@ -537,7 +568,7 @@ export class ModelRegistry {
|
|
|
537
568
|
const builtInModels = this.#loadBuiltInModels(overrides, modelOverrides);
|
|
538
569
|
const combined = this.#mergeCustomModels(builtInModels, customModels);
|
|
539
570
|
|
|
540
|
-
this.#models = combined;
|
|
571
|
+
this.#models = this.#applyHardcodedModelPolicies(combined);
|
|
541
572
|
}
|
|
542
573
|
|
|
543
574
|
/** Load built-in models, applying provider and per-model overrides */
|
|
@@ -716,7 +747,7 @@ export class ModelRegistry {
|
|
|
716
747
|
: model;
|
|
717
748
|
}),
|
|
718
749
|
);
|
|
719
|
-
this.#models = this.#applyModelOverrides(merged, this.#modelOverrides);
|
|
750
|
+
this.#models = this.#applyHardcodedModelPolicies(this.#applyModelOverrides(merged, this.#modelOverrides));
|
|
720
751
|
}
|
|
721
752
|
|
|
722
753
|
async #discoverProviderModels(providerConfig: DiscoveryProviderConfig): Promise<Model<Api>[]> {
|
|
@@ -831,12 +862,57 @@ export class ModelRegistry {
|
|
|
831
862
|
}
|
|
832
863
|
}
|
|
833
864
|
|
|
865
|
+
async #discoverOllamaModelMetadata(
|
|
866
|
+
endpoint: string,
|
|
867
|
+
modelId: string,
|
|
868
|
+
headers: Record<string, string> | undefined,
|
|
869
|
+
): Promise<{ reasoning: boolean; input: ("text" | "image")[] } | null> {
|
|
870
|
+
const showUrl = `${endpoint}/api/show`;
|
|
871
|
+
try {
|
|
872
|
+
const response = await fetch(showUrl, {
|
|
873
|
+
method: "POST",
|
|
874
|
+
headers: { ...(headers ?? {}), "Content-Type": "application/json" },
|
|
875
|
+
body: JSON.stringify({ model: modelId }),
|
|
876
|
+
signal: AbortSignal.timeout(1500),
|
|
877
|
+
});
|
|
878
|
+
if (!response.ok) {
|
|
879
|
+
return null;
|
|
880
|
+
}
|
|
881
|
+
const payload = (await response.json()) as unknown;
|
|
882
|
+
if (!isRecord(payload)) {
|
|
883
|
+
return null;
|
|
884
|
+
}
|
|
885
|
+
const capabilities = payload.capabilities;
|
|
886
|
+
if (Array.isArray(capabilities)) {
|
|
887
|
+
const normalized = new Set(
|
|
888
|
+
capabilities.flatMap(capability => (typeof capability === "string" ? [capability.toLowerCase()] : [])),
|
|
889
|
+
);
|
|
890
|
+
const supportsVision = normalized.has("vision") || normalized.has("image");
|
|
891
|
+
return {
|
|
892
|
+
reasoning: normalized.has("thinking"),
|
|
893
|
+
input: supportsVision ? ["text", "image"] : ["text"],
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
if (!isRecord(capabilities)) {
|
|
897
|
+
return null;
|
|
898
|
+
}
|
|
899
|
+
const supportsVision = capabilities.vision === true || capabilities.image === true;
|
|
900
|
+
return {
|
|
901
|
+
reasoning: capabilities.thinking === true,
|
|
902
|
+
input: supportsVision ? ["text", "image"] : ["text"],
|
|
903
|
+
};
|
|
904
|
+
} catch {
|
|
905
|
+
return null;
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
|
|
834
909
|
async #discoverOllamaModels(providerConfig: DiscoveryProviderConfig): Promise<Model<Api>[]> {
|
|
835
910
|
const endpoint = this.#normalizeOllamaBaseUrl(providerConfig.baseUrl);
|
|
836
911
|
const tagsUrl = `${endpoint}/api/tags`;
|
|
912
|
+
const headers = { ...(providerConfig.headers ?? {}) };
|
|
837
913
|
try {
|
|
838
914
|
const response = await fetch(tagsUrl, {
|
|
839
|
-
headers
|
|
915
|
+
headers,
|
|
840
916
|
signal: AbortSignal.timeout(3000),
|
|
841
917
|
});
|
|
842
918
|
if (!response.ok) {
|
|
@@ -848,25 +924,34 @@ export class ModelRegistry {
|
|
|
848
924
|
return [];
|
|
849
925
|
}
|
|
850
926
|
const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
|
|
851
|
-
const
|
|
852
|
-
const discovered: Model<Api>[] = [];
|
|
853
|
-
for (const item of models) {
|
|
927
|
+
const entries = (payload.models ?? []).flatMap(item => {
|
|
854
928
|
const id = item.model || item.name;
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
929
|
+
return id ? [{ id, name: item.name || id }] : [];
|
|
930
|
+
});
|
|
931
|
+
const metadataById = new Map(
|
|
932
|
+
await Promise.all(
|
|
933
|
+
entries.map(
|
|
934
|
+
async entry =>
|
|
935
|
+
[entry.id, await this.#discoverOllamaModelMetadata(endpoint, entry.id, headers)] as const,
|
|
936
|
+
),
|
|
937
|
+
),
|
|
938
|
+
);
|
|
939
|
+
const discovered = entries.map(entry => {
|
|
940
|
+
const metadata = metadataById.get(entry.id);
|
|
941
|
+
return enrichModelThinking({
|
|
942
|
+
id: entry.id,
|
|
943
|
+
name: entry.name,
|
|
859
944
|
api: providerConfig.api,
|
|
860
945
|
provider: providerConfig.provider,
|
|
861
946
|
baseUrl: `${endpoint}/v1`,
|
|
862
|
-
reasoning: false,
|
|
863
|
-
input: ["text"],
|
|
947
|
+
reasoning: metadata?.reasoning ?? false,
|
|
948
|
+
input: metadata?.input ?? ["text"],
|
|
864
949
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
865
950
|
contextWindow: 128000,
|
|
866
951
|
maxTokens: 8192,
|
|
867
952
|
headers: providerConfig.headers,
|
|
868
953
|
});
|
|
869
|
-
}
|
|
954
|
+
});
|
|
870
955
|
return this.#applyProviderModelOverrides(providerConfig.provider, discovered);
|
|
871
956
|
} catch (error) {
|
|
872
957
|
logger.warn("model discovery failed for provider", {
|
|
@@ -907,24 +992,26 @@ export class ModelRegistry {
|
|
|
907
992
|
for (const item of models) {
|
|
908
993
|
const id = item.id;
|
|
909
994
|
if (!id) continue;
|
|
910
|
-
discovered.push(
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
995
|
+
discovered.push(
|
|
996
|
+
enrichModelThinking({
|
|
997
|
+
id,
|
|
998
|
+
name: id,
|
|
999
|
+
api: providerConfig.api,
|
|
1000
|
+
provider: providerConfig.provider,
|
|
1001
|
+
baseUrl,
|
|
1002
|
+
reasoning: false,
|
|
1003
|
+
input: ["text"],
|
|
1004
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
1005
|
+
contextWindow: 128000,
|
|
1006
|
+
maxTokens: 8192,
|
|
1007
|
+
headers,
|
|
1008
|
+
compat: {
|
|
1009
|
+
supportsStore: false,
|
|
1010
|
+
supportsDeveloperRole: false,
|
|
1011
|
+
supportsReasoningEffort: false,
|
|
1012
|
+
},
|
|
1013
|
+
}),
|
|
1014
|
+
);
|
|
928
1015
|
}
|
|
929
1016
|
return this.#applyProviderModelOverrides(providerConfig.provider, discovered);
|
|
930
1017
|
} catch (error) {
|
|
@@ -980,6 +1067,15 @@ export class ModelRegistry {
|
|
|
980
1067
|
});
|
|
981
1068
|
}
|
|
982
1069
|
|
|
1070
|
+
#applyHardcodedModelPolicies(models: Model<Api>[]): Model<Api>[] {
|
|
1071
|
+
return models.map(model => {
|
|
1072
|
+
if (model.id === "gpt-5.4") {
|
|
1073
|
+
return { ...model, contextWindow: 1_000_000 };
|
|
1074
|
+
}
|
|
1075
|
+
return model;
|
|
1076
|
+
});
|
|
1077
|
+
}
|
|
1078
|
+
|
|
983
1079
|
#parseModels(config: ModelsConfig): Model<Api>[] {
|
|
984
1080
|
const models: Model<Api>[] = [];
|
|
985
1081
|
|
|
@@ -997,7 +1093,7 @@ export class ModelRegistry {
|
|
|
997
1093
|
providerConfig.headers,
|
|
998
1094
|
providerConfig.apiKey,
|
|
999
1095
|
providerConfig.authHeader,
|
|
1000
|
-
modelDef,
|
|
1096
|
+
modelDef as CustomModelDefinitionLike,
|
|
1001
1097
|
{ useDefaults: true },
|
|
1002
1098
|
);
|
|
1003
1099
|
if (!model) continue;
|
|
@@ -1150,7 +1246,7 @@ export class ModelRegistry {
|
|
|
1150
1246
|
config.headers,
|
|
1151
1247
|
config.apiKey,
|
|
1152
1248
|
config.authHeader,
|
|
1153
|
-
modelDef,
|
|
1249
|
+
modelDef as CustomModelDefinitionLike,
|
|
1154
1250
|
{ useDefaults: false },
|
|
1155
1251
|
);
|
|
1156
1252
|
if (!model) {
|
|
@@ -1205,7 +1301,9 @@ export interface ProviderConfigInput {
|
|
|
1205
1301
|
id: string;
|
|
1206
1302
|
name: string;
|
|
1207
1303
|
api?: Api;
|
|
1304
|
+
baseUrl?: string;
|
|
1208
1305
|
reasoning: boolean;
|
|
1306
|
+
thinking?: ThinkingConfig;
|
|
1209
1307
|
input: ("text" | "image")[];
|
|
1210
1308
|
cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
|
|
1211
1309
|
contextWindow: number;
|