@oh-my-pi/pi-coding-agent 13.9.2 → 13.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +64 -0
  2. package/examples/sdk/02-custom-model.ts +2 -1
  3. package/package.json +7 -7
  4. package/src/cli/args.ts +10 -6
  5. package/src/cli/list-models.ts +2 -2
  6. package/src/commands/launch.ts +3 -3
  7. package/src/config/model-registry.ts +136 -38
  8. package/src/config/model-resolver.ts +47 -21
  9. package/src/config/settings-schema.ts +56 -2
  10. package/src/discovery/helpers.ts +3 -3
  11. package/src/extensibility/custom-tools/types.ts +2 -0
  12. package/src/extensibility/extensions/loader.ts +3 -2
  13. package/src/extensibility/extensions/types.ts +10 -7
  14. package/src/extensibility/hooks/types.ts +2 -0
  15. package/src/main.ts +5 -22
  16. package/src/memories/index.ts +7 -3
  17. package/src/modes/components/footer.ts +10 -8
  18. package/src/modes/components/model-selector.ts +33 -38
  19. package/src/modes/components/settings-defs.ts +32 -3
  20. package/src/modes/components/settings-selector.ts +16 -5
  21. package/src/modes/components/status-line/context-thresholds.ts +68 -0
  22. package/src/modes/components/status-line/segments.ts +11 -12
  23. package/src/modes/components/status-line.ts +2 -6
  24. package/src/modes/components/thinking-selector.ts +7 -7
  25. package/src/modes/components/tree-selector.ts +3 -2
  26. package/src/modes/controllers/command-controller.ts +11 -26
  27. package/src/modes/controllers/event-controller.ts +16 -3
  28. package/src/modes/controllers/input-controller.ts +4 -2
  29. package/src/modes/controllers/selector-controller.ts +5 -4
  30. package/src/modes/interactive-mode.ts +2 -2
  31. package/src/modes/rpc/rpc-client.ts +5 -10
  32. package/src/modes/rpc/rpc-types.ts +5 -5
  33. package/src/modes/theme/theme.ts +8 -3
  34. package/src/priority.json +1 -0
  35. package/src/prompts/system/auto-handoff-threshold-focus.md +1 -0
  36. package/src/prompts/system/system-prompt.md +18 -2
  37. package/src/prompts/tools/hashline.md +139 -83
  38. package/src/sdk.ts +24 -16
  39. package/src/session/agent-session.ts +261 -118
  40. package/src/session/agent-storage.ts +14 -14
  41. package/src/session/compaction/compaction.ts +500 -13
  42. package/src/session/messages.ts +12 -1
  43. package/src/session/session-manager.ts +77 -19
  44. package/src/slash-commands/builtin-registry.ts +48 -0
  45. package/src/task/agents.ts +3 -2
  46. package/src/task/executor.ts +2 -2
  47. package/src/task/types.ts +2 -1
  48. package/src/thinking.ts +87 -0
  49. package/src/tools/browser.ts +15 -6
  50. package/src/tools/fetch.ts +118 -100
  51. package/src/tools/index.ts +2 -1
  52. package/src/web/kagi.ts +62 -7
  53. package/src/web/search/providers/exa.ts +74 -3
package/CHANGELOG.md CHANGED
@@ -2,6 +2,70 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [13.9.4] - 2026-03-07
6
+ ### Added
7
+
8
+ - Automatic detection of Ollama model capabilities including reasoning/thinking support and vision input via the `/api/show` endpoint
9
+ - Improved Kagi API error handling with extraction of detailed error messages from JSON and plain text responses
10
+
11
+ ### Changed
12
+
13
+ - Updated Kagi provider description to clarify requirement for Kagi Search API beta access
14
+
15
+ ## [13.9.3] - 2026-03-07
16
+
17
+ ### Breaking Changes
18
+
19
+ - Changed `ThinkingLevel` type to be imported from `@oh-my-pi/pi-agent-core` instead of `@oh-my-pi/pi-ai`
20
+ - Changed thinking level representation from string literals to `Effort` enum values (e.g., `Effort.High` instead of `"high"`)
21
+ - Changed `getThinkingLevel()` return type to `ThinkingLevel | undefined` to support models without thinking support
22
+ - Changed model `reasoning` property to `thinking` property with `ThinkingConfig` for explicit effort level configuration
23
+ - Changed `thinkingLevel` in session context to be optional (`ThinkingLevel | undefined`) instead of always present
24
+
25
+ ### Added
26
+
27
+ - Added `thinking.ts` module with `getThinkingLevelMetadata()` and `resolveThinkingLevelForModel()` utilities for thinking level handling
28
+ - Added `ThinkingConfig` support to model definitions for specifying supported thinking effort levels per model
29
+ - Added `enrichModelThinking()` function to apply thinking configuration to models during registry initialization
30
+ - Added `clampThinkingLevelForModel()` function to constrain thinking levels to model-supported ranges
31
+ - Added `getSupportedEfforts()` function to retrieve available thinking efforts for a model
32
+ - Added `Effort` enum import from `@oh-my-pi/pi-ai` for type-safe thinking level representation
33
+ - Added `/fast` slash command to toggle OpenAI service tier priority mode for faster response processing
34
+ - Added `serviceTier` setting to control OpenAI processing priority (none, auto, default, flex, scale, priority)
35
+ - Added `compaction.remoteEnabled` setting to control use of remote compaction endpoints
36
+ - Added remote compaction support for OpenAI and OpenAI Codex models with encrypted reasoning preservation
37
+ - Added fast mode indicator (⚡) to model segment in status line when priority service tier is active
38
+ - Added context usage threshold levels (normal, warning, purple, error) with token-aware thresholds for better context awareness
39
+ - Added `isFastModeEnabled()`, `setFastMode()`, and `toggleFastMode()` methods to AgentSession for fast mode control
40
+
41
+ ### Changed
42
+
43
+ - Changed credential deletion to disable credentials with persisted cause instead of permanent deletion
44
+ - Added `disabledCause` parameter to credential deletion methods to track reason for disabling
45
+ - Changed thinking level parsing to use `parseEffort()` from local thinking module instead of `parseThinkingLevel()` from pi-ai
46
+ - Changed model list display to show supported thinking efforts (e.g., "low,medium,high") instead of yes/no reasoning indicator
47
+ - Changed footer and status line to check `model.thinking` instead of `model.reasoning` for thinking level display
48
+ - Changed thinking selector to work with `Effort` type instead of `ThinkingLevel` for available levels
49
+ - Changed model resolver to return `undefined` for thinking level instead of `"off"` when no thinking is specified
50
+ - Changed compaction reasoning parameters to use `Effort` enum values instead of string literals
51
+ - Changed RPC types to use `Effort` for cycling thinking levels and `ThinkingLevel | undefined` for session state
52
+ - Changed theme thinking border color function to accept both `ThinkingLevel` and `Effort` types
53
+ - Changed context usage coloring in footer and status line to use token-aware thresholds instead of fixed percentages
54
+ - Changed compaction to preserve OpenAI remote compaction state and encrypted reasoning across sessions
55
+ - Changed compaction to skip emitting kept messages when using OpenAI remote compaction with preserved history
56
+ - Changed session context to include `serviceTier` field for tracking active service tier across session branches
57
+ - Changed `compact()` function to accept `remoteInstructions` option for custom remote compaction prompts
58
+ - Changed model registry to apply hardcoded policies (gpt-5.4 context window) consistently across all model loading paths
59
+
60
+ ### Fixed
61
+
62
+ - Fixed OpenAI remote compaction to correctly append incremental responses instead of replacing entire history
63
+ - Fixed thinking level display logic in main.ts to correctly check for undefined instead of "off"
64
+ - Fixed model registry to preserve explicit thinking configuration on runtime-registered models
65
+ - Fixed usage limit reset time calculation to use absolute `resetsAt` timestamps instead of deprecated `resetInMs` field
66
+ - Fixed compaction summary message creation to no longer be automatically added to chat during compaction (now handled by session manager)
67
+ - Fixed Kagi web search errors to surface the provider's beta-access message and clarified that Kagi search requires Search API beta access
68
+
5
69
  ## [13.9.2] - 2026-03-05
6
70
 
7
71
  ### Added
@@ -3,6 +3,7 @@
3
3
  *
4
4
  * Shows how to select a specific model and thinking level.
5
5
  */
6
+ import { ThinkingLevel } from "@oh-my-pi/pi-agent-core";
6
7
  import { getModel } from "@oh-my-pi/pi-ai";
7
8
  import { createAgentSession, discoverAuthStorage, discoverModels } from "@oh-my-pi/pi-coding-agent";
8
9
 
@@ -32,7 +33,7 @@ console.log(
32
33
  if (available.length > 0) {
33
34
  const { session } = await createAgentSession({
34
35
  model: available[0],
35
- thinkingLevel: "medium", // off, low, medium, high
36
+ thinkingLevel: ThinkingLevel.Medium, // off, low, medium, high
36
37
  authStorage,
37
38
  modelRegistry,
38
39
  });
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-coding-agent",
4
- "version": "13.9.2",
4
+ "version": "13.9.4",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -41,12 +41,12 @@
41
41
  },
42
42
  "dependencies": {
43
43
  "@mozilla/readability": "^0.6",
44
- "@oh-my-pi/omp-stats": "13.9.2",
45
- "@oh-my-pi/pi-agent-core": "13.9.2",
46
- "@oh-my-pi/pi-ai": "13.9.2",
47
- "@oh-my-pi/pi-natives": "13.9.2",
48
- "@oh-my-pi/pi-tui": "13.9.2",
49
- "@oh-my-pi/pi-utils": "13.9.2",
44
+ "@oh-my-pi/omp-stats": "13.9.4",
45
+ "@oh-my-pi/pi-agent-core": "13.9.4",
46
+ "@oh-my-pi/pi-ai": "13.9.4",
47
+ "@oh-my-pi/pi-natives": "13.9.4",
48
+ "@oh-my-pi/pi-tui": "13.9.4",
49
+ "@oh-my-pi/pi-utils": "13.9.4",
50
50
  "@sinclair/typebox": "^0.34",
51
51
  "@xterm/headless": "^6.0",
52
52
  "ajv": "^8.18",
package/src/cli/args.ts CHANGED
@@ -1,9 +1,10 @@
1
1
  /**
2
2
  * CLI argument parsing and help display
3
3
  */
4
- import { getAvailableThinkingLevels, parseThinkingLevel, type ThinkingLevel } from "@oh-my-pi/pi-ai";
4
+ import { type Effort, THINKING_EFFORTS } from "@oh-my-pi/pi-ai";
5
5
  import { APP_NAME, CONFIG_DIR_NAME, logger } from "@oh-my-pi/pi-utils";
6
6
  import chalk from "chalk";
7
+ import { parseEffort } from "../thinking";
7
8
  import { BUILTIN_TOOLS } from "../tools";
8
9
 
9
10
  export type Mode = "text" | "json" | "rpc";
@@ -19,7 +20,7 @@ export interface Args {
19
20
  apiKey?: string;
20
21
  systemPrompt?: string;
21
22
  appendSystemPrompt?: string;
22
- thinking?: ThinkingLevel;
23
+ thinking?: Effort;
23
24
  continue?: boolean;
24
25
  resume?: string | true;
25
26
  help?: boolean;
@@ -107,7 +108,10 @@ export function parseArgs(args: string[], extensionFlags?: Map<string, { type: "
107
108
  } else if (arg === "--no-pty") {
108
109
  result.noPty = true;
109
110
  } else if (arg === "--tools" && i + 1 < args.length) {
110
- const toolNames = args[++i].split(",").map(s => s.trim());
111
+ const toolNames = args[++i]
112
+ .split(",")
113
+ .map(s => s.trim().toLowerCase())
114
+ .filter(Boolean);
111
115
  const validTools: string[] = [];
112
116
  for (const name of toolNames) {
113
117
  if (name in BUILTIN_TOOLS) {
@@ -122,13 +126,13 @@ export function parseArgs(args: string[], extensionFlags?: Map<string, { type: "
122
126
  result.tools = validTools;
123
127
  } else if (arg === "--thinking" && i + 1 < args.length) {
124
128
  const rawThinking = args[++i];
125
- const thinking = parseThinkingLevel(rawThinking);
129
+ const thinking = parseEffort(rawThinking);
126
130
  if (thinking !== undefined) {
127
131
  result.thinking = thinking;
128
132
  } else {
129
133
  logger.warn("Invalid thinking level passed to --thinking", {
130
134
  level: rawThinking,
131
- validThinkingLevels: getAvailableThinkingLevels(),
135
+ validThinkingLevels: THINKING_EFFORTS,
132
136
  });
133
137
  }
134
138
  } else if (arg === "--print" || arg === "-p") {
@@ -207,7 +211,7 @@ export function getExtraHelpText(): string {
207
211
  MISTRAL_API_KEY - Mistral models
208
212
  ZAI_API_KEY - z.ai models (ZhipuAI/GLM)
209
213
  MINIMAX_API_KEY - MiniMax models
210
- OPENCODE_API_KEY - OpenCode models
214
+ OPENCODE_API_KEY - OpenCode Zen/OpenCode Go models
211
215
  CURSOR_ACCESS_TOKEN - Cursor AI models
212
216
  AI_GATEWAY_API_KEY - Vercel AI Gateway
213
217
 
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * List available models with optional fuzzy search
3
3
  */
4
- import type { Api, Model } from "@oh-my-pi/pi-ai";
4
+ import { type Api, getSupportedEfforts, type Model } from "@oh-my-pi/pi-ai";
5
5
  import { formatNumber } from "@oh-my-pi/pi-utils";
6
6
  import type { ModelRegistry } from "../config/model-registry";
7
7
  import { fuzzyFilter } from "../utils/fuzzy";
@@ -41,7 +41,7 @@ export async function listModels(modelRegistry: ModelRegistry, searchPattern?: s
41
41
  model: m.id,
42
42
  context: formatNumber(m.contextWindow),
43
43
  maxOut: formatNumber(m.maxTokens),
44
- thinking: m.reasoning ? "yes" : "no",
44
+ thinking: m.thinking ? getSupportedEfforts(m).join(",") : m.reasoning ? "yes" : "-",
45
45
  images: m.input.includes("image") ? "yes" : "no",
46
46
  }));
47
47
 
@@ -2,7 +2,7 @@
2
2
  * Root command for the coding agent CLI.
3
3
  */
4
4
 
5
- import { getAvailableThinkingLevels } from "@oh-my-pi/pi-ai";
5
+ import { THINKING_EFFORTS } from "@oh-my-pi/pi-ai";
6
6
  import { APP_NAME } from "@oh-my-pi/pi-utils";
7
7
  import { Args, Command, Flags } from "@oh-my-pi/pi-utils/cli";
8
8
  import { parseArgs } from "../cli/args";
@@ -86,8 +86,8 @@ export default class Index extends Command {
86
86
  description: "Comma-separated list of tools to enable (default: all)",
87
87
  }),
88
88
  thinking: Flags.string({
89
- description: `Set thinking level: ${getAvailableThinkingLevels().join(", ")}`,
90
- options: getAvailableThinkingLevels(),
89
+ description: `Set thinking level: ${THINKING_EFFORTS.join(", ")}`,
90
+ options: [...THINKING_EFFORTS],
91
91
  }),
92
92
  hook: Flags.string({
93
93
  description: "Load a hook/extension file (can be used multiple times)",
@@ -4,6 +4,7 @@ import {
4
4
  type Context,
5
5
  createModelManager,
6
6
  DEFAULT_LOCAL_TOKEN,
7
+ enrichModelThinking,
7
8
  getBundledModels,
8
9
  getBundledProviders,
9
10
  googleAntigravityModelManagerOptions,
@@ -18,10 +19,11 @@ import {
18
19
  registerCustomApi,
19
20
  registerOAuthProvider,
20
21
  type SimpleStreamOptions,
22
+ type ThinkingConfig,
21
23
  unregisterCustomApis,
22
24
  unregisterOAuthProviders,
23
25
  } from "@oh-my-pi/pi-ai";
24
- import { logger } from "@oh-my-pi/pi-utils";
26
+ import { isRecord, logger } from "@oh-my-pi/pi-utils";
25
27
  import { type Static, Type } from "@sinclair/typebox";
26
28
  import { type ConfigError, ConfigFile } from "../config";
27
29
  import type { ThemeColor } from "../modes/theme/theme";
@@ -72,6 +74,28 @@ const OpenAICompatSchema = Type.Object({
72
74
  vercelGatewayRouting: Type.Optional(VercelGatewayRoutingSchema),
73
75
  });
74
76
 
77
+ const EffortSchema = Type.Union([
78
+ Type.Literal("minimal"),
79
+ Type.Literal("low"),
80
+ Type.Literal("medium"),
81
+ Type.Literal("high"),
82
+ Type.Literal("xhigh"),
83
+ ]);
84
+
85
+ const ThinkingControlModeSchema = Type.Union([
86
+ Type.Literal("effort"),
87
+ Type.Literal("budget"),
88
+ Type.Literal("google-level"),
89
+ Type.Literal("anthropic-adaptive"),
90
+ Type.Literal("anthropic-budget-effort"),
91
+ ]);
92
+
93
+ const ModelThinkingSchema = Type.Object({
94
+ minLevel: EffortSchema,
95
+ maxLevel: EffortSchema,
96
+ mode: ThinkingControlModeSchema,
97
+ });
98
+
75
99
  // Schema for custom model definition
76
100
  // Most fields are optional with sensible defaults for local models (Ollama, LM Studio, etc.)
77
101
  const ModelDefinitionSchema = Type.Object({
@@ -88,7 +112,9 @@ const ModelDefinitionSchema = Type.Object({
88
112
  Type.Literal("google-vertex"),
89
113
  ]),
90
114
  ),
115
+ baseUrl: Type.Optional(Type.String({ minLength: 1 })),
91
116
  reasoning: Type.Optional(Type.Boolean()),
117
+ thinking: Type.Optional(ModelThinkingSchema),
92
118
  input: Type.Optional(Type.Array(Type.Union([Type.Literal("text"), Type.Literal("image")]))),
93
119
  cost: Type.Optional(
94
120
  Type.Object({
@@ -110,6 +136,7 @@ const ModelDefinitionSchema = Type.Object({
110
136
  const ModelOverrideSchema = Type.Object({
111
137
  name: Type.Optional(Type.String({ minLength: 1 })),
112
138
  reasoning: Type.Optional(Type.Boolean()),
139
+ thinking: Type.Optional(ModelThinkingSchema),
113
140
  input: Type.Optional(Type.Array(Type.Union([Type.Literal("text"), Type.Literal("image")]))),
114
141
  cost: Type.Optional(
115
142
  Type.Object({
@@ -375,6 +402,7 @@ function applyModelOverride(model: Model<Api>, override: ModelOverride): Model<A
375
402
  const result = { ...model };
376
403
  if (override.name !== undefined) result.name = override.name;
377
404
  if (override.reasoning !== undefined) result.reasoning = override.reasoning;
405
+ if (override.thinking !== undefined) result.thinking = override.thinking as ThinkingConfig;
378
406
  if (override.input !== undefined) result.input = override.input as ("text" | "image")[];
379
407
  if (override.contextWindow !== undefined) result.contextWindow = override.contextWindow;
380
408
  if (override.maxTokens !== undefined) result.maxTokens = override.maxTokens;
@@ -392,14 +420,16 @@ function applyModelOverride(model: Model<Api>, override: ModelOverride): Model<A
392
420
  result.headers = { ...model.headers, ...override.headers };
393
421
  }
394
422
  result.compat = mergeCompat(model.compat, override.compat);
395
- return result;
423
+ return enrichModelThinking(result);
396
424
  }
397
425
 
398
426
  interface CustomModelDefinitionLike {
399
427
  id: string;
400
428
  name?: string;
401
429
  api?: Api;
430
+ baseUrl?: string;
402
431
  reasoning?: boolean;
432
+ thinking?: ThinkingConfig;
403
433
  input?: ("text" | "image")[];
404
434
  cost?: { input: number; output: number; cacheRead: number; cacheWrite: number };
405
435
  contextWindow?: number;
@@ -445,13 +475,14 @@ function buildCustomModel(
445
475
  const withDefaults = options.useDefaults;
446
476
  const cost = modelDef.cost ?? (withDefaults ? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } : undefined);
447
477
  const input = modelDef.input ?? (withDefaults ? ["text"] : undefined);
448
- return {
478
+ return enrichModelThinking({
449
479
  id: modelDef.id,
450
480
  name: modelDef.name ?? (withDefaults ? modelDef.id : undefined),
451
481
  api,
452
482
  provider: providerName,
453
- baseUrl: providerBaseUrl,
483
+ baseUrl: modelDef.baseUrl ?? providerBaseUrl,
454
484
  reasoning: modelDef.reasoning ?? (withDefaults ? false : undefined),
485
+ thinking: modelDef.thinking as ThinkingConfig | undefined,
455
486
  input: input as ("text" | "image")[],
456
487
  cost,
457
488
  contextWindow: modelDef.contextWindow ?? (withDefaults ? 128000 : undefined),
@@ -460,7 +491,7 @@ function buildCustomModel(
460
491
  compat: modelDef.compat,
461
492
  contextPromotionTarget: modelDef.contextPromotionTarget,
462
493
  premiumMultiplier: modelDef.premiumMultiplier,
463
- } as Model<Api>;
494
+ } as Model<Api>);
464
495
  }
465
496
 
466
497
  /**
@@ -537,7 +568,7 @@ export class ModelRegistry {
537
568
  const builtInModels = this.#loadBuiltInModels(overrides, modelOverrides);
538
569
  const combined = this.#mergeCustomModels(builtInModels, customModels);
539
570
 
540
- this.#models = combined;
571
+ this.#models = this.#applyHardcodedModelPolicies(combined);
541
572
  }
542
573
 
543
574
  /** Load built-in models, applying provider and per-model overrides */
@@ -716,7 +747,7 @@ export class ModelRegistry {
716
747
  : model;
717
748
  }),
718
749
  );
719
- this.#models = this.#applyModelOverrides(merged, this.#modelOverrides);
750
+ this.#models = this.#applyHardcodedModelPolicies(this.#applyModelOverrides(merged, this.#modelOverrides));
720
751
  }
721
752
 
722
753
  async #discoverProviderModels(providerConfig: DiscoveryProviderConfig): Promise<Model<Api>[]> {
@@ -831,12 +862,57 @@ export class ModelRegistry {
831
862
  }
832
863
  }
833
864
 
865
+ async #discoverOllamaModelMetadata(
866
+ endpoint: string,
867
+ modelId: string,
868
+ headers: Record<string, string> | undefined,
869
+ ): Promise<{ reasoning: boolean; input: ("text" | "image")[] } | null> {
870
+ const showUrl = `${endpoint}/api/show`;
871
+ try {
872
+ const response = await fetch(showUrl, {
873
+ method: "POST",
874
+ headers: { ...(headers ?? {}), "Content-Type": "application/json" },
875
+ body: JSON.stringify({ model: modelId }),
876
+ signal: AbortSignal.timeout(1500),
877
+ });
878
+ if (!response.ok) {
879
+ return null;
880
+ }
881
+ const payload = (await response.json()) as unknown;
882
+ if (!isRecord(payload)) {
883
+ return null;
884
+ }
885
+ const capabilities = payload.capabilities;
886
+ if (Array.isArray(capabilities)) {
887
+ const normalized = new Set(
888
+ capabilities.flatMap(capability => (typeof capability === "string" ? [capability.toLowerCase()] : [])),
889
+ );
890
+ const supportsVision = normalized.has("vision") || normalized.has("image");
891
+ return {
892
+ reasoning: normalized.has("thinking"),
893
+ input: supportsVision ? ["text", "image"] : ["text"],
894
+ };
895
+ }
896
+ if (!isRecord(capabilities)) {
897
+ return null;
898
+ }
899
+ const supportsVision = capabilities.vision === true || capabilities.image === true;
900
+ return {
901
+ reasoning: capabilities.thinking === true,
902
+ input: supportsVision ? ["text", "image"] : ["text"],
903
+ };
904
+ } catch {
905
+ return null;
906
+ }
907
+ }
908
+
834
909
  async #discoverOllamaModels(providerConfig: DiscoveryProviderConfig): Promise<Model<Api>[]> {
835
910
  const endpoint = this.#normalizeOllamaBaseUrl(providerConfig.baseUrl);
836
911
  const tagsUrl = `${endpoint}/api/tags`;
912
+ const headers = { ...(providerConfig.headers ?? {}) };
837
913
  try {
838
914
  const response = await fetch(tagsUrl, {
839
- headers: { ...(providerConfig.headers ?? {}) },
915
+ headers,
840
916
  signal: AbortSignal.timeout(3000),
841
917
  });
842
918
  if (!response.ok) {
@@ -848,25 +924,34 @@ export class ModelRegistry {
848
924
  return [];
849
925
  }
850
926
  const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
851
- const models = payload.models ?? [];
852
- const discovered: Model<Api>[] = [];
853
- for (const item of models) {
927
+ const entries = (payload.models ?? []).flatMap(item => {
854
928
  const id = item.model || item.name;
855
- if (!id) continue;
856
- discovered.push({
857
- id,
858
- name: item.name || id,
929
+ return id ? [{ id, name: item.name || id }] : [];
930
+ });
931
+ const metadataById = new Map(
932
+ await Promise.all(
933
+ entries.map(
934
+ async entry =>
935
+ [entry.id, await this.#discoverOllamaModelMetadata(endpoint, entry.id, headers)] as const,
936
+ ),
937
+ ),
938
+ );
939
+ const discovered = entries.map(entry => {
940
+ const metadata = metadataById.get(entry.id);
941
+ return enrichModelThinking({
942
+ id: entry.id,
943
+ name: entry.name,
859
944
  api: providerConfig.api,
860
945
  provider: providerConfig.provider,
861
946
  baseUrl: `${endpoint}/v1`,
862
- reasoning: false,
863
- input: ["text"],
947
+ reasoning: metadata?.reasoning ?? false,
948
+ input: metadata?.input ?? ["text"],
864
949
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
865
950
  contextWindow: 128000,
866
951
  maxTokens: 8192,
867
952
  headers: providerConfig.headers,
868
953
  });
869
- }
954
+ });
870
955
  return this.#applyProviderModelOverrides(providerConfig.provider, discovered);
871
956
  } catch (error) {
872
957
  logger.warn("model discovery failed for provider", {
@@ -907,24 +992,26 @@ export class ModelRegistry {
907
992
  for (const item of models) {
908
993
  const id = item.id;
909
994
  if (!id) continue;
910
- discovered.push({
911
- id,
912
- name: id,
913
- api: providerConfig.api,
914
- provider: providerConfig.provider,
915
- baseUrl,
916
- reasoning: false,
917
- input: ["text"],
918
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
919
- contextWindow: 128000,
920
- maxTokens: 8192,
921
- headers,
922
- compat: {
923
- supportsStore: false,
924
- supportsDeveloperRole: false,
925
- supportsReasoningEffort: false,
926
- },
927
- });
995
+ discovered.push(
996
+ enrichModelThinking({
997
+ id,
998
+ name: id,
999
+ api: providerConfig.api,
1000
+ provider: providerConfig.provider,
1001
+ baseUrl,
1002
+ reasoning: false,
1003
+ input: ["text"],
1004
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
1005
+ contextWindow: 128000,
1006
+ maxTokens: 8192,
1007
+ headers,
1008
+ compat: {
1009
+ supportsStore: false,
1010
+ supportsDeveloperRole: false,
1011
+ supportsReasoningEffort: false,
1012
+ },
1013
+ }),
1014
+ );
928
1015
  }
929
1016
  return this.#applyProviderModelOverrides(providerConfig.provider, discovered);
930
1017
  } catch (error) {
@@ -980,6 +1067,15 @@ export class ModelRegistry {
980
1067
  });
981
1068
  }
982
1069
 
1070
+ #applyHardcodedModelPolicies(models: Model<Api>[]): Model<Api>[] {
1071
+ return models.map(model => {
1072
+ if (model.id === "gpt-5.4") {
1073
+ return { ...model, contextWindow: 1_000_000 };
1074
+ }
1075
+ return model;
1076
+ });
1077
+ }
1078
+
983
1079
  #parseModels(config: ModelsConfig): Model<Api>[] {
984
1080
  const models: Model<Api>[] = [];
985
1081
 
@@ -997,7 +1093,7 @@ export class ModelRegistry {
997
1093
  providerConfig.headers,
998
1094
  providerConfig.apiKey,
999
1095
  providerConfig.authHeader,
1000
- modelDef,
1096
+ modelDef as CustomModelDefinitionLike,
1001
1097
  { useDefaults: true },
1002
1098
  );
1003
1099
  if (!model) continue;
@@ -1150,7 +1246,7 @@ export class ModelRegistry {
1150
1246
  config.headers,
1151
1247
  config.apiKey,
1152
1248
  config.authHeader,
1153
- modelDef,
1249
+ modelDef as CustomModelDefinitionLike,
1154
1250
  { useDefaults: false },
1155
1251
  );
1156
1252
  if (!model) {
@@ -1205,7 +1301,9 @@ export interface ProviderConfigInput {
1205
1301
  id: string;
1206
1302
  name: string;
1207
1303
  api?: Api;
1304
+ baseUrl?: string;
1208
1305
  reasoning: boolean;
1306
+ thinking?: ThinkingConfig;
1209
1307
  input: ("text" | "image")[];
1210
1308
  cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
1211
1309
  contextWindow: number;