@oh-my-pi/pi-ai 14.2.1 → 14.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -1
- package/package.json +16 -16
- package/src/auth-storage.ts +18 -3
- package/src/model-thinking.ts +27 -8
- package/src/models.json +61 -10
- package/src/provider-models/openai-compat.ts +56 -18
- package/src/providers/anthropic.ts +409 -26
- package/src/providers/cursor.ts +98 -12
- package/src/providers/openai-codex-responses.ts +2 -24
- package/src/providers/openai-completions.ts +99 -4
- package/src/stream.ts +1 -0
- package/src/types.ts +1 -0
- package/src/utils/schema/strict-mode.ts +2 -1
- package/src/utils/validation.ts +84 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,40 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.4.0] - 2026-04-26
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added an `examples` option to `StringEnum` to include example values in the generated schema
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Changed Anthropic tool schema generation to strip unsupported schema fields (including `patternProperties`), add `additionalProperties: false` for object types, and apply Anthropic strict-mode limits when marking tools as strict
|
|
14
|
+
- Changed Anthropic strict tool planning to cap strict `tools` at twenty entries and convert excess optional/union parameters to nullable schemas to stay within provider constraints
|
|
15
|
+
|
|
16
|
+
### Fixed
|
|
17
|
+
|
|
18
|
+
- Fixed Anthropic tool schema compilation failures by keeping the `write` tool out of the strict-tool allowlist when the full coding-agent tool set is active
|
|
19
|
+
- Fixed Anthropic 400 `tools.*.custom: For 'object' type, property 'minItems' is not supported` by stripping `minItems` from object-shaped JSON schema nodes (array nodes still keep supported `minItems` values)
|
|
20
|
+
- Fixed Anthropic tool schemas that used tuple-style arrays by stripping unsupported `maxItems` and only preserving provider-supported `minItems` values
|
|
21
|
+
- Fixed Anthropic and OpenRouter Anthropic tool calls that previously failed with `compiled grammar is too large` by retrying automatically without strict tool schemas and reusing non-strict mode for subsequent requests in the same provider session
|
|
22
|
+
- Fixed parsing of JSON tool arguments containing raw control characters inside string values (such as embedded newlines) by escaping them before JSON parsing
|
|
23
|
+
- Fixed `validateToolArguments` to accept stringified objects and arrays that include literal control characters inside string fields
|
|
24
|
+
- Fixed OpenAI Codex Spark OAuth selection to fall back to non-Pro accounts when no ChatGPT Pro account is connected, so users without a Pro account can still attempt Spark requests in case the server permits access.
|
|
25
|
+
|
|
26
|
+
## [14.3.0] - 2026-04-25
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
|
|
30
|
+
- Added support for Claude Opus 4.7 (`claude-opus-4-7`) model ([#726](https://github.com/can1357/oh-my-pi/issues/726))
|
|
31
|
+
- Suppresses sampling parameters (temperature/top_p/top_k) that Opus 4.7 rejects
|
|
32
|
+
- Enables `display: "summarized"` for adaptive thinking to restore visible thinking content
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
|
|
36
|
+
- Fixed Cursor provider losing conversation history on follow-up turns (model responding "this appears to be the start of our session") by populating `ConversationStateStructure.rootPromptMessagesJson` with JSON blob IDs for the system prompt plus prior user/assistant/tool-result messages. Cursor's server builds the model prompt from `rootPromptMessagesJson`, not from the protobuf `turns[]` tree, so sending only the system prompt there caused prior turns to be dropped
|
|
37
|
+
- Fixed Cursor provider multi-turn conversations failing with `Connect error internal: Blob not found` on the second message by storing `ConversationStateStructure.turns`, `AgentConversationTurnStructure.user_message`, and `AgentConversationTurnStructure.steps` as content-addressed blob IDs in the KV store (matching the existing handling for `rootPromptMessagesJson`) rather than sending the raw serialized bytes inline ([#678](https://github.com/can1357/oh-my-pi/issues/678))
|
|
38
|
+
|
|
5
39
|
## [14.2.1] - 2026-04-24
|
|
6
40
|
|
|
7
41
|
### Fixed
|
|
@@ -2097,4 +2131,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
|
|
|
2097
2131
|
|
|
2098
2132
|
## [0.9.4] - 2025-11-26
|
|
2099
2133
|
|
|
2100
|
-
Initial release with multi-provider LLM support.
|
|
2134
|
+
Initial release with multi-provider LLM support.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.
|
|
4
|
+
"version": "14.4.0",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -41,24 +41,24 @@
|
|
|
41
41
|
"generate-models": "bun scripts/generate-models.ts"
|
|
42
42
|
},
|
|
43
43
|
"dependencies": {
|
|
44
|
-
"@anthropic-ai/sdk": "^0.
|
|
45
|
-
"@aws-sdk/client-bedrock-runtime": "^3",
|
|
46
|
-
"@aws-sdk/credential-provider-node": "^3",
|
|
47
|
-
"@bufbuild/protobuf": "^2.
|
|
48
|
-
"@google/genai": "^1.
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.
|
|
51
|
-
"@sinclair/typebox": "^0.34",
|
|
52
|
-
"@smithy/node-http-handler": "^4.
|
|
53
|
-
"ajv": "^8.
|
|
54
|
-
"ajv-formats": "^3.0",
|
|
55
|
-
"openai": "^6.
|
|
56
|
-
"partial-json": "^0.1",
|
|
57
|
-
"proxy-agent": "^
|
|
44
|
+
"@anthropic-ai/sdk": "^0.91.1",
|
|
45
|
+
"@aws-sdk/client-bedrock-runtime": "^3.1037.0",
|
|
46
|
+
"@aws-sdk/credential-provider-node": "^3.972.36",
|
|
47
|
+
"@bufbuild/protobuf": "^2.12.0",
|
|
48
|
+
"@google/genai": "^1.50.1",
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.4.0",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.4.0",
|
|
51
|
+
"@sinclair/typebox": "^0.34.49",
|
|
52
|
+
"@smithy/node-http-handler": "^4.6.1",
|
|
53
|
+
"ajv": "^8.20.0",
|
|
54
|
+
"ajv-formats": "^3.0.1",
|
|
55
|
+
"openai": "^6.34.0",
|
|
56
|
+
"partial-json": "^0.1.7",
|
|
57
|
+
"proxy-agent": "^8.0.1",
|
|
58
58
|
"zod": "4.3.6"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3"
|
|
61
|
+
"@types/bun": "^1.3.13"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
64
|
"bun": ">=1.3.7"
|
package/src/auth-storage.ts
CHANGED
|
@@ -1711,6 +1711,11 @@ export class AuthStorage {
|
|
|
1711
1711
|
}),
|
|
1712
1712
|
);
|
|
1713
1713
|
|
|
1714
|
+
// Skip the Pro-plan filter when no candidate is confirmed Pro, so users with only
|
|
1715
|
+
// non-Pro accounts can still attempt Spark requests (e.g. trial/grandfathered access).
|
|
1716
|
+
const enforceProRequirement =
|
|
1717
|
+
requiresProModel && candidates.some(candidate => hasOpenAICodexProPlan(candidate.usage));
|
|
1718
|
+
|
|
1714
1719
|
const fallback = candidates[0];
|
|
1715
1720
|
|
|
1716
1721
|
for (const candidate of candidates) {
|
|
@@ -1719,6 +1724,7 @@ export class AuthStorage {
|
|
|
1719
1724
|
allowBlocked: false,
|
|
1720
1725
|
prefetchedUsage: candidate.usage,
|
|
1721
1726
|
usagePrechecked: candidate.usageChecked,
|
|
1727
|
+
enforceProRequirement,
|
|
1722
1728
|
});
|
|
1723
1729
|
if (apiKey) return apiKey;
|
|
1724
1730
|
}
|
|
@@ -1729,6 +1735,7 @@ export class AuthStorage {
|
|
|
1729
1735
|
allowBlocked: true,
|
|
1730
1736
|
prefetchedUsage: fallback.usage,
|
|
1731
1737
|
usagePrechecked: fallback.usageChecked,
|
|
1738
|
+
enforceProRequirement,
|
|
1732
1739
|
});
|
|
1733
1740
|
}
|
|
1734
1741
|
|
|
@@ -1774,14 +1781,22 @@ export class AuthStorage {
|
|
|
1774
1781
|
allowBlocked: boolean;
|
|
1775
1782
|
prefetchedUsage?: UsageReport | null;
|
|
1776
1783
|
usagePrechecked?: boolean;
|
|
1784
|
+
enforceProRequirement?: boolean;
|
|
1777
1785
|
},
|
|
1778
1786
|
): Promise<string | undefined> {
|
|
1779
|
-
const {
|
|
1787
|
+
const {
|
|
1788
|
+
checkUsage,
|
|
1789
|
+
allowBlocked,
|
|
1790
|
+
prefetchedUsage = null,
|
|
1791
|
+
usagePrechecked = false,
|
|
1792
|
+
enforceProRequirement,
|
|
1793
|
+
} = usageOptions;
|
|
1780
1794
|
if (!allowBlocked && this.#isCredentialBlocked(providerKey, selection.index)) {
|
|
1781
1795
|
return undefined;
|
|
1782
1796
|
}
|
|
1783
1797
|
|
|
1784
1798
|
const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
|
|
1799
|
+
const applyProFilter = enforceProRequirement ?? requiresProModel;
|
|
1785
1800
|
let usage: UsageReport | null = null;
|
|
1786
1801
|
let usageChecked = false;
|
|
1787
1802
|
|
|
@@ -1796,7 +1811,7 @@ export class AuthStorage {
|
|
|
1796
1811
|
});
|
|
1797
1812
|
usageChecked = true;
|
|
1798
1813
|
}
|
|
1799
|
-
if (
|
|
1814
|
+
if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
|
|
1800
1815
|
return undefined;
|
|
1801
1816
|
}
|
|
1802
1817
|
if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
|
|
@@ -1846,7 +1861,7 @@ export class AuthStorage {
|
|
|
1846
1861
|
});
|
|
1847
1862
|
usageChecked = true;
|
|
1848
1863
|
}
|
|
1849
|
-
if (
|
|
1864
|
+
if (applyProFilter && !hasOpenAICodexProPlan(usage)) {
|
|
1850
1865
|
return undefined;
|
|
1851
1866
|
}
|
|
1852
1867
|
if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
|
package/src/model-thinking.ts
CHANGED
|
@@ -154,19 +154,27 @@ export function applyGeneratedModelPolicies(models: ApiModel<Api>[]): void {
|
|
|
154
154
|
}
|
|
155
155
|
|
|
156
156
|
/**
|
|
157
|
-
* Link
|
|
157
|
+
* Link OpenAI model variants to their context promotion targets.
|
|
158
158
|
*
|
|
159
|
-
* When a
|
|
160
|
-
*
|
|
161
|
-
* spark
|
|
159
|
+
* When a model's context is exhausted, the agent can promote to a sibling
|
|
160
|
+
* model with a larger context window on the same provider:
|
|
161
|
+
* - `-spark` variants promote to `gpt-5.5`.
|
|
162
|
+
* - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
|
|
162
163
|
*/
|
|
163
|
-
export function
|
|
164
|
+
export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
|
|
164
165
|
for (const candidate of models) {
|
|
165
166
|
const parsedCandidate = parseKnownModel(candidate.id);
|
|
166
|
-
if (parsedCandidate.family !== "openai"
|
|
167
|
-
|
|
167
|
+
if (parsedCandidate.family !== "openai") continue;
|
|
168
|
+
let targetId: string | undefined;
|
|
169
|
+
if (parsedCandidate.variant === "codex-spark") {
|
|
170
|
+
targetId = "gpt-5.5";
|
|
171
|
+
} else if (parsedCandidate.variant === "base" && semverEqual(parsedCandidate.version, "5.5")) {
|
|
172
|
+
targetId = "gpt-5.4";
|
|
173
|
+
} else {
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
168
176
|
const fallback = models.find(
|
|
169
|
-
model => model.provider === candidate.provider && model.api === candidate.api && model.id ===
|
|
177
|
+
model => model.provider === candidate.provider && model.api === candidate.api && model.id === targetId,
|
|
170
178
|
);
|
|
171
179
|
if (!fallback) continue;
|
|
172
180
|
candidate.contextPromotionTarget = `${fallback.provider}/${fallback.id}`;
|
|
@@ -283,6 +291,17 @@ export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
|
|
|
283
291
|
}
|
|
284
292
|
}
|
|
285
293
|
|
|
294
|
+
/**
|
|
295
|
+
* Returns true for Anthropic models with Opus 4.7 API restrictions:
|
|
296
|
+
* - Sampling parameters (temperature/top_p/top_k) return 400 error
|
|
297
|
+
* - Thinking content is omitted by default (needs display: "summarized")
|
|
298
|
+
*/
|
|
299
|
+
export function hasOpus47ApiRestrictions(modelId: string): boolean {
|
|
300
|
+
const parsed = parseAnthropicModel(getCanonicalModelId(modelId));
|
|
301
|
+
if (!parsed) return false;
|
|
302
|
+
return semverGte(parsed.version, "4.7") && parsed.kind === "opus";
|
|
303
|
+
}
|
|
304
|
+
|
|
286
305
|
function anthropicModelHasRealXHighEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
|
|
287
306
|
if (model.api !== "anthropic-messages") return false;
|
|
288
307
|
const parsedModel = parseKnownModel(model.id);
|
package/src/models.json
CHANGED
|
@@ -4751,6 +4751,56 @@
|
|
|
4751
4751
|
}
|
|
4752
4752
|
}
|
|
4753
4753
|
},
|
|
4754
|
+
"deepseek": {
|
|
4755
|
+
"deepseek-v4-flash": {
|
|
4756
|
+
"id": "deepseek-v4-flash",
|
|
4757
|
+
"name": "DeepSeek V4 Flash",
|
|
4758
|
+
"api": "openai-completions",
|
|
4759
|
+
"provider": "deepseek",
|
|
4760
|
+
"baseUrl": "https://api.deepseek.com",
|
|
4761
|
+
"reasoning": true,
|
|
4762
|
+
"input": [
|
|
4763
|
+
"text"
|
|
4764
|
+
],
|
|
4765
|
+
"cost": {
|
|
4766
|
+
"input": 0.14,
|
|
4767
|
+
"output": 0.28,
|
|
4768
|
+
"cacheRead": 0,
|
|
4769
|
+
"cacheWrite": 0
|
|
4770
|
+
},
|
|
4771
|
+
"contextWindow": 1000000,
|
|
4772
|
+
"maxTokens": 384000,
|
|
4773
|
+
"thinking": {
|
|
4774
|
+
"mode": "effort",
|
|
4775
|
+
"minLevel": "minimal",
|
|
4776
|
+
"maxLevel": "high"
|
|
4777
|
+
}
|
|
4778
|
+
},
|
|
4779
|
+
"deepseek-v4-pro": {
|
|
4780
|
+
"id": "deepseek-v4-pro",
|
|
4781
|
+
"name": "DeepSeek V4 Pro",
|
|
4782
|
+
"api": "openai-completions",
|
|
4783
|
+
"provider": "deepseek",
|
|
4784
|
+
"baseUrl": "https://api.deepseek.com",
|
|
4785
|
+
"reasoning": true,
|
|
4786
|
+
"input": [
|
|
4787
|
+
"text"
|
|
4788
|
+
],
|
|
4789
|
+
"cost": {
|
|
4790
|
+
"input": 1.74,
|
|
4791
|
+
"output": 3.48,
|
|
4792
|
+
"cacheRead": 0,
|
|
4793
|
+
"cacheWrite": 0
|
|
4794
|
+
},
|
|
4795
|
+
"contextWindow": 1000000,
|
|
4796
|
+
"maxTokens": 384000,
|
|
4797
|
+
"thinking": {
|
|
4798
|
+
"mode": "effort",
|
|
4799
|
+
"minLevel": "minimal",
|
|
4800
|
+
"maxLevel": "high"
|
|
4801
|
+
}
|
|
4802
|
+
}
|
|
4803
|
+
},
|
|
4754
4804
|
"github-copilot": {
|
|
4755
4805
|
"claude-haiku-4.5": {
|
|
4756
4806
|
"id": "claude-haiku-4.5",
|
|
@@ -16931,7 +16981,7 @@
|
|
|
16931
16981
|
},
|
|
16932
16982
|
"contextWindow": 128000,
|
|
16933
16983
|
"maxTokens": 128000,
|
|
16934
|
-
"contextPromotionTarget": "litellm/gpt-5.
|
|
16984
|
+
"contextPromotionTarget": "litellm/gpt-5.5",
|
|
16935
16985
|
"thinking": {
|
|
16936
16986
|
"mode": "effort",
|
|
16937
16987
|
"minLevel": "low",
|
|
@@ -17011,7 +17061,8 @@
|
|
|
17011
17061
|
"mode": "effort",
|
|
17012
17062
|
"minLevel": "low",
|
|
17013
17063
|
"maxLevel": "xhigh"
|
|
17014
|
-
}
|
|
17064
|
+
},
|
|
17065
|
+
"contextPromotionTarget": "litellm/gpt-5.4"
|
|
17015
17066
|
},
|
|
17016
17067
|
"gpt-image-2": {
|
|
17017
17068
|
"id": "gpt-image-2",
|
|
@@ -32938,7 +32989,7 @@
|
|
|
32938
32989
|
"maxLevel": "xhigh"
|
|
32939
32990
|
},
|
|
32940
32991
|
"applyPatchToolType": "freeform",
|
|
32941
|
-
"contextPromotionTarget": "openai/gpt-5.
|
|
32992
|
+
"contextPromotionTarget": "openai/gpt-5.5"
|
|
32942
32993
|
},
|
|
32943
32994
|
"gpt-5.4": {
|
|
32944
32995
|
"id": "gpt-5.4",
|
|
@@ -33068,7 +33119,8 @@
|
|
|
33068
33119
|
"minLevel": "low",
|
|
33069
33120
|
"maxLevel": "xhigh"
|
|
33070
33121
|
},
|
|
33071
|
-
"applyPatchToolType": "freeform"
|
|
33122
|
+
"applyPatchToolType": "freeform",
|
|
33123
|
+
"contextPromotionTarget": "openai/gpt-5.4"
|
|
33072
33124
|
},
|
|
33073
33125
|
"o1": {
|
|
33074
33126
|
"id": "o1",
|
|
@@ -33597,7 +33649,7 @@
|
|
|
33597
33649
|
},
|
|
33598
33650
|
"contextWindow": 128000,
|
|
33599
33651
|
"maxTokens": 128000,
|
|
33600
|
-
"contextPromotionTarget": "openai-codex/gpt-5.
|
|
33652
|
+
"contextPromotionTarget": "openai-codex/gpt-5.5",
|
|
33601
33653
|
"thinking": {
|
|
33602
33654
|
"mode": "effort",
|
|
33603
33655
|
"minLevel": "low",
|
|
@@ -33715,7 +33767,8 @@
|
|
|
33715
33767
|
"minLevel": "low",
|
|
33716
33768
|
"maxLevel": "xhigh"
|
|
33717
33769
|
},
|
|
33718
|
-
"applyPatchToolType": "freeform"
|
|
33770
|
+
"applyPatchToolType": "freeform",
|
|
33771
|
+
"contextPromotionTarget": "openai-codex/gpt-5.4"
|
|
33719
33772
|
}
|
|
33720
33773
|
},
|
|
33721
33774
|
"opencode": {
|
|
@@ -33765,8 +33818,7 @@
|
|
|
33765
33818
|
"mode": "effort",
|
|
33766
33819
|
"minLevel": "low",
|
|
33767
33820
|
"maxLevel": "xhigh"
|
|
33768
|
-
}
|
|
33769
|
-
"contextPromotionTarget": "opencode/gpt-5.3-codex"
|
|
33821
|
+
}
|
|
33770
33822
|
},
|
|
33771
33823
|
"gpt-5.4": {
|
|
33772
33824
|
"id": "gpt-5.4",
|
|
@@ -34828,8 +34880,7 @@
|
|
|
34828
34880
|
"mode": "effort",
|
|
34829
34881
|
"minLevel": "low",
|
|
34830
34882
|
"maxLevel": "xhigh"
|
|
34831
|
-
}
|
|
34832
|
-
"contextPromotionTarget": "opencode-zen/gpt-5.3-codex"
|
|
34883
|
+
}
|
|
34833
34884
|
},
|
|
34834
34885
|
"gpt-5.4": {
|
|
34835
34886
|
"id": "gpt-5.4",
|
|
@@ -246,26 +246,64 @@ async function fetchOllamaNativeModels(baseUrl: string): Promise<Model<"openai-r
|
|
|
246
246
|
}
|
|
247
247
|
const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
|
|
248
248
|
const entries = payload.models ?? [];
|
|
249
|
-
const
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
249
|
+
const resolved = await Promise.all(
|
|
250
|
+
entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
|
|
251
|
+
const id = entry.model ?? entry.name;
|
|
252
|
+
if (!id) return null;
|
|
253
|
+
const { contextWindow, maxTokens } = await fetchOllamaModelLimits(nativeBaseUrl, id);
|
|
254
|
+
return {
|
|
255
|
+
id,
|
|
256
|
+
name: entry.name ?? id,
|
|
257
|
+
api: "openai-responses",
|
|
258
|
+
provider: "ollama",
|
|
259
|
+
baseUrl,
|
|
260
|
+
reasoning: false,
|
|
261
|
+
input: ["text"],
|
|
262
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
263
|
+
contextWindow,
|
|
264
|
+
maxTokens,
|
|
265
|
+
};
|
|
266
|
+
}),
|
|
267
|
+
);
|
|
268
|
+
const models: Model<"openai-responses">[] = resolved.filter((m): m is Model<"openai-responses"> => m !== null);
|
|
269
|
+
return models.sort((left, right) => left.id.localeCompare(right.id));
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/** Ollama's default `num_ctx` when the runtime request does not override it. */
|
|
273
|
+
const OLLAMA_DEFAULT_CONTEXT_WINDOW = 4096;
|
|
274
|
+
/** Cap max output tokens at a value that matches OMP's other openai-responses defaults. */
|
|
275
|
+
const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Query Ollama's `/api/show` endpoint for a single model and pull its native
|
|
279
|
+
* context length out of `model_info.<arch>.context_length`. Falls back to
|
|
280
|
+
* Ollama's default context window when the endpoint or field is unavailable
|
|
281
|
+
* so discovery still succeeds against older Ollama builds.
|
|
282
|
+
*/
|
|
283
|
+
async function fetchOllamaModelLimits(
|
|
284
|
+
nativeBaseUrl: string,
|
|
285
|
+
modelId: string,
|
|
286
|
+
): Promise<{ contextWindow: number; maxTokens: number }> {
|
|
287
|
+
try {
|
|
288
|
+
const response = await fetch(`${nativeBaseUrl}/api/show`, {
|
|
289
|
+
method: "POST",
|
|
290
|
+
headers: { "Content-Type": "application/json", Accept: "application/json" },
|
|
291
|
+
body: JSON.stringify({ model: modelId }),
|
|
266
292
|
});
|
|
293
|
+
if (!response.ok) {
|
|
294
|
+
return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
|
|
295
|
+
}
|
|
296
|
+
const payload = (await response.json()) as { model_info?: Record<string, unknown> };
|
|
297
|
+
const info = payload.model_info ?? {};
|
|
298
|
+
for (const [key, value] of Object.entries(info)) {
|
|
299
|
+
if (key.endsWith(".context_length") && typeof value === "number" && value > 0) {
|
|
300
|
+
return { contextWindow: value, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
} catch {
|
|
304
|
+
// fall through to default
|
|
267
305
|
}
|
|
268
|
-
return
|
|
306
|
+
return { contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
|
|
269
307
|
}
|
|
270
308
|
|
|
271
309
|
const OPENAI_NON_RESPONSES_PREFIXES = [
|