@oh-my-pi/pi-ai 14.1.2 → 14.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -2
- package/package.json +5 -3
- package/src/auth-storage.ts +18 -6
- package/src/model-thinking.ts +48 -2
- package/src/models.json +2608 -388
- package/src/provider-models/openai-compat.ts +97 -224
- package/src/providers/amazon-bedrock.ts +103 -34
- package/src/providers/anthropic.ts +44 -22
- package/src/providers/azure-openai-responses.ts +4 -4
- package/src/providers/cursor.ts +18 -12
- package/src/providers/gitlab-duo.ts +2 -21
- package/src/providers/kimi.ts +2 -22
- package/src/providers/openai-codex-responses.ts +194 -23
- package/src/providers/openai-completions.ts +22 -13
- package/src/providers/openai-responses-shared.ts +143 -18
- package/src/providers/openai-responses.ts +91 -15
- package/src/providers/shared/error-message.ts +21 -0
- package/src/providers/synthetic.ts +2 -22
- package/src/stream.ts +1 -7
- package/src/types.ts +34 -0
- package/src/usage/kimi.ts +1 -11
- package/src/usage/openai-codex.ts +1 -11
- package/src/usage/shared.ts +10 -0
- package/src/utils/anthropic-auth.ts +1 -7
- package/src/utils/foundry.ts +8 -0
- package/src/utils/http-inspector.ts +9 -2
- package/src/utils/idle-iterator.ts +29 -54
- package/src/utils/oauth/api-key-login.ts +87 -0
- package/src/utils/oauth/cerebras.ts +15 -58
- package/src/utils/oauth/google-antigravity.ts +11 -86
- package/src/utils/oauth/google-gemini-cli.ts +11 -89
- package/src/utils/oauth/google-oauth-shared.ts +110 -0
- package/src/utils/oauth/moonshot.ts +15 -58
- package/src/utils/oauth/nanogpt.ts +14 -50
- package/src/utils/oauth/openai-codex.ts +3 -3
- package/src/utils/oauth/synthetic.ts +15 -59
- package/src/utils/oauth/together.ts +15 -58
- package/src/utils/oauth/zenmux.ts +14 -50
- package/src/utils/retry.ts +77 -0
- package/src/utils/schema/CONSTRAINTS.md +1 -0
- package/src/utils/schema/strict-mode.ts +10 -0
- package/src/utils/tool-choice.ts +7 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,40 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
### Fixed
|
|
6
|
+
|
|
7
|
+
- Fixed OpenAI Codex Spark OAuth selection to require a verified ChatGPT Pro account instead of falling back to Plus or unknown-plan accounts.
|
|
8
|
+
|
|
9
|
+
## [14.2.0] - 2026-04-23
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- Added `gpt-5.5` to the built-in model catalog for both OpenAI Responses (`openai`) and local `litellm` (`openai-completions`) providers
|
|
14
|
+
- Added `gpt-image-2` to the `litellm` built-in model catalog
|
|
15
|
+
- Added `isCopilotTransientModelError()` and `callWithCopilotModelRetry()` helpers in `utils/retry` that detect GitHub Copilot's intermittent `HTTP 400 model_not_supported` responses for preview models (`gpt-5.3-codex`, `gpt-5.4`, `gpt-5.4-mini`, ...) and retry the request up to three times with backoff. OpenAI Responses, OpenAI Completions, and Anthropic provider paths now participate in this retry when the model is served through Copilot.
|
|
16
|
+
- Added OpenAI Responses custom-tool grammar support for Codex-style `apply_patch` calls, including freeform streaming, history replay, and forced tool-choice mapping to the custom wire name.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- Updated built-in model metadata with revised `contextWindow`, `maxTokens`, and pricing values for existing entries
|
|
21
|
+
- Changed generated model policies to assign `applyPatchToolType: "freeform"` for first-party GPT-5 OpenAI Responses and Codex models, so regenerated `models.json` preserves the `apply_patch` custom-tool metadata.
|
|
22
|
+
- Renamed `rewriteCopilotAuthError` to `rewriteCopilotError` and extended it to rewrite `HTTP 400 model_not_supported` after retries are exhausted with guidance about Copilot's OAuth-client-specific rollout gap (see opencode#13313).
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
|
|
26
|
+
- Fixed Amazon Bedrock proxy handling to honor lowercase `http_proxy`, `https_proxy`, and `all_proxy` environment variables when using HTTP/1 fallback
|
|
27
|
+
- Fixed Amazon Bedrock streaming behind corporate HTTP proxies by using a proxy-aware HTTP/1 transport when `HTTPS_PROXY`, `HTTP_PROXY`, or `ALL_PROXY` is configured, including AWS SSO credential calls.
|
|
28
|
+
- Fixed Amazon Bedrock requests to retry once with HTTP/1 when the AWS SDK's default HTTP/2 transport fails before streaming begins.
|
|
29
|
+
- Fixed OpenAI Responses streaming to display thinking tokens from local providers (llama.cpp, etc.) that send raw `reasoning_text.delta` events and empty `summary` arrays in `output_item.done`. Previously, thinking content was silently dropped during streaming while non-streaming mode worked correctly.
|
|
30
|
+
- Synced the bundled OpenCode Go catalog with the current docs so `kimi-k2.6`, `mimo-v2.5`, and `mimo-v2.5-pro` appear in offline/default model lists.
|
|
31
|
+
|
|
32
|
+
## [14.1.3] - 2026-04-17
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
|
|
36
|
+
- Preserved user-provided `session_id` and `x-client-request-id` headers in OpenAI Responses requests instead of overriding them with automatic session-derived values
|
|
37
|
+
- Stopped sending `session_id` and `x-client-request-id` headers for OpenAI Responses requests when `cacheRetention` is set to `none`
|
|
38
|
+
- Fixed direct OpenAI Responses requests to send `session_id` and `x-client-request-id` from the same session-derived value as `prompt_cache_key`, improving prompt cache affinity for append-only sessions
|
|
5
39
|
## [14.1.1] - 2026-04-14
|
|
6
40
|
|
|
7
41
|
### Added
|
|
@@ -21,6 +55,7 @@
|
|
|
21
55
|
- Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
|
|
22
56
|
|
|
23
57
|
## [14.1.0] - 2026-04-11
|
|
58
|
+
|
|
24
59
|
### Added
|
|
25
60
|
|
|
26
61
|
- Added `accountId` to usage report metadata
|
|
@@ -37,6 +72,7 @@
|
|
|
37
72
|
## [14.0.5] - 2026-04-11
|
|
38
73
|
|
|
39
74
|
### Changed
|
|
75
|
+
|
|
40
76
|
- Replaced GitHub Copilot authentication from VSCode extension impersonation to the opencode OAuth flow, eliminating TOS concerns. Existing users will need to re-authenticate once with `/login github-copilot`.
|
|
41
77
|
- Simplified Copilot token handling: GitHub OAuth token is used directly for all API requests (no JWT exchange or refresh cycle).
|
|
42
78
|
- Changed GitHub Copilot API base URL from `api.individual.githubcopilot.com` to `api.githubcopilot.com`.
|
|
@@ -48,6 +84,7 @@
|
|
|
48
84
|
- Fixed GitHub Copilot `/models` discovery to unwrap structured OAuth credentials before sending the bearer token, preserving dynamic catalog refresh for OAuth-backed callers.
|
|
49
85
|
|
|
50
86
|
### Removed
|
|
87
|
+
|
|
51
88
|
- Removed Copilot JWT proxy-ep base URL resolution (no longer needed with opencode auth).
|
|
52
89
|
|
|
53
90
|
## [14.0.3] - 2026-04-09
|
|
@@ -57,6 +94,7 @@
|
|
|
57
94
|
- Fixed Ollama discovery cache normalization so cached models upgrade to the OpenAI Responses transport after the provider change
|
|
58
95
|
|
|
59
96
|
## [14.0.0] - 2026-04-08
|
|
97
|
+
|
|
60
98
|
### Breaking Changes
|
|
61
99
|
|
|
62
100
|
- Removed `coerceNullStrings` function and its automatic null-string coercion behavior from JSON parsing
|
|
@@ -79,6 +117,7 @@
|
|
|
79
117
|
- Fixed Anthropic streaming to suppress transient SDK console errors for malformed SSE keep-alive frames so the TUI only shows surfaced provider errors
|
|
80
118
|
|
|
81
119
|
- Added environment-based credential fallback for the OpenAI Codex provider.
|
|
120
|
+
|
|
82
121
|
## [13.17.6] - 2026-04-01
|
|
83
122
|
|
|
84
123
|
### Fixed
|
|
@@ -86,6 +125,7 @@
|
|
|
86
125
|
- Fixed Anthropic first-event timeouts to exclude stream connection setup from the watchdog, preserve timeout-specific retry classification after local aborts, and reset retry state cleanly between attempts
|
|
87
126
|
|
|
88
127
|
## [13.17.5] - 2026-04-01
|
|
128
|
+
|
|
89
129
|
### Changed
|
|
90
130
|
|
|
91
131
|
- Increased default first-event timeout from 15s to 45s to better accommodate longer request setup times
|
|
@@ -124,6 +164,7 @@
|
|
|
124
164
|
- Added Vercel AI Gateway to `/login` providers for interactive API key setup
|
|
125
165
|
|
|
126
166
|
### Fixed
|
|
167
|
+
|
|
127
168
|
- Fixed `omp commit` failing with HTTP 400 errors when using reasoning-enabled models on OpenAI-compatible endpoints that don't support the `developer` role (e.g., GitHub Copilot, custom proxies). Now falls back to `system` role when `developer` is unsupported.
|
|
128
169
|
|
|
129
170
|
## [13.17.0] - 2026-03-30
|
|
@@ -148,6 +189,7 @@
|
|
|
148
189
|
- Fixed normalizeAnthropicBaseUrl returning empty string instead of undefined when baseUrl is empty
|
|
149
190
|
|
|
150
191
|
## [13.16.4] - 2026-03-28
|
|
192
|
+
|
|
151
193
|
### Added
|
|
152
194
|
|
|
153
195
|
- Added support for Groq Compound and Compound Mini models with extended context window (131K tokens) and configurable thinking levels
|
|
@@ -168,6 +210,7 @@
|
|
|
168
210
|
- Updated OpenRouter Claude 3.5 Sonnet pricing: input from 0.45 to 0.42, cache read from 0.225 to 0.21
|
|
169
211
|
|
|
170
212
|
## [13.16.3] - 2026-03-28
|
|
213
|
+
|
|
171
214
|
### Changed
|
|
172
215
|
|
|
173
216
|
- Modified OAuth credential saving to preserve unrelated identities instead of replacing all credentials for a provider
|
|
@@ -193,6 +236,7 @@
|
|
|
193
236
|
- Fixed `parseRateLimitReason` not recognizing "usage limit" in Codex error messages, causing incorrect fallback to `UNKNOWN` classification instead of `QUOTA_EXHAUSTED`
|
|
194
237
|
|
|
195
238
|
## [13.14.2] - 2026-03-21
|
|
239
|
+
|
|
196
240
|
### Changed
|
|
197
241
|
|
|
198
242
|
- Updated thinking configuration format from `levels` array to `minLevel` and `maxLevel` properties for improved clarity
|
|
@@ -215,13 +259,14 @@
|
|
|
215
259
|
- Added bundled GPT-5.4 mini model metadata for OpenAI, OpenAI Codex, and GitHub Copilot, including low-to-xhigh thinking support and GitHub Copilot premium multiplier metadata
|
|
216
260
|
- Added bundled GPT-5.4 nano model metadata for OpenAI and OpenAI Codex, including low-to-xhigh thinking support
|
|
217
261
|
|
|
218
|
-
|
|
219
262
|
## [13.13.2] - 2026-03-18
|
|
263
|
+
|
|
220
264
|
### Changed
|
|
221
265
|
|
|
222
266
|
- Modified tool result handling for aborted assistant messages to preserve existing tool results when already recorded, instead of always replacing them with synthetic 'aborted' results
|
|
223
267
|
|
|
224
268
|
## [13.13.0] - 2026-03-18
|
|
269
|
+
|
|
225
270
|
### Changed
|
|
226
271
|
|
|
227
272
|
- Changed tool argument validation to always normalize optional null values before type coercion, ensuring consistent handling of LLM-generated 'null' strings
|
|
@@ -232,6 +277,7 @@
|
|
|
232
277
|
- Improved type safety of `validateToolCall` and `validateToolArguments` functions by returning properly typed `ToolCall["arguments"]` instead of `any`
|
|
233
278
|
|
|
234
279
|
## [13.12.9] - 2026-03-17
|
|
280
|
+
|
|
235
281
|
### Changed
|
|
236
282
|
|
|
237
283
|
- Extracted OpenAI compatibility detection and resolution logic into dedicated `openai-completions-compat` module for improved maintainability and reusability
|
|
@@ -281,6 +327,7 @@
|
|
|
281
327
|
- Fixed auth schema V0-to-V1 migration crash when the V0 table lacks a `disabled` column
|
|
282
328
|
|
|
283
329
|
## [13.11.0] - 2026-03-12
|
|
330
|
+
|
|
284
331
|
### Added
|
|
285
332
|
|
|
286
333
|
- Added support for Parallel AI provider with API key authentication
|
|
@@ -296,6 +343,7 @@
|
|
|
296
343
|
- Improved retry logic to handle HTTP/2 stream errors and internal_error responses from Anthropic API
|
|
297
344
|
|
|
298
345
|
## [13.9.16] - 2026-03-10
|
|
346
|
+
|
|
299
347
|
### Added
|
|
300
348
|
|
|
301
349
|
- Support for `onPayload` callback to replace provider request payloads before sending, enabling request interception and modification
|
|
@@ -318,11 +366,13 @@
|
|
|
318
366
|
- Fixed handling of malformed JSON messages in websocket streams to trigger immediate fallback to SSE without retry attempts
|
|
319
367
|
|
|
320
368
|
## [13.9.13] - 2026-03-10
|
|
369
|
+
|
|
321
370
|
### Added
|
|
322
371
|
|
|
323
372
|
- Added `isSpecialServiceTier` utility function to validate OpenAI service tier values
|
|
324
373
|
|
|
325
374
|
## [13.9.12] - 2026-03-09
|
|
375
|
+
|
|
326
376
|
### Added
|
|
327
377
|
|
|
328
378
|
- Added Tavily web search provider support with API key authentication
|
|
@@ -355,11 +405,13 @@
|
|
|
355
405
|
- Fixed auth storage to preserve newer recorded schema versions when opened by older binaries
|
|
356
406
|
|
|
357
407
|
## [13.9.8] - 2026-03-08
|
|
408
|
+
|
|
358
409
|
### Fixed
|
|
359
410
|
|
|
360
411
|
- Fixed WebSocket stream fallback logic to safely replay buffered output over SSE when WebSocket fails after partial content has been streamed
|
|
361
412
|
|
|
362
413
|
## [13.9.4] - 2026-03-07
|
|
414
|
+
|
|
363
415
|
### Changed
|
|
364
416
|
|
|
365
417
|
- Simplified API key credential storage to always replace existing credentials on re-login instead of accumulating multiple keys
|
|
@@ -372,6 +424,7 @@
|
|
|
372
424
|
- Fixed Cerebras model compatibility by preventing `stream_options` usage requests in chat completions
|
|
373
425
|
|
|
374
426
|
## [13.9.3] - 2026-03-07
|
|
427
|
+
|
|
375
428
|
### Breaking Changes
|
|
376
429
|
|
|
377
430
|
- Changed `reasoning` parameter from `ThinkingLevel | undefined` to `Effort | undefined` in `SimpleStreamOptions`; 'off' is no longer valid (omit the field instead)
|
|
@@ -2042,4 +2095,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
|
|
|
2042
2095
|
|
|
2043
2096
|
## [0.9.4] - 2025-11-26
|
|
2044
2097
|
|
|
2045
|
-
Initial release with multi-provider LLM support.
|
|
2098
|
+
Initial release with multi-provider LLM support.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.
|
|
4
|
+
"version": "14.2.0",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -43,16 +43,18 @@
|
|
|
43
43
|
"dependencies": {
|
|
44
44
|
"@anthropic-ai/sdk": "^0.78",
|
|
45
45
|
"@aws-sdk/client-bedrock-runtime": "^3",
|
|
46
|
+
"@aws-sdk/credential-provider-node": "^3",
|
|
46
47
|
"@bufbuild/protobuf": "^2.11",
|
|
47
48
|
"@google/genai": "^1.43",
|
|
48
|
-
"@oh-my-pi/pi-natives": "14.
|
|
49
|
-
"@oh-my-pi/pi-utils": "14.
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.2.0",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.2.0",
|
|
50
51
|
"@sinclair/typebox": "^0.34",
|
|
51
52
|
"@smithy/node-http-handler": "^4.4",
|
|
52
53
|
"ajv": "^8.18",
|
|
53
54
|
"ajv-formats": "^3.0",
|
|
54
55
|
"openai": "^6.25",
|
|
55
56
|
"partial-json": "^0.1",
|
|
57
|
+
"proxy-agent": "^6.5",
|
|
56
58
|
"zod": "4.3.6"
|
|
57
59
|
},
|
|
58
60
|
"devDependencies": {
|
package/src/auth-storage.ts
CHANGED
|
@@ -208,6 +208,10 @@ function getOpenAICodexPlanPriority(report: UsageReport | null): number {
|
|
|
208
208
|
return planType.includes("pro") ? 0 : 2;
|
|
209
209
|
}
|
|
210
210
|
|
|
211
|
+
function hasOpenAICodexProPlan(report: UsageReport | null): boolean {
|
|
212
|
+
return getUsagePlanType(report)?.includes("pro") === true;
|
|
213
|
+
}
|
|
214
|
+
|
|
211
215
|
function resolveDefaultUsageProvider(provider: Provider): UsageProvider | undefined {
|
|
212
216
|
return DEFAULT_USAGE_PROVIDER_MAP.get(provider);
|
|
213
217
|
}
|
|
@@ -1656,14 +1660,14 @@ export class AuthStorage {
|
|
|
1656
1660
|
const providerKey = this.#getProviderTypeKey(provider, "oauth");
|
|
1657
1661
|
const order = this.#getCredentialOrder(providerKey, sessionId, credentials.length);
|
|
1658
1662
|
const strategy = this.#rankingStrategyResolver?.(provider);
|
|
1659
|
-
const
|
|
1663
|
+
const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
|
|
1664
|
+
const checkUsage = strategy !== undefined && (credentials.length > 1 || requiresProModel);
|
|
1660
1665
|
const sessionCredential = this.#getSessionCredential(provider, sessionId);
|
|
1661
1666
|
const sessionPreferredIndex = sessionCredential?.type === "oauth" ? sessionCredential.index : undefined;
|
|
1662
1667
|
// Skip ranking only when the session already has a working preferred credential — re-ranking
|
|
1663
1668
|
// mid-session causes account switches that cold-start the server-side prompt cache. New sessions
|
|
1664
1669
|
// (no preference) and sessions whose preferred is blocked still rank, so we pick the account
|
|
1665
1670
|
// with the most headroom proactively and fall back intelligently when rate-limited.
|
|
1666
|
-
const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
|
|
1667
1671
|
const sessionPreferredIsAvailable =
|
|
1668
1672
|
sessionPreferredIndex !== undefined && !this.#isCredentialBlocked(providerKey, sessionPreferredIndex);
|
|
1669
1673
|
const shouldRank = checkUsage && (!sessionPreferredIsAvailable || requiresProModel);
|
|
@@ -1777,10 +1781,11 @@ export class AuthStorage {
|
|
|
1777
1781
|
return undefined;
|
|
1778
1782
|
}
|
|
1779
1783
|
|
|
1784
|
+
const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
|
|
1780
1785
|
let usage: UsageReport | null = null;
|
|
1781
1786
|
let usageChecked = false;
|
|
1782
1787
|
|
|
1783
|
-
if (checkUsage && !allowBlocked) {
|
|
1788
|
+
if ((checkUsage && !allowBlocked) || requiresProModel) {
|
|
1784
1789
|
if (usagePrechecked) {
|
|
1785
1790
|
usage = prefetchedUsage;
|
|
1786
1791
|
usageChecked = true;
|
|
@@ -1791,7 +1796,10 @@ export class AuthStorage {
|
|
|
1791
1796
|
});
|
|
1792
1797
|
usageChecked = true;
|
|
1793
1798
|
}
|
|
1794
|
-
if (
|
|
1799
|
+
if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
|
|
1800
|
+
return undefined;
|
|
1801
|
+
}
|
|
1802
|
+
if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
|
|
1795
1803
|
const resetAtMs = this.#getUsageResetAtMs(usage, Date.now());
|
|
1796
1804
|
this.#markCredentialBlocked(
|
|
1797
1805
|
providerKey,
|
|
@@ -1829,15 +1837,19 @@ export class AuthStorage {
|
|
|
1829
1837
|
enterpriseUrl: result.newCredentials.enterpriseUrl ?? selection.credential.enterpriseUrl,
|
|
1830
1838
|
};
|
|
1831
1839
|
this.#replaceCredentialAt(provider, selection.index, updated);
|
|
1832
|
-
if (checkUsage && !allowBlocked) {
|
|
1840
|
+
if ((checkUsage && !allowBlocked) || requiresProModel) {
|
|
1833
1841
|
const sameAccount = selection.credential.accountId === updated.accountId;
|
|
1834
1842
|
if (!usageChecked || !sameAccount) {
|
|
1835
1843
|
usage = await this.#getUsageReport(provider, updated, {
|
|
1836
1844
|
...options,
|
|
1837
1845
|
timeoutMs: this.#usageRequestTimeoutMs,
|
|
1838
1846
|
});
|
|
1847
|
+
usageChecked = true;
|
|
1848
|
+
}
|
|
1849
|
+
if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
|
|
1850
|
+
return undefined;
|
|
1839
1851
|
}
|
|
1840
|
-
if (usage && this.#isUsageLimitReached(usage)) {
|
|
1852
|
+
if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
|
|
1841
1853
|
const resetAtMs = this.#getUsageResetAtMs(usage, Date.now());
|
|
1842
1854
|
this.#markCredentialBlocked(
|
|
1843
1855
|
providerKey,
|
package/src/model-thinking.ts
CHANGED
|
@@ -48,6 +48,14 @@ const CODEX_GPT_5_4_PRIORITY_BY_VARIANT: Partial<Record<OpenAIVariant, number>>
|
|
|
48
48
|
nano: 2,
|
|
49
49
|
};
|
|
50
50
|
|
|
51
|
+
const COPILOT_GENERATED_LIMITS: Record<string, { contextWindow: number; maxTokens: number }> = {
|
|
52
|
+
"claude-opus-4.6": { contextWindow: 168000, maxTokens: 32000 },
|
|
53
|
+
"gpt-5.2": { contextWindow: 272000, maxTokens: 128000 },
|
|
54
|
+
"gpt-5.4": { contextWindow: 272000, maxTokens: 128000 },
|
|
55
|
+
"gpt-5.4-mini": { contextWindow: 272000, maxTokens: 128000 },
|
|
56
|
+
"grok-code-fast-1": { contextWindow: 192000, maxTokens: 64000 },
|
|
57
|
+
};
|
|
58
|
+
|
|
51
59
|
interface GeminiModel {
|
|
52
60
|
family: "gemini";
|
|
53
61
|
kind: GeminiKind;
|
|
@@ -258,7 +266,7 @@ export function mapEffortToGoogleThinkingLevel<TApi extends Api>(
|
|
|
258
266
|
export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
|
|
259
267
|
model: ApiModel<TApi>,
|
|
260
268
|
effort: Effort,
|
|
261
|
-
): "low" | "medium" | "high" | "max" {
|
|
269
|
+
): "low" | "medium" | "high" | "xhigh" | "max" {
|
|
262
270
|
switch (requireSupportedEffort(model, effort)) {
|
|
263
271
|
case Effort.Minimal:
|
|
264
272
|
case Effort.Low:
|
|
@@ -268,12 +276,34 @@ export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
|
|
|
268
276
|
case Effort.High:
|
|
269
277
|
return "high";
|
|
270
278
|
case Effort.XHigh:
|
|
271
|
-
|
|
279
|
+
// Opus 4.7+ introduced a distinct "xhigh" effort level (between "high" and "max").
|
|
280
|
+
// The Anthropic docs scope this to the Messages API only, so Bedrock Converse and
|
|
281
|
+
// older adaptive-thinking Opus 4.6 models keep the legacy "max" alias.
|
|
282
|
+
return anthropicModelHasRealXHighEffort(model) ? "xhigh" : "max";
|
|
272
283
|
}
|
|
273
284
|
}
|
|
274
285
|
|
|
286
|
+
function anthropicModelHasRealXHighEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
|
|
287
|
+
if (model.api !== "anthropic-messages") return false;
|
|
288
|
+
const parsedModel = parseKnownModel(model.id);
|
|
289
|
+
if (parsedModel.family !== "anthropic" || parsedModel.kind !== "opus") return false;
|
|
290
|
+
return semverGte(parsedModel.version, "4.7");
|
|
291
|
+
}
|
|
292
|
+
|
|
275
293
|
function applyGeneratedModelPolicy(model: ApiModel<Api>): void {
|
|
294
|
+
const copilotLimits = model.provider === "github-copilot" ? COPILOT_GENERATED_LIMITS[model.id] : undefined;
|
|
295
|
+
if (copilotLimits) {
|
|
296
|
+
model.contextWindow = copilotLimits.contextWindow;
|
|
297
|
+
model.maxTokens = copilotLimits.maxTokens;
|
|
298
|
+
}
|
|
299
|
+
|
|
276
300
|
const parsedModel = parseKnownModel(model.id);
|
|
301
|
+
const applyPatchToolType = inferGeneratedApplyPatchToolType(model, parsedModel);
|
|
302
|
+
if (applyPatchToolType) {
|
|
303
|
+
model.applyPatchToolType = applyPatchToolType;
|
|
304
|
+
} else {
|
|
305
|
+
delete model.applyPatchToolType;
|
|
306
|
+
}
|
|
277
307
|
if (parsedModel.family === "anthropic") {
|
|
278
308
|
applyAnthropicCatalogPolicy(model, parsedModel);
|
|
279
309
|
}
|
|
@@ -298,6 +328,22 @@ function applyAnthropicCatalogPolicy(model: ApiModel<Api>, parsedModel: Anthropi
|
|
|
298
328
|
}
|
|
299
329
|
}
|
|
300
330
|
|
|
331
|
+
function inferGeneratedApplyPatchToolType(
|
|
332
|
+
model: ApiModel<Api>,
|
|
333
|
+
parsedModel: ParsedModel,
|
|
334
|
+
): ApiModel<Api>["applyPatchToolType"] {
|
|
335
|
+
if (parsedModel.family !== "openai" || parsedModel.version.major !== 5) {
|
|
336
|
+
return undefined;
|
|
337
|
+
}
|
|
338
|
+
if (model.provider === "openai" && model.api === "openai-responses") {
|
|
339
|
+
return "freeform";
|
|
340
|
+
}
|
|
341
|
+
if (model.provider === "openai-codex" && model.api === "openai-codex-responses") {
|
|
342
|
+
return "freeform";
|
|
343
|
+
}
|
|
344
|
+
return undefined;
|
|
345
|
+
}
|
|
346
|
+
|
|
301
347
|
function applyOpenAICatalogPolicy(model: ApiModel<Api>, parsedModel: OpenAIModel): void {
|
|
302
348
|
// Codex models: 400K figure includes output budget; input window is 272K.
|
|
303
349
|
if (parsedModel.variant.startsWith("codex") && parsedModel.variant !== "codex-spark") {
|