@oh-my-pi/pi-catalog 15.12.3 → 15.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -75
- package/dist/types/identity/classify.d.ts +21 -3
- package/dist/types/identity/family.d.ts +22 -0
- package/dist/types/model-manager.d.ts +2 -0
- package/dist/types/provider-models/descriptors.d.ts +8 -8
- package/dist/types/provider-models/openai-compat.d.ts +2 -1
- package/dist/types/types.d.ts +8 -2
- package/dist/types/utils.d.ts +3 -0
- package/dist/types/wire/gemini-headers.d.ts +1 -0
- package/package.json +4 -4
- package/src/compat/anthropic.ts +7 -1
- package/src/compat/openai.ts +1 -0
- package/src/discovery/antigravity.ts +1 -1
- package/src/discovery/codex.ts +1 -1
- package/src/discovery/cursor.ts +1 -1
- package/src/discovery/gemini.ts +6 -5
- package/src/discovery/openai-compatible.ts +3 -4
- package/src/identity/classify.ts +59 -6
- package/src/identity/equivalence.ts +2 -2
- package/src/identity/family.ts +54 -1
- package/src/identity/reference.ts +2 -2
- package/src/model-cache.ts +9 -5
- package/src/model-manager.ts +14 -9
- package/src/models.json +3263 -2713
- package/src/provider-models/bundled-references.ts +2 -2
- package/src/provider-models/descriptors.ts +8 -8
- package/src/provider-models/openai-compat.ts +181 -75
- package/src/types.ts +8 -2
- package/src/utils.ts +9 -1
- package/src/variant-collapse.ts +7 -2
- package/src/wire/gemini-headers.ts +2 -0
- package/dist/types/provider-models/discovery-constants.d.ts +0 -11
- package/src/provider-models/discovery-constants.ts +0 -11
package/CHANGELOG.md
CHANGED
|
@@ -2,90 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
-
## [15.11.8] - 2026-06-12
|
|
6
|
-
|
|
7
|
-
### Fixed
|
|
8
|
-
|
|
9
|
-
- Fixed Antigravity `gemini-3.1-pro --thinking high` failing with `Cloud Code Assist API error (400): Request contains an invalid argument.` — the upstream `gemini-3.1-pro-high` deployment rejects every `streamGenerateContent` request on both CCA endpoints while discovery still advertises it. High effort now routes to `gemini-pro-agent` (the same "Gemini 3.1 Pro (High)" model, verified accepting the identical request body), and the model-cache fingerprint version was bumped (`merge-v2` → `merge-v3`) so existing fresh caches refetch discovery and pick up the corrected routing immediately.
|
|
10
|
-
|
|
11
|
-
## [15.11.7] - 2026-06-12
|
|
12
5
|
### Added
|
|
13
6
|
|
|
7
|
+
- Added `modelFamilyToken(modelId)` to `@oh-my-pi/pi-catalog/identity`: a coarse vendor-lineage token (`anthropic`/`openai`/`gemini`/`kimi`/…) for "are two models the same family?" comparisons, backed by `parseKnownModel` canonical-id normalization. Opaque and comparison-only; kind/variant collapsed onto the vendor token ([#2406](https://github.com/can1357/oh-my-pi/issues/2406))
|
|
8
|
+
- Added GLM-5.2 to the bundled zai (GLM Coding Plan) catalog as the selectable 1M served model.
|
|
9
|
+
- Added bundled Fireworks models `deepseek-v4-flash`, `kimi-k2.7-code`, `minimax-m2.5`, `minimax-m3`, `nemotron-3-ultra-nvfp4`, `qwen3.6-plus`, and `qwen3.7-plus`
|
|
10
|
+
- Changed
|
|
14
11
|
- Added effort-tier variant collapsing (`variant-collapse`): providers that expose one logical model as several effort/thinking-suffixed upstream ids (Antigravity CCA `gemini-3.5-flash-extra-low`/`-low`/`gemini-3-flash-agent`, `gemini-3[.1]-pro-low|high`, `claude-*[-thinking]` pairs, `gpt-oss-120b-medium`) collapse into one logical entry carrying per-effort upstream routing in `thinking.effortRouting` (plus `thinking.suppressWhenOff` for Cloud Code Assist ids whose baked server default re-applies when `thinkingConfig` is omitted). Request-time code resolves the outbound id via `resolveWireModelId(model, effort)`; selection, caching, and usage attribution key on the logical id.
|
|
15
12
|
- Added the automatic `X`/`X-thinking` pair rule (`deriveThinkingPairFamilies`): any provider's live bare/thinking twin collapses into the bare id, routing thinking-enabled requests to the `-thinking` backing id (trailing or infix token, so `kimi-k2-thinking-turbo` pairs with `kimi-k2-turbo`). Gated on same api and compatible pricing — all-zero cost rows count as unknown, while twins that both carry real, differing prices remain separate SKUs.
|
|
16
13
|
- Added `collapseBuiltModelVariants` and wired collapsing at every materialization point — Antigravity discovery, the catalog generator, and the model-manager merge — so stale sources (old static beside collapsed dynamic results, mixed cache rows) converge on logical entries instead of unioning raw tier ids back into the catalog.
|
|
17
14
|
- Added `thinking.requiresEffort`, baked for reasoning-only upstreams — Gemini 3.x (levels only, no off), Gemini 2.5 Pro (thinkingBudget floors at 128, rejects 0), OpenAI o-series, MiniMax M2, and thinking-variant SKUs (`*-thinking`/`*-reasoner`/`*-reasoning`, with a negation-aware token grammar so `non-thinking` ids never match). Identity derivation bakes it for new entries and `fillThinkingWireDefaults` backfills explicit/cached metadata; `minimumSupportedEffort` exposes the canonical floor. Pair-collapsed twins drop member flags (their off routes to the bare SKU), while identity re-flags pairs whose logical id is itself mandatory
|
|
18
|
-
|
|
19
|
-
### Changed
|
|
20
|
-
|
|
21
|
-
- Changed model display names to drop model-extrinsic decorations: gateway author prefixes (`OpenAI: …`, `Google: …`), `(latest)` alias markers, `(Antigravity)` provider attribution, price tiers (`($$$$)`), and promo/lifecycle tags (`(20% off)`, `(retires …)`). `cleanModelName` is applied in `buildModel` (covers live discovery and stale caches) and as a catalog-generator pass; Antigravity discovery no longer appends `(Antigravity)` to display names. Variant tags that map to distinct wire ids (`(Thinking)`, `(free)`, `(Fast)`, dates, regions) are preserved.
|
|
22
|
-
- Changed the `google-antigravity` default model from `gemini-3-pro-high` to `gemini-3.1-pro`
|
|
23
|
-
- Changed `gemini-2.5-flash-thinking` handling from discovery-denylist to collapsing into `gemini-2.5-flash` (thinking-enabled requests route to the `-thinking` backing id)
|
|
24
|
-
- Bumped the model cache schema to v5 so rows predating effort-tier variant collapsing (raw `-low`/`-high`/`-thinking` member ids) are invalidated
|
|
25
|
-
|
|
26
|
-
### Fixed
|
|
27
|
-
|
|
28
|
-
- Fixed catalog generation to apply effort-tier variant collapsing before provider grouping to ensure collapsed model families are consistently materialized without being impacted by in-loop mutation
|
|
29
|
-
- Fixed Kimi K2.6 OpenAI-compatible compat metadata to use a 300s stream watchdog floor, covering Fire Pass router ids as well as public `kimi-k2.6` ids so long reasoning starts do not hit the generic first-event timeout ([#2366](https://github.com/can1357/oh-my-pi/issues/2366)).
|
|
30
|
-
|
|
31
|
-
## [15.11.4] - 2026-06-12
|
|
32
|
-
|
|
33
|
-
### Fixed
|
|
34
|
-
|
|
35
|
-
- Fixed MiniMax M2-family and OpenAI gpt-oss model metadata so OpenAI-compatible catalog entries declare only `low|medium|high` thinking efforts. Their upstreams reject `minimal`, `xhigh`, and Fireworks' `minimal → none` wire mapping, so `fireworks/minimax-m2.7` as the smol auto-thinking classifier model 400ed on every turn. OpenAI-compatible provider effort maps (`Groq qwen/qwen3-32b`, DeepSeek-family, OpenRouter Anthropic adaptive, Fireworks `minimal → none`) now bake into `thinking.effortMap` in catalog metadata instead of `buildOpenAICompat`, and request builders read that field directly. Regenerated `models.json` now makes `disableReasoning` choose `low` for those families while leaving GLM-5.x and other Fireworks models on the existing `minimal → none` path ([#2315](https://github.com/can1357/oh-my-pi/issues/2315)).
|
|
36
|
-
### Added
|
|
37
|
-
|
|
38
15
|
- Added `requiresJuiceZeroHack` Responses-API compat flag, resolved by `buildOpenAIResponsesCompat` from GPT-5-family model names and overridable via sparse model `compat` config. Replaces the request-time `model.name.startsWith("gpt-5")` sniff that gated the trailing `# Juice: 0 !important` no-reasoning developer item.
|
|
39
|
-
|
|
40
|
-
## [15.11.3] - 2026-06-11
|
|
41
|
-
### Added
|
|
42
|
-
|
|
43
16
|
- Added `requestModelId` on `Model` to represent the upstream model id used when a catalog entry is a local variant
|
|
44
17
|
- Added synthetic GitHub Copilot long-context model variants with `-1m` suffixes when tiered token pricing is advertised
|
|
45
|
-
|
|
46
|
-
### Changed
|
|
47
|
-
|
|
48
|
-
- Changed GitHub Copilot discovery to request `X-GitHub-Api-Version: 2026-06-01` from `api.githubcopilot.com`
|
|
49
|
-
- Changed GitHub Copilot discovery to cap base model `contextWindow` to the default token tier and keep long-context access as the separate `-1m` model entry
|
|
50
|
-
- Changed Copilot model mapping to omit non-chat `/models` entries and enable image input for models whose capabilities indicate vision support
|
|
51
|
-
|
|
52
|
-
### Fixed
|
|
53
|
-
|
|
54
|
-
- Fixed long-context variant pricing to use `billing.token_prices.long_context` rates instead of default model pricing
|
|
55
|
-
- Fixed `mapModel` handling in OpenAI-compatible discovery so returning `null` now skips a model entry rather than falling back to defaults
|
|
56
|
-
- Fixed model ID precedence so a real upstream Copilot model id is kept when it conflicts with a synthesized `-1m` variant
|
|
57
|
-
|
|
58
|
-
## [15.11.1] - 2026-06-11
|
|
59
|
-
|
|
60
|
-
### Fixed
|
|
61
|
-
|
|
62
|
-
- Fixed NVIDIA NIM Qwen turns failing with `400 Validation: Unsupported parameter(s): enable_thinking`. NIM's chat-completions schema is `additionalProperties: false` and exposes thinking via the vLLM convention `chat_template_kwargs.enable_thinking`; `buildOpenAICompat` was sending top-level `enable_thinking` for every `qwen/*` id regardless of host. Registered `nvidia` as a known host (`integrate.api.nvidia.com`) and routed NVIDIA-hosted Qwen models to `thinkingFormat: "qwen-chat-template"` ([#2299](https://github.com/can1357/oh-my-pi/issues/2299)).
|
|
63
|
-
- Fixed Moonshot/Kimi native OpenAI-compatible request metadata so Kimi K2 uses `max_tokens` and omits OpenAI-only `store`, restoring first-turn output with `MOONSHOT_API_KEY` ([#2289](https://github.com/can1357/oh-my-pi/issues/2289)).
|
|
64
|
-
|
|
65
|
-
## [15.11.0] - 2026-06-10
|
|
66
|
-
|
|
67
|
-
### Fixed
|
|
68
|
-
|
|
69
|
-
- Fixed `buildModel` so malformed explicit thinking metadata without `efforts` is treated as sparse input and inferred instead of crashing during model resolution ([#2251](https://github.com/can1357/oh-my-pi/issues/2251)).
|
|
70
|
-
|
|
71
|
-
## [15.10.12] - 2026-06-10
|
|
72
|
-
|
|
73
|
-
### Added
|
|
74
|
-
|
|
75
18
|
- Added `grok-composer-2.5-fast` (Cursor "Composer 2.5 Fast") to the xAI Grok OAuth (SuperGrok) catalog: non-reasoning, text-only, 200K context.
|
|
76
|
-
|
|
77
|
-
### Changed
|
|
78
|
-
|
|
79
|
-
- Set every xAI Grok OAuth (SuperGrok) curated model's max output tokens to mirror its context window (`grok-build`, `grok-4.3`, `grok-4.20-0309-{reasoning,non-reasoning}`, `grok-4.20-multi-agent-0309`, `grok-composer-2.5-fast`), replacing the `8888` `UNK_MAX_TOKENS` placeholder (and a stale `30000` on three grok-4.x entries). xAI's OAuth `/v1/models` reports no per-request output limit, so the curated catalog now owns `maxTokens` like `contextWindow`, deterministic on both the static-seed and online-overlay paths; the `openai-responses` wire still clamps the actual request to `OPENAI_MAX_OUTPUT_TOKENS` (64k).
|
|
80
|
-
|
|
81
|
-
### Fixed
|
|
82
|
-
|
|
83
|
-
- Excluded zero-cost `xai-oauth` subscription entries from the model reference indexes (`buildModelReferenceIndex`, `createReferenceResolver`), so their zero pricing and context-window-sized `maxTokens` cannot outrank paid/public Grok references when resolving custom-provider model identities.
|
|
84
|
-
|
|
85
|
-
## [15.10.11] - 2026-06-10
|
|
86
|
-
|
|
87
|
-
### Added
|
|
88
|
-
|
|
89
19
|
- Added `hostMatchesUrl`, `modelMatchesHost`, and endpoint-shape helpers in the new `hosts` module for consistent provider/baseUrl matching
|
|
90
20
|
- `buildModel(spec)` (`build.ts`) is now the single Model constructor: it materializes the fully-resolved compat record and canonical thinking metadata exactly once (compat first, thinking derived from identity + resolved compat), so `Model.compat` is a required, complete `CompatOf<TApi>` (`ResolvedOpenAICompat`/`ResolvedOpenAIResponsesCompat`/`ResolvedAnthropicCompat`) and request-path code reads fields with zero URL parsing and zero per-request allocation. Sparse user/config overrides live on the new `ModelSpec<TApi>` input shape and survive on `Model.compatConfig` for introspection.
|
|
91
21
|
- Added `ResolvedAnthropicCompat.supportsSamplingParams` (Opus 4.7+/Fable/Mythos reject `temperature`/`top_p`/`top_k` with a 400), baked at build time from model identity so the request path stops re-parsing model ids.
|
|
@@ -97,6 +27,21 @@
|
|
|
97
27
|
|
|
98
28
|
### Changed
|
|
99
29
|
|
|
30
|
+
- Changed catalog metadata to update a model’s per-token pricing to input 0.09 and output 0.18
|
|
31
|
+
- Changed the same cataloged model’s maximum token limit from 384000 to 65536
|
|
32
|
+
- Pinned zai `glm-5.2` to 1M context during catalog generation so endpoint discovery and older fallbacks cannot regress it to 200k.
|
|
33
|
+
- Replaced the hand-maintained `zhipu-coding-plan` GLM reasoning allowlist and vision regex with a `parseGlmModel` family classifier in `identity/classify.ts` (variant + vision + version), surfaced as `isReasoningGlmModelId` / `isGlmVisionModelId`. Discovery now derives reasoning/vision capability from the GLM family instead of a per-id list, so newly-bumped integers (`glm-5.3`, `glm-6`, …) are covered automatically while `-flash`/`-preview` and the vision `…v` shape stay correctly classified.
|
|
34
|
+
- Model `contextWindow`/`maxTokens` are now `number | null`; discovery emits `null` when a provider reports no limit, replacing the `222222`/`8888` (`UNK_CONTEXT_WINDOW`/`UNK_MAX_TOKENS`) sentinels (now removed). Bundled `models.json` unknown limits are `null`.
|
|
35
|
+
- Changed the `github-copilot` model context window to `524288` tokens
|
|
36
|
+
- Changed Fireworks model discovery to source the control-plane `List Models` API (`GET /v1/accounts/fireworks/models?filter=supports_serverless=true`) instead of the OpenAI-compatible `/v1/models` inference listing. The inference endpoint returns a sparse, account-specific subset that omits on-demand serverless models (e.g. `kimi-k2.7-code`), so newly published serverless models stayed invisible in the picker until hand-added to the bundled catalog. The control-plane catalog enumerates every serverless model with capability metadata (`supportsServerless`/`supportsTools`/`supportsImageInput`/`contextLength`/`displayName`), paginated and filtered to tool-capable `READY` entries, then merged with bundled/models.dev references — the Kimi K2 max-output clamp and DeepSeek V4 thinking-toggle strip are preserved, and unbundled models default to reasoning so `buildModel` derives the Fireworks effort map. New serverless releases now surface automatically with no catalog edits.
|
|
37
|
+
- Changed model display names to drop model-extrinsic decorations: gateway author prefixes (`OpenAI: …`, `Google: …`), `(latest)` alias markers, `(Antigravity)` provider attribution, price tiers (`($$$$)`), and promo/lifecycle tags (`(20% off)`, `(retires …)`). `cleanModelName` is applied in `buildModel` (covers live discovery and stale caches) and as a catalog-generator pass; Antigravity discovery no longer appends `(Antigravity)` to display names. Variant tags that map to distinct wire ids (`(Thinking)`, `(free)`, `(Fast)`, dates, regions) are preserved.
|
|
38
|
+
- Changed the `google-antigravity` default model from `gemini-3-pro-high` to `gemini-3.1-pro`
|
|
39
|
+
- Changed `gemini-2.5-flash-thinking` handling from discovery-denylist to collapsing into `gemini-2.5-flash` (thinking-enabled requests route to the `-thinking` backing id)
|
|
40
|
+
- Bumped the model cache schema to v5 so rows predating effort-tier variant collapsing (raw `-low`/`-high`/`-thinking` member ids) are invalidated
|
|
41
|
+
- Changed GitHub Copilot discovery to request `X-GitHub-Api-Version: 2026-06-01` from `api.githubcopilot.com`
|
|
42
|
+
- Changed GitHub Copilot discovery to cap base model `contextWindow` to the default token tier and keep long-context access as the separate `-1m` model entry
|
|
43
|
+
- Changed Copilot model mapping to omit non-chat `/models` entries and enable image input for models whose capabilities indicate vision support
|
|
44
|
+
- Set every xAI Grok OAuth (SuperGrok) curated model's max output tokens to mirror its context window (`grok-build`, `grok-4.3`, `grok-4.20-0309-{reasoning,non-reasoning}`, `grok-4.20-multi-agent-0309`, `grok-composer-2.5-fast`), replacing the `8888` `UNK_MAX_TOKENS` placeholder (and a stale `30000` on three grok-4.x entries). xAI's OAuth `/v1/models` reports no per-request output limit, so the curated catalog now owns `maxTokens` like `contextWindow`, deterministic on both the static-seed and online-overlay paths; the `openai-responses` wire still clamps the actual request to `OPENAI_MAX_OUTPUT_TOKENS` (64k).
|
|
100
45
|
- Changed OpenAI compatibility detection to use shared host classifiers (`modelMatchesHost`/`hostMatchesUrl`) with normalized matching instead of raw URL substring checks
|
|
101
46
|
- Changed `hostMatchesUrl`/`modelMatchesHost` usage in compatibility detection to reduce mismatches across case variants and provider alias hosts
|
|
102
47
|
- Provider catalog entries now carry the runtime API-key env fallback as an ordered `envVars` list; `catalogDiscovery.envVars` became an optional generation-time override (only `cursor` and `vercel-ai-gateway` differ) and `PROVIDER_DESCRIPTORS` materializes the resolved list for `generate-models.ts`.
|
|
@@ -107,6 +52,25 @@
|
|
|
107
52
|
|
|
108
53
|
### Fixed
|
|
109
54
|
|
|
55
|
+
- Fixed MiniMax-M3 catalog context for `minimax` and `minimax-cn` to report the documented 1M long-context tier instead of the upstream 512K pricing boundary ([#2576](https://github.com/can1357/oh-my-pi/issues/2576)).
|
|
56
|
+
- Fixed OpenCode Go MiMo catalog metadata so title generation and other tool-enabled calls omit unsupported `tool_choice` instead of triggering provider 400s ([#2509](https://github.com/can1357/oh-my-pi/issues/2509)).
|
|
57
|
+
- Fixed OpenCode Go `kimi-k2.7-code` catalog metadata so resolve-gate requests use automatic tool selection instead of Moonshot-rejected forced `tool_choice` ([#2546](https://github.com/can1357/oh-my-pi/issues/2546)).
|
|
58
|
+
- Fixed Anthropic compat for the `github-copilot` host so `supportsEagerToolInputStreaming` defaults to `false` there, matching the Copilot proxy which rejects the per-tool `eager_input_streaming` field ([#2558](https://github.com/can1357/oh-my-pi/issues/2558)).
|
|
59
|
+
- Scoped vLLM model cache validity to the discovery base URL so changed endpoints refetch immediately, and bounded built-in vLLM discovery requests with a timeout.
|
|
60
|
+
- Filled missing `contextWindow` and `maxTokens` in generated `models.json` for proxy/reseller variants by inheriting limits from canonical-family and segment-reference models
|
|
61
|
+
- Ignored zero-cost `x-ai` subscription entries as reference sources when backfilling limits so inflated values are not propagated
|
|
62
|
+
- Fixed the model cache opening with `PRAGMA journal_mode=WAL` before `PRAGMA busy_timeout`, so concurrent omp startups could crash inside `getDb()` on `SQLITE_BUSY` during WAL recovery instead of waiting through the transient lock. The busy handler is now installed before the first lock-taking statement ([#2421](https://github.com/can1357/oh-my-pi/issues/2421)).
|
|
63
|
+
- Fixed Antigravity `gemini-3.1-pro --thinking high` failing with `Cloud Code Assist API error (400): Request contains an invalid argument.` — the upstream `gemini-3.1-pro-high` deployment rejects every `streamGenerateContent` request on both CCA endpoints while discovery still advertises it. High effort now routes to `gemini-pro-agent` (the same "Gemini 3.1 Pro (High)" model, verified accepting the identical request body), and the model-cache fingerprint version was bumped (`merge-v2` → `merge-v3`) so existing fresh caches refetch discovery and pick up the corrected routing immediately.
|
|
64
|
+
- Fixed catalog generation to apply effort-tier variant collapsing before provider grouping to ensure collapsed model families are consistently materialized without being impacted by in-loop mutation
|
|
65
|
+
- Fixed Kimi K2.6 OpenAI-compatible compat metadata to use a 300s stream watchdog floor, covering Fire Pass router ids as well as public `kimi-k2.6` ids so long reasoning starts do not hit the generic first-event timeout ([#2366](https://github.com/can1357/oh-my-pi/issues/2366)).
|
|
66
|
+
- Fixed MiniMax M2-family and OpenAI gpt-oss model metadata so OpenAI-compatible catalog entries declare only `low|medium|high` thinking efforts. Their upstreams reject `minimal`, `xhigh`, and Fireworks' `minimal → none` wire mapping, so `fireworks/minimax-m2.7` as the smol auto-thinking classifier model 400ed on every turn. OpenAI-compatible provider effort maps (`Groq qwen/qwen3-32b`, DeepSeek-family, OpenRouter Anthropic adaptive, Fireworks `minimal → none`) now bake into `thinking.effortMap` in catalog metadata instead of `buildOpenAICompat`, and request builders read that field directly. Regenerated `models.json` now makes `disableReasoning` choose `low` for those families while leaving GLM-5.x and other Fireworks models on the existing `minimal → none` path ([#2315](https://github.com/can1357/oh-my-pi/issues/2315)).
|
|
67
|
+
- Fixed long-context variant pricing to use `billing.token_prices.long_context` rates instead of default model pricing
|
|
68
|
+
- Fixed `mapModel` handling in OpenAI-compatible discovery so returning `null` now skips a model entry rather than falling back to defaults
|
|
69
|
+
- Fixed model ID precedence so a real upstream Copilot model id is kept when it conflicts with a synthesized `-1m` variant
|
|
70
|
+
- Fixed NVIDIA NIM Qwen turns failing with `400 Validation: Unsupported parameter(s): enable_thinking`. NIM's chat-completions schema is `additionalProperties: false` and exposes thinking via the vLLM convention `chat_template_kwargs.enable_thinking`; `buildOpenAICompat` was sending top-level `enable_thinking` for every `qwen/*` id regardless of host. Registered `nvidia` as a known host (`integrate.api.nvidia.com`) and routed NVIDIA-hosted Qwen models to `thinkingFormat: "qwen-chat-template"` ([#2299](https://github.com/can1357/oh-my-pi/issues/2299)).
|
|
71
|
+
- Fixed Moonshot/Kimi native OpenAI-compatible request metadata so Kimi K2 uses `max_tokens` and omits OpenAI-only `store`, restoring first-turn output with `MOONSHOT_API_KEY` ([#2289](https://github.com/can1357/oh-my-pi/issues/2289)).
|
|
72
|
+
- Fixed `buildModel` so malformed explicit thinking metadata without `efforts` is treated as sparse input and inferred instead of crashing during model resolution ([#2251](https://github.com/can1357/oh-my-pi/issues/2251)).
|
|
73
|
+
- Excluded zero-cost `xai-oauth` subscription entries from the model reference indexes (`buildModelReferenceIndex`, `createReferenceResolver`), so their zero pricing and context-window-sized `maxTokens` cannot outrank paid/public Grok references when resolving custom-provider model identities.
|
|
110
74
|
- Fixed Anthropic official-endpoint detection to require strict HTTPS hostname matching so non-official or lookalike URLs are no longer treated as official Anthropic hosts
|
|
111
75
|
- Fixed Ollama Cloud dynamic discovery so same-id matches from other providers no longer supply context-window or max-output-token limits for discovered models.
|
|
112
76
|
- Wired `@oh-my-pi/pi-catalog` into the release publish package list, tarball install smoke test, and root `bun generate-models` script.
|
|
@@ -115,4 +79,26 @@
|
|
|
115
79
|
|
|
116
80
|
### Removed
|
|
117
81
|
|
|
118
|
-
- Removed the runtime enrichment layer: `enrichModelThinking` (and its non-enumerable memo-slot cache), `refreshModelThinking`, `modelOmitsReasoningEffort`, and the `model-thinking` re-exports of generator-only policies. Thinking metadata is resolved exactly once inside `buildModel`; runtime helpers (`getSupportedEfforts`, `clampThinkingLevelForModel`, `requireSupportedEffort`, the effort mappers) are pure field reads.
|
|
82
|
+
- Removed the runtime enrichment layer: `enrichModelThinking` (and its non-enumerable memo-slot cache), `refreshModelThinking`, `modelOmitsReasoningEffort`, and the `model-thinking` re-exports of generator-only policies. Thinking metadata is resolved exactly once inside `buildModel`; runtime helpers (`getSupportedEfforts`, `clampThinkingLevelForModel`, `requireSupportedEffort`, the effort mappers) are pure field reads.
|
|
83
|
+
|
|
84
|
+
## [15.13.0] - 2026-06-14
|
|
85
|
+
|
|
86
|
+
## [15.12.6] - 2026-06-14
|
|
87
|
+
|
|
88
|
+
## [15.12.4] - 2026-06-13
|
|
89
|
+
|
|
90
|
+
## [15.11.8] - 2026-06-12
|
|
91
|
+
|
|
92
|
+
## [15.11.7] - 2026-06-12
|
|
93
|
+
|
|
94
|
+
## [15.11.4] - 2026-06-12
|
|
95
|
+
|
|
96
|
+
## [15.11.3] - 2026-06-11
|
|
97
|
+
|
|
98
|
+
## [15.11.1] - 2026-06-11
|
|
99
|
+
|
|
100
|
+
## [15.11.0] - 2026-06-10
|
|
101
|
+
|
|
102
|
+
## [15.10.12] - 2026-06-10
|
|
103
|
+
|
|
104
|
+
## [15.10.11] - 2026-06-10
|
|
@@ -12,6 +12,7 @@ export type SemVer = {
|
|
|
12
12
|
export type GeminiKind = "pro" | "flash";
|
|
13
13
|
export type AnthropicKind = "opus" | "sonnet" | "fable" | "mythos";
|
|
14
14
|
export type OpenAIVariant = "base" | "codex" | "codex-max" | "codex-mini" | "codex-spark" | "mini" | "max" | "nano";
|
|
15
|
+
export type GlmVariant = "base" | "air" | "turbo" | "flash" | "flashx" | "preview";
|
|
15
16
|
export interface GeminiModel {
|
|
16
17
|
family: "gemini";
|
|
17
18
|
kind: GeminiKind;
|
|
@@ -27,6 +28,14 @@ export interface OpenAIModel {
|
|
|
27
28
|
variant: OpenAIVariant;
|
|
28
29
|
version: SemVer;
|
|
29
30
|
}
|
|
31
|
+
export interface GlmModel {
|
|
32
|
+
family: "glm";
|
|
33
|
+
/** Suffix variant (`-air`, `-turbo`, `-flash`, `-flashx`, `-preview`); `base` when none. */
|
|
34
|
+
variant: GlmVariant;
|
|
35
|
+
/** Vision SKU — the `v` that attaches directly to the version (`glm-4v`, `glm-4.5v`). */
|
|
36
|
+
vision: boolean;
|
|
37
|
+
version: SemVer;
|
|
38
|
+
}
|
|
30
39
|
export interface UnknownModel {
|
|
31
40
|
family: "unknown";
|
|
32
41
|
id: string;
|
|
@@ -35,9 +44,18 @@ export type ParsedModel = GeminiModel | AnthropicModel | OpenAIModel | UnknownMo
|
|
|
35
44
|
/** Strip a provider namespace prefix (`openai/gpt-5.4` → `gpt-5.4`). */
|
|
36
45
|
export declare function bareModelId(modelId: string): string;
|
|
37
46
|
export declare function parseKnownModel(modelId: string): ParsedModel;
|
|
38
|
-
export declare
|
|
39
|
-
export declare
|
|
40
|
-
export declare
|
|
47
|
+
export declare const parseGeminiModel: (modelId: string) => GeminiModel | null;
|
|
48
|
+
export declare const parseAnthropicModel: (modelId: string) => AnthropicModel | null;
|
|
49
|
+
export declare const parseOpenAIModel: (modelId: string) => OpenAIModel | null;
|
|
50
|
+
/**
|
|
51
|
+
* Parse a GLM (Zhipu / Z.AI) model id into family + variant + vision + version.
|
|
52
|
+
* Shape: `glm-<version>[v][-<variant>]` — e.g. `glm-4.5`, `glm-4.5-air`,
|
|
53
|
+
* `glm-5-turbo`, `glm-4.5v`, `glm-5-preview`. The `v` (vision) attaches to the
|
|
54
|
+
* version; other variants are `-` suffixes. Standalone like `parseAnthropicModel`
|
|
55
|
+
* is used in family.ts — GLM needs no global thinking policy, so it stays out of
|
|
56
|
+
* `parseKnownModel`.
|
|
57
|
+
*/
|
|
58
|
+
export declare const parseGlmModel: (modelId: string) => GlmModel | null;
|
|
41
59
|
export declare function isFableOrMythos(kind: AnthropicKind): boolean;
|
|
42
60
|
export declare function parseSemVer(version: string): SemVer | null;
|
|
43
61
|
export declare function semverGte(left: SemVer | string, right: SemVer | string): boolean;
|
|
@@ -37,6 +37,28 @@ export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
|
|
|
37
37
|
* and `none`.
|
|
38
38
|
*/
|
|
39
39
|
export declare function isOpenAIGptOssModelId(modelId: string): boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Reasoning-capable GLM coding SKUs: glm-4.5 and up on the base / `-air` /
|
|
42
|
+
* `-turbo` lines. Excludes the vision (`…v`) shape, the non-reasoning
|
|
43
|
+
* `-flash`/`-flashx`/`-preview` variants, and pre-4.5 ids. Matching the family
|
|
44
|
+
* keeps newly-bumped integers (`glm-5.3`, `glm-6`, …) covered without a per-id
|
|
45
|
+
* allowlist.
|
|
46
|
+
*/
|
|
47
|
+
export declare function isReasoningGlmModelId(modelId: string): boolean;
|
|
48
|
+
/** GLM vision SKUs — the `v` that attaches to the version (`glm-4v`, `glm-4.5v`). */
|
|
49
|
+
export declare function isGlmVisionModelId(modelId: string): boolean;
|
|
50
|
+
/**
|
|
51
|
+
* Coarse vendor-lineage token for "are two models the same family?" checks
|
|
52
|
+
* (e.g. picking a cross-family reviewer). All Claude point releases share a token,
|
|
53
|
+
* Claude and GPT differ; namespace prefixes and aggregator mirrors fold onto the
|
|
54
|
+
* lineage via {@link parseKnownModel}'s `bareModelId` normalization. Opaque and
|
|
55
|
+
* comparison-only — not a stable key to persist, since the vocabulary tracks new
|
|
56
|
+
* releases. Returns `""` for ids it cannot classify; callers fall back to the provider.
|
|
57
|
+
*
|
|
58
|
+
* Vendor-only by design: a model's kind/variant (opus vs sonnet, codex vs base) is
|
|
59
|
+
* collapsed onto the single vendor token; use {@link parseKnownModel} for finer breakdowns.
|
|
60
|
+
*/
|
|
61
|
+
export declare function modelFamilyToken(modelId: string): string;
|
|
40
62
|
/**
|
|
41
63
|
* Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
|
|
42
64
|
* the Claude Fable/Mythos 5 generation. Older adaptive-thinking models
|
|
@@ -22,6 +22,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
|
|
|
22
22
|
staticModels?: readonly ModelSpec<TApi>[];
|
|
23
23
|
/** Optional override for the cache database path. Default: <agent-dir>/models.db. */
|
|
24
24
|
cacheDbPath?: string;
|
|
25
|
+
/** Optional provider id override for cache namespacing. Defaults to providerId. */
|
|
26
|
+
cacheProviderId?: string;
|
|
25
27
|
/** Maximum cache age in milliseconds before considered stale. Default: 24h. */
|
|
26
28
|
cacheTtlMs?: number;
|
|
27
29
|
/** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
|
|
@@ -25,10 +25,10 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
25
25
|
};
|
|
26
26
|
}, {
|
|
27
27
|
readonly id: "amazon-bedrock";
|
|
28
|
-
readonly defaultModel: "us.anthropic.claude-opus-4-
|
|
28
|
+
readonly defaultModel: "us.anthropic.claude-opus-4-8";
|
|
29
29
|
}, {
|
|
30
30
|
readonly id: "anthropic";
|
|
31
|
-
readonly defaultModel: "claude-opus-4-
|
|
31
|
+
readonly defaultModel: "claude-opus-4-8";
|
|
32
32
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"anthropic-messages", unknown>;
|
|
33
33
|
}, {
|
|
34
34
|
readonly id: "cerebras";
|
|
@@ -136,7 +136,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
136
136
|
};
|
|
137
137
|
}, {
|
|
138
138
|
readonly id: "litellm";
|
|
139
|
-
readonly defaultModel: "claude-opus-4-
|
|
139
|
+
readonly defaultModel: "claude-opus-4-8";
|
|
140
140
|
readonly envVars: readonly ["LITELLM_API_KEY"];
|
|
141
141
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openai-completions", unknown>;
|
|
142
142
|
readonly catalogDiscovery: {
|
|
@@ -176,7 +176,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
176
176
|
};
|
|
177
177
|
}, {
|
|
178
178
|
readonly id: "nanogpt";
|
|
179
|
-
readonly defaultModel: "openai/gpt-5.
|
|
179
|
+
readonly defaultModel: "openai/gpt-5.5";
|
|
180
180
|
readonly envVars: readonly ["NANO_GPT_API_KEY"];
|
|
181
181
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openai-completions", unknown>;
|
|
182
182
|
readonly catalogDiscovery: {
|
|
@@ -207,12 +207,12 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
207
207
|
};
|
|
208
208
|
}, {
|
|
209
209
|
readonly id: "openai";
|
|
210
|
-
readonly defaultModel: "gpt-5.
|
|
210
|
+
readonly defaultModel: "gpt-5.5";
|
|
211
211
|
readonly envVars: readonly ["OPENAI_API_KEY"];
|
|
212
212
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openai-responses", unknown>;
|
|
213
213
|
}, {
|
|
214
214
|
readonly id: "openai-codex";
|
|
215
|
-
readonly defaultModel: "gpt-5.
|
|
215
|
+
readonly defaultModel: "gpt-5.5";
|
|
216
216
|
readonly envVars: readonly ["OPENAI_CODEX_OAUTH_TOKEN"];
|
|
217
217
|
readonly specialModelManager: true;
|
|
218
218
|
}, {
|
|
@@ -227,7 +227,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
227
227
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<import("..").Api, unknown>;
|
|
228
228
|
}, {
|
|
229
229
|
readonly id: "openrouter";
|
|
230
|
-
readonly defaultModel: "openai/gpt-5.
|
|
230
|
+
readonly defaultModel: "openai/gpt-5.5";
|
|
231
231
|
readonly envVars: readonly ["OPENROUTER_API_KEY"];
|
|
232
232
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openai-completions", unknown>;
|
|
233
233
|
readonly catalogDiscovery: {
|
|
@@ -361,7 +361,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
361
361
|
};
|
|
362
362
|
}, {
|
|
363
363
|
readonly id: "zenmux";
|
|
364
|
-
readonly defaultModel: "anthropic/claude-opus-4.
|
|
364
|
+
readonly defaultModel: "anthropic/claude-opus-4.8";
|
|
365
365
|
readonly envVars: readonly ["ZENMUX_API_KEY"];
|
|
366
366
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<import("..").Api, unknown>;
|
|
367
367
|
readonly catalogDiscovery: {
|
|
@@ -165,6 +165,7 @@ export declare function isFireworksKimiK2ModelId(modelId: string): boolean;
|
|
|
165
165
|
* on Fireworks-backed providers, leaving every other model untouched.
|
|
166
166
|
*/
|
|
167
167
|
export declare function clampFireworksKimiMaxTokens(modelId: string, candidate: number): number;
|
|
168
|
+
export declare function clampFireworksKimiMaxTokens(modelId: string, candidate: number | null): number | null;
|
|
168
169
|
/**
|
|
169
170
|
* Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
|
|
170
171
|
* DeepSeek-native binary `thinking` toggle when both are present.
|
|
@@ -343,7 +344,6 @@ export interface AnthropicModelManagerConfig {
|
|
|
343
344
|
fetch?: FetchImpl;
|
|
344
345
|
}
|
|
345
346
|
export declare function anthropicModelManagerOptions(config?: AnthropicModelManagerConfig): ModelManagerOptions<"anthropic-messages">;
|
|
346
|
-
export { UNK_CONTEXT_WINDOW, UNK_MAX_TOKENS } from "./discovery-constants";
|
|
347
347
|
/** Describes how to map models.dev API data for a single provider. */
|
|
348
348
|
export interface ModelsDevProviderDescriptor {
|
|
349
349
|
/** Key in the models.dev API response JSON (e.g., "anthropic", "amazon-bedrock") */
|
|
@@ -386,3 +386,4 @@ export interface ModelsDevProviderDescriptor {
|
|
|
386
386
|
export declare function mapModelsDevToModels(data: Record<string, unknown>, descriptors: readonly ModelsDevProviderDescriptor[]): ModelSpec<Api>[];
|
|
387
387
|
/** All provider descriptors for models.dev data mapping in generate-models.ts. */
|
|
388
388
|
export declare const MODELS_DEV_PROVIDER_DESCRIPTORS: readonly ModelsDevProviderDescriptor[];
|
|
389
|
+
export {};
|
package/dist/types/types.d.ts
CHANGED
|
@@ -161,6 +161,12 @@ export interface OpenAICompat {
|
|
|
161
161
|
requiresAssistantContentForToolCalls?: boolean;
|
|
162
162
|
/** Whether the provider supports the `tool_choice` parameter. Default: true. */
|
|
163
163
|
supportsToolChoice?: boolean;
|
|
164
|
+
/**
|
|
165
|
+
* Whether forced `tool_choice` values (`"required"` or named tools) are accepted.
|
|
166
|
+
* When false, request builders keep tools available but downgrade forced choices
|
|
167
|
+
* to provider-default auto selection. Default: true.
|
|
168
|
+
*/
|
|
169
|
+
supportsForcedToolChoice?: boolean;
|
|
164
170
|
/**
|
|
165
171
|
* Drop reasoning fields (`reasoning_effort`, OpenRouter `reasoning`) for
|
|
166
172
|
* the request when `tool_choice` forces a tool call. Mirrors the Anthropic
|
|
@@ -373,8 +379,8 @@ export interface Model<TApi extends Api = Api> {
|
|
|
373
379
|
};
|
|
374
380
|
/** Premium Copilot requests charged per user-initiated request (defaults to 1). */
|
|
375
381
|
premiumMultiplier?: number;
|
|
376
|
-
contextWindow: number;
|
|
377
|
-
maxTokens: number;
|
|
382
|
+
contextWindow: number | null;
|
|
383
|
+
maxTokens: number | null;
|
|
378
384
|
/**
|
|
379
385
|
* When `true`, providers MUST omit `max_output_tokens` (Responses) /
|
|
380
386
|
* `max_tokens` / `max_completion_tokens` (Completions) from the outbound
|
package/dist/types/utils.d.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
export { isRecord } from "@oh-my-pi/pi-utils";
|
|
2
2
|
export declare function toNumber(value: unknown): number | undefined;
|
|
3
3
|
export declare function toPositiveNumber(value: unknown, fallback: number): number;
|
|
4
|
+
export declare function toPositiveNumber(value: unknown, fallback: number | null): number | null;
|
|
5
|
+
/** Positive finite number, or `null` when the value is missing/non-positive. */
|
|
6
|
+
export declare function toPositiveNumberOrNull(value: unknown): number | null;
|
|
4
7
|
export declare function toBoolean(value: unknown): boolean | undefined;
|
|
5
8
|
export declare function isAnthropicOAuthToken(key: string): boolean;
|
|
6
9
|
/**
|
|
@@ -9,6 +9,7 @@ export declare const getGeminiCliHeaders: (modelId?: string) => {
|
|
|
9
9
|
"Client-Metadata": string;
|
|
10
10
|
};
|
|
11
11
|
export declare const ANTIGRAVITY_SYSTEM_INSTRUCTION: string;
|
|
12
|
+
export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER output rule checks, formatting guidelines, constraint checklists (e.g. \"No emdashes\"), or your thinking/personality preambles in the final response. Output only the final response.";
|
|
12
13
|
/**
|
|
13
14
|
* Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
|
|
14
15
|
* and usage code can read it without pulling the heavy google-gemini-cli provider
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "15.
|
|
4
|
+
"version": "15.13.0",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,11 +34,11 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "15.
|
|
38
|
-
"zod": "4
|
|
37
|
+
"@oh-my-pi/pi-utils": "15.13.0",
|
|
38
|
+
"zod": "^4"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
|
-
"@oh-my-pi/pi-ai": "15.
|
|
41
|
+
"@oh-my-pi/pi-ai": "15.13.0",
|
|
42
42
|
"@types/bun": "^1.3.14"
|
|
43
43
|
},
|
|
44
44
|
"engines": {
|
package/src/compat/anthropic.ts
CHANGED
|
@@ -34,11 +34,17 @@ export function buildAnthropicCompat(spec: ModelSpec<"anthropic-messages">): Res
|
|
|
34
34
|
const official = isOfficialAnthropicApiUrl(baseUrl);
|
|
35
35
|
// Z.AI's Anthropic-compatible proxy lives at `api.z.ai/api/anthropic`.
|
|
36
36
|
const isZai = modelMatchesHost(spec, "zai");
|
|
37
|
+
// GitHub Copilot's Anthropic-compatible proxy (api.githubcopilot.com/v1/messages)
|
|
38
|
+
// rejects the per-tool `eager_input_streaming` field with
|
|
39
|
+
// `tools.0.custom.eager_input_streaming: Extra inputs are not permitted` and
|
|
40
|
+
// doesn't whitelist the `fine-grained-tool-streaming-2025-05-14` beta either
|
|
41
|
+
// (issue #2558), so eager tool-input streaming is unavailable on this host.
|
|
42
|
+
const isCopilot = modelMatchesHost(spec, "githubCopilot");
|
|
37
43
|
const compat: ResolvedAnthropicCompat = {
|
|
38
44
|
officialEndpoint: official,
|
|
39
45
|
disableStrictTools: false,
|
|
40
46
|
disableAdaptiveThinking: false,
|
|
41
|
-
supportsEagerToolInputStreaming:
|
|
47
|
+
supportsEagerToolInputStreaming: !isCopilot,
|
|
42
48
|
// Long cache retention is only sent to the official API by default;
|
|
43
49
|
// proxies opt in explicitly via `compat.supportsLongCacheRetention: true`.
|
|
44
50
|
supportsLongCacheRetention: official,
|
package/src/compat/openai.ts
CHANGED
|
@@ -217,6 +217,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
217
217
|
disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
|
|
218
218
|
disableReasoningOnToolChoice: isDeepseekFamily && Boolean(spec.reasoning) && !isOpenRouter,
|
|
219
219
|
supportsToolChoice: !isDirectDeepseekReasoning,
|
|
220
|
+
supportsForcedToolChoice: true,
|
|
220
221
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
221
222
|
requiresToolResultName: isMistral,
|
|
222
223
|
requiresAssistantAfterToolResult: false,
|
package/src/discovery/codex.ts
CHANGED
package/src/discovery/cursor.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as http2 from "node:http2";
|
|
2
2
|
import { create, fromBinary, toBinary } from "@bufbuild/protobuf";
|
|
3
|
-
import
|
|
3
|
+
import { z } from "zod/v4";
|
|
4
4
|
import { getBundledModels } from "../models";
|
|
5
5
|
import { toModelSpec } from "../provider-models/bundled-references";
|
|
6
6
|
import type { Model, ModelSpec } from "../types";
|
package/src/discovery/gemini.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { z } from "zod/v4";
|
|
2
2
|
import { getBundledModels } from "../models";
|
|
3
3
|
import { toModelSpec } from "../provider-models/bundled-references";
|
|
4
|
-
import { UNK_CONTEXT_WINDOW, UNK_MAX_TOKENS } from "../provider-models/discovery-constants";
|
|
5
4
|
import type { FetchImpl, Model, ModelSpec } from "../types";
|
|
6
5
|
|
|
7
6
|
const GOOGLE_GENERATIVE_AI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";
|
|
@@ -148,7 +147,9 @@ function normalizeBaseUrl(baseUrl?: string): string {
|
|
|
148
147
|
return value.replace(/\/+$/, "");
|
|
149
148
|
}
|
|
150
149
|
|
|
151
|
-
function normalizePositiveInt(value: number | undefined, fallback: number): number
|
|
150
|
+
function normalizePositiveInt(value: number | undefined, fallback: number): number;
|
|
151
|
+
function normalizePositiveInt(value: number | undefined, fallback: number | null): number | null;
|
|
152
|
+
function normalizePositiveInt(value: number | undefined, fallback: number | null): number | null {
|
|
152
153
|
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
|
|
153
154
|
return fallback;
|
|
154
155
|
}
|
|
@@ -178,8 +179,8 @@ function normalizeModel(
|
|
|
178
179
|
}
|
|
179
180
|
|
|
180
181
|
const reference = bundledById.get(id);
|
|
181
|
-
const contextWindow = normalizePositiveInt(item.inputTokenLimit, reference?.contextWindow ??
|
|
182
|
-
const maxTokens = normalizePositiveInt(item.outputTokenLimit, reference?.maxTokens ??
|
|
182
|
+
const contextWindow = normalizePositiveInt(item.inputTokenLimit, reference?.contextWindow ?? null);
|
|
183
|
+
const maxTokens = normalizePositiveInt(item.outputTokenLimit, reference?.maxTokens ?? null);
|
|
183
184
|
const name = normalizeModelName(item.displayName, reference?.name ?? id);
|
|
184
185
|
|
|
185
186
|
if (reference) {
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { UNK_CONTEXT_WINDOW, UNK_MAX_TOKENS } from "../provider-models/discovery-constants";
|
|
1
|
+
import { z } from "zod/v4";
|
|
3
2
|
import type { Api, FetchImpl, ModelSpec, Provider } from "../types";
|
|
4
3
|
|
|
5
4
|
const MODELS_PATH = "/models";
|
|
@@ -165,8 +164,8 @@ export async function fetchOpenAICompatibleModels<TApi extends Api>(
|
|
|
165
164
|
reasoning: false,
|
|
166
165
|
input: ["text"],
|
|
167
166
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
168
|
-
contextWindow:
|
|
169
|
-
maxTokens:
|
|
167
|
+
contextWindow: null,
|
|
168
|
+
maxTokens: null,
|
|
170
169
|
};
|
|
171
170
|
|
|
172
171
|
// `mapModel` returning null skips the entry (documented contract); only a
|
package/src/identity/classify.ts
CHANGED
|
@@ -14,6 +14,7 @@ export type SemVer = {
|
|
|
14
14
|
export type GeminiKind = "pro" | "flash";
|
|
15
15
|
export type AnthropicKind = "opus" | "sonnet" | "fable" | "mythos";
|
|
16
16
|
export type OpenAIVariant = "base" | "codex" | "codex-max" | "codex-mini" | "codex-spark" | "mini" | "max" | "nano";
|
|
17
|
+
export type GlmVariant = "base" | "air" | "turbo" | "flash" | "flashx" | "preview";
|
|
17
18
|
|
|
18
19
|
export interface GeminiModel {
|
|
19
20
|
family: "gemini";
|
|
@@ -33,6 +34,15 @@ export interface OpenAIModel {
|
|
|
33
34
|
version: SemVer;
|
|
34
35
|
}
|
|
35
36
|
|
|
37
|
+
export interface GlmModel {
|
|
38
|
+
family: "glm";
|
|
39
|
+
/** Suffix variant (`-air`, `-turbo`, `-flash`, `-flashx`, `-preview`); `base` when none. */
|
|
40
|
+
variant: GlmVariant;
|
|
41
|
+
/** Vision SKU — the `v` that attaches directly to the version (`glm-4v`, `glm-4.5v`). */
|
|
42
|
+
vision: boolean;
|
|
43
|
+
version: SemVer;
|
|
44
|
+
}
|
|
45
|
+
|
|
36
46
|
export interface UnknownModel {
|
|
37
47
|
family: "unknown";
|
|
38
48
|
id: string;
|
|
@@ -55,8 +65,26 @@ export function parseKnownModel(modelId: string): ParsedModel {
|
|
|
55
65
|
);
|
|
56
66
|
}
|
|
57
67
|
|
|
68
|
+
/**
|
|
69
|
+
* Wrap a parse function in a per-id memo cache. Caches the `null` result too, so
|
|
70
|
+
* repeated misses (the common case — ids of other families) stay O(1) and never
|
|
71
|
+
* re-run the regex/semver work.
|
|
72
|
+
*/
|
|
73
|
+
function parser<T>(parse: (modelId: string) => T | null): (modelId: string) => T | null {
|
|
74
|
+
const cache = new Map<string, T | null>();
|
|
75
|
+
return modelId => {
|
|
76
|
+
const hit = cache.get(modelId);
|
|
77
|
+
if (hit !== undefined || cache.has(modelId)) {
|
|
78
|
+
return hit ?? null;
|
|
79
|
+
}
|
|
80
|
+
const result = parse(modelId);
|
|
81
|
+
cache.set(modelId, result);
|
|
82
|
+
return result;
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
58
86
|
const GEMINI_SUFFIX = "-preview";
|
|
59
|
-
export
|
|
87
|
+
export const parseGeminiModel = parser((modelId): GeminiModel | null => {
|
|
60
88
|
if (modelId.endsWith(GEMINI_SUFFIX)) {
|
|
61
89
|
modelId = modelId.slice(0, -GEMINI_SUFFIX.length);
|
|
62
90
|
}
|
|
@@ -69,9 +97,9 @@ export function parseGeminiModel(modelId: string): GeminiModel | null {
|
|
|
69
97
|
return null;
|
|
70
98
|
}
|
|
71
99
|
return { family: "gemini", kind: match[2] as GeminiKind, version };
|
|
72
|
-
}
|
|
100
|
+
});
|
|
73
101
|
|
|
74
|
-
export
|
|
102
|
+
export const parseAnthropicModel = parser((modelId): AnthropicModel | null => {
|
|
75
103
|
const match = /claude-(opus|sonnet|fable|mythos)-(\d{1,2}(?:[.-]\d{1,2}){0,2})\b/.exec(modelId);
|
|
76
104
|
if (!match) {
|
|
77
105
|
return null;
|
|
@@ -81,9 +109,9 @@ export function parseAnthropicModel(modelId: string): AnthropicModel | null {
|
|
|
81
109
|
return null;
|
|
82
110
|
}
|
|
83
111
|
return { family: "anthropic", kind: match[1] as AnthropicKind, version };
|
|
84
|
-
}
|
|
112
|
+
});
|
|
85
113
|
|
|
86
|
-
export
|
|
114
|
+
export const parseOpenAIModel = parser((modelId): OpenAIModel | null => {
|
|
87
115
|
const match = /gpt-(\d+(?:\.\d+){0,2})(?:-(codex-spark|codex-mini|codex-max|codex|mini|max|nano))?\b/.exec(modelId);
|
|
88
116
|
if (!match) {
|
|
89
117
|
return null;
|
|
@@ -93,7 +121,32 @@ export function parseOpenAIModel(modelId: string): OpenAIModel | null {
|
|
|
93
121
|
return null;
|
|
94
122
|
}
|
|
95
123
|
return { family: "openai", variant: (match[2] as OpenAIVariant | undefined) ?? "base", version };
|
|
96
|
-
}
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Parse a GLM (Zhipu / Z.AI) model id into family + variant + vision + version.
|
|
128
|
+
* Shape: `glm-<version>[v][-<variant>]` — e.g. `glm-4.5`, `glm-4.5-air`,
|
|
129
|
+
* `glm-5-turbo`, `glm-4.5v`, `glm-5-preview`. The `v` (vision) attaches to the
|
|
130
|
+
* version; other variants are `-` suffixes. Standalone like `parseAnthropicModel`
|
|
131
|
+
* is used in family.ts — GLM needs no global thinking policy, so it stays out of
|
|
132
|
+
* `parseKnownModel`.
|
|
133
|
+
*/
|
|
134
|
+
export const parseGlmModel = parser((modelId): GlmModel | null => {
|
|
135
|
+
const match = /glm-(\d{1,2}(?:\.\d+)?)(v)?(?:-(air|turbo|flashx|flash|preview))?\b/.exec(modelId);
|
|
136
|
+
if (!match) {
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
const version = parseSemVer(match[1]);
|
|
140
|
+
if (!version) {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
return {
|
|
144
|
+
family: "glm",
|
|
145
|
+
variant: (match[3] as GlmVariant | undefined) ?? "base",
|
|
146
|
+
vision: match[2] === "v",
|
|
147
|
+
version,
|
|
148
|
+
};
|
|
149
|
+
});
|
|
97
150
|
|
|
98
151
|
export function isFableOrMythos(kind: AnthropicKind): boolean {
|
|
99
152
|
return kind === "fable" || kind === "mythos";
|