@oh-my-pi/pi-catalog 15.11.6 → 15.11.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/README.md +43 -0
- package/dist/types/identity/family.d.ts +21 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/model-thinking.d.ts +12 -0
- package/dist/types/provider-models/descriptors.d.ts +1 -1
- package/dist/types/types.d.ts +22 -0
- package/dist/types/utils.d.ts +6 -0
- package/dist/types/variant-collapse.d.ts +126 -0
- package/package.json +3 -3
- package/src/build.ts +2 -0
- package/src/compat/openai.ts +10 -6
- package/src/discovery/antigravity.ts +9 -10
- package/src/identity/family.ts +43 -2
- package/src/index.ts +1 -0
- package/src/model-cache.ts +4 -3
- package/src/model-manager.ts +19 -8
- package/src/model-thinking.ts +56 -1
- package/src/models.json +2426 -1809
- package/src/provider-models/descriptors.ts +1 -1
- package/src/types.ts +22 -0
- package/src/utils.ts +24 -0
- package/src/variant-collapse.ts +622 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,32 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.11.8] - 2026-06-12
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Fixed Antigravity `gemini-3.1-pro --thinking high` failing with `Cloud Code Assist API error (400): Request contains an invalid argument.` — the upstream `gemini-3.1-pro-high` deployment rejects every `streamGenerateContent` request on both CCA endpoints while discovery still advertises it. High effort now routes to `gemini-pro-agent` (the same "Gemini 3.1 Pro (High)" model, verified accepting the identical request body), and the model-cache fingerprint version was bumped (`merge-v2` → `merge-v3`) so existing fresh caches refetch discovery and pick up the corrected routing immediately.
|
|
10
|
+
|
|
11
|
+
## [15.11.7] - 2026-06-12
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added effort-tier variant collapsing (`variant-collapse`): providers that expose one logical model as several effort/thinking-suffixed upstream ids (Antigravity CCA `gemini-3.5-flash-extra-low`/`-low`/`gemini-3-flash-agent`, `gemini-3[.1]-pro-low|high`, `claude-*[-thinking]` pairs, `gpt-oss-120b-medium`) collapse into one logical entry carrying per-effort upstream routing in `thinking.effortRouting` (plus `thinking.suppressWhenOff` for Cloud Code Assist ids whose baked server default re-applies when `thinkingConfig` is omitted). Request-time code resolves the outbound id via `resolveWireModelId(model, effort)`; selection, caching, and usage attribution key on the logical id.
|
|
15
|
+
- Added the automatic `X`/`X-thinking` pair rule (`deriveThinkingPairFamilies`): any provider's live bare/thinking twin collapses into the bare id, routing thinking-enabled requests to the `-thinking` backing id (trailing or infix token, so `kimi-k2-thinking-turbo` pairs with `kimi-k2-turbo`). Gated on same api and compatible pricing — all-zero cost rows count as unknown, while twins that both carry real, differing prices remain separate SKUs.
|
|
16
|
+
- Added `collapseBuiltModelVariants` and wired collapsing at every materialization point — Antigravity discovery, the catalog generator, and the model-manager merge — so stale sources (old static beside collapsed dynamic results, mixed cache rows) converge on logical entries instead of unioning raw tier ids back into the catalog.
|
|
17
|
+
- Added `thinking.requiresEffort`, baked for reasoning-only upstreams — Gemini 3.x (levels only, no off), Gemini 2.5 Pro (thinkingBudget floors at 128, rejects 0), OpenAI o-series, MiniMax M2, and thinking-variant SKUs (`*-thinking`/`*-reasoner`/`*-reasoning`, with a negation-aware token grammar so `non-thinking` ids never match). Identity derivation bakes it for new entries and `fillThinkingWireDefaults` backfills explicit/cached metadata; `minimumSupportedEffort` exposes the canonical floor. Pair-collapsed twins drop member flags (their off routes to the bare SKU), while identity re-flags pairs whose logical id is itself mandatory
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Changed model display names to drop model-extrinsic decorations: gateway author prefixes (`OpenAI: …`, `Google: …`), `(latest)` alias markers, `(Antigravity)` provider attribution, price tiers (`($$$$)`), and promo/lifecycle tags (`(20% off)`, `(retires …)`). `cleanModelName` is applied in `buildModel` (covers live discovery and stale caches) and as a catalog-generator pass; Antigravity discovery no longer appends `(Antigravity)` to display names. Variant tags that map to distinct wire ids (`(Thinking)`, `(free)`, `(Fast)`, dates, regions) are preserved.
|
|
22
|
+
- Changed the `google-antigravity` default model from `gemini-3-pro-high` to `gemini-3.1-pro`
|
|
23
|
+
- Changed `gemini-2.5-flash-thinking` handling from discovery-denylist to collapsing into `gemini-2.5-flash` (thinking-enabled requests route to the `-thinking` backing id)
|
|
24
|
+
- Bumped the model cache schema to v5 so rows predating effort-tier variant collapsing (raw `-low`/`-high`/`-thinking` member ids) are invalidated
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- Fixed catalog generation to apply effort-tier variant collapsing before provider grouping to ensure collapsed model families are consistently materialized without being impacted by in-loop mutation
|
|
29
|
+
- Fixed Kimi K2.6 OpenAI-compatible compat metadata to use a 300s stream watchdog floor, covering Fire Pass router ids as well as public `kimi-k2.6` ids so long reasoning starts do not hit the generic first-event timeout ([#2366](https://github.com/can1357/oh-my-pi/issues/2366)).
|
|
30
|
+
|
|
5
31
|
## [15.11.4] - 2026-06-12
|
|
6
32
|
|
|
7
33
|
### Fixed
|
package/README.md
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# @oh-my-pi/pi-catalog
|
|
2
|
+
|
|
3
|
+
Model catalog for [oh-my-pi](https://github.com/can1357/oh-my-pi): bundled model database, provider discovery, model identity, classification, and equivalence.
|
|
4
|
+
|
|
5
|
+
## What's inside
|
|
6
|
+
|
|
7
|
+
| Module | Purpose |
|
|
8
|
+
| --- | --- |
|
|
9
|
+
| `models.json` + `models` | Bundled model database (pricing, context windows, modalities, thinking support) |
|
|
10
|
+
| `provider-models` | Provider catalog descriptors (`CATALOG_PROVIDERS`), per-provider model resolution rules |
|
|
11
|
+
| `discovery` | Runtime model discovery for OpenAI-compatible endpoints, Gemini, Codex, Cursor, Antigravity, Ollama |
|
|
12
|
+
| `identity` | Model id parsing and classification (family/version), reference resolution, equivalence, selection priority |
|
|
13
|
+
| `model-thinking` | Thinking/reasoning metadata and generated per-model policies |
|
|
14
|
+
| `model-manager` / `model-cache` | Runtime model registry with discovery refresh and on-disk caching |
|
|
15
|
+
| `variant-collapse` | Collapsing provider-specific variants of the same underlying model |
|
|
16
|
+
| `compat` | Request/response compatibility fixups for OpenAI- and Anthropic-shaped APIs |
|
|
17
|
+
| `wire` | Wire-level helpers: Codex, Gemini headers, GitHub Copilot |
|
|
18
|
+
| `effort` | Reasoning-effort level definitions |
|
|
19
|
+
|
|
20
|
+
Import from subpaths (`@oh-my-pi/pi-catalog/<module>`) or the root barrel.
|
|
21
|
+
|
|
22
|
+
## models.json is generated
|
|
23
|
+
|
|
24
|
+
Never edit `src/models.json` by hand — it is produced from upstream sources (models.dev, provider catalog discovery, OpenCode docs) by `scripts/generate-models.ts` and the resolvers in `src/provider-models/`. Regenerate with:
|
|
25
|
+
|
|
26
|
+
```sh
|
|
27
|
+
bun --cwd=packages/catalog run generate-models
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
To change an entry, fix the source: resolver overrides in `provider-models/openai-compat.ts`, provider entries in `provider-models/descriptors.ts`, generator fixups in `scripts/generate-models.ts`, or thinking policies in `model-thinking.ts`.
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```sh
|
|
35
|
+
bun add @oh-my-pi/pi-catalog
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Ships TypeScript source directly (no build step); requires Bun ≥ 1.3.14.
|
|
39
|
+
|
|
40
|
+
## References
|
|
41
|
+
|
|
42
|
+
- [Monorepo README](https://github.com/can1357/oh-my-pi#readme)
|
|
43
|
+
- [CHANGELOG](./CHANGELOG.md)
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
/** Kimi family ids in any namespace form (`moonshotai/kimi-*`, `kimi-k2.6`, `vendor/kimi.x`). */
|
|
10
10
|
export declare function isKimiModelId(modelId: string): boolean;
|
|
11
|
-
/** Kimi K2.6 specifically
|
|
11
|
+
/** Kimi K2.6 specifically, including router ids that spell the version `k2p6`. */
|
|
12
12
|
export declare function isKimiK26ModelId(modelId: string): boolean;
|
|
13
13
|
/** Claude ids in any namespace form (`claude-*`, `vendor/claude.x`). */
|
|
14
14
|
export declare function isClaudeModelId(modelId: string): boolean;
|
|
@@ -60,3 +60,23 @@ export declare function hasOpus47ApiRestrictions(modelId: string): boolean;
|
|
|
60
60
|
*/
|
|
61
61
|
export declare function supportsMidConversationSystemMessages(modelId: string): boolean;
|
|
62
62
|
export declare function isAnthropicFableOrMythosModel(modelId: string): boolean;
|
|
63
|
+
/** Thinking-variant token location inside a model id. */
|
|
64
|
+
export interface ThinkingVariantToken {
|
|
65
|
+
index: number;
|
|
66
|
+
length: number;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Locates the first thinking-variant token (`-thinking`, `-reasoner`,
|
|
70
|
+
* `-reasoning`; trailing or infix) in a model id. The token ends at the id
|
|
71
|
+
* end or any non-alphanumeric boundary, and negated forms (`non-thinking`,
|
|
72
|
+
* `no-thinking`) never match — those name the NON-thinking SKU.
|
|
73
|
+
*/
|
|
74
|
+
export declare function findThinkingVariantToken(modelId: string): ThinkingVariantToken | undefined;
|
|
75
|
+
/**
|
|
76
|
+
* Removes the located thinking-variant token: `kimi-k2-thinking` → `kimi-k2`,
|
|
77
|
+
* `mimo-v2-flash-thinking-original` → `mimo-v2-flash-original`,
|
|
78
|
+
* `grok-4.1-fast-reasoning` → `grok-4.1-fast`. Returns `undefined` when no
|
|
79
|
+
* token exists or nothing would remain. Callers MUST verify the result names
|
|
80
|
+
* a live model.
|
|
81
|
+
*/
|
|
82
|
+
export declare function stripThinkingVariantToken(modelId: string): string | undefined;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -10,6 +10,7 @@ export * from "./models";
|
|
|
10
10
|
export * from "./provider-models";
|
|
11
11
|
export * from "./types";
|
|
12
12
|
export * from "./utils";
|
|
13
|
+
export * from "./variant-collapse";
|
|
13
14
|
export * from "./wire/codex";
|
|
14
15
|
export * from "./wire/gemini-headers";
|
|
15
16
|
export * from "./wire/github-copilot";
|
|
@@ -64,4 +64,16 @@ export declare function mapEffortToGoogleThinkingLevel(effort: Effort): "MINIMAL
|
|
|
64
64
|
* the model's baked `thinking.effortMap` (identity for unmapped efforts).
|
|
65
65
|
*/
|
|
66
66
|
export declare function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(model: ApiModel<TApi>, effort: Effort): "low" | "medium" | "high" | "xhigh" | "max";
|
|
67
|
+
/**
|
|
68
|
+
* Resolves the upstream wire model id for a request at the given effort
|
|
69
|
+
* (`undefined` = thinking off). Collapsed effort-tier variants route through
|
|
70
|
+
* `thinking.effortRouting`; everything else falls back to
|
|
71
|
+
* `requestModelId ?? id`.
|
|
72
|
+
*/
|
|
73
|
+
export declare function resolveWireModelId<TApi extends Api>(model: ApiModel<TApi>, effort: Effort | undefined): string;
|
|
74
|
+
/**
|
|
75
|
+
* Lowest supported effort in canonical order — the clamp target for
|
|
76
|
+
* thinking-off requests on `thinking.requiresEffort` models.
|
|
77
|
+
*/
|
|
78
|
+
export declare function minimumSupportedEffort<TApi extends Api>(model: ApiModel<TApi>): Effort | undefined;
|
|
67
79
|
export {};
|
|
@@ -93,7 +93,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
93
93
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"google-generative-ai", unknown>;
|
|
94
94
|
}, {
|
|
95
95
|
readonly id: "google-antigravity";
|
|
96
|
-
readonly defaultModel: "gemini-3-pro
|
|
96
|
+
readonly defaultModel: "gemini-3.1-pro";
|
|
97
97
|
readonly specialModelManager: true;
|
|
98
98
|
}, {
|
|
99
99
|
readonly id: "google-gemini-cli";
|
package/dist/types/types.d.ts
CHANGED
|
@@ -27,6 +27,28 @@ export interface ThinkingConfig {
|
|
|
27
27
|
* 5). Also implies native interleaved thinking — no beta header needed.
|
|
28
28
|
*/
|
|
29
29
|
supportsDisplay?: boolean;
|
|
30
|
+
/**
|
|
31
|
+
* Per-effort upstream wire-id routing for collapsed effort-tier variants
|
|
32
|
+
* (`variant-collapse.ts`). Keyed by pi effort; `"off"` applies when
|
|
33
|
+
* thinking is disabled. Missing keys fall back to `requestModelId ?? id`.
|
|
34
|
+
*/
|
|
35
|
+
effortRouting?: Readonly<Partial<Record<Effort | "off", string>>>;
|
|
36
|
+
/**
|
|
37
|
+
* When true, a thinking-off request MUST explicitly suppress thinking on
|
|
38
|
+
* the wire (google-level: `thinkingLevel: "MINIMAL"` + `includeThoughts:
|
|
39
|
+
* false`; budget: `thinkingBudget: 0`) instead of omitting thinkingConfig —
|
|
40
|
+
* Cloud Code Assist re-applies the per-id baked server default when the
|
|
41
|
+
* config is absent.
|
|
42
|
+
*/
|
|
43
|
+
suppressWhenOff?: boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Reasoning is mandatory upstream: the endpoint rejects disabled or
|
|
46
|
+
* omitted thinking (e.g. OpenRouter Gemini 3.x — "Reasoning is mandatory
|
|
47
|
+
* for this endpoint and cannot be disabled"). Request mapping clamps
|
|
48
|
+
* thinking-off to the lowest supported effort unless `suppressWhenOff`
|
|
49
|
+
* provides an explicit wire off-path.
|
|
50
|
+
*/
|
|
51
|
+
requiresEffort?: boolean;
|
|
30
52
|
}
|
|
31
53
|
export type Provider = string;
|
|
32
54
|
/** Token budgets for each thinking level (token-based providers only) */
|
package/dist/types/utils.d.ts
CHANGED
|
@@ -3,3 +3,9 @@ export declare function toNumber(value: unknown): number | undefined;
|
|
|
3
3
|
export declare function toPositiveNumber(value: unknown, fallback: number): number;
|
|
4
4
|
export declare function toBoolean(value: unknown): boolean | undefined;
|
|
5
5
|
export declare function isAnthropicOAuthToken(key: string): boolean;
|
|
6
|
+
/**
|
|
7
|
+
* Normalize a model display name: drop the gateway author prefix and
|
|
8
|
+
* model-extrinsic decorations. Returns the input verbatim when nothing
|
|
9
|
+
* matches (or when stripping would leave an empty name).
|
|
10
|
+
*/
|
|
11
|
+
export declare function cleanModelName(name: string): string;
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { Effort } from "./effort";
|
|
2
|
+
import type { Api, Model, ModelSpec, Provider, ThinkingConfig } from "./types";
|
|
3
|
+
/**
|
|
4
|
+
* Structural bound for collapse inputs: both raw `ModelSpec`s and built
|
|
5
|
+
* `Model`s qualify. (`Model.compat` is the resolved record, not the sparse
|
|
6
|
+
* config, so the two are not mutually assignable — collapsing never touches
|
|
7
|
+
* `compat`.)
|
|
8
|
+
*/
|
|
9
|
+
export type VariantSpecLike = Omit<ModelSpec<Api>, "compat"> & {
|
|
10
|
+
compat?: unknown;
|
|
11
|
+
};
|
|
12
|
+
/** One collapsed family: logical id + member wire ids + per-effort routing. */
|
|
13
|
+
export interface EffortVariantFamily {
|
|
14
|
+
/** Collapsed logical id (may equal a member id — e.g. bare/thinking pairs). */
|
|
15
|
+
id: string;
|
|
16
|
+
/** Final display name, no tier marker. */
|
|
17
|
+
name: string;
|
|
18
|
+
/**
|
|
19
|
+
* Member wire ids in priority order. The first member present in the input
|
|
20
|
+
* becomes the collapsed spec's default wire id (`requestModelId`; omitted
|
|
21
|
+
* when it equals the logical id).
|
|
22
|
+
*/
|
|
23
|
+
members: readonly string[];
|
|
24
|
+
/**
|
|
25
|
+
* Wire ids upstream no longer serves (e.g. a deployment killed while
|
|
26
|
+
* discovery still advertises it). Fresh collapsing never routes to them,
|
|
27
|
+
* and stale collapsed snapshots (bundled catalog, cache rows,
|
|
28
|
+
* previous-generation fallbacks) get routing/`requestModelId` entries that
|
|
29
|
+
* target them re-pointed through `routing`. Keep retired ids in `members`
|
|
30
|
+
* so the raw upstream spec is still consumed and aliased.
|
|
31
|
+
*/
|
|
32
|
+
retiredMembers?: readonly string[];
|
|
33
|
+
/**
|
|
34
|
+
* Per-effort upstream wire id; `"off"` applies when thinking is disabled.
|
|
35
|
+
* Entries whose target member is absent from the input are dropped — those
|
|
36
|
+
* efforts fall back to `requestModelId ?? id`.
|
|
37
|
+
*/
|
|
38
|
+
routing: Readonly<Partial<Record<Effort | "off", string>>>;
|
|
39
|
+
/** Explicit capability surface for the collapsed spec — no inference. */
|
|
40
|
+
thinking: Readonly<Omit<ThinkingConfig, "effortRouting" | "suppressWhenOff">>;
|
|
41
|
+
/** Thinking-off requests must explicitly suppress thinking on the wire. */
|
|
42
|
+
suppressWhenOff?: boolean;
|
|
43
|
+
/** Retired/recycled selector ids that alias to this family without being members. */
|
|
44
|
+
extraAliases?: readonly string[];
|
|
45
|
+
}
|
|
46
|
+
export interface VariantCollapseTable {
|
|
47
|
+
families: readonly EffortVariantFamily[];
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Shared by `google-antigravity` and `google-gemini-cli` — both serve the
|
|
51
|
+
* Antigravity discovery list (`fetchAntigravityDiscoveryModels`).
|
|
52
|
+
*/
|
|
53
|
+
export declare const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
|
|
54
|
+
/** Provider id → hand collapse table. Both CCA providers share one table. */
|
|
55
|
+
export declare const VARIANT_COLLAPSE_TABLES: Readonly<Record<string, VariantCollapseTable>>;
|
|
56
|
+
/**
|
|
57
|
+
* The global automatic rule: derive an `X` + `X-thinking` family for every
|
|
58
|
+
* pair where both ids are live in `specs` (trailing or infix token). Gates:
|
|
59
|
+
* - both members share the same `api`,
|
|
60
|
+
* - known pricing must match — all-zero cost rows count as unknown
|
|
61
|
+
* (aggregators routinely ship them), but twins that BOTH carry real,
|
|
62
|
+
* differing prices are distinct SKUs and never merge,
|
|
63
|
+
* - ids claimed by the provider's hand `table` are skipped (curation wins).
|
|
64
|
+
* The capability surface prefers the thinking member's metadata, then the
|
|
65
|
+
* bare member's, then the canonical deriver (aggregators often ship
|
|
66
|
+
* `reasoning: false` and no thinking config on the twin), then a budget
|
|
67
|
+
* default. `off` routes to the bare id; every supported effort routes to the
|
|
68
|
+
* thinking id.
|
|
69
|
+
*/
|
|
70
|
+
export declare function deriveThinkingPairFamilies<TSpec extends VariantSpecLike>(specs: readonly TSpec[], table?: VariantCollapseTable): EffortVariantFamily[];
|
|
71
|
+
/**
|
|
72
|
+
* True when `spec` is the output of collapsing rather than a raw upstream
|
|
73
|
+
* member. `thinking.effortRouting` is written only by collapsing; the
|
|
74
|
+
* `requestModelId` arm is scoped to the provider's hand-table family ids so
|
|
75
|
+
* unrelated carriers (GitHub Copilot `-1m` context variants) never match.
|
|
76
|
+
*/
|
|
77
|
+
export declare function isVariantCollapsedSpec(spec: VariantSpecLike): boolean;
|
|
78
|
+
/**
|
|
79
|
+
* Collapse every family in `table` found in `specs`. Non-member specs pass
|
|
80
|
+
* through verbatim (by reference), order preserved; the collapsed spec
|
|
81
|
+
* replaces the first occurrence of its family.
|
|
82
|
+
*/
|
|
83
|
+
export declare function collapseEffortVariants<TSpec extends VariantSpecLike>(specs: readonly TSpec[], table: VariantCollapseTable): TSpec[];
|
|
84
|
+
/**
|
|
85
|
+
* Collapse a full mixed-provider list: per provider, the hand table (when
|
|
86
|
+
* registered) plus the automatic `X`/`X-thinking` pair rule. Used by the
|
|
87
|
+
* catalog generator; the runtime equivalent lives at the model-manager merge
|
|
88
|
+
* point. Output is regrouped by provider — callers re-sort.
|
|
89
|
+
*/
|
|
90
|
+
export declare function collapseEffortVariantsAcrossProviders<TSpec extends VariantSpecLike>(specs: readonly TSpec[]): TSpec[];
|
|
91
|
+
/**
|
|
92
|
+
* Runtime entry point for already-built `Model` lists (the model-manager
|
|
93
|
+
* merge point, coding-agent registry custom providers): collapses hand
|
|
94
|
+
* tables plus derived pairs, then re-runs `buildModel` on freshly created
|
|
95
|
+
* logical specs so thinking wire defaults stay resolved. Untouched entries
|
|
96
|
+
* pass through by reference.
|
|
97
|
+
*/
|
|
98
|
+
export declare function collapseBuiltModelVariants<TApi extends Api>(models: readonly Model<TApi>[]): Model<TApi>[];
|
|
99
|
+
/**
|
|
100
|
+
* Resolve a retired effort-tier variant id (collapsed member, recycled id) to
|
|
101
|
+
* its replacement model id for `provider` via the hand table. Returns
|
|
102
|
+
* `undefined` when the id is not a known alias; derived `X-thinking` members
|
|
103
|
+
* resolve through `stripThinkingVariantToken` instead. Callers must try an
|
|
104
|
+
* exact model lookup first — a live model always wins over an alias.
|
|
105
|
+
*/
|
|
106
|
+
export declare function resolveVariantAlias(provider: Provider, modelId: string): string | undefined;
|
|
107
|
+
/** Bare-id alias hit: replacement id plus the providers declaring it. */
|
|
108
|
+
export interface BareVariantAliasHit {
|
|
109
|
+
id: string;
|
|
110
|
+
/** Providers whose table declares the alias — candidates from these win ties. */
|
|
111
|
+
providers: readonly Provider[];
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Provider-agnostic hand-table alias lookup for bare-id selectors. Returns
|
|
115
|
+
* the declaring providers so callers can prefer their models when the
|
|
116
|
+
* replacement id exists on unrelated providers too (e.g. a retired Cursor
|
|
117
|
+
* tier id must not resolve to `openai/gpt-5.4`).
|
|
118
|
+
*/
|
|
119
|
+
export declare function resolveBareVariantAlias(modelId: string): BareVariantAliasHit | undefined;
|
|
120
|
+
/**
|
|
121
|
+
* Reverse alias lookup: the retired ids that resolve to `modelId` for
|
|
122
|
+
* `provider` via the hand table. Used to re-key config keyed by raw member
|
|
123
|
+
* ids (models.yml `modelOverrides`, suppressed selectors) onto the collapsed
|
|
124
|
+
* model. Empty for providers without a table.
|
|
125
|
+
*/
|
|
126
|
+
export declare function getVariantAliasSources(provider: Provider, modelId: string): readonly string[];
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "15.11.
|
|
4
|
+
"version": "15.11.8",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,11 +34,11 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "15.11.
|
|
37
|
+
"@oh-my-pi/pi-utils": "15.11.8",
|
|
38
38
|
"zod": "4.4.3"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
|
-
"@oh-my-pi/pi-ai": "15.11.
|
|
41
|
+
"@oh-my-pi/pi-ai": "15.11.8",
|
|
42
42
|
"@types/bun": "^1.3.14"
|
|
43
43
|
},
|
|
44
44
|
"engines": {
|
package/src/build.ts
CHANGED
|
@@ -13,11 +13,13 @@ import { buildAnthropicCompat } from "./compat/anthropic";
|
|
|
13
13
|
import { buildOpenAICompat, buildOpenAIResponsesCompat } from "./compat/openai";
|
|
14
14
|
import { resolveModelThinking } from "./model-thinking";
|
|
15
15
|
import type { Api, CompatOf, Model, ModelSpec } from "./types";
|
|
16
|
+
import { cleanModelName } from "./utils";
|
|
16
17
|
|
|
17
18
|
export function buildModel<TApi extends Api>(spec: ModelSpec<TApi>): Model<TApi> {
|
|
18
19
|
const compat = buildCompat(spec) as CompatOf<TApi>;
|
|
19
20
|
return {
|
|
20
21
|
...spec,
|
|
22
|
+
name: cleanModelName(spec.name),
|
|
21
23
|
thinking: resolveModelThinking(spec, compat),
|
|
22
24
|
compat,
|
|
23
25
|
compatConfig: spec.compat,
|
package/src/compat/openai.ts
CHANGED
|
@@ -25,6 +25,8 @@ const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
|
|
|
25
25
|
const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
|
|
26
26
|
/** Direct DeepSeek reasoning models stall between thinking and answer phases. */
|
|
27
27
|
const DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS = 300_000;
|
|
28
|
+
/** Kimi K2.6 can spend several minutes reasoning before the first visible token. */
|
|
29
|
+
const KIMI_K26_REASONING_STREAM_IDLE_TIMEOUT_MS = 300_000;
|
|
28
30
|
|
|
29
31
|
/**
|
|
30
32
|
* OpenCode's gateways (https://opencode.ai/zen|go) gate `reasoning_content`
|
|
@@ -178,15 +180,17 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
178
180
|
isCopilotHost ||
|
|
179
181
|
isZenmuxHost);
|
|
180
182
|
|
|
181
|
-
// Stream-watchdog floor: GLM coding-plan SKUs and direct
|
|
182
|
-
// models idle for minutes
|
|
183
|
-
// stop aborting and retrying.
|
|
183
|
+
// Stream-watchdog floor: GLM coding-plan SKUs, Kimi K2.6, and direct
|
|
184
|
+
// DeepSeek reasoning models can idle for minutes while reasoning; widen the
|
|
185
|
+
// idle timeout so warm-ups stop aborting and retrying.
|
|
184
186
|
const streamIdleTimeoutMs =
|
|
185
187
|
GLM_CODING_PLAN_MODEL_PATTERN.test(spec.id) && (isZai || isZhipu)
|
|
186
188
|
? GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS
|
|
187
|
-
: spec.reasoning &&
|
|
188
|
-
?
|
|
189
|
-
:
|
|
189
|
+
: spec.reasoning && isKimiK26ModelId(spec.id)
|
|
190
|
+
? KIMI_K26_REASONING_STREAM_IDLE_TIMEOUT_MS
|
|
191
|
+
: spec.reasoning && isDirectDeepseekApi
|
|
192
|
+
? DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS
|
|
193
|
+
: undefined;
|
|
190
194
|
|
|
191
195
|
const compat: ResolvedOpenAICompat = {
|
|
192
196
|
supportsStore: !isNonStandard,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as z from "zod/v4";
|
|
2
2
|
import type { ModelSpec } from "../types";
|
|
3
3
|
import { toPositiveNumber } from "../utils";
|
|
4
|
+
import { ANTIGRAVITY_VARIANT_COLLAPSE_TABLE, collapseEffortVariants } from "../variant-collapse";
|
|
4
5
|
import { getAntigravityUserAgent } from "../wire/gemini-headers";
|
|
5
6
|
|
|
6
7
|
const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [
|
|
@@ -11,13 +12,7 @@ const FETCH_AVAILABLE_MODELS_PATH = "/v1internal:fetchAvailableModels";
|
|
|
11
12
|
|
|
12
13
|
const DEFAULT_CONTEXT_WINDOW = 200_000;
|
|
13
14
|
const DEFAULT_MAX_TOKENS = 64_000;
|
|
14
|
-
const ANTIGRAVITY_DISCOVERY_DENYLIST = new Set([
|
|
15
|
-
"chat_20706",
|
|
16
|
-
"chat_23310",
|
|
17
|
-
"gemini-2.5-flash-thinking",
|
|
18
|
-
"gemini-3-pro-low",
|
|
19
|
-
"gemini-2.5-pro",
|
|
20
|
-
]);
|
|
15
|
+
const ANTIGRAVITY_DISCOVERY_DENYLIST = new Set(["chat_20706", "chat_23310", "gemini-2.5-pro"]);
|
|
21
16
|
|
|
22
17
|
/**
|
|
23
18
|
* Raw model metadata returned by Antigravity's `fetchAvailableModels` endpoint.
|
|
@@ -224,7 +219,7 @@ export async function fetchAntigravityDiscoveryModels(
|
|
|
224
219
|
const supportsImages = model.supportsImages === true;
|
|
225
220
|
models.push({
|
|
226
221
|
id: modelId,
|
|
227
|
-
name: model.displayName
|
|
222
|
+
name: model.displayName || modelId,
|
|
228
223
|
api: "google-gemini-cli",
|
|
229
224
|
provider: "google-antigravity",
|
|
230
225
|
baseUrl: endpoint,
|
|
@@ -241,8 +236,12 @@ export async function fetchAntigravityDiscoveryModels(
|
|
|
241
236
|
});
|
|
242
237
|
}
|
|
243
238
|
|
|
244
|
-
|
|
245
|
-
|
|
239
|
+
// Collapse effort-tier variants at the source so runtime discovery,
|
|
240
|
+
// the gemini-cli re-provision, and the catalog generator all see
|
|
241
|
+
// logical ids only.
|
|
242
|
+
const collapsed = collapseEffortVariants(models, ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
|
|
243
|
+
collapsed.sort((a, b) => a.name.localeCompare(b.name) || a.id.localeCompare(b.id));
|
|
244
|
+
return collapsed;
|
|
246
245
|
}
|
|
247
246
|
|
|
248
247
|
return null;
|
package/src/identity/family.ts
CHANGED
|
@@ -14,9 +14,9 @@ export function isKimiModelId(modelId: string): boolean {
|
|
|
14
14
|
return modelId.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(modelId);
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
/** Kimi K2.6 specifically
|
|
17
|
+
/** Kimi K2.6 specifically, including router ids that spell the version `k2p6`. */
|
|
18
18
|
export function isKimiK26ModelId(modelId: string): boolean {
|
|
19
|
-
return /(^|\/)kimi-k2
|
|
19
|
+
return /(^|\/)kimi-k2(?:\.6|p6)(?:[-:]|$)/i.test(modelId);
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
/** Claude ids in any namespace form (`claude-*`, `vendor/claude.x`). */
|
|
@@ -113,3 +113,44 @@ export function isAnthropicFableOrMythosModel(modelId: string): boolean {
|
|
|
113
113
|
const parsed = parseAnthropicModel(bareModelId(modelId));
|
|
114
114
|
return parsed !== null && isFableOrMythos(parsed.kind);
|
|
115
115
|
}
|
|
116
|
+
|
|
117
|
+
/** Thinking-variant token location inside a model id. */
|
|
118
|
+
export interface ThinkingVariantToken {
|
|
119
|
+
index: number;
|
|
120
|
+
length: number;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const THINKING_VARIANT_TOKEN_RE = /-(?:thinking|reasoner|reasoning)(?=$|[^a-z0-9])/gi;
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Locates the first thinking-variant token (`-thinking`, `-reasoner`,
|
|
127
|
+
* `-reasoning`; trailing or infix) in a model id. The token ends at the id
|
|
128
|
+
* end or any non-alphanumeric boundary, and negated forms (`non-thinking`,
|
|
129
|
+
* `no-thinking`) never match — those name the NON-thinking SKU.
|
|
130
|
+
*/
|
|
131
|
+
export function findThinkingVariantToken(modelId: string): ThinkingVariantToken | undefined {
|
|
132
|
+
THINKING_VARIANT_TOKEN_RE.lastIndex = 0;
|
|
133
|
+
let match = THINKING_VARIANT_TOKEN_RE.exec(modelId);
|
|
134
|
+
while (match !== null) {
|
|
135
|
+
const preceding = /([a-z0-9]+)$/i.exec(modelId.slice(0, match.index))?.[1]?.toLowerCase();
|
|
136
|
+
if (preceding !== "non" && preceding !== "no") {
|
|
137
|
+
return { index: match.index, length: match[0].length };
|
|
138
|
+
}
|
|
139
|
+
match = THINKING_VARIANT_TOKEN_RE.exec(modelId);
|
|
140
|
+
}
|
|
141
|
+
return undefined;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Removes the located thinking-variant token: `kimi-k2-thinking` → `kimi-k2`,
|
|
146
|
+
* `mimo-v2-flash-thinking-original` → `mimo-v2-flash-original`,
|
|
147
|
+
* `grok-4.1-fast-reasoning` → `grok-4.1-fast`. Returns `undefined` when no
|
|
148
|
+
* token exists or nothing would remain. Callers MUST verify the result names
|
|
149
|
+
* a live model.
|
|
150
|
+
*/
|
|
151
|
+
export function stripThinkingVariantToken(modelId: string): string | undefined {
|
|
152
|
+
const token = findThinkingVariantToken(modelId);
|
|
153
|
+
if (!token) return undefined;
|
|
154
|
+
const stripped = modelId.slice(0, token.index) + modelId.slice(token.index + token.length);
|
|
155
|
+
return stripped.length > 0 ? stripped : undefined;
|
|
156
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -10,6 +10,7 @@ export * from "./models";
|
|
|
10
10
|
export * from "./provider-models";
|
|
11
11
|
export * from "./types";
|
|
12
12
|
export * from "./utils";
|
|
13
|
+
export * from "./variant-collapse";
|
|
13
14
|
export * from "./wire/codex";
|
|
14
15
|
export * from "./wire/gemini-headers";
|
|
15
16
|
export * from "./wire/github-copilot";
|
package/src/model-cache.ts
CHANGED
|
@@ -7,9 +7,10 @@ import { getModelDbPath } from "@oh-my-pi/pi-utils";
|
|
|
7
7
|
import type { Api, Model, ModelSpec } from "./types";
|
|
8
8
|
|
|
9
9
|
// Rows persist ModelSpec JSON (sparse `compat`, never the resolved record);
|
|
10
|
-
// the model manager rebuilds via `buildModel` on load.
|
|
11
|
-
//
|
|
12
|
-
|
|
10
|
+
// the model manager rebuilds via `buildModel` on load. v5 invalidates rows
|
|
11
|
+
// predating effort-tier variant collapsing (raw `-low`/`-high`/`-thinking`
|
|
12
|
+
// member ids); v4 dropped the pre-efforts ThinkingConfig shape.
|
|
13
|
+
const CACHE_SCHEMA_VERSION = 5;
|
|
13
14
|
|
|
14
15
|
interface CacheRow {
|
|
15
16
|
provider_id: string;
|
package/src/model-manager.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { readModelCache, writeModelCache } from "./model-cache";
|
|
|
3
3
|
import { type GeneratedProvider, getBundledModels } from "./models";
|
|
4
4
|
import type { Api, Model, ModelSpec, Provider } from "./types";
|
|
5
5
|
import { isRecord } from "./utils";
|
|
6
|
+
import { collapseBuiltModelVariants } from "./variant-collapse";
|
|
6
7
|
|
|
7
8
|
const DEFAULT_CACHE_TTL_MS = 2 * 60 * 60 * 1000;
|
|
8
9
|
const NON_AUTHORITATIVE_RETRY_MS = 5 * 60 * 1000;
|
|
@@ -134,7 +135,7 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
|
|
|
134
135
|
// Re-running `mergeDynamicModels(static, cache)` would just rebuild the same
|
|
135
136
|
// objects (~800ms in the steady-state cold-start profile for `omp -p hi`).
|
|
136
137
|
if (!shouldFetchFromNetwork && cache?.fresh && hasAuthoritativeCache && cacheFingerprintMatches) {
|
|
137
|
-
return { models: passModelList<TApi>(cache.models), stale: false };
|
|
138
|
+
return { models: collapseBuiltModelVariants(passModelList<TApi>(cache.models)), stale: false };
|
|
138
139
|
}
|
|
139
140
|
|
|
140
141
|
const [fetchedModelsDevModels, fetchedDynamicModels] = shouldFetchFromNetwork
|
|
@@ -148,8 +149,9 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
|
|
|
148
149
|
const dynamicModels = fetchedDynamicModels ?? [];
|
|
149
150
|
const mergedWithCache = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), cacheModels);
|
|
150
151
|
const mergedModels = mergeDynamicModels(mergedWithCache, dynamicModels);
|
|
151
|
-
const models =
|
|
152
|
-
dynamicModelsAuthoritative && dynamicFetchSucceeded ? retainModelIds(mergedModels, dynamicModels) : mergedModels
|
|
152
|
+
const models = collapseBuiltModelVariants(
|
|
153
|
+
dynamicModelsAuthoritative && dynamicFetchSucceeded ? retainModelIds(mergedModels, dynamicModels) : mergedModels,
|
|
154
|
+
);
|
|
153
155
|
const dynamicAuthoritative = !hasDynamicFetcher || dynamicFetchSucceeded || shouldUseFreshCacheAsAuthoritative;
|
|
154
156
|
if (shouldFetchFromNetwork) {
|
|
155
157
|
if (dynamicFetchSucceeded) {
|
|
@@ -157,7 +159,14 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
|
|
|
157
159
|
const snapshotModels = dynamicModelsAuthoritative
|
|
158
160
|
? retainModelIds(mergedSnapshot, dynamicModels)
|
|
159
161
|
: mergedSnapshot;
|
|
160
|
-
writeModelCache(
|
|
162
|
+
writeModelCache(
|
|
163
|
+
options.providerId,
|
|
164
|
+
now(),
|
|
165
|
+
collapseBuiltModelVariants(snapshotModels),
|
|
166
|
+
true,
|
|
167
|
+
staticFingerprint,
|
|
168
|
+
dbPath,
|
|
169
|
+
);
|
|
161
170
|
} else {
|
|
162
171
|
// Dynamic fetch failed — update cache with a non-authoritative snapshot so
|
|
163
172
|
// stale state remains visible while retry backoff still applies.
|
|
@@ -165,9 +174,11 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
|
|
|
165
174
|
writeModelCache(
|
|
166
175
|
options.providerId,
|
|
167
176
|
now(),
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
177
|
+
collapseBuiltModelVariants(
|
|
178
|
+
mergeDynamicModels(
|
|
179
|
+
mergeModelSources(staticModels, modelsDevModels),
|
|
180
|
+
normalizeModelList<TApi>(latestCache?.models ?? cache?.models ?? []),
|
|
181
|
+
),
|
|
171
182
|
),
|
|
172
183
|
false,
|
|
173
184
|
staticFingerprint,
|
|
@@ -290,7 +301,7 @@ function retainModelIds<TApi extends Api>(
|
|
|
290
301
|
* arms calling `resolveProviderModels` with the same `staticModels` array)
|
|
291
302
|
* skip the JSON+hash work after the first call.
|
|
292
303
|
*/
|
|
293
|
-
const MODEL_CACHE_FINGERPRINT_VERSION = "merge-
|
|
304
|
+
const MODEL_CACHE_FINGERPRINT_VERSION = "merge-v3";
|
|
294
305
|
const kStaticFingerprint = Symbol("model-manager.staticFingerprint");
|
|
295
306
|
type ModelArrayWithFingerprint = readonly Model<Api>[] & { [kStaticFingerprint]?: string };
|
|
296
307
|
function fingerprintStatic<TApi extends Api>(
|