@oh-my-pi/pi-catalog 16.0.3 → 16.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/dist/types/compat/openai.d.ts +1 -0
- package/dist/types/discovery/antigravity.d.ts +9 -0
- package/dist/types/identity/dialect.d.ts +1 -1
- package/dist/types/identity/family.d.ts +4 -0
- package/dist/types/types.d.ts +20 -1
- package/dist/types/variant-collapse.d.ts +4 -5
- package/dist/types/wire/gemini-headers.d.ts +16 -1
- package/dist/types/wire/github-copilot.d.ts +2 -0
- package/package.json +3 -3
- package/src/compat/openai.ts +12 -1
- package/src/discovery/antigravity.ts +15 -6
- package/src/identity/dialect.ts +4 -1
- package/src/identity/family.ts +19 -1
- package/src/model-cache.ts +8 -6
- package/src/model-thinking.ts +19 -0
- package/src/models.json +518 -762
- package/src/provider-models/google.ts +2 -0
- package/src/provider-models/openai-compat.ts +7 -4
- package/src/types.ts +20 -0
- package/src/variant-collapse.ts +198 -72
- package/src/wire/gemini-headers.ts +28 -5
- package/src/wire/github-copilot.ts +18 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,36 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.0.5] - 2026-06-17
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added `enableGeminiThinkingLoopGuard` to OpenAI compatibility options to allow explicit opt-in or opt-out of the Gemini thinking-loop guard for OpenAI-compatible model aliases
|
|
10
|
+
- Added `LITELLM_BASE_URL` as the LiteLLM provider discovery base URL fallback, with discovery caches scoped by the resolved proxy URL and explicit provider `baseUrl` config kept at higher precedence. ([#2726](https://github.com/can1357/oh-my-pi/issues/2726))
|
|
11
|
+
- Added `ThinkingConfig.effortBudgets` (per-effort thinking-budget contract baked into collapsed variants) and `ANTIGRAVITY_MODEL_WIRE_PROFILES` (`maxOutputTokens` + `model_enum` per Antigravity wire id) to mirror the captured Antigravity Cloud Code Assist client request shape.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- Defaulted `enableGeminiThinkingLoopGuard` from Gemini family detection for both OpenAI completions and responses compatibility specs so Gemini models now enable the thinking-loop guard automatically
|
|
16
|
+
- Updated the default Gemini CLI user-agent version fallback to 0.46.0.
|
|
17
|
+
- Changed the Antigravity (`google-antigravity`, daily-cloudcode-pa) gemini-3.x collapse families to the `budget` thinking transport with the client's per-tier `thinkingBudget` (3.5 Flash low/medium/high = 1000/4000/10000, 3.1 Pro low/high = 1001/10001) and corrected 3.5 Flash effort→wire routing (medium → `gemini-3.5-flash-low`, high → `gemini-3-flash-agent`). Split the shared CCA collapse table so `google-gemini-cli` (cloudcode-pa) keeps the `google-level` `thinkingLevel` transport for official Gemini CLI parity. Stale collapsed snapshots (bundled catalog, recycled `gemini-3-flash` alias) self-heal from the hand table at collapse time, and the model cache schema is bumped to v7 to invalidate pre-budget Antigravity rows.
|
|
18
|
+
- Changed the Antigravity user-agent to the `antigravity/hub/<version>` format (default `2.1.4`) to match the captured client.
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
|
|
22
|
+
- Fixed `off` effort routing for `claude-opus-4-5` and `claude-opus-4-6` to use their base model IDs when thinking is disabled
|
|
23
|
+
- Fixed `gemini-2.5-flash` effort routing so all non-off effort levels resolve to `gemini-2.5-flash-thinking`
|
|
24
|
+
- Fixed shared variant alias provider resolution so `resolveBareVariantAlias` reports all matching providers when model aliases are present in both CCA collapse tables
|
|
25
|
+
- Routed google-antigravity default baseUrl to the stable primary daily endpoint in the catalog generator and all fallback snapshots, resolving connection drops on heavy queries.
|
|
26
|
+
- Fixed MiniMax M3 dialect selection so MiniMax-family OpenAI-compatible models use the MiniMax tool-call dialect instead of generic XML. ([#2759](https://github.com/can1357/oh-my-pi/issues/2759))
|
|
27
|
+
- Fixed GitHub Copilot dynamic discovery to honor plan-specific API endpoints stored in structured OAuth credentials. ([#2876](https://github.com/can1357/oh-my-pi/issues/2876))
|
|
28
|
+
|
|
29
|
+
## [16.0.4] - 2026-06-17
|
|
30
|
+
|
|
31
|
+
### Fixed
|
|
32
|
+
|
|
33
|
+
- Fixed GLM-5.2 catalog thinking metadata for Zhipu/BigModel so the top effort is exposed as `xhigh` and maps to provider-native `max`. ([#2833](https://github.com/can1357/oh-my-pi/issues/2833))
|
|
34
|
+
|
|
5
35
|
## [16.0.2] - 2026-06-16
|
|
6
36
|
|
|
7
37
|
### Fixed
|
|
@@ -5,6 +5,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
|
|
|
5
5
|
*/
|
|
6
6
|
export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
|
|
7
7
|
interface OpenAIResponsesSpecLike {
|
|
8
|
+
id?: string;
|
|
8
9
|
provider: string;
|
|
9
10
|
name: string;
|
|
10
11
|
baseUrl: string;
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import type { ModelSpec } from "../types";
|
|
2
|
+
import { type VariantCollapseTable } from "../variant-collapse";
|
|
3
|
+
export declare const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
|
|
4
|
+
export declare const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
|
2
5
|
/**
|
|
3
6
|
* Raw model metadata returned by Antigravity's `fetchAvailableModels` endpoint.
|
|
4
7
|
*/
|
|
@@ -51,6 +54,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
|
|
|
51
54
|
signal?: AbortSignal;
|
|
52
55
|
/** Optional fetch implementation override for tests. */
|
|
53
56
|
fetcher?: typeof fetch;
|
|
57
|
+
/**
|
|
58
|
+
* Hand collapse table to apply to the discovered list. Defaults to the
|
|
59
|
+
* Antigravity (budget-transport) table; `googleGeminiCli` passes the
|
|
60
|
+
* level-transport table so cloudcode-pa keeps `thinkingLevel`.
|
|
61
|
+
*/
|
|
62
|
+
collapseTable?: VariantCollapseTable;
|
|
54
63
|
}
|
|
55
64
|
/**
|
|
56
65
|
* Fetches discoverable Antigravity models and normalizes them into canonical model entries.
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma";
|
|
1
|
+
export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma" | "minimax";
|
|
2
2
|
export declare const FALLBACK_DIALECT: Dialect;
|
|
3
3
|
export declare function preferredDialect(modelId: string): Dialect;
|
|
@@ -32,6 +32,8 @@ export declare function isMimoModelIdOrName(value: string): boolean;
|
|
|
32
32
|
* clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
|
|
33
33
|
*/
|
|
34
34
|
export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
|
|
35
|
+
/** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
|
|
36
|
+
export declare function isMinimaxM3FamilyModelId(modelId: string): boolean;
|
|
35
37
|
/**
|
|
36
38
|
* OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
|
|
37
39
|
* `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
|
|
@@ -49,6 +51,8 @@ export declare function isOpenAIModelId(modelId: string): boolean;
|
|
|
49
51
|
* allowlist.
|
|
50
52
|
*/
|
|
51
53
|
export declare function isReasoningGlmModelId(modelId: string): boolean;
|
|
54
|
+
/** GLM-5.2+ coding SKUs accept `reasoning_effort` in addition to binary thinking. */
|
|
55
|
+
export declare function isGlm52ReasoningEffortModelId(modelId: string): boolean;
|
|
52
56
|
/** GLM vision SKUs — the `v` that attaches to the version (`glm-4v`, `glm-4.5v`). */
|
|
53
57
|
export declare function isGlmVisionModelId(modelId: string): boolean;
|
|
54
58
|
/**
|
package/dist/types/types.d.ts
CHANGED
|
@@ -33,6 +33,14 @@ export interface ThinkingConfig {
|
|
|
33
33
|
* thinking is disabled. Missing keys fall back to `requestModelId ?? id`.
|
|
34
34
|
*/
|
|
35
35
|
effortRouting?: Readonly<Partial<Record<Effort | "off", string>>>;
|
|
36
|
+
/**
|
|
37
|
+
* Per-effort thinking budget in tokens, baked at build time for collapsed
|
|
38
|
+
* variants whose upstream expects an explicit `thinkingBudget` instead of a
|
|
39
|
+
* value derived from the generic ladder (Antigravity Cloud Code Assist
|
|
40
|
+
* gemini-3.x). Request mapping prefers caller `thinkingBudgets`, then this
|
|
41
|
+
* map, then the provider default ladder. Only meaningful for `mode: "budget"`.
|
|
42
|
+
*/
|
|
43
|
+
effortBudgets?: Readonly<Partial<Record<Effort, number>>>;
|
|
36
44
|
/**
|
|
37
45
|
* When true, a thinking-off request MUST explicitly suppress thinking on
|
|
38
46
|
* the wire (google-level: `thinkingLevel: "MINIMAL"` + `includeThoughts:
|
|
@@ -137,6 +145,13 @@ export interface OpenAICompat {
|
|
|
137
145
|
reasoningEffortMap?: Partial<Record<Effort, string>>;
|
|
138
146
|
/** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
|
|
139
147
|
supportsUsageInStreaming?: boolean;
|
|
148
|
+
/**
|
|
149
|
+
* Enable the Gemini thinking-loop guard (pi-ai stream layer) for this model.
|
|
150
|
+
* Defaults to true when the model id classifies as the gemini family. Set
|
|
151
|
+
* explicitly to cover an opaque OpenAI-compat proxy alias (e.g. `my-model`)
|
|
152
|
+
* that routes to Gemini, or to false to opt a gemini-family id out.
|
|
153
|
+
*/
|
|
154
|
+
enableGeminiThinkingLoopGuard?: boolean;
|
|
140
155
|
/** Which field to use for max tokens. Default: auto-detected from URL. */
|
|
141
156
|
maxTokensField?: "max_completion_tokens" | "max_tokens";
|
|
142
157
|
/** Whether tool results require the `name` field. Default: auto-detected from URL. */
|
|
@@ -322,7 +337,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
|
|
|
322
337
|
* `buildModel`; request handlers read fields and never detect, resolve, or
|
|
323
338
|
* allocate.
|
|
324
339
|
*/
|
|
325
|
-
export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
|
|
340
|
+
export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "enableGeminiThinkingLoopGuard" | "whenThinking">> & {
|
|
326
341
|
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
327
342
|
vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
|
|
328
343
|
extraBody?: OpenAICompat["extraBody"];
|
|
@@ -334,6 +349,8 @@ export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRoutin
|
|
|
334
349
|
isOpenRouterHost: boolean;
|
|
335
350
|
/** The model sits behind Vercel AI Gateway. */
|
|
336
351
|
isVercelGatewayHost: boolean;
|
|
352
|
+
/** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. Set by the builder from the family classifier. */
|
|
353
|
+
enableGeminiThinkingLoopGuard?: boolean;
|
|
337
354
|
/** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
|
|
338
355
|
whenThinking?: ResolvedOpenAICompat;
|
|
339
356
|
};
|
|
@@ -346,6 +363,8 @@ export interface ResolvedOpenAIResponsesCompat {
|
|
|
346
363
|
strictResponsesPairing: boolean;
|
|
347
364
|
requiresJuiceZeroHack: boolean;
|
|
348
365
|
reasoningEffortMap: Partial<Record<Effort, string>>;
|
|
366
|
+
/** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. */
|
|
367
|
+
enableGeminiThinkingLoopGuard?: boolean;
|
|
349
368
|
}
|
|
350
369
|
/** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
|
|
351
370
|
export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {
|
|
@@ -46,12 +46,11 @@ export interface EffortVariantFamily {
|
|
|
46
46
|
export interface VariantCollapseTable {
|
|
47
47
|
families: readonly EffortVariantFamily[];
|
|
48
48
|
}
|
|
49
|
-
/**
|
|
50
|
-
* Shared by `google-antigravity` and `google-gemini-cli` — both serve the
|
|
51
|
-
* Antigravity discovery list (`fetchAntigravityDiscoveryModels`).
|
|
52
|
-
*/
|
|
49
|
+
/** `google-antigravity` (daily-cloudcode-pa): Gemini 3.x on the budget transport. */
|
|
53
50
|
export declare const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
|
|
54
|
-
/**
|
|
51
|
+
/** `google-gemini-cli` (cloudcode-pa): Gemini 3.x on the level transport (official CLI parity). */
|
|
52
|
+
export declare const GEMINI_CLI_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
|
|
53
|
+
/** Provider id → hand collapse table. The CCA providers diverge on thinking transport. */
|
|
55
54
|
export declare const VARIANT_COLLAPSE_TABLES: Readonly<Record<string, VariantCollapseTable>>;
|
|
56
55
|
/**
|
|
57
56
|
* The global automatic rule: derive an `X` + `X-thinking` family for every
|
|
@@ -9,7 +9,6 @@ export declare const getGeminiCliHeaders: (modelId?: string) => {
|
|
|
9
9
|
"Client-Metadata": string;
|
|
10
10
|
};
|
|
11
11
|
export declare const ANTIGRAVITY_SYSTEM_INSTRUCTION: string;
|
|
12
|
-
export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER output rule checks, formatting guidelines, constraint checklists (e.g. \"No emdashes\"), or your thinking/personality preambles in the final response. Output only the final response.";
|
|
13
12
|
/**
|
|
14
13
|
* Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
|
|
15
14
|
* and usage code can read it without pulling the heavy google-gemini-cli provider
|
|
@@ -17,3 +16,19 @@ export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER outp
|
|
|
17
16
|
* parse graph.
|
|
18
17
|
*/
|
|
19
18
|
export declare let getAntigravityUserAgent: () => string;
|
|
19
|
+
/**
|
|
20
|
+
* Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
|
|
21
|
+
* real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
|
|
22
|
+
* opaque `labels.model_enum` token the client tags each request with;
|
|
23
|
+
* `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
|
|
24
|
+
* regardless of the thinking budget. Keyed by the routed upstream wire id
|
|
25
|
+
* (post effort-routing), not the collapsed logical id. Checkpoint-only ids
|
|
26
|
+
* (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
|
|
27
|
+
* emits agent requests.
|
|
28
|
+
*/
|
|
29
|
+
export interface AntigravityModelWireProfile {
|
|
30
|
+
modelEnum: string;
|
|
31
|
+
maxOutputTokens: number;
|
|
32
|
+
}
|
|
33
|
+
export declare const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>>;
|
|
34
|
+
export declare function getAntigravityModelWireProfile(wireModelId: string): AntigravityModelWireProfile | undefined;
|
|
@@ -25,9 +25,11 @@ export declare const COPILOT_API_HEADERS: {
|
|
|
25
25
|
export type ParsedGitHubCopilotApiKey = {
|
|
26
26
|
accessToken: string;
|
|
27
27
|
enterpriseUrl?: string;
|
|
28
|
+
apiEndpoint?: string;
|
|
28
29
|
};
|
|
29
30
|
export declare function isPublicGitHubHost(host: string): boolean;
|
|
30
31
|
export declare function normalizeGitHubCopilotEnterpriseDomain(input: string | undefined): string | undefined;
|
|
32
|
+
export declare function normalizeGitHubCopilotApiEndpoint(input: string | undefined): string | undefined;
|
|
31
33
|
export declare function parseGitHubCopilotApiKey(apiKeyRaw: string): ParsedGitHubCopilotApiKey;
|
|
32
34
|
export declare function normalizeDomain(input: string): string | null;
|
|
33
35
|
export declare function getGitHubCopilotBaseUrl(enterpriseDomain?: string): string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "16.0.
|
|
4
|
+
"version": "16.0.5",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,11 +34,11 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "16.0.
|
|
37
|
+
"@oh-my-pi/pi-utils": "16.0.5",
|
|
38
38
|
"zod": "^4"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
|
-
"@oh-my-pi/pi-ai": "16.0.
|
|
41
|
+
"@oh-my-pi/pi-ai": "16.0.5",
|
|
42
42
|
"@types/bun": "^1.3.14"
|
|
43
43
|
},
|
|
44
44
|
"engines": {
|
package/src/compat/openai.ts
CHANGED
|
@@ -12,10 +12,12 @@ import {
|
|
|
12
12
|
isAnthropicNamespacedModelId,
|
|
13
13
|
isClaudeModelId,
|
|
14
14
|
isDeepseekModelIdOrName,
|
|
15
|
+
isGlm52ReasoningEffortModelId,
|
|
15
16
|
isKimiK26ModelId,
|
|
16
17
|
isKimiModelId,
|
|
17
18
|
isMimoModelIdOrName,
|
|
18
19
|
isQwenModelId,
|
|
20
|
+
modelFamilyToken,
|
|
19
21
|
} from "../identity/family";
|
|
20
22
|
import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
|
|
21
23
|
import { applyCompatOverrides } from "./apply";
|
|
@@ -82,6 +84,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
82
84
|
const isCerebras = modelMatchesHost(hostModel, "cerebras");
|
|
83
85
|
const isZai = modelMatchesHost(hostModel, "zai");
|
|
84
86
|
const isZhipu = modelMatchesHost(hostModel, "zhipu");
|
|
87
|
+
const supportsZaiReasoningEffort = (isZai || isZhipu) && isGlm52ReasoningEffortModelId(spec.id);
|
|
85
88
|
const isKilo = modelMatchesHost(hostModel, "kilo");
|
|
86
89
|
const isKimiModel = isKimiModelId(spec.id);
|
|
87
90
|
const isMoonshotNative = modelMatchesHost(hostModel, "moonshotNative");
|
|
@@ -136,6 +139,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
136
139
|
const useMaxTokens =
|
|
137
140
|
isMistral ||
|
|
138
141
|
isMoonshotNative ||
|
|
142
|
+
isZai ||
|
|
143
|
+
isZhipu ||
|
|
139
144
|
hostMatchesUrl(baseUrl, "chutes") ||
|
|
140
145
|
hostMatchesUrl(baseUrl, "fireworks") ||
|
|
141
146
|
isDirectDeepseekApi;
|
|
@@ -202,11 +207,15 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
202
207
|
// OpenAI's reasoning-API surface.
|
|
203
208
|
supportsDeveloperRole: isOpenAIHost || isAzureHost,
|
|
204
209
|
supportsMultipleSystemMessages: supportsMultipleSystemMessagesDefault,
|
|
205
|
-
supportsReasoningEffort: !isGrok && !
|
|
210
|
+
supportsReasoningEffort: !isGrok && !isXiaomiMimo && (!(isZai || isZhipu) || supportsZaiReasoningEffort),
|
|
206
211
|
// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
|
|
207
212
|
supportsReasoningParams: provider !== "github-copilot",
|
|
208
213
|
reasoningEffortMap: {},
|
|
209
214
|
supportsUsageInStreaming: !isCerebras,
|
|
215
|
+
// pi-ai's thinking-loop guard is gemini-only; default the flag from the
|
|
216
|
+
// family classifier so OpenAI-compat proxies serving Gemini are covered.
|
|
217
|
+
// An opaque alias can opt in via `compat.enableGeminiThinkingLoopGuard`.
|
|
218
|
+
enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id) === "gemini",
|
|
210
219
|
// Kimi (including via OpenRouter and Fireworks router-form IDs such as
|
|
211
220
|
// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
|
|
212
221
|
// max_tokens, not actual output. The official Kimi K2 model guidance
|
|
@@ -291,6 +300,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
291
300
|
}
|
|
292
301
|
|
|
293
302
|
interface OpenAIResponsesSpecLike {
|
|
303
|
+
id?: string;
|
|
294
304
|
provider: string;
|
|
295
305
|
name: string;
|
|
296
306
|
baseUrl: string;
|
|
@@ -325,6 +335,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
|
|
|
325
335
|
strictResponsesPairing: isAzure || spec.provider === "github-copilot",
|
|
326
336
|
requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
|
|
327
337
|
reasoningEffortMap: {},
|
|
338
|
+
enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id ?? "") === "gemini",
|
|
328
339
|
};
|
|
329
340
|
applyCompatOverrides(compat, spec.compat);
|
|
330
341
|
return compat;
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import { z } from "zod/v4";
|
|
2
2
|
import type { ModelSpec } from "../types";
|
|
3
3
|
import { toPositiveNumber } from "../utils";
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
ANTIGRAVITY_VARIANT_COLLAPSE_TABLE,
|
|
6
|
+
collapseEffortVariants,
|
|
7
|
+
type VariantCollapseTable,
|
|
8
|
+
} from "../variant-collapse";
|
|
5
9
|
import { getAntigravityUserAgent } from "../wire/gemini-headers";
|
|
6
10
|
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
] as const;
|
|
11
|
+
export const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
|
|
12
|
+
export const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
|
13
|
+
const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [ANTIGRAVITY_PRIMARY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
|
|
11
14
|
const FETCH_AVAILABLE_MODELS_PATH = "/v1internal:fetchAvailableModels";
|
|
12
15
|
|
|
13
16
|
const DEFAULT_CONTEXT_WINDOW = 200_000;
|
|
@@ -157,6 +160,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
|
|
|
157
160
|
signal?: AbortSignal;
|
|
158
161
|
/** Optional fetch implementation override for tests. */
|
|
159
162
|
fetcher?: typeof fetch;
|
|
163
|
+
/**
|
|
164
|
+
* Hand collapse table to apply to the discovered list. Defaults to the
|
|
165
|
+
* Antigravity (budget-transport) table; `googleGeminiCli` passes the
|
|
166
|
+
* level-transport table so cloudcode-pa keeps `thinkingLevel`.
|
|
167
|
+
*/
|
|
168
|
+
collapseTable?: VariantCollapseTable;
|
|
160
169
|
}
|
|
161
170
|
|
|
162
171
|
/**
|
|
@@ -239,7 +248,7 @@ export async function fetchAntigravityDiscoveryModels(
|
|
|
239
248
|
// Collapse effort-tier variants at the source so runtime discovery,
|
|
240
249
|
// the gemini-cli re-provision, and the catalog generator all see
|
|
241
250
|
// logical ids only.
|
|
242
|
-
const collapsed = collapseEffortVariants(models, ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
|
|
251
|
+
const collapsed = collapseEffortVariants(models, options.collapseTable ?? ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
|
|
243
252
|
collapsed.sort((a, b) => a.name.localeCompare(b.name) || a.id.localeCompare(b.id));
|
|
244
253
|
return collapsed;
|
|
245
254
|
}
|
package/src/identity/dialect.ts
CHANGED
|
@@ -11,7 +11,8 @@ export type Dialect =
|
|
|
11
11
|
| "pi"
|
|
12
12
|
| "qwen3"
|
|
13
13
|
| "gemini"
|
|
14
|
-
| "gemma"
|
|
14
|
+
| "gemma"
|
|
15
|
+
| "minimax";
|
|
15
16
|
|
|
16
17
|
export const FALLBACK_DIALECT: Dialect = "xml";
|
|
17
18
|
|
|
@@ -31,6 +32,8 @@ export function preferredDialect(modelId: string): Dialect {
|
|
|
31
32
|
return "qwen3";
|
|
32
33
|
case "deepseek":
|
|
33
34
|
return "deepseek";
|
|
35
|
+
case "minimax":
|
|
36
|
+
return "minimax";
|
|
34
37
|
case "openai":
|
|
35
38
|
case "gpt-oss":
|
|
36
39
|
return "harmony";
|
package/src/identity/family.ts
CHANGED
|
@@ -73,6 +73,13 @@ export function isMinimaxM2FamilyModelId(modelId: string): boolean {
|
|
|
73
73
|
return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
/** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
|
|
77
|
+
export function isMinimaxM3FamilyModelId(modelId: string): boolean {
|
|
78
|
+
const lower = modelId.toLowerCase();
|
|
79
|
+
if (!lower.includes("minimax")) return false;
|
|
80
|
+
return /(?:^|[/._-])(?:minimax[/._-])?m3(?:[-.:_]|$)/i.test(lower);
|
|
81
|
+
}
|
|
82
|
+
|
|
76
83
|
/**
|
|
77
84
|
* OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
|
|
78
85
|
* `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
|
|
@@ -105,6 +112,17 @@ export function isReasoningGlmModelId(modelId: string): boolean {
|
|
|
105
112
|
}
|
|
106
113
|
return semverGte(glm.version, "4.5");
|
|
107
114
|
}
|
|
115
|
+
/** GLM-5.2+ coding SKUs accept `reasoning_effort` in addition to binary thinking. */
|
|
116
|
+
export function isGlm52ReasoningEffortModelId(modelId: string): boolean {
|
|
117
|
+
const glm = parseGlmModel(bareModelId(modelId));
|
|
118
|
+
if (!glm || glm.vision) {
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
if (glm.variant !== "base" && glm.variant !== "air" && glm.variant !== "turbo") {
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
return semverGte(glm.version, "5.2");
|
|
125
|
+
}
|
|
108
126
|
|
|
109
127
|
/** GLM vision SKUs — the `v` that attaches to the version (`glm-4v`, `glm-4.5v`). */
|
|
110
128
|
export function isGlmVisionModelId(modelId: string): boolean {
|
|
@@ -128,7 +146,7 @@ export function modelFamilyToken(modelId: string): string {
|
|
|
128
146
|
if (isOpenAIModelId(modelId)) return "openai";
|
|
129
147
|
if (isKimiModelId(modelId)) return "kimi";
|
|
130
148
|
if (isQwenModelId(modelId)) return "qwen";
|
|
131
|
-
if (isMinimaxM2FamilyModelId(modelId)) return "minimax";
|
|
149
|
+
if (isMinimaxM2FamilyModelId(modelId) || isMinimaxM3FamilyModelId(modelId)) return "minimax";
|
|
132
150
|
if (isOpenAIGptOssModelId(modelId)) return "gpt-oss";
|
|
133
151
|
if (isDeepseekModelIdOrName(modelId)) return "deepseek";
|
|
134
152
|
if (isMimoModelIdOrName(modelId)) return "mimo";
|
package/src/model-cache.ts
CHANGED
|
@@ -7,12 +7,14 @@ import { getModelDbPath } from "@oh-my-pi/pi-utils";
|
|
|
7
7
|
import type { Api, Model, ModelSpec } from "./types";
|
|
8
8
|
|
|
9
9
|
// Rows persist ModelSpec JSON (sparse `compat`, never the resolved record);
|
|
10
|
-
// the model manager rebuilds via `buildModel` on load.
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
//
|
|
15
|
-
|
|
10
|
+
// the model manager rebuilds via `buildModel` on load. v7 invalidates rows
|
|
11
|
+
// predating the Antigravity Gemini budget-mode migration (cached specs still
|
|
12
|
+
// carrying `thinking.mode: "google-level"` and the old 3.5-flash effort
|
|
13
|
+
// routing); v6 invalidates rows that may contain the retired unknown-limit
|
|
14
|
+
// sentinels (222222/8888); v5 invalidated rows predating effort-tier variant
|
|
15
|
+
// collapsing (raw `-low`/`-high`/`-thinking` member ids); v4 dropped the
|
|
16
|
+
// pre-efforts ThinkingConfig shape.
|
|
17
|
+
const CACHE_SCHEMA_VERSION = 7;
|
|
16
18
|
|
|
17
19
|
interface CacheRow {
|
|
18
20
|
provider_id: string;
|
package/src/model-thinking.ts
CHANGED
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
import {
|
|
24
24
|
findThinkingVariantToken,
|
|
25
25
|
isDeepseekModelIdOrName,
|
|
26
|
+
isGlm52ReasoningEffortModelId,
|
|
26
27
|
isMinimaxM2FamilyModelId,
|
|
27
28
|
isOpenAIGptOssModelId,
|
|
28
29
|
supportsAdaptiveThinkingDisplay,
|
|
@@ -76,6 +77,13 @@ const DEEPSEEK_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
|
76
77
|
const FIREWORKS_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
77
78
|
[Effort.Minimal]: "none",
|
|
78
79
|
};
|
|
80
|
+
const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
81
|
+
[Effort.Minimal]: "none",
|
|
82
|
+
[Effort.Low]: "high",
|
|
83
|
+
[Effort.Medium]: "high",
|
|
84
|
+
[Effort.High]: "high",
|
|
85
|
+
[Effort.XHigh]: "max",
|
|
86
|
+
};
|
|
79
87
|
|
|
80
88
|
/**
|
|
81
89
|
* Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
|
|
@@ -259,11 +267,19 @@ function sameEffortList(left: readonly Effort[], right: readonly Effort[]): bool
|
|
|
259
267
|
}
|
|
260
268
|
|
|
261
269
|
function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
|
|
270
|
+
if (spec.api === "openai-completions" && isZaiGlm52ReasoningEffortModel(spec)) {
|
|
271
|
+
return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
|
|
272
|
+
}
|
|
262
273
|
return spec.api === "openai-completions" && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
|
|
263
274
|
? LOW_MEDIUM_HIGH_REASONING_EFFORTS
|
|
264
275
|
: undefined;
|
|
265
276
|
}
|
|
266
277
|
|
|
278
|
+
function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
|
|
279
|
+
if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
|
|
280
|
+
return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
|
|
281
|
+
}
|
|
282
|
+
|
|
267
283
|
function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
|
|
268
284
|
if (compat === undefined || !("reasoningEffortMap" in compat)) {
|
|
269
285
|
return undefined;
|
|
@@ -288,6 +304,9 @@ function inferDetectedEffortMap<TApi extends Api>(
|
|
|
288
304
|
if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
|
|
289
305
|
return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
|
|
290
306
|
}
|
|
307
|
+
if (isZaiGlm52ReasoningEffortModel(spec)) {
|
|
308
|
+
return ZAI_GLM_52_REASONING_EFFORT_MAP;
|
|
309
|
+
}
|
|
291
310
|
if (isDeepseekReasoningModel(spec)) {
|
|
292
311
|
return DEEPSEEK_REASONING_EFFORT_MAP;
|
|
293
312
|
}
|