@oh-my-pi/pi-catalog 16.0.3 → 16.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,36 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.0.5] - 2026-06-17
6
+
7
+ ### Added
8
+
9
+ - Added `enableGeminiThinkingLoopGuard` to OpenAI compatibility options to allow explicit opt-in or opt-out of the Gemini thinking-loop guard for OpenAI-compatible model aliases
10
+ - Added `LITELLM_BASE_URL` as the LiteLLM provider discovery base URL fallback, with discovery caches scoped by the resolved proxy URL and explicit provider `baseUrl` config kept at higher precedence. ([#2726](https://github.com/can1357/oh-my-pi/issues/2726))
11
+ - Added `ThinkingConfig.effortBudgets` (per-effort thinking-budget contract baked into collapsed variants) and `ANTIGRAVITY_MODEL_WIRE_PROFILES` (`maxOutputTokens` + `model_enum` per Antigravity wire id) to mirror the captured Antigravity Cloud Code Assist client request shape.
12
+
13
+ ### Changed
14
+
15
+ - Defaulted `enableGeminiThinkingLoopGuard` from Gemini family detection for both OpenAI completions and responses compatibility specs so Gemini models now enable the thinking-loop guard automatically
16
+ - Updated the default Gemini CLI user-agent version fallback to 0.46.0.
17
+ - Changed the Antigravity (`google-antigravity`, daily-cloudcode-pa) gemini-3.x collapse families to the `budget` thinking transport with the client's per-tier `thinkingBudget` (3.5 Flash low/medium/high = 1000/4000/10000, 3.1 Pro low/high = 1001/10001) and corrected 3.5 Flash effort→wire routing (medium → `gemini-3.5-flash-low`, high → `gemini-3-flash-agent`). Split the shared CCA collapse table so `google-gemini-cli` (cloudcode-pa) keeps the `google-level` `thinkingLevel` transport for official Gemini CLI parity. Stale collapsed snapshots (bundled catalog, recycled `gemini-3-flash` alias) self-heal from the hand table at collapse time, and the model cache schema is bumped to v7 to invalidate pre-budget Antigravity rows.
18
+ - Changed the Antigravity user-agent to the `antigravity/hub/<version>` format (default `2.1.4`) to match the captured client.
19
+
20
+ ### Fixed
21
+
22
+ - Fixed `off` effort routing for `claude-opus-4-5` and `claude-opus-4-6` to use their base model IDs when thinking is disabled
23
+ - Fixed `gemini-2.5-flash` effort routing so all non-off effort levels resolve to `gemini-2.5-flash-thinking`
24
+ - Fixed shared variant alias provider resolution so `resolveBareVariantAlias` reports all matching providers when model aliases are present in both CCA collapse tables
25
+ - Routed google-antigravity default baseUrl to the stable primary daily endpoint in the catalog generator and all fallback snapshots, resolving connection drops on heavy queries.
26
+ - Fixed MiniMax M3 dialect selection so MiniMax-family OpenAI-compatible models use the MiniMax tool-call dialect instead of generic XML. ([#2759](https://github.com/can1357/oh-my-pi/issues/2759))
27
+ - Fixed GitHub Copilot dynamic discovery to honor plan-specific API endpoints stored in structured OAuth credentials. ([#2876](https://github.com/can1357/oh-my-pi/issues/2876))
28
+
29
+ ## [16.0.4] - 2026-06-17
30
+
31
+ ### Fixed
32
+
33
+ - Fixed GLM-5.2 catalog thinking metadata for Zhipu/BigModel so the top effort is exposed as `xhigh` and maps to provider-native `max`. ([#2833](https://github.com/can1357/oh-my-pi/issues/2833))
34
+
5
35
  ## [16.0.2] - 2026-06-16
6
36
 
7
37
  ### Fixed
@@ -5,6 +5,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
5
5
  */
6
6
  export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
7
7
  interface OpenAIResponsesSpecLike {
8
+ id?: string;
8
9
  provider: string;
9
10
  name: string;
10
11
  baseUrl: string;
@@ -1,4 +1,7 @@
1
1
  import type { ModelSpec } from "../types";
2
+ import { type VariantCollapseTable } from "../variant-collapse";
3
+ export declare const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
4
+ export declare const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
2
5
  /**
3
6
  * Raw model metadata returned by Antigravity's `fetchAvailableModels` endpoint.
4
7
  */
@@ -51,6 +54,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
51
54
  signal?: AbortSignal;
52
55
  /** Optional fetch implementation override for tests. */
53
56
  fetcher?: typeof fetch;
57
+ /**
58
+ * Hand collapse table to apply to the discovered list. Defaults to the
59
+ * Antigravity (budget-transport) table; `googleGeminiCli` passes the
60
+ * level-transport table so cloudcode-pa keeps `thinkingLevel`.
61
+ */
62
+ collapseTable?: VariantCollapseTable;
54
63
  }
55
64
  /**
56
65
  * Fetches discoverable Antigravity models and normalizes them into canonical model entries.
@@ -1,3 +1,3 @@
1
- export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma";
1
+ export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma" | "minimax";
2
2
  export declare const FALLBACK_DIALECT: Dialect;
3
3
  export declare function preferredDialect(modelId: string): Dialect;
@@ -32,6 +32,8 @@ export declare function isMimoModelIdOrName(value: string): boolean;
32
32
  * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
33
33
  */
34
34
  export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
35
+ /** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
36
+ export declare function isMinimaxM3FamilyModelId(modelId: string): boolean;
35
37
  /**
36
38
  * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
37
39
  * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
@@ -49,6 +51,8 @@ export declare function isOpenAIModelId(modelId: string): boolean;
49
51
  * allowlist.
50
52
  */
51
53
  export declare function isReasoningGlmModelId(modelId: string): boolean;
54
+ /** GLM-5.2+ coding SKUs accept `reasoning_effort` in addition to binary thinking. */
55
+ export declare function isGlm52ReasoningEffortModelId(modelId: string): boolean;
52
56
  /** GLM vision SKUs — the `v` that attaches to the version (`glm-4v`, `glm-4.5v`). */
53
57
  export declare function isGlmVisionModelId(modelId: string): boolean;
54
58
  /**
@@ -33,6 +33,14 @@ export interface ThinkingConfig {
33
33
  * thinking is disabled. Missing keys fall back to `requestModelId ?? id`.
34
34
  */
35
35
  effortRouting?: Readonly<Partial<Record<Effort | "off", string>>>;
36
+ /**
37
+ * Per-effort thinking budget in tokens, baked at build time for collapsed
38
+ * variants whose upstream expects an explicit `thinkingBudget` instead of a
39
+ * value derived from the generic ladder (Antigravity Cloud Code Assist
40
+ * gemini-3.x). Request mapping prefers caller `thinkingBudgets`, then this
41
+ * map, then the provider default ladder. Only meaningful for `mode: "budget"`.
42
+ */
43
+ effortBudgets?: Readonly<Partial<Record<Effort, number>>>;
36
44
  /**
37
45
  * When true, a thinking-off request MUST explicitly suppress thinking on
38
46
  * the wire (google-level: `thinkingLevel: "MINIMAL"` + `includeThoughts:
@@ -137,6 +145,13 @@ export interface OpenAICompat {
137
145
  reasoningEffortMap?: Partial<Record<Effort, string>>;
138
146
  /** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
139
147
  supportsUsageInStreaming?: boolean;
148
+ /**
149
+ * Enable the Gemini thinking-loop guard (pi-ai stream layer) for this model.
150
+ * Defaults to true when the model id classifies as the gemini family. Set
151
+ * explicitly to cover an opaque OpenAI-compat proxy alias (e.g. `my-model`)
152
+ * that routes to Gemini, or to false to opt a gemini-family id out.
153
+ */
154
+ enableGeminiThinkingLoopGuard?: boolean;
140
155
  /** Which field to use for max tokens. Default: auto-detected from URL. */
141
156
  maxTokensField?: "max_completion_tokens" | "max_tokens";
142
157
  /** Whether tool results require the `name` field. Default: auto-detected from URL. */
@@ -322,7 +337,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
322
337
  * `buildModel`; request handlers read fields and never detect, resolve, or
323
338
  * allocate.
324
339
  */
325
- export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
340
+ export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "enableGeminiThinkingLoopGuard" | "whenThinking">> & {
326
341
  openRouterRouting?: OpenAICompat["openRouterRouting"];
327
342
  vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
328
343
  extraBody?: OpenAICompat["extraBody"];
@@ -334,6 +349,8 @@ export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRoutin
334
349
  isOpenRouterHost: boolean;
335
350
  /** The model sits behind Vercel AI Gateway. */
336
351
  isVercelGatewayHost: boolean;
352
+ /** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. Set by the builder from the family classifier. */
353
+ enableGeminiThinkingLoopGuard?: boolean;
337
354
  /** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
338
355
  whenThinking?: ResolvedOpenAICompat;
339
356
  };
@@ -346,6 +363,8 @@ export interface ResolvedOpenAIResponsesCompat {
346
363
  strictResponsesPairing: boolean;
347
364
  requiresJuiceZeroHack: boolean;
348
365
  reasoningEffortMap: Partial<Record<Effort, string>>;
366
+ /** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. */
367
+ enableGeminiThinkingLoopGuard?: boolean;
349
368
  }
350
369
  /** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
351
370
  export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {
@@ -46,12 +46,11 @@ export interface EffortVariantFamily {
46
46
  export interface VariantCollapseTable {
47
47
  families: readonly EffortVariantFamily[];
48
48
  }
49
- /**
50
- * Shared by `google-antigravity` and `google-gemini-cli` — both serve the
51
- * Antigravity discovery list (`fetchAntigravityDiscoveryModels`).
52
- */
49
+ /** `google-antigravity` (daily-cloudcode-pa): Gemini 3.x on the budget transport. */
53
50
  export declare const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
54
- /** Provider id hand collapse table. Both CCA providers share one table. */
51
+ /** `google-gemini-cli` (cloudcode-pa): Gemini 3.x on the level transport (official CLI parity). */
52
+ export declare const GEMINI_CLI_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
53
+ /** Provider id → hand collapse table. The CCA providers diverge on thinking transport. */
55
54
  export declare const VARIANT_COLLAPSE_TABLES: Readonly<Record<string, VariantCollapseTable>>;
56
55
  /**
57
56
  * The global automatic rule: derive an `X` + `X-thinking` family for every
@@ -9,7 +9,6 @@ export declare const getGeminiCliHeaders: (modelId?: string) => {
9
9
  "Client-Metadata": string;
10
10
  };
11
11
  export declare const ANTIGRAVITY_SYSTEM_INSTRUCTION: string;
12
- export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER output rule checks, formatting guidelines, constraint checklists (e.g. \"No emdashes\"), or your thinking/personality preambles in the final response. Output only the final response.";
13
12
  /**
14
13
  * Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
15
14
  * and usage code can read it without pulling the heavy google-gemini-cli provider
@@ -17,3 +16,19 @@ export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER outp
17
16
  * parse graph.
18
17
  */
19
18
  export declare let getAntigravityUserAgent: () => string;
19
+ /**
20
+ * Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
21
+ * real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
22
+ * opaque `labels.model_enum` token the client tags each request with;
23
+ * `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
24
+ * regardless of the thinking budget. Keyed by the routed upstream wire id
25
+ * (post effort-routing), not the collapsed logical id. Checkpoint-only ids
26
+ * (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
27
+ * emits agent requests.
28
+ */
29
+ export interface AntigravityModelWireProfile {
30
+ modelEnum: string;
31
+ maxOutputTokens: number;
32
+ }
33
+ export declare const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>>;
34
+ export declare function getAntigravityModelWireProfile(wireModelId: string): AntigravityModelWireProfile | undefined;
@@ -25,9 +25,11 @@ export declare const COPILOT_API_HEADERS: {
25
25
  export type ParsedGitHubCopilotApiKey = {
26
26
  accessToken: string;
27
27
  enterpriseUrl?: string;
28
+ apiEndpoint?: string;
28
29
  };
29
30
  export declare function isPublicGitHubHost(host: string): boolean;
30
31
  export declare function normalizeGitHubCopilotEnterpriseDomain(input: string | undefined): string | undefined;
32
+ export declare function normalizeGitHubCopilotApiEndpoint(input: string | undefined): string | undefined;
31
33
  export declare function parseGitHubCopilotApiKey(apiKeyRaw: string): ParsedGitHubCopilotApiKey;
32
34
  export declare function normalizeDomain(input: string): string | null;
33
35
  export declare function getGitHubCopilotBaseUrl(enterpriseDomain?: string): string;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-catalog",
4
- "version": "16.0.3",
4
+ "version": "16.0.5",
5
5
  "description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -34,11 +34,11 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@bufbuild/protobuf": "^2.12.0",
37
- "@oh-my-pi/pi-utils": "16.0.3",
37
+ "@oh-my-pi/pi-utils": "16.0.5",
38
38
  "zod": "^4"
39
39
  },
40
40
  "devDependencies": {
41
- "@oh-my-pi/pi-ai": "16.0.3",
41
+ "@oh-my-pi/pi-ai": "16.0.5",
42
42
  "@types/bun": "^1.3.14"
43
43
  },
44
44
  "engines": {
@@ -12,10 +12,12 @@ import {
12
12
  isAnthropicNamespacedModelId,
13
13
  isClaudeModelId,
14
14
  isDeepseekModelIdOrName,
15
+ isGlm52ReasoningEffortModelId,
15
16
  isKimiK26ModelId,
16
17
  isKimiModelId,
17
18
  isMimoModelIdOrName,
18
19
  isQwenModelId,
20
+ modelFamilyToken,
19
21
  } from "../identity/family";
20
22
  import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
21
23
  import { applyCompatOverrides } from "./apply";
@@ -82,6 +84,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
82
84
  const isCerebras = modelMatchesHost(hostModel, "cerebras");
83
85
  const isZai = modelMatchesHost(hostModel, "zai");
84
86
  const isZhipu = modelMatchesHost(hostModel, "zhipu");
87
+ const supportsZaiReasoningEffort = (isZai || isZhipu) && isGlm52ReasoningEffortModelId(spec.id);
85
88
  const isKilo = modelMatchesHost(hostModel, "kilo");
86
89
  const isKimiModel = isKimiModelId(spec.id);
87
90
  const isMoonshotNative = modelMatchesHost(hostModel, "moonshotNative");
@@ -136,6 +139,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
136
139
  const useMaxTokens =
137
140
  isMistral ||
138
141
  isMoonshotNative ||
142
+ isZai ||
143
+ isZhipu ||
139
144
  hostMatchesUrl(baseUrl, "chutes") ||
140
145
  hostMatchesUrl(baseUrl, "fireworks") ||
141
146
  isDirectDeepseekApi;
@@ -202,11 +207,15 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
202
207
  // OpenAI's reasoning-API surface.
203
208
  supportsDeveloperRole: isOpenAIHost || isAzureHost,
204
209
  supportsMultipleSystemMessages: supportsMultipleSystemMessagesDefault,
205
- supportsReasoningEffort: !isGrok && !isZai && !isZhipu && !isXiaomiMimo,
210
+ supportsReasoningEffort: !isGrok && !isXiaomiMimo && (!(isZai || isZhipu) || supportsZaiReasoningEffort),
206
211
  // GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
207
212
  supportsReasoningParams: provider !== "github-copilot",
208
213
  reasoningEffortMap: {},
209
214
  supportsUsageInStreaming: !isCerebras,
215
+ // pi-ai's thinking-loop guard is gemini-only; default the flag from the
216
+ // family classifier so OpenAI-compat proxies serving Gemini are covered.
217
+ // An opaque alias can opt in via `compat.enableGeminiThinkingLoopGuard`.
218
+ enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id) === "gemini",
210
219
  // Kimi (including via OpenRouter and Fireworks router-form IDs such as
211
220
  // `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
212
221
  // max_tokens, not actual output. The official Kimi K2 model guidance
@@ -291,6 +300,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
291
300
  }
292
301
 
293
302
  interface OpenAIResponsesSpecLike {
303
+ id?: string;
294
304
  provider: string;
295
305
  name: string;
296
306
  baseUrl: string;
@@ -325,6 +335,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
325
335
  strictResponsesPairing: isAzure || spec.provider === "github-copilot",
326
336
  requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
327
337
  reasoningEffortMap: {},
338
+ enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id ?? "") === "gemini",
328
339
  };
329
340
  applyCompatOverrides(compat, spec.compat);
330
341
  return compat;
@@ -1,13 +1,16 @@
1
1
  import { z } from "zod/v4";
2
2
  import type { ModelSpec } from "../types";
3
3
  import { toPositiveNumber } from "../utils";
4
- import { ANTIGRAVITY_VARIANT_COLLAPSE_TABLE, collapseEffortVariants } from "../variant-collapse";
4
+ import {
5
+ ANTIGRAVITY_VARIANT_COLLAPSE_TABLE,
6
+ collapseEffortVariants,
7
+ type VariantCollapseTable,
8
+ } from "../variant-collapse";
5
9
  import { getAntigravityUserAgent } from "../wire/gemini-headers";
6
10
 
7
- const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [
8
- "https://daily-cloudcode-pa.googleapis.com",
9
- "https://daily-cloudcode-pa.sandbox.googleapis.com",
10
- ] as const;
11
+ export const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
12
+ export const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
13
+ const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [ANTIGRAVITY_PRIMARY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
11
14
  const FETCH_AVAILABLE_MODELS_PATH = "/v1internal:fetchAvailableModels";
12
15
 
13
16
  const DEFAULT_CONTEXT_WINDOW = 200_000;
@@ -157,6 +160,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
157
160
  signal?: AbortSignal;
158
161
  /** Optional fetch implementation override for tests. */
159
162
  fetcher?: typeof fetch;
163
+ /**
164
+ * Hand collapse table to apply to the discovered list. Defaults to the
165
+ * Antigravity (budget-transport) table; `googleGeminiCli` passes the
166
+ * level-transport table so cloudcode-pa keeps `thinkingLevel`.
167
+ */
168
+ collapseTable?: VariantCollapseTable;
160
169
  }
161
170
 
162
171
  /**
@@ -239,7 +248,7 @@ export async function fetchAntigravityDiscoveryModels(
239
248
  // Collapse effort-tier variants at the source so runtime discovery,
240
249
  // the gemini-cli re-provision, and the catalog generator all see
241
250
  // logical ids only.
242
- const collapsed = collapseEffortVariants(models, ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
251
+ const collapsed = collapseEffortVariants(models, options.collapseTable ?? ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
243
252
  collapsed.sort((a, b) => a.name.localeCompare(b.name) || a.id.localeCompare(b.id));
244
253
  return collapsed;
245
254
  }
@@ -11,7 +11,8 @@ export type Dialect =
11
11
  | "pi"
12
12
  | "qwen3"
13
13
  | "gemini"
14
- | "gemma";
14
+ | "gemma"
15
+ | "minimax";
15
16
 
16
17
  export const FALLBACK_DIALECT: Dialect = "xml";
17
18
 
@@ -31,6 +32,8 @@ export function preferredDialect(modelId: string): Dialect {
31
32
  return "qwen3";
32
33
  case "deepseek":
33
34
  return "deepseek";
35
+ case "minimax":
36
+ return "minimax";
34
37
  case "openai":
35
38
  case "gpt-oss":
36
39
  return "harmony";
@@ -73,6 +73,13 @@ export function isMinimaxM2FamilyModelId(modelId: string): boolean {
73
73
  return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
74
74
  }
75
75
 
76
+ /** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
77
+ export function isMinimaxM3FamilyModelId(modelId: string): boolean {
78
+ const lower = modelId.toLowerCase();
79
+ if (!lower.includes("minimax")) return false;
80
+ return /(?:^|[/._-])(?:minimax[/._-])?m3(?:[-.:_]|$)/i.test(lower);
81
+ }
82
+
76
83
  /**
77
84
  * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
78
85
  * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
@@ -105,6 +112,17 @@ export function isReasoningGlmModelId(modelId: string): boolean {
105
112
  }
106
113
  return semverGte(glm.version, "4.5");
107
114
  }
115
+ /** GLM-5.2+ coding SKUs accept `reasoning_effort` in addition to binary thinking. */
116
+ export function isGlm52ReasoningEffortModelId(modelId: string): boolean {
117
+ const glm = parseGlmModel(bareModelId(modelId));
118
+ if (!glm || glm.vision) {
119
+ return false;
120
+ }
121
+ if (glm.variant !== "base" && glm.variant !== "air" && glm.variant !== "turbo") {
122
+ return false;
123
+ }
124
+ return semverGte(glm.version, "5.2");
125
+ }
108
126
 
109
127
  /** GLM vision SKUs — the `v` that attaches to the version (`glm-4v`, `glm-4.5v`). */
110
128
  export function isGlmVisionModelId(modelId: string): boolean {
@@ -128,7 +146,7 @@ export function modelFamilyToken(modelId: string): string {
128
146
  if (isOpenAIModelId(modelId)) return "openai";
129
147
  if (isKimiModelId(modelId)) return "kimi";
130
148
  if (isQwenModelId(modelId)) return "qwen";
131
- if (isMinimaxM2FamilyModelId(modelId)) return "minimax";
149
+ if (isMinimaxM2FamilyModelId(modelId) || isMinimaxM3FamilyModelId(modelId)) return "minimax";
132
150
  if (isOpenAIGptOssModelId(modelId)) return "gpt-oss";
133
151
  if (isDeepseekModelIdOrName(modelId)) return "deepseek";
134
152
  if (isMimoModelIdOrName(modelId)) return "mimo";
@@ -7,12 +7,14 @@ import { getModelDbPath } from "@oh-my-pi/pi-utils";
7
7
  import type { Api, Model, ModelSpec } from "./types";
8
8
 
9
9
  // Rows persist ModelSpec JSON (sparse `compat`, never the resolved record);
10
- // the model manager rebuilds via `buildModel` on load. v6 invalidates rows
11
- // that may contain the retired unknown-limit sentinels (222222/8888); v5
12
- // invalidated rows predating effort-tier variant collapsing (raw
13
- // `-low`/`-high`/`-thinking` member ids); v4 dropped the pre-efforts
14
- // ThinkingConfig shape.
15
- const CACHE_SCHEMA_VERSION = 6;
10
+ // the model manager rebuilds via `buildModel` on load. v7 invalidates rows
11
+ // predating the Antigravity Gemini budget-mode migration (cached specs still
12
+ // carrying `thinking.mode: "google-level"` and the old 3.5-flash effort
13
+ // routing); v6 invalidates rows that may contain the retired unknown-limit
14
+ // sentinels (222222/8888); v5 invalidated rows predating effort-tier variant
15
+ // collapsing (raw `-low`/`-high`/`-thinking` member ids); v4 dropped the
16
+ // pre-efforts ThinkingConfig shape.
17
+ const CACHE_SCHEMA_VERSION = 7;
16
18
 
17
19
  interface CacheRow {
18
20
  provider_id: string;
@@ -23,6 +23,7 @@ import {
23
23
  import {
24
24
  findThinkingVariantToken,
25
25
  isDeepseekModelIdOrName,
26
+ isGlm52ReasoningEffortModelId,
26
27
  isMinimaxM2FamilyModelId,
27
28
  isOpenAIGptOssModelId,
28
29
  supportsAdaptiveThinkingDisplay,
@@ -76,6 +77,13 @@ const DEEPSEEK_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
76
77
  const FIREWORKS_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
77
78
  [Effort.Minimal]: "none",
78
79
  };
80
+ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
81
+ [Effort.Minimal]: "none",
82
+ [Effort.Low]: "high",
83
+ [Effort.Medium]: "high",
84
+ [Effort.High]: "high",
85
+ [Effort.XHigh]: "max",
86
+ };
79
87
 
80
88
  /**
81
89
  * Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
@@ -259,11 +267,19 @@ function sameEffortList(left: readonly Effort[], right: readonly Effort[]): bool
259
267
  }
260
268
 
261
269
  function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
270
+ if (spec.api === "openai-completions" && isZaiGlm52ReasoningEffortModel(spec)) {
271
+ return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
272
+ }
262
273
  return spec.api === "openai-completions" && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
263
274
  ? LOW_MEDIUM_HIGH_REASONING_EFFORTS
264
275
  : undefined;
265
276
  }
266
277
 
278
+ function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
279
+ if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
280
+ return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
281
+ }
282
+
267
283
  function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
268
284
  if (compat === undefined || !("reasoningEffortMap" in compat)) {
269
285
  return undefined;
@@ -288,6 +304,9 @@ function inferDetectedEffortMap<TApi extends Api>(
288
304
  if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
289
305
  return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
290
306
  }
307
+ if (isZaiGlm52ReasoningEffortModel(spec)) {
308
+ return ZAI_GLM_52_REASONING_EFFORT_MAP;
309
+ }
291
310
  if (isDeepseekReasoningModel(spec)) {
292
311
  return DEEPSEEK_REASONING_EFFORT_MAP;
293
312
  }