npm - @oh-my-pi/pi-catalog - Versions diffs - 16.0.3 → 16.0.5 - Mend

@oh-my-pi/pi-catalog 16.0.3 → 16.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/CHANGELOG.md +30 -0
package/dist/types/compat/openai.d.ts +1 -0
package/dist/types/discovery/antigravity.d.ts +9 -0
package/dist/types/identity/dialect.d.ts +1 -1
package/dist/types/identity/family.d.ts +4 -0
package/dist/types/types.d.ts +20 -1
package/dist/types/variant-collapse.d.ts +4 -5
package/dist/types/wire/gemini-headers.d.ts +16 -1
package/dist/types/wire/github-copilot.d.ts +2 -0
package/package.json +3 -3
package/src/compat/openai.ts +12 -1
package/src/discovery/antigravity.ts +15 -6
package/src/identity/dialect.ts +4 -1
package/src/identity/family.ts +19 -1
package/src/model-cache.ts +8 -6
package/src/model-thinking.ts +19 -0
package/src/models.json +518 -762
package/src/provider-models/google.ts +2 -0
package/src/provider-models/openai-compat.ts +7 -4
package/src/types.ts +20 -0
package/src/variant-collapse.ts +198 -72
package/src/wire/gemini-headers.ts +28 -5
package/src/wire/github-copilot.ts +18 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,36 @@
 ## [Unreleased]
+## [16.0.5] - 2026-06-17
+### Added
+- Added `enableGeminiThinkingLoopGuard` to OpenAI compatibility options to allow explicit opt-in or opt-out of the Gemini thinking-loop guard for OpenAI-compatible model aliases
+- Added `LITELLM_BASE_URL` as the LiteLLM provider discovery base URL fallback, with discovery caches scoped by the resolved proxy URL and explicit provider `baseUrl` config kept at higher precedence. ([#2726](https://github.com/can1357/oh-my-pi/issues/2726))
+- Added `ThinkingConfig.effortBudgets` (per-effort thinking-budget contract baked into collapsed variants) and `ANTIGRAVITY_MODEL_WIRE_PROFILES` (`maxOutputTokens` + `model_enum` per Antigravity wire id) to mirror the captured Antigravity Cloud Code Assist client request shape.
+### Changed
+- Defaulted `enableGeminiThinkingLoopGuard` from Gemini family detection for both OpenAI completions and responses compatibility specs so Gemini models now enable the thinking-loop guard automatically
+- Updated the default Gemini CLI user-agent version fallback to 0.46.0.
+- Changed the Antigravity (`google-antigravity`, daily-cloudcode-pa) gemini-3.x collapse families to the `budget` thinking transport with the client's per-tier `thinkingBudget` (3.5 Flash low/medium/high = 1000/4000/10000, 3.1 Pro low/high = 1001/10001) and corrected 3.5 Flash effort→wire routing (medium → `gemini-3.5-flash-low`, high → `gemini-3-flash-agent`). Split the shared CCA collapse table so `google-gemini-cli` (cloudcode-pa) keeps the `google-level` `thinkingLevel` transport for official Gemini CLI parity. Stale collapsed snapshots (bundled catalog, recycled `gemini-3-flash` alias) self-heal from the hand table at collapse time, and the model cache schema is bumped to v7 to invalidate pre-budget Antigravity rows.
+- Changed the Antigravity user-agent to the `antigravity/hub/<version>` format (default `2.1.4`) to match the captured client.
+### Fixed
+- Fixed `off` effort routing for `claude-opus-4-5` and `claude-opus-4-6` to use their base model IDs when thinking is disabled
+- Fixed `gemini-2.5-flash` effort routing so all non-off effort levels resolve to `gemini-2.5-flash-thinking`
+- Fixed shared variant alias provider resolution so `resolveBareVariantAlias` reports all matching providers when model aliases are present in both CCA collapse tables
+- Routed google-antigravity default baseUrl to the stable primary daily endpoint in the catalog generator and all fallback snapshots, resolving connection drops on heavy queries.
+- Fixed MiniMax M3 dialect selection so MiniMax-family OpenAI-compatible models use the MiniMax tool-call dialect instead of generic XML. ([#2759](https://github.com/can1357/oh-my-pi/issues/2759))
+- Fixed GitHub Copilot dynamic discovery to honor plan-specific API endpoints stored in structured OAuth credentials. ([#2876](https://github.com/can1357/oh-my-pi/issues/2876))
+## [16.0.4] - 2026-06-17
+### Fixed
+- Fixed GLM-5.2 catalog thinking metadata for Zhipu/BigModel so the top effort is exposed as `xhigh` and maps to provider-native `max`. ([#2833](https://github.com/can1357/oh-my-pi/issues/2833))
 ## [16.0.2] - 2026-06-16
 ### Fixed

package/dist/types/compat/openai.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
  */
 export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
 interface OpenAIResponsesSpecLike {
+    id?: string;
     provider: string;
     name: string;
     baseUrl: string;

package/dist/types/discovery/antigravity.d.ts CHANGED Viewed

@@ -1,4 +1,7 @@
 import type { ModelSpec } from "../types";
+import { type VariantCollapseTable } from "../variant-collapse";
+export declare const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
+export declare const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
 /**
  * Raw model metadata returned by Antigravity's `fetchAvailableModels` endpoint.
  */
@@ -51,6 +54,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
     signal?: AbortSignal;
     /** Optional fetch implementation override for tests. */
     fetcher?: typeof fetch;
+    /**
+     * Hand collapse table to apply to the discovered list. Defaults to the
+     * Antigravity (budget-transport) table; `googleGeminiCli` passes the
+     * level-transport table so cloudcode-pa keeps `thinkingLevel`.
+     */
+    collapseTable?: VariantCollapseTable;
 }
 /**
  * Fetches discoverable Antigravity models and normalizes them into canonical model entries.

package/dist/types/identity/dialect.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma";
+export type Dialect = "glm" | "hermes" | "kimi" | "xml" | "anthropic" | "deepseek" | "harmony" | "pi" | "qwen3" | "gemini" | "gemma" | "minimax";
 export declare const FALLBACK_DIALECT: Dialect;
 export declare function preferredDialect(modelId: string): Dialect;

package/dist/types/identity/family.d.ts CHANGED Viewed

@@ -32,6 +32,8 @@ export declare function isMimoModelIdOrName(value: string): boolean;
  * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
  */
 export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
+/** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
+export declare function isMinimaxM3FamilyModelId(modelId: string): boolean;
 /**
  * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
  * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
@@ -49,6 +51,8 @@ export declare function isOpenAIModelId(modelId: string): boolean;
  * allowlist.
  */
 export declare function isReasoningGlmModelId(modelId: string): boolean;
+/** GLM-5.2+ coding SKUs accept `reasoning_effort` in addition to binary thinking. */
+export declare function isGlm52ReasoningEffortModelId(modelId: string): boolean;
 /** GLM vision SKUs — the `v` that attaches to the version (`glm-4v`, `glm-4.5v`). */
 export declare function isGlmVisionModelId(modelId: string): boolean;
 /**

package/dist/types/types.d.ts CHANGED Viewed

@@ -33,6 +33,14 @@ export interface ThinkingConfig {
      * thinking is disabled. Missing keys fall back to `requestModelId ?? id`.
      */
     effortRouting?: Readonly<Partial<Record<Effort | "off", string>>>;
+    /**
+     * Per-effort thinking budget in tokens, baked at build time for collapsed
+     * variants whose upstream expects an explicit `thinkingBudget` instead of a
+     * value derived from the generic ladder (Antigravity Cloud Code Assist
+     * gemini-3.x). Request mapping prefers caller `thinkingBudgets`, then this
+     * map, then the provider default ladder. Only meaningful for `mode: "budget"`.
+     */
+    effortBudgets?: Readonly<Partial<Record<Effort, number>>>;
     /**
      * When true, a thinking-off request MUST explicitly suppress thinking on
      * the wire (google-level: `thinkingLevel: "MINIMAL"` + `includeThoughts:
@@ -137,6 +145,13 @@ export interface OpenAICompat {
     reasoningEffortMap?: Partial<Record<Effort, string>>;
     /** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
     supportsUsageInStreaming?: boolean;
+    /**
+     * Enable the Gemini thinking-loop guard (pi-ai stream layer) for this model.
+     * Defaults to true when the model id classifies as the gemini family. Set
+     * explicitly to cover an opaque OpenAI-compat proxy alias (e.g. `my-model`)
+     * that routes to Gemini, or to false to opt a gemini-family id out.
+     */
+    enableGeminiThinkingLoopGuard?: boolean;
     /** Which field to use for max tokens. Default: auto-detected from URL. */
     maxTokensField?: "max_completion_tokens" | "max_tokens";
     /** Whether tool results require the `name` field. Default: auto-detected from URL. */
@@ -322,7 +337,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
  * `buildModel`; request handlers read fields and never detect, resolve, or
  * allocate.
  */
-export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
+export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "enableGeminiThinkingLoopGuard" | "whenThinking">> & {
     openRouterRouting?: OpenAICompat["openRouterRouting"];
     vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
     extraBody?: OpenAICompat["extraBody"];
@@ -334,6 +349,8 @@ export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRoutin
     isOpenRouterHost: boolean;
     /** The model sits behind Vercel AI Gateway. */
     isVercelGatewayHost: boolean;
+    /** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. Set by the builder from the family classifier. */
+    enableGeminiThinkingLoopGuard?: boolean;
     /** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
     whenThinking?: ResolvedOpenAICompat;
 };
@@ -346,6 +363,8 @@ export interface ResolvedOpenAIResponsesCompat {
     strictResponsesPairing: boolean;
     requiresJuiceZeroHack: boolean;
     reasoningEffortMap: Partial<Record<Effort, string>>;
+    /** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. */
+    enableGeminiThinkingLoopGuard?: boolean;
 }
 /** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
 export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {

package/dist/types/variant-collapse.d.ts CHANGED Viewed

@@ -46,12 +46,11 @@ export interface EffortVariantFamily {
 export interface VariantCollapseTable {
     families: readonly EffortVariantFamily[];
 }
-/**
- * Shared by `google-antigravity` and `google-gemini-cli` — both serve the
- * Antigravity discovery list (`fetchAntigravityDiscoveryModels`).
- */
+/** `google-antigravity` (daily-cloudcode-pa): Gemini 3.x on the budget transport. */
 export declare const ANTIGRAVITY_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
-/** Provider id → hand collapse table. Both CCA providers share one table. */
+/** `google-gemini-cli` (cloudcode-pa): Gemini 3.x on the level transport (official CLI parity). */
+export declare const GEMINI_CLI_VARIANT_COLLAPSE_TABLE: VariantCollapseTable;
+/** Provider id → hand collapse table. The CCA providers diverge on thinking transport. */
 export declare const VARIANT_COLLAPSE_TABLES: Readonly<Record<string, VariantCollapseTable>>;
 /**
  * The global automatic rule: derive an `X` + `X-thinking` family for every

package/dist/types/wire/gemini-headers.d.ts CHANGED Viewed

@@ -9,7 +9,6 @@ export declare const getGeminiCliHeaders: (modelId?: string) => {
     "Client-Metadata": string;
 };
 export declare const ANTIGRAVITY_SYSTEM_INSTRUCTION: string;
-export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER output rule checks, formatting guidelines, constraint checklists (e.g. \"No emdashes\"), or your thinking/personality preambles in the final response. Output only the final response.";
 /**
  * Antigravity / Cloud Code Assist user agent. Lives in its own file so discovery
  * and usage code can read it without pulling the heavy google-gemini-cli provider
@@ -17,3 +16,19 @@ export declare const ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION = "CRITICAL: NEVER outp
  * parse graph.
  */
 export declare let getAntigravityUserAgent: () => string;
+/**
+ * Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
+ * real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
+ * opaque `labels.model_enum` token the client tags each request with;
+ * `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
+ * regardless of the thinking budget. Keyed by the routed upstream wire id
+ * (post effort-routing), not the collapsed logical id. Checkpoint-only ids
+ * (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
+ * emits agent requests.
+ */
+export interface AntigravityModelWireProfile {
+    modelEnum: string;
+    maxOutputTokens: number;
+}
+export declare const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>>;
+export declare function getAntigravityModelWireProfile(wireModelId: string): AntigravityModelWireProfile | undefined;

package/dist/types/wire/github-copilot.d.ts CHANGED Viewed

@@ -25,9 +25,11 @@ export declare const COPILOT_API_HEADERS: {
 export type ParsedGitHubCopilotApiKey = {
     accessToken: string;
     enterpriseUrl?: string;
+    apiEndpoint?: string;
 };
 export declare function isPublicGitHubHost(host: string): boolean;
 export declare function normalizeGitHubCopilotEnterpriseDomain(input: string | undefined): string | undefined;
+export declare function normalizeGitHubCopilotApiEndpoint(input: string | undefined): string | undefined;
 export declare function parseGitHubCopilotApiKey(apiKeyRaw: string): ParsedGitHubCopilotApiKey;
 export declare function normalizeDomain(input: string): string | null;
 export declare function getGitHubCopilotBaseUrl(enterpriseDomain?: string): string;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-catalog",
-	"version": "16.0.3",
+	"version": "16.0.5",
 	"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -34,11 +34,11 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "16.0.3",
+		"@oh-my-pi/pi-utils": "16.0.5",
 		"zod": "^4"
 	},
 	"devDependencies": {
-		"@oh-my-pi/pi-ai": "16.0.3",
+		"@oh-my-pi/pi-ai": "16.0.5",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/compat/openai.ts CHANGED Viewed

@@ -12,10 +12,12 @@ import {
 	isAnthropicNamespacedModelId,
 	isClaudeModelId,
 	isDeepseekModelIdOrName,
+	isGlm52ReasoningEffortModelId,
 	isKimiK26ModelId,
 	isKimiModelId,
 	isMimoModelIdOrName,
 	isQwenModelId,
+	modelFamilyToken,
 } from "../identity/family";
 import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
 import { applyCompatOverrides } from "./apply";
@@ -82,6 +84,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 	const isCerebras = modelMatchesHost(hostModel, "cerebras");
 	const isZai = modelMatchesHost(hostModel, "zai");
 	const isZhipu = modelMatchesHost(hostModel, "zhipu");
+	const supportsZaiReasoningEffort = (isZai || isZhipu) && isGlm52ReasoningEffortModelId(spec.id);
 	const isKilo = modelMatchesHost(hostModel, "kilo");
 	const isKimiModel = isKimiModelId(spec.id);
 	const isMoonshotNative = modelMatchesHost(hostModel, "moonshotNative");
@@ -136,6 +139,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 	const useMaxTokens =
 		isMistral ||
 		isMoonshotNative ||
+		isZai ||
+		isZhipu ||
 		hostMatchesUrl(baseUrl, "chutes") ||
 		hostMatchesUrl(baseUrl, "fireworks") ||
 		isDirectDeepseekApi;
@@ -202,11 +207,15 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 		// OpenAI's reasoning-API surface.
 		supportsDeveloperRole: isOpenAIHost || isAzureHost,
 		supportsMultipleSystemMessages: supportsMultipleSystemMessagesDefault,
-		supportsReasoningEffort: !isGrok && !isZai && !isZhipu && !isXiaomiMimo,
+		supportsReasoningEffort: !isGrok && !isXiaomiMimo && (!(isZai || isZhipu) || supportsZaiReasoningEffort),
 		// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
 		supportsReasoningParams: provider !== "github-copilot",
 		reasoningEffortMap: {},
 		supportsUsageInStreaming: !isCerebras,
+		// pi-ai's thinking-loop guard is gemini-only; default the flag from the
+		// family classifier so OpenAI-compat proxies serving Gemini are covered.
+		// An opaque alias can opt in via `compat.enableGeminiThinkingLoopGuard`.
+		enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id) === "gemini",
 		// Kimi (including via OpenRouter and Fireworks router-form IDs such as
 		// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
 		// max_tokens, not actual output. The official Kimi K2 model guidance
@@ -291,6 +300,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 }
 interface OpenAIResponsesSpecLike {
+	id?: string;
 	provider: string;
 	name: string;
 	baseUrl: string;
@@ -325,6 +335,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
 		strictResponsesPairing: isAzure || spec.provider === "github-copilot",
 		requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
 		reasoningEffortMap: {},
+		enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id ?? "") === "gemini",
 	};
 	applyCompatOverrides(compat, spec.compat);
 	return compat;

package/src/discovery/antigravity.ts CHANGED Viewed

@@ -1,13 +1,16 @@
 import { z } from "zod/v4";
 import type { ModelSpec } from "../types";
 import { toPositiveNumber } from "../utils";
-import { ANTIGRAVITY_VARIANT_COLLAPSE_TABLE, collapseEffortVariants } from "../variant-collapse";
+import {
+	ANTIGRAVITY_VARIANT_COLLAPSE_TABLE,
+	collapseEffortVariants,
+	type VariantCollapseTable,
+} from "../variant-collapse";
 import { getAntigravityUserAgent } from "../wire/gemini-headers";
-const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [
-	"https://daily-cloudcode-pa.googleapis.com",
-	"https://daily-cloudcode-pa.sandbox.googleapis.com",
-] as const;
+export const ANTIGRAVITY_PRIMARY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
+export const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
+const DEFAULT_ANTIGRAVITY_DISCOVERY_ENDPOINTS = [ANTIGRAVITY_PRIMARY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
 const FETCH_AVAILABLE_MODELS_PATH = "/v1internal:fetchAvailableModels";
 const DEFAULT_CONTEXT_WINDOW = 200_000;
@@ -157,6 +160,12 @@ export interface FetchAntigravityDiscoveryModelsOptions {
 	signal?: AbortSignal;
 	/** Optional fetch implementation override for tests. */
 	fetcher?: typeof fetch;
+	/**
+	 * Hand collapse table to apply to the discovered list. Defaults to the
+	 * Antigravity (budget-transport) table; `googleGeminiCli` passes the
+	 * level-transport table so cloudcode-pa keeps `thinkingLevel`.
+	 */
+	collapseTable?: VariantCollapseTable;
 }
 /**
@@ -239,7 +248,7 @@ export async function fetchAntigravityDiscoveryModels(
 		// Collapse effort-tier variants at the source so runtime discovery,
 		// the gemini-cli re-provision, and the catalog generator all see
 		// logical ids only.
-		const collapsed = collapseEffortVariants(models, ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
+		const collapsed = collapseEffortVariants(models, options.collapseTable ?? ANTIGRAVITY_VARIANT_COLLAPSE_TABLE);
 		collapsed.sort((a, b) => a.name.localeCompare(b.name) || a.id.localeCompare(b.id));
 		return collapsed;
 	}

package/src/identity/dialect.ts CHANGED Viewed

@@ -11,7 +11,8 @@ export type Dialect =
 	| "pi"
 	| "qwen3"
 	| "gemini"
-	| "gemma";
+	| "gemma"
+	| "minimax";
 export const FALLBACK_DIALECT: Dialect = "xml";
@@ -31,6 +32,8 @@ export function preferredDialect(modelId: string): Dialect {
 			return "qwen3";
 		case "deepseek":
 			return "deepseek";
+		case "minimax":
+			return "minimax";
 		case "openai":
 		case "gpt-oss":
 			return "harmony";

package/src/identity/family.ts CHANGED Viewed

@@ -73,6 +73,13 @@ export function isMinimaxM2FamilyModelId(modelId: string): boolean {
 	return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
 }
+/** MiniMax M3 family ids in bundled/default and aggregator namespace forms. */
+export function isMinimaxM3FamilyModelId(modelId: string): boolean {
+	const lower = modelId.toLowerCase();
+	if (!lower.includes("minimax")) return false;
+	return /(?:^|[/._-])(?:minimax[/._-])?m3(?:[-.:_]|$)/i.test(lower);
+}
 /**
  * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
  * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
@@ -105,6 +112,17 @@ export function isReasoningGlmModelId(modelId: string): boolean {
 	}
 	return semverGte(glm.version, "4.5");
 }
+/** GLM-5.2+ coding SKUs accept `reasoning_effort` in addition to binary thinking. */
+export function isGlm52ReasoningEffortModelId(modelId: string): boolean {
+	const glm = parseGlmModel(bareModelId(modelId));
+	if (!glm || glm.vision) {
+		return false;
+	}
+	if (glm.variant !== "base" && glm.variant !== "air" && glm.variant !== "turbo") {
+		return false;
+	}
+	return semverGte(glm.version, "5.2");
+}
 /** GLM vision SKUs — the `v` that attaches to the version (`glm-4v`, `glm-4.5v`). */
 export function isGlmVisionModelId(modelId: string): boolean {
@@ -128,7 +146,7 @@ export function modelFamilyToken(modelId: string): string {
 	if (isOpenAIModelId(modelId)) return "openai";
 	if (isKimiModelId(modelId)) return "kimi";
 	if (isQwenModelId(modelId)) return "qwen";
-	if (isMinimaxM2FamilyModelId(modelId)) return "minimax";
+	if (isMinimaxM2FamilyModelId(modelId) || isMinimaxM3FamilyModelId(modelId)) return "minimax";
 	if (isOpenAIGptOssModelId(modelId)) return "gpt-oss";
 	if (isDeepseekModelIdOrName(modelId)) return "deepseek";
 	if (isMimoModelIdOrName(modelId)) return "mimo";

package/src/model-cache.ts CHANGED Viewed

@@ -7,12 +7,14 @@ import { getModelDbPath } from "@oh-my-pi/pi-utils";
 import type { Api, Model, ModelSpec } from "./types";
 // Rows persist ModelSpec JSON (sparse `compat`, never the resolved record);
-// the model manager rebuilds via `buildModel` on load. v6 invalidates rows
-// that may contain the retired unknown-limit sentinels (222222/8888); v5
-// invalidated rows predating effort-tier variant collapsing (raw
-// `-low`/`-high`/`-thinking` member ids); v4 dropped the pre-efforts
-// ThinkingConfig shape.
-const CACHE_SCHEMA_VERSION = 6;
+// the model manager rebuilds via `buildModel` on load. v7 invalidates rows
+// predating the Antigravity Gemini budget-mode migration (cached specs still
+// carrying `thinking.mode: "google-level"` and the old 3.5-flash effort
+// routing); v6 invalidates rows that may contain the retired unknown-limit
+// sentinels (222222/8888); v5 invalidated rows predating effort-tier variant
+// collapsing (raw `-low`/`-high`/`-thinking` member ids); v4 dropped the
+// pre-efforts ThinkingConfig shape.
+const CACHE_SCHEMA_VERSION = 7;
 interface CacheRow {
 	provider_id: string;

package/src/model-thinking.ts CHANGED Viewed

@@ -23,6 +23,7 @@ import {
 import {
 	findThinkingVariantToken,
 	isDeepseekModelIdOrName,
+	isGlm52ReasoningEffortModelId,
 	isMinimaxM2FamilyModelId,
 	isOpenAIGptOssModelId,
 	supportsAdaptiveThinkingDisplay,
@@ -76,6 +77,13 @@ const DEEPSEEK_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
 const FIREWORKS_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
 	[Effort.Minimal]: "none",
 };
+const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "none",
+	[Effort.Low]: "high",
+	[Effort.Medium]: "high",
+	[Effort.High]: "high",
+	[Effort.XHigh]: "max",
+};
 /**
  * Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
@@ -259,11 +267,19 @@ function sameEffortList(left: readonly Effort[], right: readonly Effort[]): bool
 }
 function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
+	if (spec.api === "openai-completions" && isZaiGlm52ReasoningEffortModel(spec)) {
+		return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
+	}
 	return spec.api === "openai-completions" && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
 		? LOW_MEDIUM_HIGH_REASONING_EFFORTS
 		: undefined;
 }
+function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
+	if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
+	return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
+}
 function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
 	if (compat === undefined || !("reasoningEffortMap" in compat)) {
 		return undefined;
@@ -288,6 +304,9 @@ function inferDetectedEffortMap<TApi extends Api>(
 	if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
 		return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
 	}
+	if (isZaiGlm52ReasoningEffortModel(spec)) {
+		return ZAI_GLM_52_REASONING_EFFORT_MAP;
+	}
 	if (isDeepseekReasoningModel(spec)) {
 		return DEEPSEEK_REASONING_EFFORT_MAP;
 	}