npm - @oh-my-pi/pi-catalog - Versions diffs - 16.1.1 → 16.1.2 - Mend

@oh-my-pi/pi-catalog 16.1.1 → 16.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +18 -0
package/dist/types/variant-collapse.d.ts +7 -0
package/package.json +3 -3
package/src/model-thinking.ts +55 -30
package/src/models.json +571 -775
package/src/provider-models/openai-compat.ts +14 -2
package/src/variant-collapse.ts +21 -3

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,24 @@
 ## [Unreleased]
+## [16.1.2] - 2026-06-19
+### Added
+- Added support for Gemini 2.5 Flash-Lite, 3.1 Flash-Lite, and 3.5 Flash models
+- Added support for Moonshot V1 model family
+### Changed
+- Updated context window and token limits for various Claude, Gemini, and GPT-OSS models
+- Refined thinking mode behaviors and routing for supported LLM families
+### Fixed
+- Fixed GLM-5.2 `reasoning_effort` so the top thinking tier reaches each host's genuine maximum instead of 400ing, mapping the internal `xhigh` tier per host dialect (verified against live endpoints): Z.ai/Zhipu collapse onto the model's `none`/`high`/`max` scale (`xhigh → max`); Fireworks, resellers, and Ollama Cloud keep their distinct lower tiers and remap only the top `xhigh → max` (merged over host quirks such as Fireworks' `minimal → none`); and OpenRouter — whose API rejects `max` and treats `xhigh` as its own max tier — now exposes the `xhigh` tier and forwards it verbatim. Dialect detection keys off resolved `compat.thinkingFormat`, so custom OpenRouter/Z.ai-format providers are covered too.
+- Maintained thinking effort routing when discovery only returns the base model ID
+- Improved credential retrieval logic for Antigravity and Codex providers via auth discovery
 ## [16.0.9] - 2026-06-18
 ### Fixed

package/dist/types/variant-collapse.d.ts CHANGED Viewed

@@ -40,6 +40,13 @@ export interface EffortVariantFamily {
     thinking: Readonly<Omit<ThinkingConfig, "effortRouting" | "suppressWhenOff">>;
     /** Thinking-off requests must explicitly suppress thinking on the wire. */
     suppressWhenOff?: boolean;
+    /**
+     * Preserve non-off effort routes even when discovery omits the backing member.
+     * Used for Cloud Code Assist `X`/`X-thinking` pairs where upstream accepts
+     * the `-thinking` wire id but the model-list endpoint may advertise only the
+     * bare id.
+     */
+    preserveAbsentEffortRoutes?: boolean;
     /** Retired/recycled selector ids that alias to this family without being members. */
     extraAliases?: readonly string[];
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-catalog",
-	"version": "16.1.1",
+	"version": "16.1.2",
 	"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -34,12 +34,12 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "16.1.1",
+		"@oh-my-pi/pi-utils": "16.1.2",
 		"arktype": "^2.2.0",
 		"zod": "^4"
 	},
 	"devDependencies": {
-		"@oh-my-pi/pi-ai": "16.1.1",
+		"@oh-my-pi/pi-ai": "16.1.2",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/model-thinking.ts CHANGED Viewed

@@ -86,7 +86,7 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
 	[Effort.High]: "high",
 	[Effort.XHigh]: "max",
 };
-const OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
 	[Effort.XHigh]: "max",
 };
@@ -164,7 +164,7 @@ function fillThinkingWireDefaults<TApi extends Api>(
 	thinking: ThinkingConfig,
 ): ThinkingConfig {
 	const parsed = parseKnownModel(spec.id);
-	const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
+	const normalizedEfforts = getModelDefinedEfforts(spec, compat) ?? thinking.efforts;
 	const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
 	const effortMap =
 		thinking.effortMap === undefined
@@ -251,7 +251,7 @@ function inferEffortMap<TApi extends Api>(
 	mode: ThinkingConfig["mode"],
 	efforts: readonly Effort[],
 ): EffortMap | undefined {
-	const detected = inferDetectedEffortMap(spec, parsedModel, mode);
+	const detected = inferDetectedEffortMap(spec, compat, parsedModel, mode);
 	const configured = readCompatEffortMap(compat);
 	const merged =
 		detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
@@ -281,23 +281,26 @@ function isOpenAICompatReasoningApi(api: Api): boolean {
 	return api === "openai-completions" || api === "openrouter";
 }
-function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
-	if (isOpenAICompatReasoningApi(spec.api) && isZaiGlm52ReasoningEffortModel(spec)) {
-		return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
-	}
-	if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
-		return GLM_52_HIGH_MAX_REASONING_EFFORTS;
+function getModelDefinedEfforts<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
+): readonly Effort[] | undefined {
+	if (isGlm52ReasoningEffortModelId(spec.id)) {
+		// Z.ai/Zhipu and OpenRouter both surface GLM-5.2's full effort ladder,
+		// including the top `xhigh` (= "max") tier; Ollama Cloud exposes only
+		// high/xhigh.
+		if (isZaiThinkingFormat(compat) || isOpenRouterThinkingFormat(compat)) {
+			return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
+		}
+		if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
+			return GLM_52_HIGH_MAX_REASONING_EFFORTS;
+		}
 	}
 	return isOpenAICompatReasoningApi(spec.api) && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
 		? LOW_MEDIUM_HIGH_REASONING_EFFORTS
 		: undefined;
 }
-function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
-	if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
-	return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
-}
 function isOllamaCloudGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
 	return spec.api === "ollama-chat" && spec.provider === "ollama-cloud" && isGlm52ReasoningEffortModelId(spec.id);
 }
@@ -314,8 +317,17 @@ function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
 	return map && Object.keys(map).length > 0 ? map : undefined;
 }
+function isOpenRouterThinkingFormat(compat: CompatOf<Api>): boolean {
+	return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "openrouter";
+}
+function isZaiThinkingFormat(compat: CompatOf<Api>): boolean {
+	return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "zai";
+}
 function inferDetectedEffortMap<TApi extends Api>(
 	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
 	parsedModel: ParsedModel,
 	mode: ThinkingConfig["mode"],
 ): EffortMap | undefined {
@@ -327,29 +339,42 @@ function inferDetectedEffortMap<TApi extends Api>(
 			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
 			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
 	}
+	// GLM-5.2 coding SKUs accept `reasoning_effort`, but the effort dialect is
+	// host-specific (verified against live endpoints):
+	//   - Z.ai/Zhipu ("zai" dialect): the model exposes only none/high/max, so
+	//     `xhigh` 400s — collapse minimal->none, low/medium/high->high, xhigh->max.
+	//   - OpenRouter: `max` 400s and `xhigh` IS its max tier, so it passes `xhigh`
+	//     through literally (no map; the tier is exposed via getModelDefinedEfforts).
+	//   - Other openai-compat hosts (Fireworks, resellers) and Ollama Cloud keep
+	//     their distinct lower tiers and host quirks (e.g. Fireworks rejects
+	//     `minimal`, so `minimal->none` stays) and only remap the top `xhigh` UI
+	//     tier onto the genuine `max` budget. Filtered to supported efforts later.
+	const isGlm52 = isGlm52ReasoningEffortModelId(spec.id);
+	if (isGlm52 && isZaiThinkingFormat(compat)) {
+		return ZAI_GLM_52_REASONING_EFFORT_MAP;
+	}
 	if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
-		return OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP;
+		return GLM_52_XHIGH_MAX_EFFORT_MAP;
 	}
 	if (!isOpenAICompatReasoningApi(spec.api)) {
 		return undefined;
 	}
+	let map: EffortMap | undefined;
 	if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
-		return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
-	}
-	if (isZaiGlm52ReasoningEffortModel(spec)) {
-		return ZAI_GLM_52_REASONING_EFFORT_MAP;
-	}
-	if (isDeepseekReasoningModel(spec)) {
-		return DEEPSEEK_REASONING_EFFORT_MAP;
-	}
-	if (modelMatchesHost(spec, "openrouter")) {
-		const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
-		if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
+		map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
+	} else if (isDeepseekReasoningModel(spec)) {
+		map = DEEPSEEK_REASONING_EFFORT_MAP;
+	} else if (modelMatchesHost(spec, "openrouter")) {
+		map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
+	} else if (modelMatchesHost(spec, "fireworks")) {
+		map = FIREWORKS_REASONING_EFFORT_MAP;
 	}
-	if (modelMatchesHost(spec, "fireworks")) {
-		return FIREWORKS_REASONING_EFFORT_MAP;
+	// Overlay GLM-5.2's top-tier `xhigh -> max` on the host base map, except on
+	// OpenRouter (xhigh IS its max tier; `max` 400s there).
+	if (isGlm52 && !isOpenRouterThinkingFormat(compat)) {
+		map = { ...map, ...GLM_52_XHIGH_MAX_EFFORT_MAP };
 	}
-	return undefined;
+	return map;
 }
 function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
@@ -383,7 +408,7 @@ function inferSupportedEfforts<TApi extends Api>(
 	spec: ModelSpec<TApi>,
 	compat: CompatOf<TApi>,
 ): readonly Effort[] {
-	const modelDefinedEfforts = getModelDefinedEfforts(spec);
+	const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
 	if (modelDefinedEfforts !== undefined) {
 		return modelDefinedEfforts;
 	}