npm - @oh-my-pi/pi-catalog - Versions diffs - 16.1.1 → 16.1.3 - Mend

@oh-my-pi/pi-catalog 16.1.1 → 16.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +25 -0
package/dist/types/types.d.ts +5 -0
package/dist/types/variant-collapse.d.ts +7 -0
package/package.json +3 -3
package/src/model-cache.ts +65 -39
package/src/model-thinking.ts +55 -30
package/src/models.json +623 -787
package/src/provider-models/ollama.ts +1 -0
package/src/provider-models/openai-compat.ts +14 -2
package/src/types.ts +5 -0
package/src/variant-collapse.ts +21 -3

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 ## [Unreleased]
+## [16.1.3] - 2026-06-19
+### Fixed
+- Marked Ollama Cloud catalog models to omit on-the-wire output-token caps, preventing context-window-sized `num_predict` values from causing HTTP 400s for models whose true output cap is not discoverable. ([#2984](https://github.com/can1357/oh-my-pi/issues/2984))
+- Fixed `readModelCache`/`writeModelCache` using a process-global shared database even when a custom `dbPath` was provided. Custom-path cache operations now open and close a per-call database via `withModelCacheDb`, preventing leaked SQLite handles on Windows
+## [16.1.2] - 2026-06-19
+### Added
+- Added support for Gemini 2.5 Flash-Lite, 3.1 Flash-Lite, and 3.5 Flash models
+- Added support for Moonshot V1 model family
+### Changed
+- Updated context window and token limits for various Claude, Gemini, and GPT-OSS models
+- Refined thinking mode behaviors and routing for supported LLM families
+### Fixed
+- Fixed GLM-5.2 `reasoning_effort` so the top thinking tier reaches each host's genuine maximum instead of 400ing, mapping the internal `xhigh` tier per host dialect (verified against live endpoints): Z.ai/Zhipu collapse onto the model's `none`/`high`/`max` scale (`xhigh → max`); Fireworks, resellers, and Ollama Cloud keep their distinct lower tiers and remap only the top `xhigh → max` (merged over host quirks such as Fireworks' `minimal → none`); and OpenRouter — whose API rejects `max` and treats `xhigh` as its own max tier — now exposes the `xhigh` tier and forwards it verbatim. Dialect detection keys off resolved `compat.thinkingFormat`, so custom OpenRouter/Z.ai-format providers are covered too.
+- Maintained thinking effort routing when discovery only returns the base model ID
+- Improved credential retrieval logic for Antigravity and Codex providers via auth discovery
 ## [16.0.9] - 2026-06-18
 ### Fixed

package/dist/types/types.d.ts CHANGED Viewed

@@ -477,6 +477,11 @@ export interface Model<TApi extends Api = Api> {
     baseUrl: string;
     reasoning: boolean;
     input: ("text" | "image")[];
+    /**
+     * Decoder family used for image inputs when it has narrower format support
+     * than OMP's general image pipeline. `stb` local backends reject WebP.
+     */
+    imageInputDecoder?: "stb";
     /**
      * Native provider tool-call support. `false` is the only unsupported signal:
      * `true` and `undefined` both mean callers may use native tools. Catalog and

package/dist/types/variant-collapse.d.ts CHANGED Viewed

@@ -40,6 +40,13 @@ export interface EffortVariantFamily {
     thinking: Readonly<Omit<ThinkingConfig, "effortRouting" | "suppressWhenOff">>;
     /** Thinking-off requests must explicitly suppress thinking on the wire. */
     suppressWhenOff?: boolean;
+    /**
+     * Preserve non-off effort routes even when discovery omits the backing member.
+     * Used for Cloud Code Assist `X`/`X-thinking` pairs where upstream accepts
+     * the `-thinking` wire id but the model-list endpoint may advertise only the
+     * bare id.
+     */
+    preserveAbsentEffortRoutes?: boolean;
     /** Retired/recycled selector ids that alias to this family without being members. */
     extraAliases?: readonly string[];
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-catalog",
-	"version": "16.1.1",
+	"version": "16.1.3",
 	"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -34,12 +34,12 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "16.1.1",
+		"@oh-my-pi/pi-utils": "16.1.3",
 		"arktype": "^2.2.0",
 		"zod": "^4"
 	},
 	"devDependencies": {
-		"@oh-my-pi/pi-ai": "16.1.1",
+		"@oh-my-pi/pi-ai": "16.1.3",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/model-cache.ts CHANGED Viewed

@@ -46,14 +46,7 @@ interface CacheEntry<TApi extends Api = Api> {
 let sharedDb: Database | null = null;
 let sharedDbPath: string | null = null;
-function getDb(dbPath?: string): Database {
-	const resolvedPath = dbPath ?? getModelDbPath();
-	if (sharedDb && sharedDbPath === resolvedPath) {
-		return sharedDb;
-	}
-	if (sharedDb) {
-		sharedDb.close();
-	}
+function openDb(resolvedPath: string): Database {
 	const db = new Database(resolvedPath, { create: true });
 	// Install the busy handler BEFORE any lock-taking statement. See
 	// https://github.com/can1357/oh-my-pi/issues/2421.
@@ -70,16 +63,42 @@ function getDb(dbPath?: string): Database {
 		)
 	`);
 	migrateCacheSchema(db);
+	return db;
+}
+function getSharedDb(): Database {
+	const resolvedPath = getModelDbPath();
+	if (sharedDb && sharedDbPath === resolvedPath) {
+		return sharedDb;
+	}
+	if (sharedDb) {
+		sharedDb.close();
+	}
+	const db = openDb(resolvedPath);
 	sharedDb = db;
 	sharedDbPath = resolvedPath;
 	return db;
 }
+function withModelCacheDb<T>(dbPath: string | undefined, useDb: (db: Database) => T): T {
+	if (!dbPath) return useDb(getSharedDb());
+	const db = openDb(dbPath);
+	try {
+		return useDb(db);
+	} finally {
+		db.close();
+	}
+}
 function migrateCacheSchema(db: Database): void {
-	const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
-	if (!columns.some(column => column.name === "static_fingerprint")) {
-		db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
+	const stmt = db.prepare("PRAGMA table_info(model_cache)");
+	try {
+		const columns = stmt.all() as TableInfoRow[];
+		if (!columns.some(column => column.name === "static_fingerprint")) {
+			db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
+		}
+	} finally {
+		stmt.finalize();
 	}
 	db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
 }
@@ -91,21 +110,27 @@ export function readModelCache<TApi extends Api>(
 	dbPath?: string,
 ): CacheEntry<TApi> | null {
 	try {
-		const db = getDb(dbPath);
-		const row = db.query<CacheRow, [string]>("SELECT * FROM model_cache WHERE provider_id = ?").get(providerId);
-		if (!row || row.version !== CACHE_SCHEMA_VERSION) {
-			return null;
-		}
-		const models = JSON.parse(row.models) as ModelSpec<TApi>[];
-		const ageMs = now() - row.updated_at;
-		const fresh = Number.isFinite(ageMs) && ageMs >= 0 && ageMs <= ttlMs;
-		return {
-			models,
-			fresh,
-			authoritative: row.authoritative === 1,
-			updatedAt: row.updated_at,
-			staticFingerprint: row.static_fingerprint ?? "",
-		};
+		return withModelCacheDb(dbPath, db => {
+			const stmt = db.query<CacheRow, [string]>("SELECT * FROM model_cache WHERE provider_id = ?");
+			try {
+				const row = stmt.get(providerId);
+				if (!row || row.version !== CACHE_SCHEMA_VERSION) {
+					return null;
+				}
+				const models = JSON.parse(row.models) as ModelSpec<TApi>[];
+				const ageMs = now() - row.updated_at;
+				const fresh = Number.isFinite(ageMs) && ageMs >= 0 && ageMs <= ttlMs;
+				return {
+					models,
+					fresh,
+					authoritative: row.authoritative === 1,
+					updatedAt: row.updated_at,
+					staticFingerprint: row.static_fingerprint ?? "",
+				};
+			} finally {
+				stmt.finalize();
+			}
+		});
 	} catch {
 		return null;
 	}
@@ -120,19 +145,20 @@ export function writeModelCache<TApi extends Api>(
 	dbPath?: string,
 ): void {
 	try {
-		const db = getDb(dbPath);
-		db.run(
-			`INSERT OR REPLACE INTO model_cache (provider_id, version, updated_at, authoritative, static_fingerprint, models)
-			 VALUES (?, ?, ?, ?, ?, ?)`,
-			[
-				providerId,
-				CACHE_SCHEMA_VERSION,
-				updatedAt,
-				authoritative ? 1 : 0,
-				staticFingerprint,
-				JSON.stringify(models.map(model => ({ ...model, compat: model.compatConfig, compatConfig: undefined }))),
-			],
-		);
+		withModelCacheDb(dbPath, db => {
+			db.run(
+				`INSERT OR REPLACE INTO model_cache (provider_id, version, updated_at, authoritative, static_fingerprint, models)
+				 VALUES (?, ?, ?, ?, ?, ?)`,
+				[
+					providerId,
+					CACHE_SCHEMA_VERSION,
+					updatedAt,
+					authoritative ? 1 : 0,
+					staticFingerprint,
+					JSON.stringify(models.map(model => ({ ...model, compat: model.compatConfig, compatConfig: undefined }))),
+				],
+			);
+		});
 	} catch {
 		// Cache writes are best-effort; failures should not break model resolution.
 	}

package/src/model-thinking.ts CHANGED Viewed

@@ -86,7 +86,7 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
 	[Effort.High]: "high",
 	[Effort.XHigh]: "max",
 };
-const OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
 	[Effort.XHigh]: "max",
 };
@@ -164,7 +164,7 @@ function fillThinkingWireDefaults<TApi extends Api>(
 	thinking: ThinkingConfig,
 ): ThinkingConfig {
 	const parsed = parseKnownModel(spec.id);
-	const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
+	const normalizedEfforts = getModelDefinedEfforts(spec, compat) ?? thinking.efforts;
 	const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
 	const effortMap =
 		thinking.effortMap === undefined
@@ -251,7 +251,7 @@ function inferEffortMap<TApi extends Api>(
 	mode: ThinkingConfig["mode"],
 	efforts: readonly Effort[],
 ): EffortMap | undefined {
-	const detected = inferDetectedEffortMap(spec, parsedModel, mode);
+	const detected = inferDetectedEffortMap(spec, compat, parsedModel, mode);
 	const configured = readCompatEffortMap(compat);
 	const merged =
 		detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
@@ -281,23 +281,26 @@ function isOpenAICompatReasoningApi(api: Api): boolean {
 	return api === "openai-completions" || api === "openrouter";
 }
-function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
-	if (isOpenAICompatReasoningApi(spec.api) && isZaiGlm52ReasoningEffortModel(spec)) {
-		return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
-	}
-	if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
-		return GLM_52_HIGH_MAX_REASONING_EFFORTS;
+function getModelDefinedEfforts<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
+): readonly Effort[] | undefined {
+	if (isGlm52ReasoningEffortModelId(spec.id)) {
+		// Z.ai/Zhipu and OpenRouter both surface GLM-5.2's full effort ladder,
+		// including the top `xhigh` (= "max") tier; Ollama Cloud exposes only
+		// high/xhigh.
+		if (isZaiThinkingFormat(compat) || isOpenRouterThinkingFormat(compat)) {
+			return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
+		}
+		if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
+			return GLM_52_HIGH_MAX_REASONING_EFFORTS;
+		}
 	}
 	return isOpenAICompatReasoningApi(spec.api) && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
 		? LOW_MEDIUM_HIGH_REASONING_EFFORTS
 		: undefined;
 }
-function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
-	if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
-	return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
-}
 function isOllamaCloudGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
 	return spec.api === "ollama-chat" && spec.provider === "ollama-cloud" && isGlm52ReasoningEffortModelId(spec.id);
 }
@@ -314,8 +317,17 @@ function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
 	return map && Object.keys(map).length > 0 ? map : undefined;
 }
+function isOpenRouterThinkingFormat(compat: CompatOf<Api>): boolean {
+	return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "openrouter";
+}
+function isZaiThinkingFormat(compat: CompatOf<Api>): boolean {
+	return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "zai";
+}
 function inferDetectedEffortMap<TApi extends Api>(
 	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
 	parsedModel: ParsedModel,
 	mode: ThinkingConfig["mode"],
 ): EffortMap | undefined {
@@ -327,29 +339,42 @@ function inferDetectedEffortMap<TApi extends Api>(
 			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
 			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
 	}
+	// GLM-5.2 coding SKUs accept `reasoning_effort`, but the effort dialect is
+	// host-specific (verified against live endpoints):
+	//   - Z.ai/Zhipu ("zai" dialect): the model exposes only none/high/max, so
+	//     `xhigh` 400s — collapse minimal->none, low/medium/high->high, xhigh->max.
+	//   - OpenRouter: `max` 400s and `xhigh` IS its max tier, so it passes `xhigh`
+	//     through literally (no map; the tier is exposed via getModelDefinedEfforts).
+	//   - Other openai-compat hosts (Fireworks, resellers) and Ollama Cloud keep
+	//     their distinct lower tiers and host quirks (e.g. Fireworks rejects
+	//     `minimal`, so `minimal->none` stays) and only remap the top `xhigh` UI
+	//     tier onto the genuine `max` budget. Filtered to supported efforts later.
+	const isGlm52 = isGlm52ReasoningEffortModelId(spec.id);
+	if (isGlm52 && isZaiThinkingFormat(compat)) {
+		return ZAI_GLM_52_REASONING_EFFORT_MAP;
+	}
 	if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
-		return OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP;
+		return GLM_52_XHIGH_MAX_EFFORT_MAP;
 	}
 	if (!isOpenAICompatReasoningApi(spec.api)) {
 		return undefined;
 	}
+	let map: EffortMap | undefined;
 	if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
-		return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
-	}
-	if (isZaiGlm52ReasoningEffortModel(spec)) {
-		return ZAI_GLM_52_REASONING_EFFORT_MAP;
-	}
-	if (isDeepseekReasoningModel(spec)) {
-		return DEEPSEEK_REASONING_EFFORT_MAP;
-	}
-	if (modelMatchesHost(spec, "openrouter")) {
-		const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
-		if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
+		map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
+	} else if (isDeepseekReasoningModel(spec)) {
+		map = DEEPSEEK_REASONING_EFFORT_MAP;
+	} else if (modelMatchesHost(spec, "openrouter")) {
+		map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
+	} else if (modelMatchesHost(spec, "fireworks")) {
+		map = FIREWORKS_REASONING_EFFORT_MAP;
 	}
-	if (modelMatchesHost(spec, "fireworks")) {
-		return FIREWORKS_REASONING_EFFORT_MAP;
+	// Overlay GLM-5.2's top-tier `xhigh -> max` on the host base map, except on
+	// OpenRouter (xhigh IS its max tier; `max` 400s there).
+	if (isGlm52 && !isOpenRouterThinkingFormat(compat)) {
+		map = { ...map, ...GLM_52_XHIGH_MAX_EFFORT_MAP };
 	}
-	return undefined;
+	return map;
 }
 function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
@@ -383,7 +408,7 @@ function inferSupportedEfforts<TApi extends Api>(
 	spec: ModelSpec<TApi>,
 	compat: CompatOf<TApi>,
 ): readonly Effort[] {
-	const modelDefinedEfforts = getModelDefinedEfforts(spec);
+	const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
 	if (modelDefinedEfforts !== undefined) {
 		return modelDefinedEfforts;
 	}