@oh-my-pi/pi-catalog 16.1.1 → 16.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.1.3] - 2026-06-19
6
+
7
+ ### Fixed
8
+
9
+ - Marked Ollama Cloud catalog models to omit on-the-wire output-token caps, preventing context-window-sized `num_predict` values from causing HTTP 400s for models whose true output cap is not discoverable. ([#2984](https://github.com/can1357/oh-my-pi/issues/2984))
10
+ - Fixed `readModelCache`/`writeModelCache` using a process-global shared database even when a custom `dbPath` was provided. Custom-path cache operations now open and close a per-call database via `withModelCacheDb`, preventing leaked SQLite handles on Windows
11
+
12
+ ## [16.1.2] - 2026-06-19
13
+
14
+ ### Added
15
+
16
+ - Added support for Gemini 2.5 Flash-Lite, 3.1 Flash-Lite, and 3.5 Flash models
17
+ - Added support for Moonshot V1 model family
18
+
19
+ ### Changed
20
+
21
+ - Updated context window and token limits for various Claude, Gemini, and GPT-OSS models
22
+ - Refined thinking mode behaviors and routing for supported LLM families
23
+
24
+ ### Fixed
25
+
26
+ - Fixed GLM-5.2 `reasoning_effort` so the top thinking tier reaches each host's genuine maximum instead of 400ing, mapping the internal `xhigh` tier per host dialect (verified against live endpoints): Z.ai/Zhipu collapse onto the model's `none`/`high`/`max` scale (`xhigh → max`); Fireworks, resellers, and Ollama Cloud keep their distinct lower tiers and remap only the top `xhigh → max` (merged over host quirks such as Fireworks' `minimal → none`); and OpenRouter — whose API rejects `max` and treats `xhigh` as its own max tier — now exposes the `xhigh` tier and forwards it verbatim. Dialect detection keys off resolved `compat.thinkingFormat`, so custom OpenRouter/Z.ai-format providers are covered too.
27
+ - Maintained thinking effort routing when discovery only returns the base model ID
28
+ - Improved credential retrieval logic for Antigravity and Codex providers via auth discovery
29
+
5
30
  ## [16.0.9] - 2026-06-18
6
31
 
7
32
  ### Fixed
@@ -477,6 +477,11 @@ export interface Model<TApi extends Api = Api> {
477
477
  baseUrl: string;
478
478
  reasoning: boolean;
479
479
  input: ("text" | "image")[];
480
+ /**
481
+ * Decoder family used for image inputs when it has narrower format support
482
+ * than OMP's general image pipeline. `stb` local backends reject WebP.
483
+ */
484
+ imageInputDecoder?: "stb";
480
485
  /**
481
486
  * Native provider tool-call support. `false` is the only unsupported signal:
482
487
  * `true` and `undefined` both mean callers may use native tools. Catalog and
@@ -40,6 +40,13 @@ export interface EffortVariantFamily {
40
40
  thinking: Readonly<Omit<ThinkingConfig, "effortRouting" | "suppressWhenOff">>;
41
41
  /** Thinking-off requests must explicitly suppress thinking on the wire. */
42
42
  suppressWhenOff?: boolean;
43
+ /**
44
+ * Preserve non-off effort routes even when discovery omits the backing member.
45
+ * Used for Cloud Code Assist `X`/`X-thinking` pairs where upstream accepts
46
+ * the `-thinking` wire id but the model-list endpoint may advertise only the
47
+ * bare id.
48
+ */
49
+ preserveAbsentEffortRoutes?: boolean;
43
50
  /** Retired/recycled selector ids that alias to this family without being members. */
44
51
  extraAliases?: readonly string[];
45
52
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-catalog",
4
- "version": "16.1.1",
4
+ "version": "16.1.3",
5
5
  "description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -34,12 +34,12 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@bufbuild/protobuf": "^2.12.0",
37
- "@oh-my-pi/pi-utils": "16.1.1",
37
+ "@oh-my-pi/pi-utils": "16.1.3",
38
38
  "arktype": "^2.2.0",
39
39
  "zod": "^4"
40
40
  },
41
41
  "devDependencies": {
42
- "@oh-my-pi/pi-ai": "16.1.1",
42
+ "@oh-my-pi/pi-ai": "16.1.3",
43
43
  "@types/bun": "^1.3.14"
44
44
  },
45
45
  "engines": {
@@ -46,14 +46,7 @@ interface CacheEntry<TApi extends Api = Api> {
46
46
  let sharedDb: Database | null = null;
47
47
  let sharedDbPath: string | null = null;
48
48
 
49
- function getDb(dbPath?: string): Database {
50
- const resolvedPath = dbPath ?? getModelDbPath();
51
- if (sharedDb && sharedDbPath === resolvedPath) {
52
- return sharedDb;
53
- }
54
- if (sharedDb) {
55
- sharedDb.close();
56
- }
49
+ function openDb(resolvedPath: string): Database {
57
50
  const db = new Database(resolvedPath, { create: true });
58
51
  // Install the busy handler BEFORE any lock-taking statement. See
59
52
  // https://github.com/can1357/oh-my-pi/issues/2421.
@@ -70,16 +63,42 @@ function getDb(dbPath?: string): Database {
70
63
  )
71
64
  `);
72
65
  migrateCacheSchema(db);
66
+ return db;
67
+ }
73
68
 
69
+ function getSharedDb(): Database {
70
+ const resolvedPath = getModelDbPath();
71
+ if (sharedDb && sharedDbPath === resolvedPath) {
72
+ return sharedDb;
73
+ }
74
+ if (sharedDb) {
75
+ sharedDb.close();
76
+ }
77
+ const db = openDb(resolvedPath);
74
78
  sharedDb = db;
75
79
  sharedDbPath = resolvedPath;
76
80
  return db;
77
81
  }
78
82
 
83
+ function withModelCacheDb<T>(dbPath: string | undefined, useDb: (db: Database) => T): T {
84
+ if (!dbPath) return useDb(getSharedDb());
85
+ const db = openDb(dbPath);
86
+ try {
87
+ return useDb(db);
88
+ } finally {
89
+ db.close();
90
+ }
91
+ }
92
+
79
93
  function migrateCacheSchema(db: Database): void {
80
- const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
81
- if (!columns.some(column => column.name === "static_fingerprint")) {
82
- db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
94
+ const stmt = db.prepare("PRAGMA table_info(model_cache)");
95
+ try {
96
+ const columns = stmt.all() as TableInfoRow[];
97
+ if (!columns.some(column => column.name === "static_fingerprint")) {
98
+ db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
99
+ }
100
+ } finally {
101
+ stmt.finalize();
83
102
  }
84
103
  db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
85
104
  }
@@ -91,21 +110,27 @@ export function readModelCache<TApi extends Api>(
91
110
  dbPath?: string,
92
111
  ): CacheEntry<TApi> | null {
93
112
  try {
94
- const db = getDb(dbPath);
95
- const row = db.query<CacheRow, [string]>("SELECT * FROM model_cache WHERE provider_id = ?").get(providerId);
96
- if (!row || row.version !== CACHE_SCHEMA_VERSION) {
97
- return null;
98
- }
99
- const models = JSON.parse(row.models) as ModelSpec<TApi>[];
100
- const ageMs = now() - row.updated_at;
101
- const fresh = Number.isFinite(ageMs) && ageMs >= 0 && ageMs <= ttlMs;
102
- return {
103
- models,
104
- fresh,
105
- authoritative: row.authoritative === 1,
106
- updatedAt: row.updated_at,
107
- staticFingerprint: row.static_fingerprint ?? "",
108
- };
113
+ return withModelCacheDb(dbPath, db => {
114
+ const stmt = db.query<CacheRow, [string]>("SELECT * FROM model_cache WHERE provider_id = ?");
115
+ try {
116
+ const row = stmt.get(providerId);
117
+ if (!row || row.version !== CACHE_SCHEMA_VERSION) {
118
+ return null;
119
+ }
120
+ const models = JSON.parse(row.models) as ModelSpec<TApi>[];
121
+ const ageMs = now() - row.updated_at;
122
+ const fresh = Number.isFinite(ageMs) && ageMs >= 0 && ageMs <= ttlMs;
123
+ return {
124
+ models,
125
+ fresh,
126
+ authoritative: row.authoritative === 1,
127
+ updatedAt: row.updated_at,
128
+ staticFingerprint: row.static_fingerprint ?? "",
129
+ };
130
+ } finally {
131
+ stmt.finalize();
132
+ }
133
+ });
109
134
  } catch {
110
135
  return null;
111
136
  }
@@ -120,19 +145,20 @@ export function writeModelCache<TApi extends Api>(
120
145
  dbPath?: string,
121
146
  ): void {
122
147
  try {
123
- const db = getDb(dbPath);
124
- db.run(
125
- `INSERT OR REPLACE INTO model_cache (provider_id, version, updated_at, authoritative, static_fingerprint, models)
126
- VALUES (?, ?, ?, ?, ?, ?)`,
127
- [
128
- providerId,
129
- CACHE_SCHEMA_VERSION,
130
- updatedAt,
131
- authoritative ? 1 : 0,
132
- staticFingerprint,
133
- JSON.stringify(models.map(model => ({ ...model, compat: model.compatConfig, compatConfig: undefined }))),
134
- ],
135
- );
148
+ withModelCacheDb(dbPath, db => {
149
+ db.run(
150
+ `INSERT OR REPLACE INTO model_cache (provider_id, version, updated_at, authoritative, static_fingerprint, models)
151
+ VALUES (?, ?, ?, ?, ?, ?)`,
152
+ [
153
+ providerId,
154
+ CACHE_SCHEMA_VERSION,
155
+ updatedAt,
156
+ authoritative ? 1 : 0,
157
+ staticFingerprint,
158
+ JSON.stringify(models.map(model => ({ ...model, compat: model.compatConfig, compatConfig: undefined }))),
159
+ ],
160
+ );
161
+ });
136
162
  } catch {
137
163
  // Cache writes are best-effort; failures should not break model resolution.
138
164
  }
@@ -86,7 +86,7 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
86
86
  [Effort.High]: "high",
87
87
  [Effort.XHigh]: "max",
88
88
  };
89
- const OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
89
+ const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
90
90
  [Effort.XHigh]: "max",
91
91
  };
92
92
 
@@ -164,7 +164,7 @@ function fillThinkingWireDefaults<TApi extends Api>(
164
164
  thinking: ThinkingConfig,
165
165
  ): ThinkingConfig {
166
166
  const parsed = parseKnownModel(spec.id);
167
- const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
167
+ const normalizedEfforts = getModelDefinedEfforts(spec, compat) ?? thinking.efforts;
168
168
  const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
169
169
  const effortMap =
170
170
  thinking.effortMap === undefined
@@ -251,7 +251,7 @@ function inferEffortMap<TApi extends Api>(
251
251
  mode: ThinkingConfig["mode"],
252
252
  efforts: readonly Effort[],
253
253
  ): EffortMap | undefined {
254
- const detected = inferDetectedEffortMap(spec, parsedModel, mode);
254
+ const detected = inferDetectedEffortMap(spec, compat, parsedModel, mode);
255
255
  const configured = readCompatEffortMap(compat);
256
256
  const merged =
257
257
  detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
@@ -281,23 +281,26 @@ function isOpenAICompatReasoningApi(api: Api): boolean {
281
281
  return api === "openai-completions" || api === "openrouter";
282
282
  }
283
283
 
284
- function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
285
- if (isOpenAICompatReasoningApi(spec.api) && isZaiGlm52ReasoningEffortModel(spec)) {
286
- return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
287
- }
288
- if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
289
- return GLM_52_HIGH_MAX_REASONING_EFFORTS;
284
+ function getModelDefinedEfforts<TApi extends Api>(
285
+ spec: ModelSpec<TApi>,
286
+ compat: CompatOf<TApi>,
287
+ ): readonly Effort[] | undefined {
288
+ if (isGlm52ReasoningEffortModelId(spec.id)) {
289
+ // Z.ai/Zhipu and OpenRouter both surface GLM-5.2's full effort ladder,
290
+ // including the top `xhigh` (= "max") tier; Ollama Cloud exposes only
291
+ // high/xhigh.
292
+ if (isZaiThinkingFormat(compat) || isOpenRouterThinkingFormat(compat)) {
293
+ return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
294
+ }
295
+ if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
296
+ return GLM_52_HIGH_MAX_REASONING_EFFORTS;
297
+ }
290
298
  }
291
299
  return isOpenAICompatReasoningApi(spec.api) && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
292
300
  ? LOW_MEDIUM_HIGH_REASONING_EFFORTS
293
301
  : undefined;
294
302
  }
295
303
 
296
- function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
297
- if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
298
- return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
299
- }
300
-
301
304
  function isOllamaCloudGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
302
305
  return spec.api === "ollama-chat" && spec.provider === "ollama-cloud" && isGlm52ReasoningEffortModelId(spec.id);
303
306
  }
@@ -314,8 +317,17 @@ function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
314
317
  return map && Object.keys(map).length > 0 ? map : undefined;
315
318
  }
316
319
 
320
+ function isOpenRouterThinkingFormat(compat: CompatOf<Api>): boolean {
321
+ return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "openrouter";
322
+ }
323
+
324
+ function isZaiThinkingFormat(compat: CompatOf<Api>): boolean {
325
+ return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "zai";
326
+ }
327
+
317
328
  function inferDetectedEffortMap<TApi extends Api>(
318
329
  spec: ModelSpec<TApi>,
330
+ compat: CompatOf<TApi>,
319
331
  parsedModel: ParsedModel,
320
332
  mode: ThinkingConfig["mode"],
321
333
  ): EffortMap | undefined {
@@ -327,29 +339,42 @@ function inferDetectedEffortMap<TApi extends Api>(
327
339
  ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
328
340
  : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
329
341
  }
342
+ // GLM-5.2 coding SKUs accept `reasoning_effort`, but the effort dialect is
343
+ // host-specific (verified against live endpoints):
344
+ // - Z.ai/Zhipu ("zai" dialect): the model exposes only none/high/max, so
345
+ // `xhigh` 400s — collapse minimal->none, low/medium/high->high, xhigh->max.
346
+ // - OpenRouter: `max` 400s and `xhigh` IS its max tier, so it passes `xhigh`
347
+ // through literally (no map; the tier is exposed via getModelDefinedEfforts).
348
+ // - Other openai-compat hosts (Fireworks, resellers) and Ollama Cloud keep
349
+ // their distinct lower tiers and host quirks (e.g. Fireworks rejects
350
+ // `minimal`, so `minimal->none` stays) and only remap the top `xhigh` UI
351
+ // tier onto the genuine `max` budget. Filtered to supported efforts later.
352
+ const isGlm52 = isGlm52ReasoningEffortModelId(spec.id);
353
+ if (isGlm52 && isZaiThinkingFormat(compat)) {
354
+ return ZAI_GLM_52_REASONING_EFFORT_MAP;
355
+ }
330
356
  if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
331
- return OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP;
357
+ return GLM_52_XHIGH_MAX_EFFORT_MAP;
332
358
  }
333
359
  if (!isOpenAICompatReasoningApi(spec.api)) {
334
360
  return undefined;
335
361
  }
362
+ let map: EffortMap | undefined;
336
363
  if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
337
- return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
338
- }
339
- if (isZaiGlm52ReasoningEffortModel(spec)) {
340
- return ZAI_GLM_52_REASONING_EFFORT_MAP;
341
- }
342
- if (isDeepseekReasoningModel(spec)) {
343
- return DEEPSEEK_REASONING_EFFORT_MAP;
344
- }
345
- if (modelMatchesHost(spec, "openrouter")) {
346
- const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
347
- if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
364
+ map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
365
+ } else if (isDeepseekReasoningModel(spec)) {
366
+ map = DEEPSEEK_REASONING_EFFORT_MAP;
367
+ } else if (modelMatchesHost(spec, "openrouter")) {
368
+ map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
369
+ } else if (modelMatchesHost(spec, "fireworks")) {
370
+ map = FIREWORKS_REASONING_EFFORT_MAP;
348
371
  }
349
- if (modelMatchesHost(spec, "fireworks")) {
350
- return FIREWORKS_REASONING_EFFORT_MAP;
372
+ // Overlay GLM-5.2's top-tier `xhigh -> max` on the host base map, except on
373
+ // OpenRouter (xhigh IS its max tier; `max` 400s there).
374
+ if (isGlm52 && !isOpenRouterThinkingFormat(compat)) {
375
+ map = { ...map, ...GLM_52_XHIGH_MAX_EFFORT_MAP };
351
376
  }
352
- return undefined;
377
+ return map;
353
378
  }
354
379
 
355
380
  function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
@@ -383,7 +408,7 @@ function inferSupportedEfforts<TApi extends Api>(
383
408
  spec: ModelSpec<TApi>,
384
409
  compat: CompatOf<TApi>,
385
410
  ): readonly Effort[] {
386
- const modelDefinedEfforts = getModelDefinedEfforts(spec);
411
+ const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
387
412
  if (modelDefinedEfforts !== undefined) {
388
413
  return modelDefinedEfforts;
389
414
  }