@oh-my-pi/pi-catalog 16.1.1 → 16.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,24 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.1.2] - 2026-06-19
6
+
7
+ ### Added
8
+
9
+ - Added support for Gemini 2.5 Flash-Lite, 3.1 Flash-Lite, and 3.5 Flash models
10
+ - Added support for Moonshot V1 model family
11
+
12
+ ### Changed
13
+
14
+ - Updated context window and token limits for various Claude, Gemini, and GPT-OSS models
15
+ - Refined thinking mode behaviors and routing for supported LLM families
16
+
17
+ ### Fixed
18
+
19
+ - Fixed GLM-5.2 `reasoning_effort` so the top thinking tier reaches each host's genuine maximum instead of 400ing, mapping the internal `xhigh` tier per host dialect (verified against live endpoints): Z.ai/Zhipu collapse onto the model's `none`/`high`/`max` scale (`xhigh → max`); Fireworks, resellers, and Ollama Cloud keep their distinct lower tiers and remap only the top `xhigh → max` (merged over host quirks such as Fireworks' `minimal → none`); and OpenRouter — whose API rejects `max` and treats `xhigh` as its own max tier — now exposes the `xhigh` tier and forwards it verbatim. Dialect detection keys off resolved `compat.thinkingFormat`, so custom OpenRouter/Z.ai-format providers are covered too.
20
+ - Maintained thinking effort routing when discovery only returns the base model ID
21
+ - Improved credential retrieval logic for Antigravity and Codex providers via auth discovery
22
+
5
23
  ## [16.0.9] - 2026-06-18
6
24
 
7
25
  ### Fixed
@@ -40,6 +40,13 @@ export interface EffortVariantFamily {
40
40
  thinking: Readonly<Omit<ThinkingConfig, "effortRouting" | "suppressWhenOff">>;
41
41
  /** Thinking-off requests must explicitly suppress thinking on the wire. */
42
42
  suppressWhenOff?: boolean;
43
+ /**
44
+ * Preserve non-off effort routes even when discovery omits the backing member.
45
+ * Used for Cloud Code Assist `X`/`X-thinking` pairs where upstream accepts
46
+ * the `-thinking` wire id but the model-list endpoint may advertise only the
47
+ * bare id.
48
+ */
49
+ preserveAbsentEffortRoutes?: boolean;
43
50
  /** Retired/recycled selector ids that alias to this family without being members. */
44
51
  extraAliases?: readonly string[];
45
52
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-catalog",
4
- "version": "16.1.1",
4
+ "version": "16.1.2",
5
5
  "description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -34,12 +34,12 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@bufbuild/protobuf": "^2.12.0",
37
- "@oh-my-pi/pi-utils": "16.1.1",
37
+ "@oh-my-pi/pi-utils": "16.1.2",
38
38
  "arktype": "^2.2.0",
39
39
  "zod": "^4"
40
40
  },
41
41
  "devDependencies": {
42
- "@oh-my-pi/pi-ai": "16.1.1",
42
+ "@oh-my-pi/pi-ai": "16.1.2",
43
43
  "@types/bun": "^1.3.14"
44
44
  },
45
45
  "engines": {
@@ -86,7 +86,7 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
86
86
  [Effort.High]: "high",
87
87
  [Effort.XHigh]: "max",
88
88
  };
89
- const OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
89
+ const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
90
90
  [Effort.XHigh]: "max",
91
91
  };
92
92
 
@@ -164,7 +164,7 @@ function fillThinkingWireDefaults<TApi extends Api>(
164
164
  thinking: ThinkingConfig,
165
165
  ): ThinkingConfig {
166
166
  const parsed = parseKnownModel(spec.id);
167
- const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
167
+ const normalizedEfforts = getModelDefinedEfforts(spec, compat) ?? thinking.efforts;
168
168
  const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
169
169
  const effortMap =
170
170
  thinking.effortMap === undefined
@@ -251,7 +251,7 @@ function inferEffortMap<TApi extends Api>(
251
251
  mode: ThinkingConfig["mode"],
252
252
  efforts: readonly Effort[],
253
253
  ): EffortMap | undefined {
254
- const detected = inferDetectedEffortMap(spec, parsedModel, mode);
254
+ const detected = inferDetectedEffortMap(spec, compat, parsedModel, mode);
255
255
  const configured = readCompatEffortMap(compat);
256
256
  const merged =
257
257
  detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
@@ -281,23 +281,26 @@ function isOpenAICompatReasoningApi(api: Api): boolean {
281
281
  return api === "openai-completions" || api === "openrouter";
282
282
  }
283
283
 
284
- function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
285
- if (isOpenAICompatReasoningApi(spec.api) && isZaiGlm52ReasoningEffortModel(spec)) {
286
- return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
287
- }
288
- if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
289
- return GLM_52_HIGH_MAX_REASONING_EFFORTS;
284
+ function getModelDefinedEfforts<TApi extends Api>(
285
+ spec: ModelSpec<TApi>,
286
+ compat: CompatOf<TApi>,
287
+ ): readonly Effort[] | undefined {
288
+ if (isGlm52ReasoningEffortModelId(spec.id)) {
289
+ // Z.ai/Zhipu and OpenRouter both surface GLM-5.2's full effort ladder,
290
+ // including the top `xhigh` (= "max") tier; Ollama Cloud exposes only
291
+ // high/xhigh.
292
+ if (isZaiThinkingFormat(compat) || isOpenRouterThinkingFormat(compat)) {
293
+ return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
294
+ }
295
+ if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
296
+ return GLM_52_HIGH_MAX_REASONING_EFFORTS;
297
+ }
290
298
  }
291
299
  return isOpenAICompatReasoningApi(spec.api) && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
292
300
  ? LOW_MEDIUM_HIGH_REASONING_EFFORTS
293
301
  : undefined;
294
302
  }
295
303
 
296
- function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
297
- if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
298
- return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
299
- }
300
-
301
304
  function isOllamaCloudGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
302
305
  return spec.api === "ollama-chat" && spec.provider === "ollama-cloud" && isGlm52ReasoningEffortModelId(spec.id);
303
306
  }
@@ -314,8 +317,17 @@ function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
314
317
  return map && Object.keys(map).length > 0 ? map : undefined;
315
318
  }
316
319
 
320
+ function isOpenRouterThinkingFormat(compat: CompatOf<Api>): boolean {
321
+ return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "openrouter";
322
+ }
323
+
324
+ function isZaiThinkingFormat(compat: CompatOf<Api>): boolean {
325
+ return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "zai";
326
+ }
327
+
317
328
  function inferDetectedEffortMap<TApi extends Api>(
318
329
  spec: ModelSpec<TApi>,
330
+ compat: CompatOf<TApi>,
319
331
  parsedModel: ParsedModel,
320
332
  mode: ThinkingConfig["mode"],
321
333
  ): EffortMap | undefined {
@@ -327,29 +339,42 @@ function inferDetectedEffortMap<TApi extends Api>(
327
339
  ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
328
340
  : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
329
341
  }
342
+ // GLM-5.2 coding SKUs accept `reasoning_effort`, but the effort dialect is
343
+ // host-specific (verified against live endpoints):
344
+ // - Z.ai/Zhipu ("zai" dialect): the model exposes only none/high/max, so
345
+ // `xhigh` 400s — collapse minimal->none, low/medium/high->high, xhigh->max.
346
+ // - OpenRouter: `max` 400s and `xhigh` IS its max tier, so it passes `xhigh`
347
+ // through literally (no map; the tier is exposed via getModelDefinedEfforts).
348
+ // - Other openai-compat hosts (Fireworks, resellers) and Ollama Cloud keep
349
+ // their distinct lower tiers and host quirks (e.g. Fireworks rejects
350
+ // `minimal`, so `minimal->none` stays) and only remap the top `xhigh` UI
351
+ // tier onto the genuine `max` budget. Filtered to supported efforts later.
352
+ const isGlm52 = isGlm52ReasoningEffortModelId(spec.id);
353
+ if (isGlm52 && isZaiThinkingFormat(compat)) {
354
+ return ZAI_GLM_52_REASONING_EFFORT_MAP;
355
+ }
330
356
  if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
331
- return OLLAMA_CLOUD_GLM_52_REASONING_EFFORT_MAP;
357
+ return GLM_52_XHIGH_MAX_EFFORT_MAP;
332
358
  }
333
359
  if (!isOpenAICompatReasoningApi(spec.api)) {
334
360
  return undefined;
335
361
  }
362
+ let map: EffortMap | undefined;
336
363
  if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
337
- return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
338
- }
339
- if (isZaiGlm52ReasoningEffortModel(spec)) {
340
- return ZAI_GLM_52_REASONING_EFFORT_MAP;
341
- }
342
- if (isDeepseekReasoningModel(spec)) {
343
- return DEEPSEEK_REASONING_EFFORT_MAP;
344
- }
345
- if (modelMatchesHost(spec, "openrouter")) {
346
- const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
347
- if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
364
+ map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
365
+ } else if (isDeepseekReasoningModel(spec)) {
366
+ map = DEEPSEEK_REASONING_EFFORT_MAP;
367
+ } else if (modelMatchesHost(spec, "openrouter")) {
368
+ map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
369
+ } else if (modelMatchesHost(spec, "fireworks")) {
370
+ map = FIREWORKS_REASONING_EFFORT_MAP;
348
371
  }
349
- if (modelMatchesHost(spec, "fireworks")) {
350
- return FIREWORKS_REASONING_EFFORT_MAP;
372
+ // Overlay GLM-5.2's top-tier `xhigh -> max` on the host base map, except on
373
+ // OpenRouter (xhigh IS its max tier; `max` 400s there).
374
+ if (isGlm52 && !isOpenRouterThinkingFormat(compat)) {
375
+ map = { ...map, ...GLM_52_XHIGH_MAX_EFFORT_MAP };
351
376
  }
352
- return undefined;
377
+ return map;
353
378
  }
354
379
 
355
380
  function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
@@ -383,7 +408,7 @@ function inferSupportedEfforts<TApi extends Api>(
383
408
  spec: ModelSpec<TApi>,
384
409
  compat: CompatOf<TApi>,
385
410
  ): readonly Effort[] {
386
- const modelDefinedEfforts = getModelDefinedEfforts(spec);
411
+ const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
387
412
  if (modelDefinedEfforts !== undefined) {
388
413
  return modelDefinedEfforts;
389
414
  }