@oh-my-pi/pi-catalog 16.1.1 → 16.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/types/types.d.ts +5 -0
- package/dist/types/variant-collapse.d.ts +7 -0
- package/package.json +3 -3
- package/src/model-cache.ts +65 -39
- package/src/model-thinking.ts +55 -30
- package/src/models.json +623 -787
- package/src/provider-models/ollama.ts +1 -0
- package/src/provider-models/openai-compat.ts +14 -2
- package/src/types.ts +5 -0
- package/src/variant-collapse.ts +21 -3
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.1.3] - 2026-06-19
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Marked Ollama Cloud catalog models to omit on-the-wire output-token caps, preventing context-window-sized `num_predict` values from causing HTTP 400s for models whose true output cap is not discoverable. ([#2984](https://github.com/can1357/oh-my-pi/issues/2984))
|
|
10
|
+
- Fixed `readModelCache`/`writeModelCache` using a process-global shared database even when a custom `dbPath` was provided. Custom-path cache operations now open and close a per-call database via `withModelCacheDb`, preventing leaked SQLite handles on Windows
|
|
11
|
+
|
|
12
|
+
## [16.1.2] - 2026-06-19
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
|
|
16
|
+
- Added support for Gemini 2.5 Flash-Lite, 3.1 Flash-Lite, and 3.5 Flash models
|
|
17
|
+
- Added support for Moonshot V1 model family
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Updated context window and token limits for various Claude, Gemini, and GPT-OSS models
|
|
22
|
+
- Refined thinking mode behaviors and routing for supported LLM families
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
|
|
26
|
+
- Fixed GLM-5.2 `reasoning_effort` so the top thinking tier reaches each host's genuine maximum instead of 400ing, mapping the internal `xhigh` tier per host dialect (verified against live endpoints): Z.ai/Zhipu collapse onto the model's `none`/`high`/`max` scale (`xhigh → max`); Fireworks, resellers, and Ollama Cloud keep their distinct lower tiers and remap only the top `xhigh → max` (merged over host quirks such as Fireworks' `minimal → none`); and OpenRouter — whose API rejects `max` and treats `xhigh` as its own max tier — now exposes the `xhigh` tier and forwards it verbatim. Dialect detection keys off resolved `compat.thinkingFormat`, so custom OpenRouter/Z.ai-format providers are covered too.
|
|
27
|
+
- Maintained thinking effort routing when discovery only returns the base model ID
|
|
28
|
+
- Improved credential retrieval logic for Antigravity and Codex providers via auth discovery
|
|
29
|
+
|
|
5
30
|
## [16.0.9] - 2026-06-18
|
|
6
31
|
|
|
7
32
|
### Fixed
|
package/dist/types/types.d.ts
CHANGED
|
@@ -477,6 +477,11 @@ export interface Model<TApi extends Api = Api> {
|
|
|
477
477
|
baseUrl: string;
|
|
478
478
|
reasoning: boolean;
|
|
479
479
|
input: ("text" | "image")[];
|
|
480
|
+
/**
|
|
481
|
+
* Decoder family used for image inputs when it has narrower format support
|
|
482
|
+
* than OMP's general image pipeline. `stb` local backends reject WebP.
|
|
483
|
+
*/
|
|
484
|
+
imageInputDecoder?: "stb";
|
|
480
485
|
/**
|
|
481
486
|
* Native provider tool-call support. `false` is the only unsupported signal:
|
|
482
487
|
* `true` and `undefined` both mean callers may use native tools. Catalog and
|
|
@@ -40,6 +40,13 @@ export interface EffortVariantFamily {
|
|
|
40
40
|
thinking: Readonly<Omit<ThinkingConfig, "effortRouting" | "suppressWhenOff">>;
|
|
41
41
|
/** Thinking-off requests must explicitly suppress thinking on the wire. */
|
|
42
42
|
suppressWhenOff?: boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Preserve non-off effort routes even when discovery omits the backing member.
|
|
45
|
+
* Used for Cloud Code Assist `X`/`X-thinking` pairs where upstream accepts
|
|
46
|
+
* the `-thinking` wire id but the model-list endpoint may advertise only the
|
|
47
|
+
* bare id.
|
|
48
|
+
*/
|
|
49
|
+
preserveAbsentEffortRoutes?: boolean;
|
|
43
50
|
/** Retired/recycled selector ids that alias to this family without being members. */
|
|
44
51
|
extraAliases?: readonly string[];
|
|
45
52
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "16.1.
|
|
4
|
+
"version": "16.1.3",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,12 +34,12 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "16.1.
|
|
37
|
+
"@oh-my-pi/pi-utils": "16.1.3",
|
|
38
38
|
"arktype": "^2.2.0",
|
|
39
39
|
"zod": "^4"
|
|
40
40
|
},
|
|
41
41
|
"devDependencies": {
|
|
42
|
-
"@oh-my-pi/pi-ai": "16.1.
|
|
42
|
+
"@oh-my-pi/pi-ai": "16.1.3",
|
|
43
43
|
"@types/bun": "^1.3.14"
|
|
44
44
|
},
|
|
45
45
|
"engines": {
|
package/src/model-cache.ts
CHANGED
|
@@ -46,14 +46,7 @@ interface CacheEntry<TApi extends Api = Api> {
|
|
|
46
46
|
let sharedDb: Database | null = null;
|
|
47
47
|
let sharedDbPath: string | null = null;
|
|
48
48
|
|
|
49
|
-
function
|
|
50
|
-
const resolvedPath = dbPath ?? getModelDbPath();
|
|
51
|
-
if (sharedDb && sharedDbPath === resolvedPath) {
|
|
52
|
-
return sharedDb;
|
|
53
|
-
}
|
|
54
|
-
if (sharedDb) {
|
|
55
|
-
sharedDb.close();
|
|
56
|
-
}
|
|
49
|
+
function openDb(resolvedPath: string): Database {
|
|
57
50
|
const db = new Database(resolvedPath, { create: true });
|
|
58
51
|
// Install the busy handler BEFORE any lock-taking statement. See
|
|
59
52
|
// https://github.com/can1357/oh-my-pi/issues/2421.
|
|
@@ -70,16 +63,42 @@ function getDb(dbPath?: string): Database {
|
|
|
70
63
|
)
|
|
71
64
|
`);
|
|
72
65
|
migrateCacheSchema(db);
|
|
66
|
+
return db;
|
|
67
|
+
}
|
|
73
68
|
|
|
69
|
+
function getSharedDb(): Database {
|
|
70
|
+
const resolvedPath = getModelDbPath();
|
|
71
|
+
if (sharedDb && sharedDbPath === resolvedPath) {
|
|
72
|
+
return sharedDb;
|
|
73
|
+
}
|
|
74
|
+
if (sharedDb) {
|
|
75
|
+
sharedDb.close();
|
|
76
|
+
}
|
|
77
|
+
const db = openDb(resolvedPath);
|
|
74
78
|
sharedDb = db;
|
|
75
79
|
sharedDbPath = resolvedPath;
|
|
76
80
|
return db;
|
|
77
81
|
}
|
|
78
82
|
|
|
83
|
+
function withModelCacheDb<T>(dbPath: string | undefined, useDb: (db: Database) => T): T {
|
|
84
|
+
if (!dbPath) return useDb(getSharedDb());
|
|
85
|
+
const db = openDb(dbPath);
|
|
86
|
+
try {
|
|
87
|
+
return useDb(db);
|
|
88
|
+
} finally {
|
|
89
|
+
db.close();
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
79
93
|
function migrateCacheSchema(db: Database): void {
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
94
|
+
const stmt = db.prepare("PRAGMA table_info(model_cache)");
|
|
95
|
+
try {
|
|
96
|
+
const columns = stmt.all() as TableInfoRow[];
|
|
97
|
+
if (!columns.some(column => column.name === "static_fingerprint")) {
|
|
98
|
+
db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
|
|
99
|
+
}
|
|
100
|
+
} finally {
|
|
101
|
+
stmt.finalize();
|
|
83
102
|
}
|
|
84
103
|
db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
|
|
85
104
|
}
|
|
@@ -91,21 +110,27 @@ export function readModelCache<TApi extends Api>(
|
|
|
91
110
|
dbPath?: string,
|
|
92
111
|
): CacheEntry<TApi> | null {
|
|
93
112
|
try {
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
113
|
+
return withModelCacheDb(dbPath, db => {
|
|
114
|
+
const stmt = db.query<CacheRow, [string]>("SELECT * FROM model_cache WHERE provider_id = ?");
|
|
115
|
+
try {
|
|
116
|
+
const row = stmt.get(providerId);
|
|
117
|
+
if (!row || row.version !== CACHE_SCHEMA_VERSION) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
const models = JSON.parse(row.models) as ModelSpec<TApi>[];
|
|
121
|
+
const ageMs = now() - row.updated_at;
|
|
122
|
+
const fresh = Number.isFinite(ageMs) && ageMs >= 0 && ageMs <= ttlMs;
|
|
123
|
+
return {
|
|
124
|
+
models,
|
|
125
|
+
fresh,
|
|
126
|
+
authoritative: row.authoritative === 1,
|
|
127
|
+
updatedAt: row.updated_at,
|
|
128
|
+
staticFingerprint: row.static_fingerprint ?? "",
|
|
129
|
+
};
|
|
130
|
+
} finally {
|
|
131
|
+
stmt.finalize();
|
|
132
|
+
}
|
|
133
|
+
});
|
|
109
134
|
} catch {
|
|
110
135
|
return null;
|
|
111
136
|
}
|
|
@@ -120,19 +145,20 @@ export function writeModelCache<TApi extends Api>(
|
|
|
120
145
|
dbPath?: string,
|
|
121
146
|
): void {
|
|
122
147
|
try {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
148
|
+
withModelCacheDb(dbPath, db => {
|
|
149
|
+
db.run(
|
|
150
|
+
`INSERT OR REPLACE INTO model_cache (provider_id, version, updated_at, authoritative, static_fingerprint, models)
|
|
151
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
152
|
+
[
|
|
153
|
+
providerId,
|
|
154
|
+
CACHE_SCHEMA_VERSION,
|
|
155
|
+
updatedAt,
|
|
156
|
+
authoritative ? 1 : 0,
|
|
157
|
+
staticFingerprint,
|
|
158
|
+
JSON.stringify(models.map(model => ({ ...model, compat: model.compatConfig, compatConfig: undefined }))),
|
|
159
|
+
],
|
|
160
|
+
);
|
|
161
|
+
});
|
|
136
162
|
} catch {
|
|
137
163
|
// Cache writes are best-effort; failures should not break model resolution.
|
|
138
164
|
}
|
package/src/model-thinking.ts
CHANGED
|
@@ -86,7 +86,7 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
|
86
86
|
[Effort.High]: "high",
|
|
87
87
|
[Effort.XHigh]: "max",
|
|
88
88
|
};
|
|
89
|
-
const
|
|
89
|
+
const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
|
|
90
90
|
[Effort.XHigh]: "max",
|
|
91
91
|
};
|
|
92
92
|
|
|
@@ -164,7 +164,7 @@ function fillThinkingWireDefaults<TApi extends Api>(
|
|
|
164
164
|
thinking: ThinkingConfig,
|
|
165
165
|
): ThinkingConfig {
|
|
166
166
|
const parsed = parseKnownModel(spec.id);
|
|
167
|
-
const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
|
|
167
|
+
const normalizedEfforts = getModelDefinedEfforts(spec, compat) ?? thinking.efforts;
|
|
168
168
|
const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
|
|
169
169
|
const effortMap =
|
|
170
170
|
thinking.effortMap === undefined
|
|
@@ -251,7 +251,7 @@ function inferEffortMap<TApi extends Api>(
|
|
|
251
251
|
mode: ThinkingConfig["mode"],
|
|
252
252
|
efforts: readonly Effort[],
|
|
253
253
|
): EffortMap | undefined {
|
|
254
|
-
const detected = inferDetectedEffortMap(spec, parsedModel, mode);
|
|
254
|
+
const detected = inferDetectedEffortMap(spec, compat, parsedModel, mode);
|
|
255
255
|
const configured = readCompatEffortMap(compat);
|
|
256
256
|
const merged =
|
|
257
257
|
detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
|
|
@@ -281,23 +281,26 @@ function isOpenAICompatReasoningApi(api: Api): boolean {
|
|
|
281
281
|
return api === "openai-completions" || api === "openrouter";
|
|
282
282
|
}
|
|
283
283
|
|
|
284
|
-
function getModelDefinedEfforts<TApi extends Api>(
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
if (
|
|
289
|
-
|
|
284
|
+
function getModelDefinedEfforts<TApi extends Api>(
|
|
285
|
+
spec: ModelSpec<TApi>,
|
|
286
|
+
compat: CompatOf<TApi>,
|
|
287
|
+
): readonly Effort[] | undefined {
|
|
288
|
+
if (isGlm52ReasoningEffortModelId(spec.id)) {
|
|
289
|
+
// Z.ai/Zhipu and OpenRouter both surface GLM-5.2's full effort ladder,
|
|
290
|
+
// including the top `xhigh` (= "max") tier; Ollama Cloud exposes only
|
|
291
|
+
// high/xhigh.
|
|
292
|
+
if (isZaiThinkingFormat(compat) || isOpenRouterThinkingFormat(compat)) {
|
|
293
|
+
return DEFAULT_REASONING_EFFORTS_WITH_XHIGH;
|
|
294
|
+
}
|
|
295
|
+
if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
|
|
296
|
+
return GLM_52_HIGH_MAX_REASONING_EFFORTS;
|
|
297
|
+
}
|
|
290
298
|
}
|
|
291
299
|
return isOpenAICompatReasoningApi(spec.api) && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
|
|
292
300
|
? LOW_MEDIUM_HIGH_REASONING_EFFORTS
|
|
293
301
|
: undefined;
|
|
294
302
|
}
|
|
295
303
|
|
|
296
|
-
function isZaiGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
|
|
297
|
-
if (!isGlm52ReasoningEffortModelId(spec.id)) return false;
|
|
298
|
-
return modelMatchesHost(spec, "zai") || modelMatchesHost(spec, "zhipu");
|
|
299
|
-
}
|
|
300
|
-
|
|
301
304
|
function isOllamaCloudGlm52ReasoningEffortModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
|
|
302
305
|
return spec.api === "ollama-chat" && spec.provider === "ollama-cloud" && isGlm52ReasoningEffortModelId(spec.id);
|
|
303
306
|
}
|
|
@@ -314,8 +317,17 @@ function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
|
|
|
314
317
|
return map && Object.keys(map).length > 0 ? map : undefined;
|
|
315
318
|
}
|
|
316
319
|
|
|
320
|
+
function isOpenRouterThinkingFormat(compat: CompatOf<Api>): boolean {
|
|
321
|
+
return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "openrouter";
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function isZaiThinkingFormat(compat: CompatOf<Api>): boolean {
|
|
325
|
+
return compat !== undefined && "thinkingFormat" in compat && compat.thinkingFormat === "zai";
|
|
326
|
+
}
|
|
327
|
+
|
|
317
328
|
function inferDetectedEffortMap<TApi extends Api>(
|
|
318
329
|
spec: ModelSpec<TApi>,
|
|
330
|
+
compat: CompatOf<TApi>,
|
|
319
331
|
parsedModel: ParsedModel,
|
|
320
332
|
mode: ThinkingConfig["mode"],
|
|
321
333
|
): EffortMap | undefined {
|
|
@@ -327,29 +339,42 @@ function inferDetectedEffortMap<TApi extends Api>(
|
|
|
327
339
|
? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
|
|
328
340
|
: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
|
|
329
341
|
}
|
|
342
|
+
// GLM-5.2 coding SKUs accept `reasoning_effort`, but the effort dialect is
|
|
343
|
+
// host-specific (verified against live endpoints):
|
|
344
|
+
// - Z.ai/Zhipu ("zai" dialect): the model exposes only none/high/max, so
|
|
345
|
+
// `xhigh` 400s — collapse minimal->none, low/medium/high->high, xhigh->max.
|
|
346
|
+
// - OpenRouter: `max` 400s and `xhigh` IS its max tier, so it passes `xhigh`
|
|
347
|
+
// through literally (no map; the tier is exposed via getModelDefinedEfforts).
|
|
348
|
+
// - Other openai-compat hosts (Fireworks, resellers) and Ollama Cloud keep
|
|
349
|
+
// their distinct lower tiers and host quirks (e.g. Fireworks rejects
|
|
350
|
+
// `minimal`, so `minimal->none` stays) and only remap the top `xhigh` UI
|
|
351
|
+
// tier onto the genuine `max` budget. Filtered to supported efforts later.
|
|
352
|
+
const isGlm52 = isGlm52ReasoningEffortModelId(spec.id);
|
|
353
|
+
if (isGlm52 && isZaiThinkingFormat(compat)) {
|
|
354
|
+
return ZAI_GLM_52_REASONING_EFFORT_MAP;
|
|
355
|
+
}
|
|
330
356
|
if (isOllamaCloudGlm52ReasoningEffortModel(spec)) {
|
|
331
|
-
return
|
|
357
|
+
return GLM_52_XHIGH_MAX_EFFORT_MAP;
|
|
332
358
|
}
|
|
333
359
|
if (!isOpenAICompatReasoningApi(spec.api)) {
|
|
334
360
|
return undefined;
|
|
335
361
|
}
|
|
362
|
+
let map: EffortMap | undefined;
|
|
336
363
|
if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
|
|
337
|
-
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
if (
|
|
343
|
-
|
|
344
|
-
}
|
|
345
|
-
if (modelMatchesHost(spec, "openrouter")) {
|
|
346
|
-
const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
|
|
347
|
-
if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
|
|
364
|
+
map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
|
|
365
|
+
} else if (isDeepseekReasoningModel(spec)) {
|
|
366
|
+
map = DEEPSEEK_REASONING_EFFORT_MAP;
|
|
367
|
+
} else if (modelMatchesHost(spec, "openrouter")) {
|
|
368
|
+
map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
|
|
369
|
+
} else if (modelMatchesHost(spec, "fireworks")) {
|
|
370
|
+
map = FIREWORKS_REASONING_EFFORT_MAP;
|
|
348
371
|
}
|
|
349
|
-
|
|
350
|
-
|
|
372
|
+
// Overlay GLM-5.2's top-tier `xhigh -> max` on the host base map, except on
|
|
373
|
+
// OpenRouter (xhigh IS its max tier; `max` 400s there).
|
|
374
|
+
if (isGlm52 && !isOpenRouterThinkingFormat(compat)) {
|
|
375
|
+
map = { ...map, ...GLM_52_XHIGH_MAX_EFFORT_MAP };
|
|
351
376
|
}
|
|
352
|
-
return
|
|
377
|
+
return map;
|
|
353
378
|
}
|
|
354
379
|
|
|
355
380
|
function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
|
|
@@ -383,7 +408,7 @@ function inferSupportedEfforts<TApi extends Api>(
|
|
|
383
408
|
spec: ModelSpec<TApi>,
|
|
384
409
|
compat: CompatOf<TApi>,
|
|
385
410
|
): readonly Effort[] {
|
|
386
|
-
const modelDefinedEfforts = getModelDefinedEfforts(spec);
|
|
411
|
+
const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
|
|
387
412
|
if (modelDefinedEfforts !== undefined) {
|
|
388
413
|
return modelDefinedEfforts;
|
|
389
414
|
}
|