@oh-my-pi/pi-catalog 16.1.2 → 16.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.1.4] - 2026-06-19
6
+
7
+ ### Fixed
8
+
9
+ - Fixed Claude 4.6 routing on the `google-antigravity` (and `google-gemini-cli`) Cloud Code Assist providers, whose backend exposes the models asymmetrically: `claude-sonnet-4-6` has no `-thinking` twin and `claude-opus-4-6` has only the `-thinking` twin. The shared `thinkingPair` family was routing thinking efforts on `claude-sonnet-4-6` to a non-existent `claude-sonnet-4-6-thinking` wire id (404 `Requested entity was not found`); replaced both 4.6 entries with bespoke single-wire families so every effort and off resolve to the live wire id. Added `claude-sonnet-4-6` and `claude-opus-4-6-thinking` entries to `ANTIGRAVITY_MODEL_WIRE_PROFILES` capped at the backend's 64000-output-token limit (over-cap requests 400'd with `Request contains an invalid argument`); `modelEnum` is now optional on `AntigravityModelWireProfile` since the Claude wire ids are accepted without a captured `labels.model_enum`. ([#3067](https://github.com/can1357/oh-my-pi/issues/3067))
10
+
11
+ ## [16.1.3] - 2026-06-19
12
+
13
+ ### Fixed
14
+
15
+ - Marked Ollama Cloud catalog models to omit on-the-wire output-token caps, preventing context-window-sized `num_predict` values from causing HTTP 400s for models whose true output cap is not discoverable. ([#2984](https://github.com/can1357/oh-my-pi/issues/2984))
16
+ - Fixed `readModelCache`/`writeModelCache` using a process-global shared database even when a custom `dbPath` was provided. Custom-path cache operations now open and close a per-call database via `withModelCacheDb`, preventing leaked SQLite handles on Windows
17
+
5
18
  ## [16.1.2] - 2026-06-19
6
19
 
7
20
  ### Added
@@ -477,6 +477,11 @@ export interface Model<TApi extends Api = Api> {
477
477
  baseUrl: string;
478
478
  reasoning: boolean;
479
479
  input: ("text" | "image")[];
480
+ /**
481
+ * Decoder family used for image inputs when it has narrower format support
482
+ * than OMP's general image pipeline. `stb` local backends reject WebP.
483
+ */
484
+ imageInputDecoder?: "stb";
480
485
  /**
481
486
  * Native provider tool-call support. `false` is the only unsupported signal:
482
487
  * `true` and `undefined` both mean callers may use native tools. Catalog and
@@ -19,15 +19,18 @@ export declare let getAntigravityUserAgent: () => string;
19
19
  /**
20
20
  * Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
21
21
  * real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
22
- * opaque `labels.model_enum` token the client tags each request with;
23
- * `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
24
- * regardless of the thinking budget. Keyed by the routed upstream wire id
25
- * (post effort-routing), not the collapsed logical id. Checkpoint-only ids
22
+ * opaque `labels.model_enum` token the client tags each request with — optional
23
+ * because Anthropic-backed wire ids (e.g. `claude-sonnet-4-6`,
24
+ * `claude-opus-4-6-thinking`) are accepted without one; the label is purely
25
+ * telemetry. `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens`
26
+ * the backend enforces regardless of the thinking budget (Claude caps at
27
+ * 64000, Gemini accepts the discovered cap). Keyed by the routed upstream wire
28
+ * id (post effort-routing), not the collapsed logical id. Checkpoint-only ids
26
29
  * (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
27
30
  * emits agent requests.
28
31
  */
29
32
  export interface AntigravityModelWireProfile {
30
- modelEnum: string;
33
+ modelEnum?: string;
31
34
  maxOutputTokens: number;
32
35
  }
33
36
  export declare const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-catalog",
4
- "version": "16.1.2",
4
+ "version": "16.1.4",
5
5
  "description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -34,12 +34,12 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@bufbuild/protobuf": "^2.12.0",
37
- "@oh-my-pi/pi-utils": "16.1.2",
37
+ "@oh-my-pi/pi-utils": "16.1.4",
38
38
  "arktype": "^2.2.0",
39
39
  "zod": "^4"
40
40
  },
41
41
  "devDependencies": {
42
- "@oh-my-pi/pi-ai": "16.1.2",
42
+ "@oh-my-pi/pi-ai": "16.1.4",
43
43
  "@types/bun": "^1.3.14"
44
44
  },
45
45
  "engines": {
@@ -46,14 +46,7 @@ interface CacheEntry<TApi extends Api = Api> {
46
46
  let sharedDb: Database | null = null;
47
47
  let sharedDbPath: string | null = null;
48
48
 
49
- function getDb(dbPath?: string): Database {
50
- const resolvedPath = dbPath ?? getModelDbPath();
51
- if (sharedDb && sharedDbPath === resolvedPath) {
52
- return sharedDb;
53
- }
54
- if (sharedDb) {
55
- sharedDb.close();
56
- }
49
+ function openDb(resolvedPath: string): Database {
57
50
  const db = new Database(resolvedPath, { create: true });
58
51
  // Install the busy handler BEFORE any lock-taking statement. See
59
52
  // https://github.com/can1357/oh-my-pi/issues/2421.
@@ -70,16 +63,42 @@ function getDb(dbPath?: string): Database {
70
63
  )
71
64
  `);
72
65
  migrateCacheSchema(db);
66
+ return db;
67
+ }
73
68
 
69
+ function getSharedDb(): Database {
70
+ const resolvedPath = getModelDbPath();
71
+ if (sharedDb && sharedDbPath === resolvedPath) {
72
+ return sharedDb;
73
+ }
74
+ if (sharedDb) {
75
+ sharedDb.close();
76
+ }
77
+ const db = openDb(resolvedPath);
74
78
  sharedDb = db;
75
79
  sharedDbPath = resolvedPath;
76
80
  return db;
77
81
  }
78
82
 
83
+ function withModelCacheDb<T>(dbPath: string | undefined, useDb: (db: Database) => T): T {
84
+ if (!dbPath) return useDb(getSharedDb());
85
+ const db = openDb(dbPath);
86
+ try {
87
+ return useDb(db);
88
+ } finally {
89
+ db.close();
90
+ }
91
+ }
92
+
79
93
  function migrateCacheSchema(db: Database): void {
80
- const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
81
- if (!columns.some(column => column.name === "static_fingerprint")) {
82
- db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
94
+ const stmt = db.prepare("PRAGMA table_info(model_cache)");
95
+ try {
96
+ const columns = stmt.all() as TableInfoRow[];
97
+ if (!columns.some(column => column.name === "static_fingerprint")) {
98
+ db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
99
+ }
100
+ } finally {
101
+ stmt.finalize();
83
102
  }
84
103
  db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
85
104
  }
@@ -91,21 +110,27 @@ export function readModelCache<TApi extends Api>(
91
110
  dbPath?: string,
92
111
  ): CacheEntry<TApi> | null {
93
112
  try {
94
- const db = getDb(dbPath);
95
- const row = db.query<CacheRow, [string]>("SELECT * FROM model_cache WHERE provider_id = ?").get(providerId);
96
- if (!row || row.version !== CACHE_SCHEMA_VERSION) {
97
- return null;
98
- }
99
- const models = JSON.parse(row.models) as ModelSpec<TApi>[];
100
- const ageMs = now() - row.updated_at;
101
- const fresh = Number.isFinite(ageMs) && ageMs >= 0 && ageMs <= ttlMs;
102
- return {
103
- models,
104
- fresh,
105
- authoritative: row.authoritative === 1,
106
- updatedAt: row.updated_at,
107
- staticFingerprint: row.static_fingerprint ?? "",
108
- };
113
+ return withModelCacheDb(dbPath, db => {
114
+ const stmt = db.query<CacheRow, [string]>("SELECT * FROM model_cache WHERE provider_id = ?");
115
+ try {
116
+ const row = stmt.get(providerId);
117
+ if (!row || row.version !== CACHE_SCHEMA_VERSION) {
118
+ return null;
119
+ }
120
+ const models = JSON.parse(row.models) as ModelSpec<TApi>[];
121
+ const ageMs = now() - row.updated_at;
122
+ const fresh = Number.isFinite(ageMs) && ageMs >= 0 && ageMs <= ttlMs;
123
+ return {
124
+ models,
125
+ fresh,
126
+ authoritative: row.authoritative === 1,
127
+ updatedAt: row.updated_at,
128
+ staticFingerprint: row.static_fingerprint ?? "",
129
+ };
130
+ } finally {
131
+ stmt.finalize();
132
+ }
133
+ });
109
134
  } catch {
110
135
  return null;
111
136
  }
@@ -120,19 +145,20 @@ export function writeModelCache<TApi extends Api>(
120
145
  dbPath?: string,
121
146
  ): void {
122
147
  try {
123
- const db = getDb(dbPath);
124
- db.run(
125
- `INSERT OR REPLACE INTO model_cache (provider_id, version, updated_at, authoritative, static_fingerprint, models)
126
- VALUES (?, ?, ?, ?, ?, ?)`,
127
- [
128
- providerId,
129
- CACHE_SCHEMA_VERSION,
130
- updatedAt,
131
- authoritative ? 1 : 0,
132
- staticFingerprint,
133
- JSON.stringify(models.map(model => ({ ...model, compat: model.compatConfig, compatConfig: undefined }))),
134
- ],
135
- );
148
+ withModelCacheDb(dbPath, db => {
149
+ db.run(
150
+ `INSERT OR REPLACE INTO model_cache (provider_id, version, updated_at, authoritative, static_fingerprint, models)
151
+ VALUES (?, ?, ?, ?, ?, ?)`,
152
+ [
153
+ providerId,
154
+ CACHE_SCHEMA_VERSION,
155
+ updatedAt,
156
+ authoritative ? 1 : 0,
157
+ staticFingerprint,
158
+ JSON.stringify(models.map(model => ({ ...model, compat: model.compatConfig, compatConfig: undefined }))),
159
+ ],
160
+ );
161
+ });
136
162
  } catch {
137
163
  // Cache writes are best-effort; failures should not break model resolution.
138
164
  }
package/src/models.json CHANGED
@@ -53624,6 +53624,7 @@
53624
53624
  },
53625
53625
  "contextWindow": 163840,
53626
53626
  "maxTokens": 32000,
53627
+ "omitMaxOutputTokens": true,
53627
53628
  "thinking": {
53628
53629
  "mode": "effort",
53629
53630
  "efforts": [
@@ -53652,6 +53653,7 @@
53652
53653
  },
53653
53654
  "contextWindow": 163840,
53654
53655
  "maxTokens": 163840,
53656
+ "omitMaxOutputTokens": true,
53655
53657
  "thinking": {
53656
53658
  "mode": "effort",
53657
53659
  "efforts": [
@@ -53680,6 +53682,7 @@
53680
53682
  },
53681
53683
  "contextWindow": 163840,
53682
53684
  "maxTokens": 65536,
53685
+ "omitMaxOutputTokens": true,
53683
53686
  "thinking": {
53684
53687
  "mode": "effort",
53685
53688
  "efforts": [
@@ -53708,6 +53711,7 @@
53708
53711
  },
53709
53712
  "contextWindow": 1048576,
53710
53713
  "maxTokens": 1048576,
53714
+ "omitMaxOutputTokens": true,
53711
53715
  "thinking": {
53712
53716
  "mode": "effort",
53713
53717
  "efforts": [
@@ -53737,6 +53741,7 @@
53737
53741
  },
53738
53742
  "contextWindow": 1048576,
53739
53743
  "maxTokens": 1048576,
53744
+ "omitMaxOutputTokens": true,
53740
53745
  "thinking": {
53741
53746
  "mode": "effort",
53742
53747
  "efforts": [
@@ -53765,7 +53770,8 @@
53765
53770
  "cacheWrite": 0
53766
53771
  },
53767
53772
  "contextWindow": 262144,
53768
- "maxTokens": 262144
53773
+ "maxTokens": 262144,
53774
+ "omitMaxOutputTokens": true
53769
53775
  },
53770
53776
  "devstral-small-2:24b": {
53771
53777
  "id": "devstral-small-2:24b",
@@ -53785,7 +53791,8 @@
53785
53791
  "cacheWrite": 0
53786
53792
  },
53787
53793
  "contextWindow": 262144,
53788
- "maxTokens": 262144
53794
+ "maxTokens": 262144,
53795
+ "omitMaxOutputTokens": true
53789
53796
  },
53790
53797
  "gemini-3-flash-preview": {
53791
53798
  "id": "gemini-3-flash-preview",
@@ -53806,6 +53813,7 @@
53806
53813
  },
53807
53814
  "contextWindow": 1048576,
53808
53815
  "maxTokens": 65536,
53816
+ "omitMaxOutputTokens": true,
53809
53817
  "thinking": {
53810
53818
  "mode": "effort",
53811
53819
  "efforts": [
@@ -53836,6 +53844,7 @@
53836
53844
  },
53837
53845
  "contextWindow": 262144,
53838
53846
  "maxTokens": 262144,
53847
+ "omitMaxOutputTokens": true,
53839
53848
  "thinking": {
53840
53849
  "mode": "effort",
53841
53850
  "efforts": [
@@ -53864,6 +53873,7 @@
53864
53873
  },
53865
53874
  "contextWindow": 202752,
53866
53875
  "maxTokens": 131072,
53876
+ "omitMaxOutputTokens": true,
53867
53877
  "thinking": {
53868
53878
  "mode": "effort",
53869
53879
  "efforts": [
@@ -53892,6 +53902,7 @@
53892
53902
  },
53893
53903
  "contextWindow": 202752,
53894
53904
  "maxTokens": 131072,
53905
+ "omitMaxOutputTokens": true,
53895
53906
  "thinking": {
53896
53907
  "mode": "effort",
53897
53908
  "efforts": [
@@ -53920,6 +53931,7 @@
53920
53931
  },
53921
53932
  "contextWindow": 202752,
53922
53933
  "maxTokens": 131072,
53934
+ "omitMaxOutputTokens": true,
53923
53935
  "thinking": {
53924
53936
  "mode": "effort",
53925
53937
  "efforts": [
@@ -53948,6 +53960,7 @@
53948
53960
  },
53949
53961
  "contextWindow": 202752,
53950
53962
  "maxTokens": 131072,
53963
+ "omitMaxOutputTokens": true,
53951
53964
  "thinking": {
53952
53965
  "mode": "effort",
53953
53966
  "efforts": [
@@ -53976,6 +53989,7 @@
53976
53989
  },
53977
53990
  "contextWindow": 976000,
53978
53991
  "maxTokens": 131072,
53992
+ "omitMaxOutputTokens": true,
53979
53993
  "thinking": {
53980
53994
  "mode": "effort",
53981
53995
  "efforts": [
@@ -54005,6 +54019,7 @@
54005
54019
  },
54006
54020
  "contextWindow": 131072,
54007
54021
  "maxTokens": 32768,
54022
+ "omitMaxOutputTokens": true,
54008
54023
  "thinking": {
54009
54024
  "mode": "effort",
54010
54025
  "efforts": [
@@ -54033,6 +54048,7 @@
54033
54048
  },
54034
54049
  "contextWindow": 131072,
54035
54050
  "maxTokens": 32768,
54051
+ "omitMaxOutputTokens": true,
54036
54052
  "thinking": {
54037
54053
  "mode": "effort",
54038
54054
  "efforts": [
@@ -54061,6 +54077,7 @@
54061
54077
  },
54062
54078
  "contextWindow": 262144,
54063
54079
  "maxTokens": 262144,
54080
+ "omitMaxOutputTokens": true,
54064
54081
  "thinking": {
54065
54082
  "mode": "effort",
54066
54083
  "efforts": [
@@ -54089,7 +54106,8 @@
54089
54106
  "cacheWrite": 0
54090
54107
  },
54091
54108
  "contextWindow": 262144,
54092
- "maxTokens": 262144
54109
+ "maxTokens": 262144,
54110
+ "omitMaxOutputTokens": true
54093
54111
  },
54094
54112
  "kimi-k2.5": {
54095
54113
  "id": "kimi-k2.5",
@@ -54110,6 +54128,7 @@
54110
54128
  },
54111
54129
  "contextWindow": 262144,
54112
54130
  "maxTokens": 262144,
54131
+ "omitMaxOutputTokens": true,
54113
54132
  "thinking": {
54114
54133
  "mode": "effort",
54115
54134
  "efforts": [
@@ -54139,6 +54158,7 @@
54139
54158
  },
54140
54159
  "contextWindow": 262144,
54141
54160
  "maxTokens": 262144,
54161
+ "omitMaxOutputTokens": true,
54142
54162
  "thinking": {
54143
54163
  "mode": "effort",
54144
54164
  "efforts": [
@@ -54168,6 +54188,7 @@
54168
54188
  },
54169
54189
  "contextWindow": 262144,
54170
54190
  "maxTokens": 262144,
54191
+ "omitMaxOutputTokens": true,
54171
54192
  "thinking": {
54172
54193
  "mode": "effort",
54173
54194
  "efforts": [
@@ -54196,6 +54217,7 @@
54196
54217
  },
54197
54218
  "contextWindow": 204800,
54198
54219
  "maxTokens": 128000,
54220
+ "omitMaxOutputTokens": true,
54199
54221
  "thinking": {
54200
54222
  "mode": "effort",
54201
54223
  "efforts": [
@@ -54225,6 +54247,7 @@
54225
54247
  },
54226
54248
  "contextWindow": 204800,
54227
54249
  "maxTokens": 131072,
54250
+ "omitMaxOutputTokens": true,
54228
54251
  "thinking": {
54229
54252
  "mode": "effort",
54230
54253
  "efforts": [
@@ -54254,6 +54277,7 @@
54254
54277
  },
54255
54278
  "contextWindow": 204800,
54256
54279
  "maxTokens": 131072,
54280
+ "omitMaxOutputTokens": true,
54257
54281
  "thinking": {
54258
54282
  "mode": "effort",
54259
54283
  "efforts": [
@@ -54283,6 +54307,7 @@
54283
54307
  },
54284
54308
  "contextWindow": 196608,
54285
54309
  "maxTokens": 196608,
54310
+ "omitMaxOutputTokens": true,
54286
54311
  "thinking": {
54287
54312
  "mode": "effort",
54288
54313
  "efforts": [
@@ -54313,6 +54338,7 @@
54313
54338
  },
54314
54339
  "contextWindow": 512000,
54315
54340
  "maxTokens": 131072,
54341
+ "omitMaxOutputTokens": true,
54316
54342
  "thinking": {
54317
54343
  "mode": "effort",
54318
54344
  "efforts": [
@@ -54341,7 +54367,8 @@
54341
54367
  "cacheWrite": 0
54342
54368
  },
54343
54369
  "contextWindow": 262144,
54344
- "maxTokens": 128000
54370
+ "maxTokens": 128000,
54371
+ "omitMaxOutputTokens": true
54345
54372
  },
54346
54373
  "ministral-3:3b": {
54347
54374
  "id": "ministral-3:3b",
@@ -54361,7 +54388,8 @@
54361
54388
  "cacheWrite": 0
54362
54389
  },
54363
54390
  "contextWindow": 262144,
54364
- "maxTokens": 128000
54391
+ "maxTokens": 128000,
54392
+ "omitMaxOutputTokens": true
54365
54393
  },
54366
54394
  "ministral-3:8b": {
54367
54395
  "id": "ministral-3:8b",
@@ -54381,7 +54409,8 @@
54381
54409
  "cacheWrite": 0
54382
54410
  },
54383
54411
  "contextWindow": 262144,
54384
- "maxTokens": 128000
54412
+ "maxTokens": 128000,
54413
+ "omitMaxOutputTokens": true
54385
54414
  },
54386
54415
  "mistral-large-3:675b": {
54387
54416
  "id": "mistral-large-3:675b",
@@ -54401,7 +54430,8 @@
54401
54430
  "cacheWrite": 0
54402
54431
  },
54403
54432
  "contextWindow": 262144,
54404
- "maxTokens": 262144
54433
+ "maxTokens": 262144,
54434
+ "omitMaxOutputTokens": true
54405
54435
  },
54406
54436
  "nemotron-3-nano:30b": {
54407
54437
  "id": "nemotron-3-nano:30b",
@@ -54421,6 +54451,7 @@
54421
54451
  },
54422
54452
  "contextWindow": 1048576,
54423
54453
  "maxTokens": 131072,
54454
+ "omitMaxOutputTokens": true,
54424
54455
  "thinking": {
54425
54456
  "mode": "effort",
54426
54457
  "efforts": [
@@ -54449,6 +54480,7 @@
54449
54480
  },
54450
54481
  "contextWindow": 262144,
54451
54482
  "maxTokens": 65536,
54483
+ "omitMaxOutputTokens": true,
54452
54484
  "thinking": {
54453
54485
  "mode": "effort",
54454
54486
  "efforts": [
@@ -54477,6 +54509,7 @@
54477
54509
  },
54478
54510
  "contextWindow": 262144,
54479
54511
  "maxTokens": 128000,
54512
+ "omitMaxOutputTokens": true,
54480
54513
  "thinking": {
54481
54514
  "mode": "effort",
54482
54515
  "efforts": [
@@ -54504,7 +54537,8 @@
54504
54537
  "cacheWrite": 0
54505
54538
  },
54506
54539
  "contextWindow": 262144,
54507
- "maxTokens": 65536
54540
+ "maxTokens": 65536,
54541
+ "omitMaxOutputTokens": true
54508
54542
  },
54509
54543
  "qwen3-coder:480b": {
54510
54544
  "id": "qwen3-coder:480b",
@@ -54523,7 +54557,8 @@
54523
54557
  "cacheWrite": 0
54524
54558
  },
54525
54559
  "contextWindow": 262144,
54526
- "maxTokens": 65536
54560
+ "maxTokens": 65536,
54561
+ "omitMaxOutputTokens": true
54527
54562
  },
54528
54563
  "qwen3-next:80b": {
54529
54564
  "id": "qwen3-next:80b",
@@ -54543,6 +54578,7 @@
54543
54578
  },
54544
54579
  "contextWindow": 262144,
54545
54580
  "maxTokens": 32768,
54581
+ "omitMaxOutputTokens": true,
54546
54582
  "thinking": {
54547
54583
  "mode": "effort",
54548
54584
  "efforts": [
@@ -54572,6 +54608,7 @@
54572
54608
  },
54573
54609
  "contextWindow": 262144,
54574
54610
  "maxTokens": 32768,
54611
+ "omitMaxOutputTokens": true,
54575
54612
  "thinking": {
54576
54613
  "mode": "effort",
54577
54614
  "efforts": [
@@ -54600,7 +54637,8 @@
54600
54637
  "cacheWrite": 0
54601
54638
  },
54602
54639
  "contextWindow": 262144,
54603
- "maxTokens": 131072
54640
+ "maxTokens": 131072,
54641
+ "omitMaxOutputTokens": true
54604
54642
  },
54605
54643
  "qwen3.5:397b": {
54606
54644
  "id": "qwen3.5:397b",
@@ -54621,6 +54659,7 @@
54621
54659
  },
54622
54660
  "contextWindow": 262144,
54623
54661
  "maxTokens": 65536,
54662
+ "omitMaxOutputTokens": true,
54624
54663
  "thinking": {
54625
54664
  "mode": "effort",
54626
54665
  "efforts": [
@@ -54648,7 +54687,8 @@
54648
54687
  "cacheWrite": 0
54649
54688
  },
54650
54689
  "contextWindow": 32768,
54651
- "maxTokens": 4096
54690
+ "maxTokens": 4096,
54691
+ "omitMaxOutputTokens": true
54652
54692
  }
54653
54693
  },
54654
54694
  "openai": {
@@ -84815,4 +84855,4 @@
84815
84855
  }
84816
84856
  }
84817
84857
  }
84818
- }
84858
+ }
@@ -159,6 +159,7 @@ export function ollamaCloudModelManagerOptions(
159
159
  discoveredContextWindow !== null && discoveredContextWindow !== undefined
160
160
  ? (providerReference?.maxTokens ?? Math.min(contextWindow, 8192))
161
161
  : Math.min(contextWindow, 8192),
162
+ omitMaxOutputTokens: true,
162
163
  };
163
164
  }),
164
165
  );
package/src/types.ts CHANGED
@@ -594,6 +594,11 @@ export interface Model<TApi extends Api = Api> {
594
594
  baseUrl: string;
595
595
  reasoning: boolean;
596
596
  input: ("text" | "image")[];
597
+ /**
598
+ * Decoder family used for image inputs when it has narrower format support
599
+ * than OMP's general image pipeline. `stb` local backends reject WebP.
600
+ */
601
+ imageInputDecoder?: "stb";
597
602
  /**
598
603
  * Native provider tool-call support. `false` is the only unsupported signal:
599
604
  * `true` and `undefined` both mean callers may use native tools. Catalog and
@@ -220,8 +220,26 @@ const SHARED_CCA_FAMILIES: readonly EffortVariantFamily[] = [
220
220
  routing: {},
221
221
  thinking: { mode: "budget", efforts: [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High] },
222
222
  },
223
- thinkingPair("claude-sonnet-4-6", "Claude Sonnet 4.6"),
224
- thinkingPair("claude-opus-4-6", "Claude Opus 4.6"),
223
+ // Antigravity Cloud Code Assist exposes Claude 4.6 asymmetrically: only the
224
+ // bare `claude-sonnet-4-6` wire id (no `-thinking` twin) and only the
225
+ // `claude-opus-4-6-thinking` wire id (no bare twin). Per-effort thinking is
226
+ // carried in the request body via `thinkingBudget`, so both ids accept on/off
227
+ // requests. Listing both candidates in `members` (priority order) keeps the
228
+ // collapse correct if the backend mix ever rebalances.
229
+ {
230
+ id: "claude-sonnet-4-6",
231
+ name: "Claude Sonnet 4.6",
232
+ members: ["claude-sonnet-4-6", "claude-sonnet-4-6-thinking"],
233
+ routing: {},
234
+ thinking: { mode: "budget", efforts: [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High] },
235
+ },
236
+ {
237
+ id: "claude-opus-4-6",
238
+ name: "Claude Opus 4.6",
239
+ members: ["claude-opus-4-6-thinking", "claude-opus-4-6"],
240
+ routing: {},
241
+ thinking: { mode: "budget", efforts: [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High] },
242
+ },
225
243
  thinkingPair("claude-sonnet-4-5", "Claude Sonnet 4.5"),
226
244
  thinkingPair("claude-opus-4-5", "Claude Opus 4.5"),
227
245
  thinkingPair("gemini-2.5-flash", "Gemini 2.5 Flash"),
@@ -43,15 +43,18 @@ export let getAntigravityUserAgent = () => {
43
43
  /**
44
44
  * Per-wire-id Antigravity Cloud Code Assist request constants, captured from the
45
45
  * real `antigravity/hub` client against `daily-cloudcode-pa`. `modelEnum` is the
46
- * opaque `labels.model_enum` token the client tags each request with;
47
- * `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens` it sends
48
- * regardless of the thinking budget. Keyed by the routed upstream wire id
49
- * (post effort-routing), not the collapsed logical id. Checkpoint-only ids
46
+ * opaque `labels.model_enum` token the client tags each request with — optional
47
+ * because Anthropic-backed wire ids (e.g. `claude-sonnet-4-6`,
48
+ * `claude-opus-4-6-thinking`) are accepted without one; the label is purely
49
+ * telemetry. `maxOutputTokens` is the fixed `generationConfig.maxOutputTokens`
50
+ * the backend enforces regardless of the thinking budget (Claude caps at
51
+ * 64000, Gemini accepts the discovered cap). Keyed by the routed upstream wire
52
+ * id (post effort-routing), not the collapsed logical id. Checkpoint-only ids
50
53
  * (e.g. `gemini-3.1-flash-lite`) are intentionally absent — this provider only
51
54
  * emits agent requests.
52
55
  */
53
56
  export interface AntigravityModelWireProfile {
54
- modelEnum: string;
57
+ modelEnum?: string;
55
58
  maxOutputTokens: number;
56
59
  }
57
60
  export const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, AntigravityModelWireProfile>> = {
@@ -60,6 +63,11 @@ export const ANTIGRAVITY_MODEL_WIRE_PROFILES: Readonly<Record<string, Antigravit
60
63
  "gemini-3-flash-agent": { modelEnum: "MODEL_PLACEHOLDER_M132", maxOutputTokens: 65536 },
61
64
  "gemini-3.1-pro-low": { modelEnum: "MODEL_PLACEHOLDER_M36", maxOutputTokens: 65535 },
62
65
  "gemini-pro-agent": { modelEnum: "MODEL_PLACEHOLDER_M16", maxOutputTokens: 65535 },
66
+ // Claude on `daily-cloudcode-pa` rejects `maxOutputTokens > 64000` with a
67
+ // 400 (`Request contains an invalid argument`). The model_enum label is
68
+ // untracked for these ids; the backend does not require it.
69
+ "claude-sonnet-4-6": { maxOutputTokens: 64000 },
70
+ "claude-opus-4-6-thinking": { maxOutputTokens: 64000 },
63
71
  };
64
72
  export function getAntigravityModelWireProfile(wireModelId: string): AntigravityModelWireProfile | undefined {
65
73
  return ANTIGRAVITY_MODEL_WIRE_PROFILES[wireModelId];