@oh-my-pi/pi-ai 11.6.1 → 11.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "11.6.1",
3
+ "version": "11.7.1",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -63,7 +63,7 @@
63
63
  "@connectrpc/connect-node": "^2.1.1",
64
64
  "@google/genai": "^1.39.0",
65
65
  "@mistralai/mistralai": "^1.13.0",
66
- "@oh-my-pi/pi-utils": "11.6.1",
66
+ "@oh-my-pi/pi-utils": "11.7.1",
67
67
  "@sinclair/typebox": "^0.34.48",
68
68
  "@smithy/node-http-handler": "^4.4.9",
69
69
  "ajv": "^8.17.1",
@@ -107,8 +107,8 @@ export const MODELS = {
107
107
  contextWindow: 200000,
108
108
  maxTokens: 4096,
109
109
  } satisfies Model<"bedrock-converse-stream">,
110
- "anthropic.claude-opus-4-6-v1:0": {
111
- id: "anthropic.claude-opus-4-6-v1:0",
110
+ "anthropic.claude-opus-4-6-v1": {
111
+ id: "anthropic.claude-opus-4-6-v1",
112
112
  name: "Claude Opus 4.6",
113
113
  api: "bedrock-converse-stream",
114
114
  provider: "amazon-bedrock",
@@ -121,7 +121,7 @@ export const MODELS = {
121
121
  cacheRead: 0.5,
122
122
  cacheWrite: 6.25,
123
123
  },
124
- contextWindow: 200000,
124
+ contextWindow: 1000000,
125
125
  maxTokens: 128000,
126
126
  } satisfies Model<"bedrock-converse-stream">,
127
127
  "cohere.command-r-plus-v1:0": {
@@ -209,8 +209,8 @@ export const MODELS = {
209
209
  contextWindow: 200000,
210
210
  maxTokens: 64000,
211
211
  } satisfies Model<"bedrock-converse-stream">,
212
- "eu.anthropic.claude-opus-4-6-v1:0": {
213
- id: "eu.anthropic.claude-opus-4-6-v1:0",
212
+ "eu.anthropic.claude-opus-4-6-v1": {
213
+ id: "eu.anthropic.claude-opus-4-6-v1",
214
214
  name: "Claude Opus 4.6 (EU)",
215
215
  api: "bedrock-converse-stream",
216
216
  provider: "amazon-bedrock",
@@ -220,10 +220,10 @@ export const MODELS = {
220
220
  cost: {
221
221
  input: 5,
222
222
  output: 25,
223
- cacheRead: 1.5,
224
- cacheWrite: 18.75,
223
+ cacheRead: 0.5,
224
+ cacheWrite: 6.25,
225
225
  },
226
- contextWindow: 200000,
226
+ contextWindow: 1000000,
227
227
  maxTokens: 128000,
228
228
  } satisfies Model<"bedrock-converse-stream">,
229
229
  "eu.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -311,8 +311,8 @@ export const MODELS = {
311
311
  contextWindow: 200000,
312
312
  maxTokens: 64000,
313
313
  } satisfies Model<"bedrock-converse-stream">,
314
- "global.anthropic.claude-opus-4-6-v1:0": {
315
- id: "global.anthropic.claude-opus-4-6-v1:0",
314
+ "global.anthropic.claude-opus-4-6-v1": {
315
+ id: "global.anthropic.claude-opus-4-6-v1",
316
316
  name: "Claude Opus 4.6 (Global)",
317
317
  api: "bedrock-converse-stream",
318
318
  provider: "amazon-bedrock",
@@ -325,7 +325,7 @@ export const MODELS = {
325
325
  cacheRead: 0.5,
326
326
  cacheWrite: 6.25,
327
327
  },
328
- contextWindow: 200000,
328
+ contextWindow: 1000000,
329
329
  maxTokens: 128000,
330
330
  } satisfies Model<"bedrock-converse-stream">,
331
331
  "global.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -900,14 +900,14 @@ export const MODELS = {
900
900
  cost: {
901
901
  input: 5,
902
902
  output: 25,
903
- cacheRead: 1.5,
904
- cacheWrite: 18.75,
903
+ cacheRead: 0.5,
904
+ cacheWrite: 6.25,
905
905
  },
906
906
  contextWindow: 200000,
907
907
  maxTokens: 64000,
908
908
  } satisfies Model<"bedrock-converse-stream">,
909
- "us.anthropic.claude-opus-4-6-v1:0": {
910
- id: "us.anthropic.claude-opus-4-6-v1:0",
909
+ "us.anthropic.claude-opus-4-6-v1": {
910
+ id: "us.anthropic.claude-opus-4-6-v1",
911
911
  name: "Claude Opus 4.6 (US)",
912
912
  api: "bedrock-converse-stream",
913
913
  provider: "amazon-bedrock",
@@ -917,10 +917,10 @@ export const MODELS = {
917
917
  cost: {
918
918
  input: 5,
919
919
  output: 25,
920
- cacheRead: 1.5,
921
- cacheWrite: 18.75,
920
+ cacheRead: 0.5,
921
+ cacheWrite: 6.25,
922
922
  },
923
- contextWindow: 200000,
923
+ contextWindow: 1000000,
924
924
  maxTokens: 128000,
925
925
  } satisfies Model<"bedrock-converse-stream">,
926
926
  "us.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -1398,23 +1398,6 @@ export const MODELS = {
1398
1398
  cacheRead: 0.5,
1399
1399
  cacheWrite: 6.25,
1400
1400
  },
1401
- contextWindow: 1000000,
1402
- maxTokens: 128000,
1403
- } satisfies Model<"anthropic-messages">,
1404
- "claude-opus-4-6-20260205": {
1405
- id: "claude-opus-4-6-20260205",
1406
- name: "Claude Opus 4.6",
1407
- api: "anthropic-messages",
1408
- provider: "anthropic",
1409
- baseUrl: "https://api.anthropic.com",
1410
- reasoning: true,
1411
- input: ["text", "image"],
1412
- cost: {
1413
- input: 5,
1414
- output: 25,
1415
- cacheRead: 0.5,
1416
- cacheWrite: 6.25,
1417
- },
1418
1401
  contextWindow: 200000,
1419
1402
  maxTokens: 128000,
1420
1403
  } satisfies Model<"anthropic-messages">,
@@ -1819,7 +1802,7 @@ export const MODELS = {
1819
1802
  cacheWrite: 0,
1820
1803
  },
1821
1804
  contextWindow: 128000,
1822
- maxTokens: 16000,
1805
+ maxTokens: 64000,
1823
1806
  } satisfies Model<"openai-completions">,
1824
1807
  "claude-sonnet-4": {
1825
1808
  id: "claude-sonnet-4",
@@ -2480,17 +2463,34 @@ export const MODELS = {
2480
2463
  "google-antigravity": {
2481
2464
  "claude-opus-4-5-thinking": {
2482
2465
  id: "claude-opus-4-5-thinking",
2483
- name: "Claude Opus 4.5 Thinking (Antigravity)",
2466
+ name: "Claude Opus 4.5 (Thinking) (Antigravity)",
2484
2467
  api: "google-gemini-cli",
2485
2468
  provider: "google-antigravity",
2486
2469
  baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2487
2470
  reasoning: true,
2488
2471
  input: ["text", "image"],
2489
2472
  cost: {
2490
- input: 5,
2491
- output: 25,
2492
- cacheRead: 0.5,
2493
- cacheWrite: 6.25,
2473
+ input: 0,
2474
+ output: 0,
2475
+ cacheRead: 0,
2476
+ cacheWrite: 0,
2477
+ },
2478
+ contextWindow: 200000,
2479
+ maxTokens: 64000,
2480
+ } satisfies Model<"google-gemini-cli">,
2481
+ "claude-opus-4-6-thinking": {
2482
+ id: "claude-opus-4-6-thinking",
2483
+ name: "Claude Opus 4.6 (Thinking) (Antigravity)",
2484
+ api: "google-gemini-cli",
2485
+ provider: "google-antigravity",
2486
+ baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2487
+ reasoning: true,
2488
+ input: ["text", "image"],
2489
+ cost: {
2490
+ input: 0,
2491
+ output: 0,
2492
+ cacheRead: 0,
2493
+ cacheWrite: 0,
2494
2494
  },
2495
2495
  contextWindow: 200000,
2496
2496
  maxTokens: 64000,
@@ -2504,31 +2504,82 @@ export const MODELS = {
2504
2504
  reasoning: false,
2505
2505
  input: ["text", "image"],
2506
2506
  cost: {
2507
- input: 3,
2508
- output: 15,
2509
- cacheRead: 0.3,
2510
- cacheWrite: 3.75,
2507
+ input: 0,
2508
+ output: 0,
2509
+ cacheRead: 0,
2510
+ cacheWrite: 0,
2511
2511
  },
2512
2512
  contextWindow: 200000,
2513
2513
  maxTokens: 64000,
2514
2514
  } satisfies Model<"google-gemini-cli">,
2515
2515
  "claude-sonnet-4-5-thinking": {
2516
2516
  id: "claude-sonnet-4-5-thinking",
2517
- name: "Claude Sonnet 4.5 Thinking (Antigravity)",
2517
+ name: "Claude Sonnet 4.5 (Thinking) (Antigravity)",
2518
2518
  api: "google-gemini-cli",
2519
2519
  provider: "google-antigravity",
2520
2520
  baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2521
2521
  reasoning: true,
2522
2522
  input: ["text", "image"],
2523
2523
  cost: {
2524
- input: 3,
2525
- output: 15,
2526
- cacheRead: 0.3,
2527
- cacheWrite: 3.75,
2524
+ input: 0,
2525
+ output: 0,
2526
+ cacheRead: 0,
2527
+ cacheWrite: 0,
2528
2528
  },
2529
2529
  contextWindow: 200000,
2530
2530
  maxTokens: 64000,
2531
2531
  } satisfies Model<"google-gemini-cli">,
2532
+ "gemini-2.5-flash": {
2533
+ id: "gemini-2.5-flash",
2534
+ name: "Gemini 2.5 Flash (Antigravity)",
2535
+ api: "google-gemini-cli",
2536
+ provider: "google-antigravity",
2537
+ baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2538
+ reasoning: true,
2539
+ input: ["text", "image"],
2540
+ cost: {
2541
+ input: 0,
2542
+ output: 0,
2543
+ cacheRead: 0,
2544
+ cacheWrite: 0,
2545
+ },
2546
+ contextWindow: 1048576,
2547
+ maxTokens: 65535,
2548
+ } satisfies Model<"google-gemini-cli">,
2549
+ "gemini-2.5-flash-thinking": {
2550
+ id: "gemini-2.5-flash-thinking",
2551
+ name: "Gemini 2.5 Flash (Thinking) (Antigravity)",
2552
+ api: "google-gemini-cli",
2553
+ provider: "google-antigravity",
2554
+ baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2555
+ reasoning: true,
2556
+ input: ["text", "image"],
2557
+ cost: {
2558
+ input: 0,
2559
+ output: 0,
2560
+ cacheRead: 0,
2561
+ cacheWrite: 0,
2562
+ },
2563
+ contextWindow: 1048576,
2564
+ maxTokens: 65535,
2565
+ } satisfies Model<"google-gemini-cli">,
2566
+ "gemini-2.5-pro": {
2567
+ id: "gemini-2.5-pro",
2568
+ name: "Gemini 2.5 Pro (Antigravity)",
2569
+ api: "google-gemini-cli",
2570
+ provider: "google-antigravity",
2571
+ baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2572
+ reasoning: true,
2573
+ input: ["text", "image"],
2574
+ cost: {
2575
+ input: 0,
2576
+ output: 0,
2577
+ cacheRead: 0,
2578
+ cacheWrite: 0,
2579
+ },
2580
+ contextWindow: 1048576,
2581
+ maxTokens: 65535,
2582
+ } satisfies Model<"google-gemini-cli">,
2532
2583
  "gemini-3-flash": {
2533
2584
  id: "gemini-3-flash",
2534
2585
  name: "Gemini 3 Flash (Antigravity)",
@@ -2538,59 +2589,59 @@ export const MODELS = {
2538
2589
  reasoning: true,
2539
2590
  input: ["text", "image"],
2540
2591
  cost: {
2541
- input: 0.5,
2542
- output: 3,
2543
- cacheRead: 0.5,
2592
+ input: 0,
2593
+ output: 0,
2594
+ cacheRead: 0,
2544
2595
  cacheWrite: 0,
2545
2596
  },
2546
2597
  contextWindow: 1048576,
2547
- maxTokens: 65535,
2598
+ maxTokens: 65536,
2548
2599
  } satisfies Model<"google-gemini-cli">,
2549
2600
  "gemini-3-pro-high": {
2550
2601
  id: "gemini-3-pro-high",
2551
- name: "Gemini 3 Pro High (Antigravity)",
2602
+ name: "Gemini 3 Pro (High) (Antigravity)",
2552
2603
  api: "google-gemini-cli",
2553
2604
  provider: "google-antigravity",
2554
2605
  baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2555
2606
  reasoning: true,
2556
2607
  input: ["text", "image"],
2557
2608
  cost: {
2558
- input: 2,
2559
- output: 12,
2560
- cacheRead: 0.2,
2561
- cacheWrite: 2.375,
2609
+ input: 0,
2610
+ output: 0,
2611
+ cacheRead: 0,
2612
+ cacheWrite: 0,
2562
2613
  },
2563
2614
  contextWindow: 1048576,
2564
2615
  maxTokens: 65535,
2565
2616
  } satisfies Model<"google-gemini-cli">,
2566
2617
  "gemini-3-pro-low": {
2567
2618
  id: "gemini-3-pro-low",
2568
- name: "Gemini 3 Pro Low (Antigravity)",
2619
+ name: "Gemini 3 Pro (Low) (Antigravity)",
2569
2620
  api: "google-gemini-cli",
2570
2621
  provider: "google-antigravity",
2571
2622
  baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2572
2623
  reasoning: true,
2573
2624
  input: ["text", "image"],
2574
2625
  cost: {
2575
- input: 2,
2576
- output: 12,
2577
- cacheRead: 0.2,
2578
- cacheWrite: 2.375,
2626
+ input: 0,
2627
+ output: 0,
2628
+ cacheRead: 0,
2629
+ cacheWrite: 0,
2579
2630
  },
2580
2631
  contextWindow: 1048576,
2581
2632
  maxTokens: 65535,
2582
2633
  } satisfies Model<"google-gemini-cli">,
2583
2634
  "gpt-oss-120b-medium": {
2584
2635
  id: "gpt-oss-120b-medium",
2585
- name: "GPT-OSS 120B Medium (Antigravity)",
2636
+ name: "GPT-OSS 120B (Medium) (Antigravity)",
2586
2637
  api: "google-gemini-cli",
2587
2638
  provider: "google-antigravity",
2588
2639
  baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
2589
- reasoning: false,
2640
+ reasoning: true,
2590
2641
  input: ["text"],
2591
2642
  cost: {
2592
- input: 0.09,
2593
- output: 0.36,
2643
+ input: 0,
2644
+ output: 0,
2594
2645
  cacheRead: 0,
2595
2646
  cacheWrite: 0,
2596
2647
  },
@@ -4909,8 +4960,8 @@ export const MODELS = {
4909
4960
  input: ["text"],
4910
4961
  cost: {
4911
4962
  input: 0.09,
4912
- output: 0.39999999999999997,
4913
- cacheRead: 0,
4963
+ output: 0.44999999999999996,
4964
+ cacheRead: 0.09,
4914
4965
  cacheWrite: 0,
4915
4966
  },
4916
4967
  contextWindow: 131072,
@@ -5408,7 +5459,7 @@ export const MODELS = {
5408
5459
  cost: {
5409
5460
  input: 0.3,
5410
5461
  output: 1.2,
5411
- cacheRead: 0,
5462
+ cacheRead: 0.15,
5412
5463
  cacheWrite: 0,
5413
5464
  },
5414
5465
  contextWindow: 163840,
@@ -5425,7 +5476,7 @@ export const MODELS = {
5425
5476
  cost: {
5426
5477
  input: 0.19,
5427
5478
  output: 0.87,
5428
- cacheRead: 0,
5479
+ cacheRead: 0.095,
5429
5480
  cacheWrite: 0,
5430
5481
  },
5431
5482
  contextWindow: 163840,
@@ -5476,7 +5527,7 @@ export const MODELS = {
5476
5527
  cost: {
5477
5528
  input: 0.39999999999999997,
5478
5529
  output: 1.75,
5479
- cacheRead: 0,
5530
+ cacheRead: 0.19999999999999998,
5480
5531
  cacheWrite: 0,
5481
5532
  },
5482
5533
  contextWindow: 163840,
@@ -5527,7 +5578,7 @@ export const MODELS = {
5527
5578
  cost: {
5528
5579
  input: 0.25,
5529
5580
  output: 0.38,
5530
- cacheRead: 0,
5581
+ cacheRead: 0.125,
5531
5582
  cacheWrite: 0,
5532
5583
  },
5533
5584
  contextWindow: 163840,
@@ -5748,7 +5799,7 @@ export const MODELS = {
5748
5799
  cost: {
5749
5800
  input: 0.04,
5750
5801
  output: 0.15,
5751
- cacheRead: 0,
5802
+ cacheRead: 0.02,
5752
5803
  cacheWrite: 0,
5753
5804
  },
5754
5805
  contextWindow: 128000,
@@ -5848,12 +5899,12 @@ export const MODELS = {
5848
5899
  reasoning: false,
5849
5900
  input: ["text"],
5850
5901
  cost: {
5851
- input: 3.5,
5852
- output: 3.5,
5902
+ input: 4,
5903
+ output: 4,
5853
5904
  cacheRead: 0,
5854
5905
  cacheWrite: 0,
5855
5906
  },
5856
- contextWindow: 10000,
5907
+ contextWindow: 131000,
5857
5908
  maxTokens: 4096,
5858
5909
  } satisfies Model<"openai-completions">,
5859
5910
  "meta-llama/llama-3.1-70b-instruct": {
@@ -6037,7 +6088,7 @@ export const MODELS = {
6037
6088
  cost: {
6038
6089
  input: 0.049999999999999996,
6039
6090
  output: 0.22,
6040
- cacheRead: 0,
6091
+ cacheRead: 0.024999999999999998,
6041
6092
  cacheWrite: 0,
6042
6093
  },
6043
6094
  contextWindow: 262144,
@@ -6307,13 +6358,13 @@ export const MODELS = {
6307
6358
  reasoning: false,
6308
6359
  input: ["text"],
6309
6360
  cost: {
6310
- input: 0.03,
6311
- output: 0.11,
6361
+ input: 0.049999999999999996,
6362
+ output: 0.08,
6312
6363
  cacheRead: 0,
6313
6364
  cacheWrite: 0,
6314
6365
  },
6315
6366
  contextWindow: 32768,
6316
- maxTokens: 32768,
6367
+ maxTokens: 16384,
6317
6368
  } satisfies Model<"openai-completions">,
6318
6369
  "mistralai/mistral-small-3.1-24b-instruct": {
6319
6370
  id: "mistralai/mistral-small-3.1-24b-instruct",
@@ -6326,7 +6377,7 @@ export const MODELS = {
6326
6377
  cost: {
6327
6378
  input: 0.03,
6328
6379
  output: 0.11,
6329
- cacheRead: 0,
6380
+ cacheRead: 0.015,
6330
6381
  cacheWrite: 0,
6331
6382
  },
6332
6383
  contextWindow: 131072,
@@ -6360,7 +6411,7 @@ export const MODELS = {
6360
6411
  cost: {
6361
6412
  input: 0.06,
6362
6413
  output: 0.18,
6363
- cacheRead: 0,
6414
+ cacheRead: 0.03,
6364
6415
  cacheWrite: 0,
6365
6416
  },
6366
6417
  contextWindow: 131072,
@@ -6513,7 +6564,7 @@ export const MODELS = {
6513
6564
  cost: {
6514
6565
  input: 0.39,
6515
6566
  output: 1.9,
6516
- cacheRead: 0,
6567
+ cacheRead: 0.195,
6517
6568
  cacheWrite: 0,
6518
6569
  },
6519
6570
  contextWindow: 262144,
@@ -6547,7 +6598,7 @@ export const MODELS = {
6547
6598
  cost: {
6548
6599
  input: 0.39999999999999997,
6549
6600
  output: 1.75,
6550
- cacheRead: 0,
6601
+ cacheRead: 0.19999999999999998,
6551
6602
  cacheWrite: 0,
6552
6603
  },
6553
6604
  contextWindow: 262144,
@@ -6563,12 +6614,12 @@ export const MODELS = {
6563
6614
  input: ["text", "image"],
6564
6615
  cost: {
6565
6616
  input: 0.44999999999999996,
6566
- output: 2.5,
6567
- cacheRead: 0,
6617
+ output: 2.25,
6618
+ cacheRead: 0.075,
6568
6619
  cacheWrite: 0,
6569
6620
  },
6570
6621
  contextWindow: 262144,
6571
- maxTokens: 65535,
6622
+ maxTokens: 4096,
6572
6623
  } satisfies Model<"openai-completions">,
6573
6624
  "nex-agi/deepseek-v3.1-nex-n1": {
6574
6625
  id: "nex-agi/deepseek-v3.1-nex-n1",
@@ -6598,7 +6649,7 @@ export const MODELS = {
6598
6649
  cost: {
6599
6650
  input: 0.02,
6600
6651
  output: 0.09999999999999999,
6601
- cacheRead: 0,
6652
+ cacheRead: 0.01,
6602
6653
  cacheWrite: 0,
6603
6654
  },
6604
6655
  contextWindow: 32768,
@@ -6615,7 +6666,7 @@ export const MODELS = {
6615
6666
  cost: {
6616
6667
  input: 0.11,
6617
6668
  output: 0.38,
6618
- cacheRead: 0,
6669
+ cacheRead: 0.055,
6619
6670
  cacheWrite: 0,
6620
6671
  },
6621
6672
  contextWindow: 131072,
@@ -7395,13 +7446,13 @@ export const MODELS = {
7395
7446
  reasoning: true,
7396
7447
  input: ["text"],
7397
7448
  cost: {
7398
- input: 0.02,
7399
- output: 0.09999999999999999,
7449
+ input: 0.03,
7450
+ output: 0.14,
7400
7451
  cacheRead: 0,
7401
7452
  cacheWrite: 0,
7402
7453
  },
7403
7454
  contextWindow: 131072,
7404
- maxTokens: 131072,
7455
+ maxTokens: 4096,
7405
7456
  } satisfies Model<"openai-completions">,
7406
7457
  "openai/gpt-oss-20b:free": {
7407
7458
  id: "openai/gpt-oss-20b:free",
@@ -7624,6 +7675,24 @@ export const MODELS = {
7624
7675
  contextWindow: 200000,
7625
7676
  maxTokens: 4096,
7626
7677
  } satisfies Model<"openai-completions">,
7678
+ "openrouter/pony-alpha": {
7679
+ id: "openrouter/pony-alpha",
7680
+ name: "Pony Alpha",
7681
+ api: "openai-completions",
7682
+ provider: "openrouter",
7683
+ baseUrl: "https://openrouter.ai/api/v1",
7684
+ compat: {"supportsToolChoice":false},
7685
+ reasoning: true,
7686
+ input: ["text"],
7687
+ cost: {
7688
+ input: 0,
7689
+ output: 0,
7690
+ cacheRead: 0,
7691
+ cacheWrite: 0,
7692
+ },
7693
+ contextWindow: 200000,
7694
+ maxTokens: 131000,
7695
+ } satisfies Model<"openai-completions">,
7627
7696
  "prime-intellect/intellect-3": {
7628
7697
  id: "prime-intellect/intellect-3",
7629
7698
  name: "Prime Intellect: INTELLECT-3",
@@ -7788,7 +7857,7 @@ export const MODELS = {
7788
7857
  cost: {
7789
7858
  input: 0.049999999999999996,
7790
7859
  output: 0.22,
7791
- cacheRead: 0,
7860
+ cacheRead: 0.024999999999999998,
7792
7861
  cacheWrite: 0,
7793
7862
  },
7794
7863
  contextWindow: 40960,
@@ -7839,7 +7908,7 @@ export const MODELS = {
7839
7908
  cost: {
7840
7909
  input: 0.11,
7841
7910
  output: 0.6,
7842
- cacheRead: 0,
7911
+ cacheRead: 0.055,
7843
7912
  cacheWrite: 0,
7844
7913
  },
7845
7914
  contextWindow: 262144,
@@ -7856,7 +7925,7 @@ export const MODELS = {
7856
7925
  cost: {
7857
7926
  input: 0.06,
7858
7927
  output: 0.22,
7859
- cacheRead: 0,
7928
+ cacheRead: 0.03,
7860
7929
  cacheWrite: 0,
7861
7930
  },
7862
7931
  contextWindow: 40960,
@@ -7873,7 +7942,7 @@ export const MODELS = {
7873
7942
  cost: {
7874
7943
  input: 0.08,
7875
7944
  output: 0.33,
7876
- cacheRead: 0,
7945
+ cacheRead: 0.04,
7877
7946
  cacheWrite: 0,
7878
7947
  },
7879
7948
  contextWindow: 262144,
@@ -7907,7 +7976,7 @@ export const MODELS = {
7907
7976
  cost: {
7908
7977
  input: 0.08,
7909
7978
  output: 0.24,
7910
- cacheRead: 0,
7979
+ cacheRead: 0.04,
7911
7980
  cacheWrite: 0,
7912
7981
  },
7913
7982
  contextWindow: 40960,
@@ -7957,12 +8026,12 @@ export const MODELS = {
7957
8026
  input: ["text"],
7958
8027
  cost: {
7959
8028
  input: 0.22,
7960
- output: 0.95,
7961
- cacheRead: 0,
8029
+ output: 1,
8030
+ cacheRead: 0.022,
7962
8031
  cacheWrite: 0,
7963
8032
  },
7964
8033
  contextWindow: 262144,
7965
- maxTokens: 262144,
8034
+ maxTokens: 4096,
7966
8035
  } satisfies Model<"openai-completions">,
7967
8036
  "qwen/qwen3-coder-30b-a3b-instruct": {
7968
8037
  id: "qwen/qwen3-coder-30b-a3b-instruct",
@@ -8009,7 +8078,7 @@ export const MODELS = {
8009
8078
  cost: {
8010
8079
  input: 0.07,
8011
8080
  output: 0.3,
8012
- cacheRead: 0,
8081
+ cacheRead: 0.035,
8013
8082
  cacheWrite: 0,
8014
8083
  },
8015
8084
  contextWindow: 262144,
@@ -8038,7 +8107,7 @@ export const MODELS = {
8038
8107
  api: "openai-completions",
8039
8108
  provider: "openrouter",
8040
8109
  baseUrl: "https://openrouter.ai/api/v1",
8041
- reasoning: true,
8110
+ reasoning: false,
8042
8111
  input: ["text"],
8043
8112
  cost: {
8044
8113
  input: 0.22,
@@ -8384,7 +8453,7 @@ export const MODELS = {
8384
8453
  cost: {
8385
8454
  input: 0.25,
8386
8455
  output: 0.85,
8387
- cacheRead: 0,
8456
+ cacheRead: 0.125,
8388
8457
  cacheWrite: 0,
8389
8458
  },
8390
8459
  contextWindow: 163840,
@@ -8401,7 +8470,7 @@ export const MODELS = {
8401
8470
  cost: {
8402
8471
  input: 0.25,
8403
8472
  output: 0.85,
8404
- cacheRead: 0,
8473
+ cacheRead: 0.125,
8405
8474
  cacheWrite: 0,
8406
8475
  },
8407
8476
  contextWindow: 163840,
@@ -8588,7 +8657,7 @@ export const MODELS = {
8588
8657
  cost: {
8589
8658
  input: 0.09,
8590
8659
  output: 0.29,
8591
- cacheRead: 0,
8660
+ cacheRead: 0.045,
8592
8661
  cacheWrite: 0,
8593
8662
  },
8594
8663
  contextWindow: 262144,
@@ -8622,7 +8691,7 @@ export const MODELS = {
8622
8691
  cost: {
8623
8692
  input: 0.35,
8624
8693
  output: 1.55,
8625
- cacheRead: 0,
8694
+ cacheRead: 0.175,
8626
8695
  cacheWrite: 0,
8627
8696
  },
8628
8697
  contextWindow: 131072,
@@ -8637,13 +8706,13 @@ export const MODELS = {
8637
8706
  reasoning: true,
8638
8707
  input: ["text"],
8639
8708
  cost: {
8640
- input: 0.049999999999999996,
8641
- output: 0.22,
8642
- cacheRead: 0,
8709
+ input: 0.13,
8710
+ output: 0.85,
8711
+ cacheRead: 0.024999999999999998,
8643
8712
  cacheWrite: 0,
8644
8713
  },
8645
8714
  contextWindow: 131072,
8646
- maxTokens: 131072,
8715
+ maxTokens: 98304,
8647
8716
  } satisfies Model<"openai-completions">,
8648
8717
  "z-ai/glm-4.5-air:free": {
8649
8718
  id: "z-ai/glm-4.5-air:free",
@@ -8690,7 +8759,7 @@ export const MODELS = {
8690
8759
  cost: {
8691
8760
  input: 0.35,
8692
8761
  output: 1.5,
8693
- cacheRead: 0,
8762
+ cacheRead: 0.175,
8694
8763
  cacheWrite: 0,
8695
8764
  },
8696
8765
  contextWindow: 202752,
@@ -8741,7 +8810,7 @@ export const MODELS = {
8741
8810
  cost: {
8742
8811
  input: 0.39999999999999997,
8743
8812
  output: 1.5,
8744
- cacheRead: 0,
8813
+ cacheRead: 0.19999999999999998,
8745
8814
  cacheWrite: 0,
8746
8815
  },
8747
8816
  contextWindow: 202752,
@@ -8756,13 +8825,13 @@ export const MODELS = {
8756
8825
  reasoning: true,
8757
8826
  input: ["text"],
8758
8827
  cost: {
8759
- input: 0.07,
8828
+ input: 0.06,
8760
8829
  output: 0.39999999999999997,
8761
- cacheRead: 0.01,
8830
+ cacheRead: 0.0100000002,
8762
8831
  cacheWrite: 0,
8763
8832
  },
8764
- contextWindow: 200000,
8765
- maxTokens: 131072,
8833
+ contextWindow: 202752,
8834
+ maxTokens: 4096,
8766
8835
  } satisfies Model<"openai-completions">,
8767
8836
  },
8768
8837
  "vercel-ai-gateway": {
@@ -8860,8 +8929,8 @@ export const MODELS = {
8860
8929
  reasoning: false,
8861
8930
  input: ["text"],
8862
8931
  cost: {
8863
- input: 0.38,
8864
- output: 1.53,
8932
+ input: 0.39999999999999997,
8933
+ output: 1.5999999999999999,
8865
8934
  cacheRead: 0,
8866
8935
  cacheWrite: 0,
8867
8936
  },
@@ -9625,13 +9694,13 @@ export const MODELS = {
9625
9694
  reasoning: true,
9626
9695
  input: ["text"],
9627
9696
  cost: {
9628
- input: 0.28,
9697
+ input: 0.3,
9629
9698
  output: 1.2,
9630
- cacheRead: 0.14,
9699
+ cacheRead: 0.15,
9631
9700
  cacheWrite: 0,
9632
9701
  },
9633
- contextWindow: 196608,
9634
- maxTokens: 196608,
9702
+ contextWindow: 204800,
9703
+ maxTokens: 131072,
9635
9704
  } satisfies Model<"anthropic-messages">,
9636
9705
  "minimax/minimax-m2.1-lightning": {
9637
9706
  id: "minimax/minimax-m2.1-lightning",