@fleetagent/pi-ai 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ export const MODELS = {
8
8
  api: "bedrock-converse-stream",
9
9
  provider: "amazon-bedrock",
10
10
  baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
11
- reasoning: false,
11
+ reasoning: true,
12
12
  input: ["text", "image"],
13
13
  cost: {
14
14
  input: 0.33,
@@ -345,6 +345,23 @@ export const MODELS = {
345
345
  contextWindow: 163840,
346
346
  maxTokens: 81920,
347
347
  },
348
+ "eu.anthropic.claude-fable-5": {
349
+ id: "eu.anthropic.claude-fable-5",
350
+ name: "Claude Fable 5 (EU)",
351
+ api: "bedrock-converse-stream",
352
+ provider: "amazon-bedrock",
353
+ baseUrl: "https://bedrock-runtime.eu-central-1.amazonaws.com",
354
+ reasoning: true,
355
+ input: ["text", "image"],
356
+ cost: {
357
+ input: 11,
358
+ output: 55,
359
+ cacheRead: 1.1,
360
+ cacheWrite: 13.75,
361
+ },
362
+ contextWindow: 1000000,
363
+ maxTokens: 128000,
364
+ },
348
365
  "eu.anthropic.claude-haiku-4-5-20251001-v1:0": {
349
366
  id: "eu.anthropic.claude-haiku-4-5-20251001-v1:0",
350
367
  name: "Claude Haiku 4.5 (EU)",
@@ -466,6 +483,23 @@ export const MODELS = {
466
483
  contextWindow: 1000000,
467
484
  maxTokens: 64000,
468
485
  },
486
+ "global.anthropic.claude-fable-5": {
487
+ id: "global.anthropic.claude-fable-5",
488
+ name: "Claude Fable 5 (Global)",
489
+ api: "bedrock-converse-stream",
490
+ provider: "amazon-bedrock",
491
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
492
+ reasoning: true,
493
+ input: ["text", "image"],
494
+ cost: {
495
+ input: 10,
496
+ output: 50,
497
+ cacheRead: 1,
498
+ cacheWrite: 12.5,
499
+ },
500
+ contextWindow: 1000000,
501
+ maxTokens: 128000,
502
+ },
469
503
  "global.anthropic.claude-haiku-4-5-20251001-v1:0": {
470
504
  id: "global.anthropic.claude-haiku-4-5-20251001-v1:0",
471
505
  name: "Claude Haiku 4.5 (Global)",
@@ -1123,7 +1157,7 @@ export const MODELS = {
1123
1157
  api: "bedrock-converse-stream",
1124
1158
  provider: "amazon-bedrock",
1125
1159
  baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1126
- reasoning: false,
1160
+ reasoning: true,
1127
1161
  input: ["text"],
1128
1162
  cost: {
1129
1163
  input: 0.15,
@@ -1140,7 +1174,7 @@ export const MODELS = {
1140
1174
  api: "bedrock-converse-stream",
1141
1175
  provider: "amazon-bedrock",
1142
1176
  baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1143
- reasoning: false,
1177
+ reasoning: true,
1144
1178
  input: ["text"],
1145
1179
  cost: {
1146
1180
  input: 0.15,
@@ -1157,7 +1191,7 @@ export const MODELS = {
1157
1191
  api: "bedrock-converse-stream",
1158
1192
  provider: "amazon-bedrock",
1159
1193
  baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1160
- reasoning: false,
1194
+ reasoning: true,
1161
1195
  input: ["text"],
1162
1196
  cost: {
1163
1197
  input: 0.07,
@@ -1174,7 +1208,7 @@ export const MODELS = {
1174
1208
  api: "bedrock-converse-stream",
1175
1209
  provider: "amazon-bedrock",
1176
1210
  baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1177
- reasoning: false,
1211
+ reasoning: true,
1178
1212
  input: ["text"],
1179
1213
  cost: {
1180
1214
  input: 0.07,
@@ -1338,6 +1372,23 @@ export const MODELS = {
1338
1372
  contextWindow: 262000,
1339
1373
  maxTokens: 262000,
1340
1374
  },
1375
+ "us.anthropic.claude-fable-5": {
1376
+ id: "us.anthropic.claude-fable-5",
1377
+ name: "Claude Fable 5 (US)",
1378
+ api: "bedrock-converse-stream",
1379
+ provider: "amazon-bedrock",
1380
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1381
+ reasoning: true,
1382
+ input: ["text", "image"],
1383
+ cost: {
1384
+ input: 10,
1385
+ output: 50,
1386
+ cacheRead: 1,
1387
+ cacheWrite: 12.5,
1388
+ },
1389
+ contextWindow: 1000000,
1390
+ maxTokens: 128000,
1391
+ },
1341
1392
  "us.anthropic.claude-haiku-4-5-20251001-v1:0": {
1342
1393
  id: "us.anthropic.claude-haiku-4-5-20251001-v1:0",
1343
1394
  name: "Claude Haiku 4.5 (US)",
@@ -1750,6 +1801,23 @@ export const MODELS = {
1750
1801
  contextWindow: 200000,
1751
1802
  maxTokens: 4096,
1752
1803
  },
1804
+ "claude-fable-5": {
1805
+ id: "claude-fable-5",
1806
+ name: "Claude Fable 5",
1807
+ api: "anthropic-messages",
1808
+ provider: "anthropic",
1809
+ baseUrl: "https://api.anthropic.com",
1810
+ reasoning: true,
1811
+ input: ["text", "image"],
1812
+ cost: {
1813
+ input: 10,
1814
+ output: 50,
1815
+ cacheRead: 1,
1816
+ cacheWrite: 12.5,
1817
+ },
1818
+ contextWindow: 1000000,
1819
+ maxTokens: 128000,
1820
+ },
1753
1821
  "claude-haiku-4-5": {
1754
1822
  id: "claude-haiku-4-5",
1755
1823
  name: "Claude Haiku 4.5 (latest)",
@@ -2779,30 +2847,13 @@ export const MODELS = {
2779
2847
  reasoning: true,
2780
2848
  input: ["text"],
2781
2849
  cost: {
2782
- input: 0.25,
2783
- output: 0.69,
2850
+ input: 0.35,
2851
+ output: 0.75,
2784
2852
  cacheRead: 0,
2785
2853
  cacheWrite: 0,
2786
2854
  },
2787
2855
  contextWindow: 131072,
2788
- maxTokens: 32768,
2789
- },
2790
- "llama3.1-8b": {
2791
- id: "llama3.1-8b",
2792
- name: "Llama 3.1 8B",
2793
- api: "openai-completions",
2794
- provider: "cerebras",
2795
- baseUrl: "https://api.cerebras.ai/v1",
2796
- reasoning: false,
2797
- input: ["text"],
2798
- cost: {
2799
- input: 0.1,
2800
- output: 0.1,
2801
- cacheRead: 0,
2802
- cacheWrite: 0,
2803
- },
2804
- contextWindow: 32000,
2805
- maxTokens: 8000,
2856
+ maxTokens: 40960,
2806
2857
  },
2807
2858
  "zai-glm-4.7": {
2808
2859
  id: "zai-glm-4.7",
@@ -2810,7 +2861,7 @@ export const MODELS = {
2810
2861
  api: "openai-completions",
2811
2862
  provider: "cerebras",
2812
2863
  baseUrl: "https://api.cerebras.ai/v1",
2813
- reasoning: false,
2864
+ reasoning: true,
2814
2865
  input: ["text"],
2815
2866
  cost: {
2816
2867
  input: 2.25,
@@ -2819,7 +2870,7 @@ export const MODELS = {
2819
2870
  cacheWrite: 0,
2820
2871
  },
2821
2872
  contextWindow: 131072,
2822
- maxTokens: 40000,
2873
+ maxTokens: 40960,
2823
2874
  },
2824
2875
  },
2825
2876
  "cloudflare-ai-gateway": {
@@ -2925,6 +2976,23 @@ export const MODELS = {
2925
2976
  contextWindow: 200000,
2926
2977
  maxTokens: 8192,
2927
2978
  },
2979
+ "claude-fable-5": {
2980
+ id: "claude-fable-5",
2981
+ name: "Claude Fable 5",
2982
+ api: "anthropic-messages",
2983
+ provider: "cloudflare-ai-gateway",
2984
+ baseUrl: "https://gateway.ai.cloudflare.com/v1/{CLOUDFLARE_ACCOUNT_ID}/{CLOUDFLARE_GATEWAY_ID}/anthropic",
2985
+ reasoning: true,
2986
+ input: ["text", "image"],
2987
+ cost: {
2988
+ input: 10,
2989
+ output: 50,
2990
+ cacheRead: 1,
2991
+ cacheWrite: 12.5,
2992
+ },
2993
+ contextWindow: 1000000,
2994
+ maxTokens: 128000,
2995
+ },
2928
2996
  "claude-haiku-4-5": {
2929
2997
  id: "claude-haiku-4-5",
2930
2998
  name: "Claude Haiku 4.5 (latest)",
@@ -3930,6 +3998,25 @@ export const MODELS = {
3930
3998
  },
3931
3999
  },
3932
4000
  "github-copilot": {
4001
+ "claude-fable-5": {
4002
+ id: "claude-fable-5",
4003
+ name: "Claude Fable 5",
4004
+ api: "openai-completions",
4005
+ provider: "github-copilot",
4006
+ baseUrl: "https://api.individual.githubcopilot.com",
4007
+ headers: { "User-Agent": "GitHubCopilotChat/0.35.0", "Editor-Version": "vscode/1.107.0", "Editor-Plugin-Version": "copilot-chat/0.35.0", "Copilot-Integration-Id": "vscode-chat" },
4008
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false },
4009
+ reasoning: true,
4010
+ input: ["text", "image"],
4011
+ cost: {
4012
+ input: 10,
4013
+ output: 50,
4014
+ cacheRead: 1,
4015
+ cacheWrite: 12.5,
4016
+ },
4017
+ contextWindow: 1000000,
4018
+ maxTokens: 128000,
4019
+ },
3933
4020
  "claude-haiku-4.5": {
3934
4021
  id: "claude-haiku-4.5",
3935
4022
  name: "Claude Haiku 4.5 (latest)",
@@ -4861,354 +4948,167 @@ export const MODELS = {
4861
4948
  },
4862
4949
  },
4863
4950
  "groq": {
4864
- "deepseek-r1-distill-llama-70b": {
4865
- id: "deepseek-r1-distill-llama-70b",
4866
- name: "DeepSeek R1 Distill Llama 70B",
4951
+ "llama-3.1-8b-instant": {
4952
+ id: "llama-3.1-8b-instant",
4953
+ name: "Llama 3.1 8B",
4867
4954
  api: "openai-completions",
4868
4955
  provider: "groq",
4869
4956
  baseUrl: "https://api.groq.com/openai/v1",
4870
- reasoning: true,
4957
+ reasoning: false,
4871
4958
  input: ["text"],
4872
4959
  cost: {
4873
- input: 0.75,
4874
- output: 0.99,
4960
+ input: 0.05,
4961
+ output: 0.08,
4875
4962
  cacheRead: 0,
4876
4963
  cacheWrite: 0,
4877
4964
  },
4878
4965
  contextWindow: 131072,
4879
- maxTokens: 8192,
4966
+ maxTokens: 131072,
4880
4967
  },
4881
- "gemma2-9b-it": {
4882
- id: "gemma2-9b-it",
4883
- name: "Gemma 2 9B",
4968
+ "llama-3.3-70b-versatile": {
4969
+ id: "llama-3.3-70b-versatile",
4970
+ name: "Llama 3.3 70B",
4884
4971
  api: "openai-completions",
4885
4972
  provider: "groq",
4886
4973
  baseUrl: "https://api.groq.com/openai/v1",
4887
4974
  reasoning: false,
4888
4975
  input: ["text"],
4889
4976
  cost: {
4890
- input: 0.2,
4891
- output: 0.2,
4977
+ input: 0.59,
4978
+ output: 0.79,
4892
4979
  cacheRead: 0,
4893
4980
  cacheWrite: 0,
4894
4981
  },
4895
- contextWindow: 8192,
4896
- maxTokens: 8192,
4982
+ contextWindow: 131072,
4983
+ maxTokens: 32768,
4897
4984
  },
4898
- "groq/compound": {
4899
- id: "groq/compound",
4900
- name: "Compound",
4985
+ "meta-llama/llama-4-scout-17b-16e-instruct": {
4986
+ id: "meta-llama/llama-4-scout-17b-16e-instruct",
4987
+ name: "Llama 4 Scout 17B 16E",
4901
4988
  api: "openai-completions",
4902
4989
  provider: "groq",
4903
4990
  baseUrl: "https://api.groq.com/openai/v1",
4904
- reasoning: true,
4905
- input: ["text"],
4991
+ reasoning: false,
4992
+ input: ["text", "image"],
4906
4993
  cost: {
4907
- input: 0,
4908
- output: 0,
4994
+ input: 0.11,
4995
+ output: 0.34,
4909
4996
  cacheRead: 0,
4910
4997
  cacheWrite: 0,
4911
4998
  },
4912
4999
  contextWindow: 131072,
4913
5000
  maxTokens: 8192,
4914
5001
  },
4915
- "groq/compound-mini": {
4916
- id: "groq/compound-mini",
4917
- name: "Compound Mini",
5002
+ "openai/gpt-oss-120b": {
5003
+ id: "openai/gpt-oss-120b",
5004
+ name: "GPT OSS 120B",
4918
5005
  api: "openai-completions",
4919
5006
  provider: "groq",
4920
5007
  baseUrl: "https://api.groq.com/openai/v1",
4921
5008
  reasoning: true,
4922
5009
  input: ["text"],
4923
5010
  cost: {
4924
- input: 0,
4925
- output: 0,
4926
- cacheRead: 0,
5011
+ input: 0.15,
5012
+ output: 0.6,
5013
+ cacheRead: 0.075,
4927
5014
  cacheWrite: 0,
4928
5015
  },
4929
5016
  contextWindow: 131072,
4930
- maxTokens: 8192,
5017
+ maxTokens: 65536,
4931
5018
  },
4932
- "llama-3.1-8b-instant": {
4933
- id: "llama-3.1-8b-instant",
4934
- name: "Llama 3.1 8B Instant",
5019
+ "openai/gpt-oss-20b": {
5020
+ id: "openai/gpt-oss-20b",
5021
+ name: "GPT OSS 20B",
4935
5022
  api: "openai-completions",
4936
5023
  provider: "groq",
4937
5024
  baseUrl: "https://api.groq.com/openai/v1",
4938
- reasoning: false,
5025
+ reasoning: true,
4939
5026
  input: ["text"],
4940
5027
  cost: {
4941
- input: 0.05,
4942
- output: 0.08,
4943
- cacheRead: 0,
5028
+ input: 0.075,
5029
+ output: 0.3,
5030
+ cacheRead: 0.0375,
4944
5031
  cacheWrite: 0,
4945
5032
  },
4946
5033
  contextWindow: 131072,
4947
- maxTokens: 131072,
5034
+ maxTokens: 65536,
4948
5035
  },
4949
- "llama-3.3-70b-versatile": {
4950
- id: "llama-3.3-70b-versatile",
4951
- name: "Llama 3.3 70B Versatile",
5036
+ "openai/gpt-oss-safeguard-20b": {
5037
+ id: "openai/gpt-oss-safeguard-20b",
5038
+ name: "Safety GPT OSS 20B",
4952
5039
  api: "openai-completions",
4953
5040
  provider: "groq",
4954
5041
  baseUrl: "https://api.groq.com/openai/v1",
4955
- reasoning: false,
5042
+ reasoning: true,
4956
5043
  input: ["text"],
4957
5044
  cost: {
4958
- input: 0.59,
4959
- output: 0.79,
4960
- cacheRead: 0,
5045
+ input: 0.075,
5046
+ output: 0.3,
5047
+ cacheRead: 0.037,
4961
5048
  cacheWrite: 0,
4962
5049
  },
4963
5050
  contextWindow: 131072,
4964
- maxTokens: 32768,
5051
+ maxTokens: 65536,
4965
5052
  },
4966
- "llama3-70b-8192": {
4967
- id: "llama3-70b-8192",
4968
- name: "Llama 3 70B",
5053
+ "qwen/qwen3-32b": {
5054
+ id: "qwen/qwen3-32b",
5055
+ name: "Qwen3-32B",
4969
5056
  api: "openai-completions",
4970
5057
  provider: "groq",
4971
5058
  baseUrl: "https://api.groq.com/openai/v1",
4972
- reasoning: false,
5059
+ reasoning: true,
5060
+ thinkingLevelMap: { "minimal": null, "low": null, "medium": null, "high": "default" },
4973
5061
  input: ["text"],
4974
5062
  cost: {
4975
- input: 0.59,
4976
- output: 0.79,
5063
+ input: 0.29,
5064
+ output: 0.59,
4977
5065
  cacheRead: 0,
4978
5066
  cacheWrite: 0,
4979
5067
  },
4980
- contextWindow: 8192,
4981
- maxTokens: 8192,
5068
+ contextWindow: 131072,
5069
+ maxTokens: 40960,
4982
5070
  },
4983
- "llama3-8b-8192": {
4984
- id: "llama3-8b-8192",
4985
- name: "Llama 3 8B",
5071
+ },
5072
+ "huggingface": {
5073
+ "MiniMaxAI/MiniMax-M2.1": {
5074
+ id: "MiniMaxAI/MiniMax-M2.1",
5075
+ name: "MiniMax-M2.1",
4986
5076
  api: "openai-completions",
4987
- provider: "groq",
4988
- baseUrl: "https://api.groq.com/openai/v1",
4989
- reasoning: false,
5077
+ provider: "huggingface",
5078
+ baseUrl: "https://router.huggingface.co/v1",
5079
+ compat: { "supportsDeveloperRole": false },
5080
+ reasoning: true,
4990
5081
  input: ["text"],
4991
5082
  cost: {
4992
- input: 0.05,
4993
- output: 0.08,
5083
+ input: 0.3,
5084
+ output: 1.2,
4994
5085
  cacheRead: 0,
4995
5086
  cacheWrite: 0,
4996
5087
  },
4997
- contextWindow: 8192,
4998
- maxTokens: 8192,
5088
+ contextWindow: 204800,
5089
+ maxTokens: 131072,
4999
5090
  },
5000
- "meta-llama/llama-4-maverick-17b-128e-instruct": {
5001
- id: "meta-llama/llama-4-maverick-17b-128e-instruct",
5002
- name: "Llama 4 Maverick 17B",
5091
+ "MiniMaxAI/MiniMax-M2.5": {
5092
+ id: "MiniMaxAI/MiniMax-M2.5",
5093
+ name: "MiniMax-M2.5",
5003
5094
  api: "openai-completions",
5004
- provider: "groq",
5005
- baseUrl: "https://api.groq.com/openai/v1",
5006
- reasoning: false,
5007
- input: ["text", "image"],
5095
+ provider: "huggingface",
5096
+ baseUrl: "https://router.huggingface.co/v1",
5097
+ compat: { "supportsDeveloperRole": false },
5098
+ reasoning: true,
5099
+ input: ["text"],
5008
5100
  cost: {
5009
- input: 0.2,
5010
- output: 0.6,
5011
- cacheRead: 0,
5101
+ input: 0.3,
5102
+ output: 1.2,
5103
+ cacheRead: 0.03,
5012
5104
  cacheWrite: 0,
5013
5105
  },
5014
- contextWindow: 131072,
5015
- maxTokens: 8192,
5106
+ contextWindow: 204800,
5107
+ maxTokens: 131072,
5016
5108
  },
5017
- "meta-llama/llama-4-scout-17b-16e-instruct": {
5018
- id: "meta-llama/llama-4-scout-17b-16e-instruct",
5019
- name: "Llama 4 Scout 17B",
5020
- api: "openai-completions",
5021
- provider: "groq",
5022
- baseUrl: "https://api.groq.com/openai/v1",
5023
- reasoning: false,
5024
- input: ["text", "image"],
5025
- cost: {
5026
- input: 0.11,
5027
- output: 0.34,
5028
- cacheRead: 0,
5029
- cacheWrite: 0,
5030
- },
5031
- contextWindow: 131072,
5032
- maxTokens: 8192,
5033
- },
5034
- "mistral-saba-24b": {
5035
- id: "mistral-saba-24b",
5036
- name: "Mistral Saba 24B",
5037
- api: "openai-completions",
5038
- provider: "groq",
5039
- baseUrl: "https://api.groq.com/openai/v1",
5040
- reasoning: false,
5041
- input: ["text"],
5042
- cost: {
5043
- input: 0.79,
5044
- output: 0.79,
5045
- cacheRead: 0,
5046
- cacheWrite: 0,
5047
- },
5048
- contextWindow: 32768,
5049
- maxTokens: 32768,
5050
- },
5051
- "moonshotai/kimi-k2-instruct": {
5052
- id: "moonshotai/kimi-k2-instruct",
5053
- name: "Kimi K2 Instruct",
5054
- api: "openai-completions",
5055
- provider: "groq",
5056
- baseUrl: "https://api.groq.com/openai/v1",
5057
- reasoning: false,
5058
- input: ["text"],
5059
- cost: {
5060
- input: 1,
5061
- output: 3,
5062
- cacheRead: 0,
5063
- cacheWrite: 0,
5064
- },
5065
- contextWindow: 131072,
5066
- maxTokens: 16384,
5067
- },
5068
- "moonshotai/kimi-k2-instruct-0905": {
5069
- id: "moonshotai/kimi-k2-instruct-0905",
5070
- name: "Kimi K2 Instruct 0905",
5071
- api: "openai-completions",
5072
- provider: "groq",
5073
- baseUrl: "https://api.groq.com/openai/v1",
5074
- reasoning: false,
5075
- input: ["text"],
5076
- cost: {
5077
- input: 1,
5078
- output: 3,
5079
- cacheRead: 0.5,
5080
- cacheWrite: 0,
5081
- },
5082
- contextWindow: 262144,
5083
- maxTokens: 16384,
5084
- },
5085
- "openai/gpt-oss-120b": {
5086
- id: "openai/gpt-oss-120b",
5087
- name: "GPT OSS 120B",
5088
- api: "openai-completions",
5089
- provider: "groq",
5090
- baseUrl: "https://api.groq.com/openai/v1",
5091
- reasoning: true,
5092
- input: ["text"],
5093
- cost: {
5094
- input: 0.15,
5095
- output: 0.6,
5096
- cacheRead: 0.075,
5097
- cacheWrite: 0,
5098
- },
5099
- contextWindow: 131072,
5100
- maxTokens: 65536,
5101
- },
5102
- "openai/gpt-oss-20b": {
5103
- id: "openai/gpt-oss-20b",
5104
- name: "GPT OSS 20B",
5105
- api: "openai-completions",
5106
- provider: "groq",
5107
- baseUrl: "https://api.groq.com/openai/v1",
5108
- reasoning: true,
5109
- input: ["text"],
5110
- cost: {
5111
- input: 0.075,
5112
- output: 0.3,
5113
- cacheRead: 0.0375,
5114
- cacheWrite: 0,
5115
- },
5116
- contextWindow: 131072,
5117
- maxTokens: 65536,
5118
- },
5119
- "openai/gpt-oss-safeguard-20b": {
5120
- id: "openai/gpt-oss-safeguard-20b",
5121
- name: "Safety GPT OSS 20B",
5122
- api: "openai-completions",
5123
- provider: "groq",
5124
- baseUrl: "https://api.groq.com/openai/v1",
5125
- reasoning: true,
5126
- input: ["text"],
5127
- cost: {
5128
- input: 0.075,
5129
- output: 0.3,
5130
- cacheRead: 0.037,
5131
- cacheWrite: 0,
5132
- },
5133
- contextWindow: 131072,
5134
- maxTokens: 65536,
5135
- },
5136
- "qwen-qwq-32b": {
5137
- id: "qwen-qwq-32b",
5138
- name: "Qwen QwQ 32B",
5139
- api: "openai-completions",
5140
- provider: "groq",
5141
- baseUrl: "https://api.groq.com/openai/v1",
5142
- reasoning: true,
5143
- input: ["text"],
5144
- cost: {
5145
- input: 0.29,
5146
- output: 0.39,
5147
- cacheRead: 0,
5148
- cacheWrite: 0,
5149
- },
5150
- contextWindow: 131072,
5151
- maxTokens: 16384,
5152
- },
5153
- "qwen/qwen3-32b": {
5154
- id: "qwen/qwen3-32b",
5155
- name: "Qwen3 32B",
5156
- api: "openai-completions",
5157
- provider: "groq",
5158
- baseUrl: "https://api.groq.com/openai/v1",
5159
- reasoning: true,
5160
- thinkingLevelMap: { "minimal": null, "low": null, "medium": null, "high": "default" },
5161
- input: ["text"],
5162
- cost: {
5163
- input: 0.29,
5164
- output: 0.59,
5165
- cacheRead: 0,
5166
- cacheWrite: 0,
5167
- },
5168
- contextWindow: 131072,
5169
- maxTokens: 40960,
5170
- },
5171
- },
5172
- "huggingface": {
5173
- "MiniMaxAI/MiniMax-M2.1": {
5174
- id: "MiniMaxAI/MiniMax-M2.1",
5175
- name: "MiniMax-M2.1",
5176
- api: "openai-completions",
5177
- provider: "huggingface",
5178
- baseUrl: "https://router.huggingface.co/v1",
5179
- compat: { "supportsDeveloperRole": false },
5180
- reasoning: true,
5181
- input: ["text"],
5182
- cost: {
5183
- input: 0.3,
5184
- output: 1.2,
5185
- cacheRead: 0,
5186
- cacheWrite: 0,
5187
- },
5188
- contextWindow: 204800,
5189
- maxTokens: 131072,
5190
- },
5191
- "MiniMaxAI/MiniMax-M2.5": {
5192
- id: "MiniMaxAI/MiniMax-M2.5",
5193
- name: "MiniMax-M2.5",
5194
- api: "openai-completions",
5195
- provider: "huggingface",
5196
- baseUrl: "https://router.huggingface.co/v1",
5197
- compat: { "supportsDeveloperRole": false },
5198
- reasoning: true,
5199
- input: ["text"],
5200
- cost: {
5201
- input: 0.3,
5202
- output: 1.2,
5203
- cacheRead: 0.03,
5204
- cacheWrite: 0,
5205
- },
5206
- contextWindow: 204800,
5207
- maxTokens: 131072,
5208
- },
5209
- "MiniMaxAI/MiniMax-M2.7": {
5210
- id: "MiniMaxAI/MiniMax-M2.7",
5211
- name: "MiniMax-M2.7",
5109
+ "MiniMaxAI/MiniMax-M2.7": {
5110
+ id: "MiniMaxAI/MiniMax-M2.7",
5111
+ name: "MiniMax-M2.7",
5212
5112
  api: "openai-completions",
5213
5113
  provider: "huggingface",
5214
5114
  baseUrl: "https://router.huggingface.co/v1",
@@ -7313,6 +7213,23 @@ export const MODELS = {
7313
7213
  contextWindow: 200000,
7314
7214
  maxTokens: 32000,
7315
7215
  },
7216
+ "claude-fable-5": {
7217
+ id: "claude-fable-5",
7218
+ name: "Claude Fable 5",
7219
+ api: "anthropic-messages",
7220
+ provider: "opencode",
7221
+ baseUrl: "https://opencode.ai/zen",
7222
+ reasoning: true,
7223
+ input: ["text", "image"],
7224
+ cost: {
7225
+ input: 10,
7226
+ output: 50,
7227
+ cacheRead: 1,
7228
+ cacheWrite: 12.5,
7229
+ },
7230
+ contextWindow: 1000000,
7231
+ maxTokens: 128000,
7232
+ },
7316
7233
  "claude-haiku-4-5": {
7317
7234
  id: "claude-haiku-4-5",
7318
7235
  name: "Claude Haiku 4.5",
@@ -7485,7 +7402,7 @@ export const MODELS = {
7485
7402
  cost: {
7486
7403
  input: 0.14,
7487
7404
  output: 0.28,
7488
- cacheRead: 0.03,
7405
+ cacheRead: 0.028,
7489
7406
  cacheWrite: 0,
7490
7407
  },
7491
7408
  contextWindow: 1000000,
@@ -7510,6 +7427,25 @@ export const MODELS = {
7510
7427
  contextWindow: 200000,
7511
7428
  maxTokens: 128000,
7512
7429
  },
7430
+ "deepseek-v4-pro": {
7431
+ id: "deepseek-v4-pro",
7432
+ name: "DeepSeek V4 Pro",
7433
+ api: "openai-completions",
7434
+ provider: "opencode",
7435
+ baseUrl: "https://opencode.ai/zen/v1",
7436
+ compat: { "requiresReasoningContentOnAssistantMessages": true, "thinkingFormat": "deepseek" },
7437
+ reasoning: true,
7438
+ thinkingLevelMap: { "minimal": null, "low": null, "medium": null, "high": "high", "xhigh": "max" },
7439
+ input: ["text"],
7440
+ cost: {
7441
+ input: 1.74,
7442
+ output: 3.84,
7443
+ cacheRead: 0.145,
7444
+ cacheWrite: 0,
7445
+ },
7446
+ contextWindow: 1000000,
7447
+ maxTokens: 384000,
7448
+ },
7513
7449
  "gemini-3-flash": {
7514
7450
  id: "gemini-3-flash",
7515
7451
  name: "Gemini 3 Flash",
@@ -8008,6 +7944,23 @@ export const MODELS = {
8008
7944
  contextWindow: 1000000,
8009
7945
  maxTokens: 128000,
8010
7946
  },
7947
+ "north-mini-code-free": {
7948
+ id: "north-mini-code-free",
7949
+ name: "North Mini Code Free",
7950
+ api: "openai-completions",
7951
+ provider: "opencode",
7952
+ baseUrl: "https://opencode.ai/zen/v1",
7953
+ reasoning: true,
7954
+ input: ["text"],
7955
+ cost: {
7956
+ input: 0,
7957
+ output: 0,
7958
+ cacheRead: 0,
7959
+ cacheWrite: 0,
7960
+ },
7961
+ contextWindow: 256000,
7962
+ maxTokens: 64000,
7963
+ },
8011
7964
  "qwen3.5-plus": {
8012
7965
  id: "qwen3.5-plus",
8013
7966
  name: "Qwen3.5 Plus",
@@ -8229,9 +8182,9 @@ export const MODELS = {
8229
8182
  reasoning: true,
8230
8183
  input: ["text", "image"],
8231
8184
  cost: {
8232
- input: 0.6,
8233
- output: 2.4,
8234
- cacheRead: 0.12,
8185
+ input: 0.3,
8186
+ output: 1.2,
8187
+ cacheRead: 0.06,
8235
8188
  cacheWrite: 0,
8236
8189
  },
8237
8190
  contextWindow: 512000,
@@ -8427,6 +8380,23 @@ export const MODELS = {
8427
8380
  contextWindow: 200000,
8428
8381
  maxTokens: 8192,
8429
8382
  },
8383
+ "anthropic/claude-fable-5": {
8384
+ id: "anthropic/claude-fable-5",
8385
+ name: "Anthropic: Claude Fable 5",
8386
+ api: "openai-completions",
8387
+ provider: "openrouter",
8388
+ baseUrl: "https://openrouter.ai/api/v1",
8389
+ reasoning: true,
8390
+ input: ["text", "image"],
8391
+ cost: {
8392
+ input: 10,
8393
+ output: 50,
8394
+ cacheRead: 1,
8395
+ cacheWrite: 12.5,
8396
+ },
8397
+ contextWindow: 1000000,
8398
+ maxTokens: 128000,
8399
+ },
8430
8400
  "anthropic/claude-haiku-4.5": {
8431
8401
  id: "anthropic/claude-haiku-4.5",
8432
8402
  name: "Anthropic: Claude Haiku 4.5",
@@ -8853,7 +8823,7 @@ export const MODELS = {
8853
8823
  cacheRead: 0.135,
8854
8824
  cacheWrite: 0,
8855
8825
  },
8856
- contextWindow: 163840,
8826
+ contextWindow: 131072,
8857
8827
  maxTokens: 16384,
8858
8828
  },
8859
8829
  "deepseek/deepseek-chat-v3.1": {
@@ -9295,12 +9265,12 @@ export const MODELS = {
9295
9265
  input: ["text", "image"],
9296
9266
  cost: {
9297
9267
  input: 0.12,
9298
- output: 0.36,
9268
+ output: 0.35,
9299
9269
  cacheRead: 0.09,
9300
9270
  cacheWrite: 0,
9301
9271
  },
9302
9272
  contextWindow: 262144,
9303
- maxTokens: 8192,
9273
+ maxTokens: 262144,
9304
9274
  },
9305
9275
  "google/gemma-4-31b-it:free": {
9306
9276
  id: "google/gemma-4-31b-it:free",
@@ -9585,8 +9555,8 @@ export const MODELS = {
9585
9555
  input: ["text"],
9586
9556
  cost: {
9587
9557
  input: 0.15,
9588
- output: 1.15,
9589
- cacheRead: 0,
9558
+ output: 0.8999999999999999,
9559
+ cacheRead: 0.049999999999999996,
9590
9560
  cacheWrite: 0,
9591
9561
  },
9592
9562
  contextWindow: 204800,
@@ -9601,13 +9571,13 @@ export const MODELS = {
9601
9571
  reasoning: true,
9602
9572
  input: ["text"],
9603
9573
  cost: {
9604
- input: 0.27899999999999997,
9605
- output: 1.2,
9606
- cacheRead: 0,
9574
+ input: 0.25,
9575
+ output: 1,
9576
+ cacheRead: 0.049999999999999996,
9607
9577
  cacheWrite: 0,
9608
9578
  },
9609
9579
  contextWindow: 204800,
9610
- maxTokens: 196608,
9580
+ maxTokens: 131072,
9611
9581
  },
9612
9582
  "minimax/minimax-m3": {
9613
9583
  id: "minimax/minimax-m3",
@@ -9993,21 +9963,20 @@ export const MODELS = {
9993
9963
  reasoning: true,
9994
9964
  input: ["text", "image"],
9995
9965
  cost: {
9996
- input: 0.684,
9997
- output: 3.42,
9998
- cacheRead: 0.144,
9966
+ input: 0.67,
9967
+ output: 3.39,
9968
+ cacheRead: 0.14,
9999
9969
  cacheWrite: 0,
10000
9970
  },
10001
9971
  contextWindow: 262144,
10002
9972
  maxTokens: 262144,
10003
9973
  },
10004
- "moonshotai/kimi-k2.6:free": {
10005
- id: "moonshotai/kimi-k2.6:free",
10006
- name: "MoonshotAI: Kimi K2.6 (free)",
9974
+ "nex-agi/nex-n2-pro:free": {
9975
+ id: "nex-agi/nex-n2-pro:free",
9976
+ name: "Nex AGI: Nex-N2-Pro (free)",
10007
9977
  api: "openai-completions",
10008
9978
  provider: "openrouter",
10009
9979
  baseUrl: "https://openrouter.ai/api/v1",
10010
- compat: { "supportsDeveloperRole": false },
10011
9980
  reasoning: true,
10012
9981
  input: ["text", "image"],
10013
9982
  cost: {
@@ -10017,24 +9986,7 @@ export const MODELS = {
10017
9986
  cacheWrite: 0,
10018
9987
  },
10019
9988
  contextWindow: 262144,
10020
- maxTokens: 4096,
10021
- },
10022
- "nex-agi/deepseek-v3.1-nex-n1": {
10023
- id: "nex-agi/deepseek-v3.1-nex-n1",
10024
- name: "Nex AGI: DeepSeek V3.1 Nex N1",
10025
- api: "openai-completions",
10026
- provider: "openrouter",
10027
- baseUrl: "https://openrouter.ai/api/v1",
10028
- reasoning: false,
10029
- input: ["text"],
10030
- cost: {
10031
- input: 0.135,
10032
- output: 0.5,
10033
- cacheRead: 0,
10034
- cacheWrite: 0,
10035
- },
10036
- contextWindow: 131072,
10037
- maxTokens: 163840,
9989
+ maxTokens: 262144,
10038
9990
  },
10039
9991
  "nvidia/llama-3.3-nemotron-super-49b-v1.5": {
10040
9992
  id: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
@@ -10189,23 +10141,6 @@ export const MODELS = {
10189
10141
  contextWindow: 128000,
10190
10142
  maxTokens: 128000,
10191
10143
  },
10192
- "nvidia/nemotron-nano-9b-v2": {
10193
- id: "nvidia/nemotron-nano-9b-v2",
10194
- name: "NVIDIA: Nemotron Nano 9B V2",
10195
- api: "openai-completions",
10196
- provider: "openrouter",
10197
- baseUrl: "https://openrouter.ai/api/v1",
10198
- reasoning: true,
10199
- input: ["text"],
10200
- cost: {
10201
- input: 0.04,
10202
- output: 0.16,
10203
- cacheRead: 0,
10204
- cacheWrite: 0,
10205
- },
10206
- contextWindow: 131072,
10207
- maxTokens: 16384,
10208
- },
10209
10144
  "nvidia/nemotron-nano-9b-v2:free": {
10210
10145
  id: "nvidia/nemotron-nano-9b-v2:free",
10211
10146
  name: "NVIDIA: Nemotron Nano 9B V2 (free)",
@@ -11944,13 +11879,13 @@ export const MODELS = {
11944
11879
  reasoning: true,
11945
11880
  input: ["text", "image"],
11946
11881
  cost: {
11947
- input: 0.14,
11882
+ input: 0.15,
11948
11883
  output: 1,
11949
- cacheRead: 0,
11884
+ cacheRead: 0.049999999999999996,
11950
11885
  cacheWrite: 0,
11951
11886
  },
11952
11887
  contextWindow: 262144,
11953
- maxTokens: 262140,
11888
+ maxTokens: 262144,
11954
11889
  },
11955
11890
  "qwen/qwen3.6-flash": {
11956
11891
  id: "qwen/qwen3.6-flash",
@@ -12029,10 +11964,10 @@ export const MODELS = {
12029
11964
  reasoning: true,
12030
11965
  input: ["text", "image"],
12031
11966
  cost: {
12032
- input: 0.39999999999999997,
12033
- output: 1.5999999999999999,
12034
- cacheRead: 0.08,
12035
- cacheWrite: 0.5,
11967
+ input: 0.32,
11968
+ output: 1.28,
11969
+ cacheRead: 0.064,
11970
+ cacheWrite: 0.39999999999999997,
12036
11971
  },
12037
11972
  contextWindow: 1000000,
12038
11973
  maxTokens: 65536,
@@ -12292,23 +12227,6 @@ export const MODELS = {
12292
12227
  contextWindow: 1048576,
12293
12228
  maxTokens: 131072,
12294
12229
  },
12295
- "z-ai/glm-4-32b": {
12296
- id: "z-ai/glm-4-32b",
12297
- name: "Z.ai: GLM 4 32B ",
12298
- api: "openai-completions",
12299
- provider: "openrouter",
12300
- baseUrl: "https://openrouter.ai/api/v1",
12301
- reasoning: false,
12302
- input: ["text"],
12303
- cost: {
12304
- input: 0.09999999999999999,
12305
- output: 0.09999999999999999,
12306
- cacheRead: 0,
12307
- cacheWrite: 0,
12308
- },
12309
- contextWindow: 128000,
12310
- maxTokens: 4096,
12311
- },
12312
12230
  "z-ai/glm-4.5": {
12313
12231
  id: "z-ai/glm-4.5",
12314
12232
  name: "Z.ai: GLM 4.5",
@@ -12343,23 +12261,6 @@ export const MODELS = {
12343
12261
  contextWindow: 131072,
12344
12262
  maxTokens: 131070,
12345
12263
  },
12346
- "z-ai/glm-4.5-air:free": {
12347
- id: "z-ai/glm-4.5-air:free",
12348
- name: "Z.ai: GLM 4.5 Air (free)",
12349
- api: "openai-completions",
12350
- provider: "openrouter",
12351
- baseUrl: "https://openrouter.ai/api/v1",
12352
- reasoning: true,
12353
- input: ["text"],
12354
- cost: {
12355
- input: 0,
12356
- output: 0,
12357
- cacheRead: 0,
12358
- cacheWrite: 0,
12359
- },
12360
- contextWindow: 131072,
12361
- maxTokens: 96000,
12362
- },
12363
12264
  "z-ai/glm-4.5v": {
12364
12265
  id: "z-ai/glm-4.5v",
12365
12266
  name: "Z.ai: GLM 4.5V",
@@ -12405,11 +12306,11 @@ export const MODELS = {
12405
12306
  cost: {
12406
12307
  input: 0.3,
12407
12308
  output: 0.8999999999999999,
12408
- cacheRead: 0.049999999999999996,
12309
+ cacheRead: 0.055,
12409
12310
  cacheWrite: 0,
12410
12311
  },
12411
12312
  contextWindow: 131072,
12412
- maxTokens: 24000,
12313
+ maxTokens: 32768,
12413
12314
  },
12414
12315
  "z-ai/glm-4.7": {
12415
12316
  id: "z-ai/glm-4.7",
@@ -12476,7 +12377,7 @@ export const MODELS = {
12476
12377
  cacheRead: 0.24,
12477
12378
  cacheWrite: 0,
12478
12379
  },
12479
- contextWindow: 202752,
12380
+ contextWindow: 262144,
12480
12381
  maxTokens: 131072,
12481
12382
  },
12482
12383
  "z-ai/glm-5.1": {
@@ -12496,22 +12397,22 @@ export const MODELS = {
12496
12397
  contextWindow: 202752,
12497
12398
  maxTokens: 4096,
12498
12399
  },
12499
- "z-ai/glm-5v-turbo": {
12500
- id: "z-ai/glm-5v-turbo",
12501
- name: "Z.ai: GLM 5V Turbo",
12400
+ "~anthropic/claude-fable-latest": {
12401
+ id: "~anthropic/claude-fable-latest",
12402
+ name: "Anthropic: Claude Fable Latest",
12502
12403
  api: "openai-completions",
12503
12404
  provider: "openrouter",
12504
12405
  baseUrl: "https://openrouter.ai/api/v1",
12505
12406
  reasoning: true,
12506
12407
  input: ["text", "image"],
12507
12408
  cost: {
12508
- input: 1.2,
12509
- output: 4,
12510
- cacheRead: 0.24,
12511
- cacheWrite: 0,
12409
+ input: 10,
12410
+ output: 50,
12411
+ cacheRead: 1,
12412
+ cacheWrite: 12.5,
12512
12413
  },
12513
- contextWindow: 202752,
12514
- maxTokens: 131072,
12414
+ contextWindow: 1000000,
12415
+ maxTokens: 128000,
12515
12416
  },
12516
12417
  "~anthropic/claude-haiku-latest": {
12517
12418
  id: "~anthropic/claude-haiku-latest",
@@ -12607,9 +12508,9 @@ export const MODELS = {
12607
12508
  reasoning: true,
12608
12509
  input: ["text", "image"],
12609
12510
  cost: {
12610
- input: 0.684,
12611
- output: 3.42,
12612
- cacheRead: 0.144,
12511
+ input: 0.67,
12512
+ output: 3.39,
12513
+ cacheRead: 0.14,
12613
12514
  cacheWrite: 0,
12614
12515
  },
12615
12516
  contextWindow: 262144,
@@ -12651,25 +12552,6 @@ export const MODELS = {
12651
12552
  },
12652
12553
  },
12653
12554
  "together": {
12654
- "MiniMaxAI/MiniMax-M2.5": {
12655
- id: "MiniMaxAI/MiniMax-M2.5",
12656
- name: "MiniMax-M2.5",
12657
- api: "openai-completions",
12658
- provider: "together",
12659
- baseUrl: "https://api.together.ai/v1",
12660
- compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
12661
- reasoning: true,
12662
- thinkingLevelMap: { "off": null, "minimal": null, "low": null, "medium": null },
12663
- input: ["text"],
12664
- cost: {
12665
- input: 0.3,
12666
- output: 1.2,
12667
- cacheRead: 0.06,
12668
- cacheWrite: 0,
12669
- },
12670
- contextWindow: 204800,
12671
- maxTokens: 131072,
12672
- },
12673
12555
  "MiniMaxAI/MiniMax-M2.7": {
12674
12556
  id: "MiniMaxAI/MiniMax-M2.7",
12675
12557
  name: "MiniMax-M2.7",
@@ -12689,28 +12571,27 @@ export const MODELS = {
12689
12571
  contextWindow: 202752,
12690
12572
  maxTokens: 131072,
12691
12573
  },
12692
- "Qwen/Qwen3-235B-A22B-Instruct-2507-tput": {
12693
- id: "Qwen/Qwen3-235B-A22B-Instruct-2507-tput",
12694
- name: "Qwen3 235B A22B Instruct 2507 FP8",
12574
+ "Qwen/Qwen2.5-7B-Instruct-Turbo": {
12575
+ id: "Qwen/Qwen2.5-7B-Instruct-Turbo",
12576
+ name: "Qwen 2.5 7B Instruct Turbo",
12695
12577
  api: "openai-completions",
12696
12578
  provider: "together",
12697
12579
  baseUrl: "https://api.together.ai/v1",
12698
- compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
12699
- reasoning: true,
12700
- thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12580
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
12581
+ reasoning: false,
12701
12582
  input: ["text"],
12702
12583
  cost: {
12703
- input: 0.2,
12704
- output: 0.6,
12584
+ input: 0.3,
12585
+ output: 0.3,
12705
12586
  cacheRead: 0,
12706
12587
  cacheWrite: 0,
12707
12588
  },
12708
- contextWindow: 262144,
12709
- maxTokens: 262144,
12589
+ contextWindow: 32768,
12590
+ maxTokens: 32768,
12710
12591
  },
12711
- "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
12712
- id: "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
12713
- name: "Qwen3 Coder 480B A35B Instruct",
12592
+ "Qwen/Qwen3-235B-A22B-Instruct-2507-tput": {
12593
+ id: "Qwen/Qwen3-235B-A22B-Instruct-2507-tput",
12594
+ name: "Qwen3 235B A22B Instruct 2507 FP8",
12714
12595
  api: "openai-completions",
12715
12596
  provider: "together",
12716
12597
  baseUrl: "https://api.together.ai/v1",
@@ -12718,36 +12599,36 @@ export const MODELS = {
12718
12599
  reasoning: false,
12719
12600
  input: ["text"],
12720
12601
  cost: {
12721
- input: 2,
12722
- output: 2,
12602
+ input: 0.2,
12603
+ output: 0.6,
12723
12604
  cacheRead: 0,
12724
12605
  cacheWrite: 0,
12725
12606
  },
12726
12607
  contextWindow: 262144,
12727
12608
  maxTokens: 262144,
12728
12609
  },
12729
- "Qwen/Qwen3-Coder-Next-FP8": {
12730
- id: "Qwen/Qwen3-Coder-Next-FP8",
12731
- name: "Qwen3 Coder Next FP8",
12610
+ "Qwen/Qwen3.5-397B-A17B": {
12611
+ id: "Qwen/Qwen3.5-397B-A17B",
12612
+ name: "Qwen3.5 397B A17B",
12732
12613
  api: "openai-completions",
12733
12614
  provider: "together",
12734
12615
  baseUrl: "https://api.together.ai/v1",
12735
12616
  compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
12736
12617
  reasoning: true,
12737
12618
  thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12738
- input: ["text"],
12619
+ input: ["text", "image"],
12739
12620
  cost: {
12740
- input: 0.5,
12741
- output: 1.2,
12621
+ input: 0.6,
12622
+ output: 3.6,
12742
12623
  cacheRead: 0,
12743
12624
  cacheWrite: 0,
12744
12625
  },
12745
12626
  contextWindow: 262144,
12746
- maxTokens: 262144,
12627
+ maxTokens: 130000,
12747
12628
  },
12748
- "Qwen/Qwen3.5-397B-A17B": {
12749
- id: "Qwen/Qwen3.5-397B-A17B",
12750
- name: "Qwen3.5 397B A17B",
12629
+ "Qwen/Qwen3.5-9B": {
12630
+ id: "Qwen/Qwen3.5-9B",
12631
+ name: "Qwen3.5 9B",
12751
12632
  api: "openai-completions",
12752
12633
  provider: "together",
12753
12634
  baseUrl: "https://api.together.ai/v1",
@@ -12756,13 +12637,13 @@ export const MODELS = {
12756
12637
  thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12757
12638
  input: ["text", "image"],
12758
12639
  cost: {
12759
- input: 0.6,
12760
- output: 3.6,
12640
+ input: 0.17,
12641
+ output: 0.25,
12761
12642
  cacheRead: 0,
12762
12643
  cacheWrite: 0,
12763
12644
  },
12764
12645
  contextWindow: 262144,
12765
- maxTokens: 130000,
12646
+ maxTokens: 65536,
12766
12647
  },
12767
12648
  "Qwen/Qwen3.6-Plus": {
12768
12649
  id: "Qwen/Qwen3.6-Plus",
@@ -12789,9 +12670,8 @@ export const MODELS = {
12789
12670
  api: "openai-completions",
12790
12671
  provider: "together",
12791
12672
  baseUrl: "https://api.together.ai/v1",
12792
- compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
12793
- reasoning: true,
12794
- thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12673
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
12674
+ reasoning: false,
12795
12675
  input: ["text"],
12796
12676
  cost: {
12797
12677
  input: 2.5,
@@ -12802,44 +12682,6 @@ export const MODELS = {
12802
12682
  contextWindow: 1000000,
12803
12683
  maxTokens: 500000,
12804
12684
  },
12805
- "deepseek-ai/DeepSeek-V3": {
12806
- id: "deepseek-ai/DeepSeek-V3",
12807
- name: "DeepSeek-V3",
12808
- api: "openai-completions",
12809
- provider: "together",
12810
- baseUrl: "https://api.together.ai/v1",
12811
- compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
12812
- reasoning: true,
12813
- thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12814
- input: ["text"],
12815
- cost: {
12816
- input: 1.25,
12817
- output: 1.25,
12818
- cacheRead: 0,
12819
- cacheWrite: 0,
12820
- },
12821
- contextWindow: 131072,
12822
- maxTokens: 131072,
12823
- },
12824
- "deepseek-ai/DeepSeek-V3-1": {
12825
- id: "deepseek-ai/DeepSeek-V3-1",
12826
- name: "DeepSeek V3.1",
12827
- api: "openai-completions",
12828
- provider: "together",
12829
- baseUrl: "https://api.together.ai/v1",
12830
- compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
12831
- reasoning: true,
12832
- thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12833
- input: ["text"],
12834
- cost: {
12835
- input: 0.6,
12836
- output: 1.7,
12837
- cacheRead: 0,
12838
- cacheWrite: 0,
12839
- },
12840
- contextWindow: 131072,
12841
- maxTokens: 131072,
12842
- },
12843
12685
  "deepseek-ai/DeepSeek-V4-Pro": {
12844
12686
  id: "deepseek-ai/DeepSeek-V4-Pro",
12845
12687
  name: "DeepSeek V4 Pro",
@@ -12851,8 +12693,8 @@ export const MODELS = {
12851
12693
  thinkingLevelMap: { "minimal": null, "low": null, "medium": null, "high": "high", "xhigh": null },
12852
12694
  input: ["text"],
12853
12695
  cost: {
12854
- input: 2.1,
12855
- output: 4.4,
12696
+ input: 1.74,
12697
+ output: 3.48,
12856
12698
  cacheRead: 0.2,
12857
12699
  cacheWrite: 0,
12858
12700
  },
@@ -12888,8 +12730,8 @@ export const MODELS = {
12888
12730
  thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12889
12731
  input: ["text", "image"],
12890
12732
  cost: {
12891
- input: 0.2,
12892
- output: 0.5,
12733
+ input: 0.39,
12734
+ output: 0.97,
12893
12735
  cacheRead: 0,
12894
12736
  cacheWrite: 0,
12895
12737
  },
@@ -12914,25 +12756,6 @@ export const MODELS = {
12914
12756
  contextWindow: 131072,
12915
12757
  maxTokens: 131072,
12916
12758
  },
12917
- "moonshotai/Kimi-K2.5": {
12918
- id: "moonshotai/Kimi-K2.5",
12919
- name: "Kimi K2.5",
12920
- api: "openai-completions",
12921
- provider: "together",
12922
- baseUrl: "https://api.together.ai/v1",
12923
- compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
12924
- reasoning: true,
12925
- thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12926
- input: ["text", "image"],
12927
- cost: {
12928
- input: 0.5,
12929
- output: 2.8,
12930
- cacheRead: 0,
12931
- cacheWrite: 0,
12932
- },
12933
- contextWindow: 262144,
12934
- maxTokens: 262144,
12935
- },
12936
12759
  "moonshotai/Kimi-K2.6": {
12937
12760
  id: "moonshotai/Kimi-K2.6",
12938
12761
  name: "Kimi K2.6",
@@ -12990,6 +12813,44 @@ export const MODELS = {
12990
12813
  contextWindow: 131072,
12991
12814
  maxTokens: 131072,
12992
12815
  },
12816
+ "openai/gpt-oss-20b": {
12817
+ id: "openai/gpt-oss-20b",
12818
+ name: "GPT OSS 20B",
12819
+ api: "openai-completions",
12820
+ provider: "together",
12821
+ baseUrl: "https://api.together.ai/v1",
12822
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": true, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "openai" },
12823
+ reasoning: true,
12824
+ thinkingLevelMap: { "off": null, "minimal": null },
12825
+ input: ["text"],
12826
+ cost: {
12827
+ input: 0.05,
12828
+ output: 0.2,
12829
+ cacheRead: 0,
12830
+ cacheWrite: 0,
12831
+ },
12832
+ contextWindow: 131072,
12833
+ maxTokens: 131072,
12834
+ },
12835
+ "zai-org/GLM-5": {
12836
+ id: "zai-org/GLM-5",
12837
+ name: "GLM-5",
12838
+ api: "openai-completions",
12839
+ provider: "together",
12840
+ baseUrl: "https://api.together.ai/v1",
12841
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
12842
+ reasoning: true,
12843
+ thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
12844
+ input: ["text"],
12845
+ cost: {
12846
+ input: 1,
12847
+ output: 3.2,
12848
+ cacheRead: 0,
12849
+ cacheWrite: 0,
12850
+ },
12851
+ contextWindow: 202752,
12852
+ maxTokens: 131072,
12853
+ },
12993
12854
  "zai-org/GLM-5.1": {
12994
12855
  id: "zai-org/GLM-5.1",
12995
12856
  name: "GLM-5.1",
@@ -13054,8 +12915,8 @@ export const MODELS = {
13054
12915
  reasoning: true,
13055
12916
  input: ["text"],
13056
12917
  cost: {
13057
- input: 0.08,
13058
- output: 0.29,
12918
+ input: 0.12,
12919
+ output: 0.5,
13059
12920
  cacheRead: 0,
13060
12921
  cacheWrite: 0,
13061
12922
  },
@@ -13419,6 +13280,23 @@ export const MODELS = {
13419
13280
  contextWindow: 200000,
13420
13281
  maxTokens: 8192,
13421
13282
  },
13283
+ "anthropic/claude-fable-5": {
13284
+ id: "anthropic/claude-fable-5",
13285
+ name: "Claude Fable 5",
13286
+ api: "anthropic-messages",
13287
+ provider: "vercel-ai-gateway",
13288
+ baseUrl: "https://ai-gateway.vercel.sh",
13289
+ reasoning: true,
13290
+ input: ["text", "image"],
13291
+ cost: {
13292
+ input: 10,
13293
+ output: 50,
13294
+ cacheRead: 1,
13295
+ cacheWrite: 12.5,
13296
+ },
13297
+ contextWindow: 1000000,
13298
+ maxTokens: 128000,
13299
+ },
13422
13300
  "anthropic/claude-haiku-4.5": {
13423
13301
  id: "anthropic/claude-haiku-4.5",
13424
13302
  name: "Claude Haiku 4.5",
@@ -13799,40 +13677,6 @@ export const MODELS = {
13799
13677
  contextWindow: 1000000,
13800
13678
  maxTokens: 384000,
13801
13679
  },
13802
- "google/gemini-2.0-flash": {
13803
- id: "google/gemini-2.0-flash",
13804
- name: "Gemini 2.0 Flash",
13805
- api: "anthropic-messages",
13806
- provider: "vercel-ai-gateway",
13807
- baseUrl: "https://ai-gateway.vercel.sh",
13808
- reasoning: false,
13809
- input: ["text", "image"],
13810
- cost: {
13811
- input: 0.15,
13812
- output: 0.6,
13813
- cacheRead: 0.024999999999999998,
13814
- cacheWrite: 0,
13815
- },
13816
- contextWindow: 1048576,
13817
- maxTokens: 8192,
13818
- },
13819
- "google/gemini-2.0-flash-lite": {
13820
- id: "google/gemini-2.0-flash-lite",
13821
- name: "Gemini 2.0 Flash Lite",
13822
- api: "anthropic-messages",
13823
- provider: "vercel-ai-gateway",
13824
- baseUrl: "https://ai-gateway.vercel.sh",
13825
- reasoning: false,
13826
- input: ["text", "image"],
13827
- cost: {
13828
- input: 0.075,
13829
- output: 0.3,
13830
- cacheRead: 0.02,
13831
- cacheWrite: 0,
13832
- },
13833
- contextWindow: 1048576,
13834
- maxTokens: 8192,
13835
- },
13836
13680
  "google/gemini-2.5-flash": {
13837
13681
  id: "google/gemini-2.5-flash",
13838
13682
  name: "Gemini 2.5 Flash",
@@ -14581,40 +14425,6 @@ export const MODELS = {
14581
14425
  contextWindow: 262114,
14582
14426
  maxTokens: 262114,
14583
14427
  },
14584
- "moonshotai/kimi-k2-thinking-turbo": {
14585
- id: "moonshotai/kimi-k2-thinking-turbo",
14586
- name: "Kimi K2 Thinking Turbo",
14587
- api: "anthropic-messages",
14588
- provider: "vercel-ai-gateway",
14589
- baseUrl: "https://ai-gateway.vercel.sh",
14590
- reasoning: true,
14591
- input: ["text"],
14592
- cost: {
14593
- input: 1.15,
14594
- output: 8,
14595
- cacheRead: 0.15,
14596
- cacheWrite: 0,
14597
- },
14598
- contextWindow: 262114,
14599
- maxTokens: 262114,
14600
- },
14601
- "moonshotai/kimi-k2-turbo": {
14602
- id: "moonshotai/kimi-k2-turbo",
14603
- name: "Kimi K2 Turbo",
14604
- api: "anthropic-messages",
14605
- provider: "vercel-ai-gateway",
14606
- baseUrl: "https://ai-gateway.vercel.sh",
14607
- reasoning: false,
14608
- input: ["text"],
14609
- cost: {
14610
- input: 1.15,
14611
- output: 8,
14612
- cacheRead: 0.15,
14613
- cacheWrite: 0,
14614
- },
14615
- contextWindow: 256000,
14616
- maxTokens: 16384,
14617
- },
14618
14428
  "moonshotai/kimi-k2.5": {
14619
14429
  id: "moonshotai/kimi-k2.5",
14620
14430
  name: "Kimi K2.5",
@@ -15952,7 +15762,7 @@ export const MODELS = {
15952
15762
  cacheRead: 0.2,
15953
15763
  cacheWrite: 0,
15954
15764
  },
15955
- contextWindow: 2000000,
15765
+ contextWindow: 1000000,
15956
15766
  maxTokens: 30000,
15957
15767
  },
15958
15768
  "grok-4.20-0309-reasoning": {
@@ -15969,7 +15779,7 @@ export const MODELS = {
15969
15779
  cacheRead: 0.2,
15970
15780
  cacheWrite: 0,
15971
15781
  },
15972
- contextWindow: 2000000,
15782
+ contextWindow: 1000000,
15973
15783
  maxTokens: 30000,
15974
15784
  },
15975
15785
  "grok-4.3": {
@@ -16115,6 +15925,24 @@ export const MODELS = {
16115
15925
  contextWindow: 1048576,
16116
15926
  maxTokens: 131072,
16117
15927
  },
15928
+ "mimo-v2.5-pro-ultraspeed": {
15929
+ id: "mimo-v2.5-pro-ultraspeed",
15930
+ name: "MiMo-V2.5-Pro-UltraSpeed",
15931
+ api: "openai-completions",
15932
+ provider: "xiaomi",
15933
+ baseUrl: "https://api.xiaomimimo.com/v1",
15934
+ compat: { "requiresReasoningContentOnAssistantMessages": true, "thinkingFormat": "deepseek" },
15935
+ reasoning: true,
15936
+ input: ["text"],
15937
+ cost: {
15938
+ input: 1.305,
15939
+ output: 2.61,
15940
+ cacheRead: 0.0108,
15941
+ cacheWrite: 0,
15942
+ },
15943
+ contextWindow: 1048576,
15944
+ maxTokens: 131072,
15945
+ },
16118
15946
  },
16119
15947
  "xiaomi-token-plan-ams": {
16120
15948
  "mimo-v2-omni": {
@@ -16189,6 +16017,24 @@ export const MODELS = {
16189
16017
  contextWindow: 1048576,
16190
16018
  maxTokens: 131072,
16191
16019
  },
16020
+ "mimo-v2.5-pro-ultraspeed": {
16021
+ id: "mimo-v2.5-pro-ultraspeed",
16022
+ name: "MiMo-V2.5-Pro-UltraSpeed",
16023
+ api: "openai-completions",
16024
+ provider: "xiaomi-token-plan-ams",
16025
+ baseUrl: "https://token-plan-ams.xiaomimimo.com/v1",
16026
+ compat: { "requiresReasoningContentOnAssistantMessages": true, "thinkingFormat": "deepseek" },
16027
+ reasoning: true,
16028
+ input: ["text"],
16029
+ cost: {
16030
+ input: 1.305,
16031
+ output: 2.61,
16032
+ cacheRead: 0.0108,
16033
+ cacheWrite: 0,
16034
+ },
16035
+ contextWindow: 1048576,
16036
+ maxTokens: 131072,
16037
+ },
16192
16038
  },
16193
16039
  "xiaomi-token-plan-cn": {
16194
16040
  "mimo-v2-omni": {
@@ -16263,6 +16109,24 @@ export const MODELS = {
16263
16109
  contextWindow: 1048576,
16264
16110
  maxTokens: 131072,
16265
16111
  },
16112
+ "mimo-v2.5-pro-ultraspeed": {
16113
+ id: "mimo-v2.5-pro-ultraspeed",
16114
+ name: "MiMo-V2.5-Pro-UltraSpeed",
16115
+ api: "openai-completions",
16116
+ provider: "xiaomi-token-plan-cn",
16117
+ baseUrl: "https://token-plan-cn.xiaomimimo.com/v1",
16118
+ compat: { "requiresReasoningContentOnAssistantMessages": true, "thinkingFormat": "deepseek" },
16119
+ reasoning: true,
16120
+ input: ["text"],
16121
+ cost: {
16122
+ input: 1.305,
16123
+ output: 2.61,
16124
+ cacheRead: 0.0108,
16125
+ cacheWrite: 0,
16126
+ },
16127
+ contextWindow: 1048576,
16128
+ maxTokens: 131072,
16129
+ },
16266
16130
  },
16267
16131
  "xiaomi-token-plan-sgp": {
16268
16132
  "mimo-v2-omni": {
@@ -16337,6 +16201,24 @@ export const MODELS = {
16337
16201
  contextWindow: 1048576,
16338
16202
  maxTokens: 131072,
16339
16203
  },
16204
+ "mimo-v2.5-pro-ultraspeed": {
16205
+ id: "mimo-v2.5-pro-ultraspeed",
16206
+ name: "MiMo-V2.5-Pro-UltraSpeed",
16207
+ api: "openai-completions",
16208
+ provider: "xiaomi-token-plan-sgp",
16209
+ baseUrl: "https://token-plan-sgp.xiaomimimo.com/v1",
16210
+ compat: { "requiresReasoningContentOnAssistantMessages": true, "thinkingFormat": "deepseek" },
16211
+ reasoning: true,
16212
+ input: ["text"],
16213
+ cost: {
16214
+ input: 1.305,
16215
+ output: 2.61,
16216
+ cacheRead: 0.0108,
16217
+ cacheWrite: 0,
16218
+ },
16219
+ contextWindow: 1048576,
16220
+ maxTokens: 131072,
16221
+ },
16340
16222
  },
16341
16223
  "zai": {
16342
16224
  "glm-4.5-air": {