@openachieve/ai 0.78.0 → 0.79.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1086,6 +1086,59 @@ export const MODELS = {
1086
1086
  contextWindow: 262144,
1087
1087
  maxTokens: 131072,
1088
1088
  },
1089
+ "openai.gpt-5.4": {
1090
+ id: "openai.gpt-5.4",
1091
+ name: "GPT-5.4",
1092
+ api: "bedrock-converse-stream",
1093
+ provider: "amazon-bedrock",
1094
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1095
+ reasoning: true,
1096
+ thinkingLevelMap: { "xhigh": "xhigh" },
1097
+ input: ["text", "image"],
1098
+ cost: {
1099
+ input: 2.75,
1100
+ output: 16.5,
1101
+ cacheRead: 0.275,
1102
+ cacheWrite: 0,
1103
+ },
1104
+ contextWindow: 272000,
1105
+ maxTokens: 128000,
1106
+ },
1107
+ "openai.gpt-5.5": {
1108
+ id: "openai.gpt-5.5",
1109
+ name: "GPT-5.5",
1110
+ api: "bedrock-converse-stream",
1111
+ provider: "amazon-bedrock",
1112
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1113
+ reasoning: true,
1114
+ thinkingLevelMap: { "xhigh": "xhigh" },
1115
+ input: ["text", "image"],
1116
+ cost: {
1117
+ input: 5.5,
1118
+ output: 33,
1119
+ cacheRead: 0.55,
1120
+ cacheWrite: 0,
1121
+ },
1122
+ contextWindow: 272000,
1123
+ maxTokens: 128000,
1124
+ },
1125
+ "openai.gpt-oss-120b": {
1126
+ id: "openai.gpt-oss-120b",
1127
+ name: "gpt-oss-120b",
1128
+ api: "bedrock-converse-stream",
1129
+ provider: "amazon-bedrock",
1130
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1131
+ reasoning: false,
1132
+ input: ["text"],
1133
+ cost: {
1134
+ input: 0.15,
1135
+ output: 0.6,
1136
+ cacheRead: 0,
1137
+ cacheWrite: 0,
1138
+ },
1139
+ contextWindow: 128000,
1140
+ maxTokens: 16384,
1141
+ },
1089
1142
  "openai.gpt-oss-120b-1:0": {
1090
1143
  id: "openai.gpt-oss-120b-1:0",
1091
1144
  name: "gpt-oss-120b",
@@ -1103,6 +1156,23 @@ export const MODELS = {
1103
1156
  contextWindow: 128000,
1104
1157
  maxTokens: 16384,
1105
1158
  },
1159
+ "openai.gpt-oss-20b": {
1160
+ id: "openai.gpt-oss-20b",
1161
+ name: "gpt-oss-20b",
1162
+ api: "bedrock-converse-stream",
1163
+ provider: "amazon-bedrock",
1164
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
1165
+ reasoning: false,
1166
+ input: ["text"],
1167
+ cost: {
1168
+ input: 0.07,
1169
+ output: 0.3,
1170
+ cacheRead: 0,
1171
+ cacheWrite: 0,
1172
+ },
1173
+ contextWindow: 128000,
1174
+ maxTokens: 16384,
1175
+ },
1106
1176
  "openai.gpt-oss-20b-1:0": {
1107
1177
  id: "openai.gpt-oss-20b-1:0",
1108
1178
  name: "gpt-oss-20b",
@@ -3887,6 +3957,24 @@ export const MODELS = {
3887
3957
  contextWindow: 202800,
3888
3958
  maxTokens: 131072,
3889
3959
  },
3960
+ "accounts/fireworks/routers/kimi-k2p6-fast": {
3961
+ id: "accounts/fireworks/routers/kimi-k2p6-fast",
3962
+ name: "Kimi K2.6 Fast",
3963
+ api: "anthropic-messages",
3964
+ provider: "fireworks",
3965
+ baseUrl: "https://api.fireworks.ai/inference",
3966
+ compat: { "sendSessionAffinityHeaders": true, "supportsEagerToolInputStreaming": false, "supportsCacheControlOnTools": false, "supportsLongCacheRetention": false },
3967
+ reasoning: true,
3968
+ input: ["text", "image"],
3969
+ cost: {
3970
+ input: 2,
3971
+ output: 8,
3972
+ cacheRead: 0.3,
3973
+ cacheWrite: 0,
3974
+ },
3975
+ contextWindow: 262000,
3976
+ maxTokens: 262000,
3977
+ },
3890
3978
  "accounts/fireworks/routers/kimi-k2p6-turbo": {
3891
3979
  id: "accounts/fireworks/routers/kimi-k2p6-turbo",
3892
3980
  name: "Kimi K2.6 Turbo",
@@ -6019,11 +6107,11 @@ export const MODELS = {
6019
6107
  api: "mistral-conversations",
6020
6108
  provider: "mistral",
6021
6109
  baseUrl: "https://api.mistral.ai",
6022
- reasoning: true,
6110
+ reasoning: false,
6023
6111
  input: ["text", "image"],
6024
6112
  cost: {
6025
- input: 1.5,
6026
- output: 7.5,
6113
+ input: 0.4,
6114
+ output: 2,
6027
6115
  cacheRead: 0,
6028
6116
  cacheWrite: 0,
6029
6117
  },
@@ -6458,6 +6546,82 @@ export const MODELS = {
6458
6546
  },
6459
6547
  },
6460
6548
  "nvidia": {
6549
+ "abacusai/dracarys-llama-3_1-70b-instruct": {
6550
+ id: "abacusai/dracarys-llama-3_1-70b-instruct",
6551
+ name: "dracarys-llama-3.1-70b-instruct",
6552
+ api: "openai-completions",
6553
+ provider: "nvidia",
6554
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6555
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6556
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6557
+ reasoning: false,
6558
+ input: ["text"],
6559
+ cost: {
6560
+ input: 0,
6561
+ output: 0,
6562
+ cacheRead: 0,
6563
+ cacheWrite: 0,
6564
+ },
6565
+ contextWindow: 128000,
6566
+ maxTokens: 8192,
6567
+ },
6568
+ "deepseek-ai/deepseek-v3.1-terminus": {
6569
+ id: "deepseek-ai/deepseek-v3.1-terminus",
6570
+ name: "DeepSeek V3.1 Terminus",
6571
+ api: "openai-completions",
6572
+ provider: "nvidia",
6573
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6574
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6575
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6576
+ reasoning: true,
6577
+ input: ["text"],
6578
+ cost: {
6579
+ input: 0,
6580
+ output: 0,
6581
+ cacheRead: 0,
6582
+ cacheWrite: 0,
6583
+ },
6584
+ contextWindow: 128000,
6585
+ maxTokens: 8192,
6586
+ },
6587
+ "deepseek-ai/deepseek-v3.2": {
6588
+ id: "deepseek-ai/deepseek-v3.2",
6589
+ name: "DeepSeek V3.2",
6590
+ api: "openai-completions",
6591
+ provider: "nvidia",
6592
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6593
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6594
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6595
+ reasoning: true,
6596
+ input: ["text"],
6597
+ cost: {
6598
+ input: 0,
6599
+ output: 0,
6600
+ cacheRead: 0,
6601
+ cacheWrite: 0,
6602
+ },
6603
+ contextWindow: 163840,
6604
+ maxTokens: 65536,
6605
+ },
6606
+ "google/gemma-3-27b-it": {
6607
+ id: "google/gemma-3-27b-it",
6608
+ name: "Gemma-3-27B-IT",
6609
+ api: "openai-completions",
6610
+ provider: "nvidia",
6611
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6612
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6613
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6614
+ reasoning: true,
6615
+ input: ["text", "image"],
6616
+ cost: {
6617
+ input: 0,
6618
+ output: 0,
6619
+ cacheRead: 0,
6620
+ cacheWrite: 0,
6621
+ },
6622
+ contextWindow: 131072,
6623
+ maxTokens: 8192,
6624
+ },
6461
6625
  "meta/llama-3.1-70b-instruct": {
6462
6626
  id: "meta/llama-3.1-70b-instruct",
6463
6627
  name: "Llama 3.1 70b Instruct",
@@ -6553,6 +6717,63 @@ export const MODELS = {
6553
6717
  contextWindow: 128000,
6554
6718
  maxTokens: 4096,
6555
6719
  },
6720
+ "minimaxai/minimax-m2.5": {
6721
+ id: "minimaxai/minimax-m2.5",
6722
+ name: "MiniMax-M2.5",
6723
+ api: "openai-completions",
6724
+ provider: "nvidia",
6725
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6726
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6727
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6728
+ reasoning: true,
6729
+ input: ["text"],
6730
+ cost: {
6731
+ input: 0,
6732
+ output: 0,
6733
+ cacheRead: 0,
6734
+ cacheWrite: 0,
6735
+ },
6736
+ contextWindow: 204800,
6737
+ maxTokens: 131072,
6738
+ },
6739
+ "mistralai/devstral-2-123b-instruct-2512": {
6740
+ id: "mistralai/devstral-2-123b-instruct-2512",
6741
+ name: "Devstral-2-123B-Instruct-2512",
6742
+ api: "openai-completions",
6743
+ provider: "nvidia",
6744
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6745
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6746
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6747
+ reasoning: true,
6748
+ input: ["text"],
6749
+ cost: {
6750
+ input: 0,
6751
+ output: 0,
6752
+ cacheRead: 0,
6753
+ cacheWrite: 0,
6754
+ },
6755
+ contextWindow: 262144,
6756
+ maxTokens: 262144,
6757
+ },
6758
+ "mistralai/mistral-7b-instruct-v03": {
6759
+ id: "mistralai/mistral-7b-instruct-v03",
6760
+ name: "Mistral-7B-Instruct-v0.3",
6761
+ api: "openai-completions",
6762
+ provider: "nvidia",
6763
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6764
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6765
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6766
+ reasoning: false,
6767
+ input: ["text"],
6768
+ cost: {
6769
+ input: 0,
6770
+ output: 0,
6771
+ cacheRead: 0,
6772
+ cacheWrite: 0,
6773
+ },
6774
+ contextWindow: 65536,
6775
+ maxTokens: 65536,
6776
+ },
6556
6777
  "mistralai/mistral-large-3-675b-instruct-2512": {
6557
6778
  id: "mistralai/mistral-large-3-675b-instruct-2512",
6558
6779
  name: "Mistral Large 3 675B Instruct 2512",
@@ -6591,6 +6812,101 @@ export const MODELS = {
6591
6812
  contextWindow: 128000,
6592
6813
  maxTokens: 8192,
6593
6814
  },
6815
+ "mistralai/mixtral-8x22b-instruct": {
6816
+ id: "mistralai/mixtral-8x22b-instruct",
6817
+ name: "Mistral: Mixtral 8x22B Instruct",
6818
+ api: "openai-completions",
6819
+ provider: "nvidia",
6820
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6821
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6822
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6823
+ reasoning: false,
6824
+ input: ["text"],
6825
+ cost: {
6826
+ input: 0,
6827
+ output: 0,
6828
+ cacheRead: 0,
6829
+ cacheWrite: 0,
6830
+ },
6831
+ contextWindow: 65536,
6832
+ maxTokens: 13108,
6833
+ },
6834
+ "mistralai/mixtral-8x7b-instruct": {
6835
+ id: "mistralai/mixtral-8x7b-instruct",
6836
+ name: "Mistral: Mixtral 8x7B Instruct",
6837
+ api: "openai-completions",
6838
+ provider: "nvidia",
6839
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6840
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6841
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6842
+ reasoning: false,
6843
+ input: ["text"],
6844
+ cost: {
6845
+ input: 0,
6846
+ output: 0,
6847
+ cacheRead: 0,
6848
+ cacheWrite: 0,
6849
+ },
6850
+ contextWindow: 32768,
6851
+ maxTokens: 16384,
6852
+ },
6853
+ "moonshotai/kimi-k2-instruct": {
6854
+ id: "moonshotai/kimi-k2-instruct",
6855
+ name: "Kimi K2 Instruct",
6856
+ api: "openai-completions",
6857
+ provider: "nvidia",
6858
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6859
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6860
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6861
+ reasoning: true,
6862
+ input: ["text"],
6863
+ cost: {
6864
+ input: 0,
6865
+ output: 0,
6866
+ cacheRead: 0,
6867
+ cacheWrite: 0,
6868
+ },
6869
+ contextWindow: 128000,
6870
+ maxTokens: 8192,
6871
+ },
6872
+ "moonshotai/kimi-k2-instruct-0905": {
6873
+ id: "moonshotai/kimi-k2-instruct-0905",
6874
+ name: "Kimi K2 0905",
6875
+ api: "openai-completions",
6876
+ provider: "nvidia",
6877
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6878
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6879
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6880
+ reasoning: false,
6881
+ input: ["text"],
6882
+ cost: {
6883
+ input: 0,
6884
+ output: 0,
6885
+ cacheRead: 0,
6886
+ cacheWrite: 0,
6887
+ },
6888
+ contextWindow: 262144,
6889
+ maxTokens: 262144,
6890
+ },
6891
+ "moonshotai/kimi-k2-thinking": {
6892
+ id: "moonshotai/kimi-k2-thinking",
6893
+ name: "Kimi K2 Thinking",
6894
+ api: "openai-completions",
6895
+ provider: "nvidia",
6896
+ baseUrl: "https://integrate.api.nvidia.com/v1",
6897
+ headers: { "NVCF-POLL-SECONDS": "3600" },
6898
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
6899
+ reasoning: true,
6900
+ input: ["text"],
6901
+ cost: {
6902
+ input: 0,
6903
+ output: 0,
6904
+ cacheRead: 0,
6905
+ cacheWrite: 0,
6906
+ },
6907
+ contextWindow: 262144,
6908
+ maxTokens: 262144,
6909
+ },
6594
6910
  "moonshotai/kimi-k2.6": {
6595
6911
  id: "moonshotai/kimi-k2.6",
6596
6912
  name: "Kimi K2.6",
@@ -6610,8 +6926,8 @@ export const MODELS = {
6610
6926
  contextWindow: 262144,
6611
6927
  maxTokens: 262144,
6612
6928
  },
6613
- "nvidia/llama-3.3-nemotron-super-49b-v1": {
6614
- id: "nvidia/llama-3.3-nemotron-super-49b-v1",
6929
+ "nvidia/llama-3_3-nemotron-super-49b-v1": {
6930
+ id: "nvidia/llama-3_3-nemotron-super-49b-v1",
6615
6931
  name: "Llama 3.3 Nemotron Super 49B v1",
6616
6932
  api: "openai-completions",
6617
6933
  provider: "nvidia",
@@ -6629,8 +6945,8 @@ export const MODELS = {
6629
6945
  contextWindow: 131072,
6630
6946
  maxTokens: 131072,
6631
6947
  },
6632
- "nvidia/llama-3.3-nemotron-super-49b-v1.5": {
6633
- id: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
6948
+ "nvidia/llama-3_3-nemotron-super-49b-v1_5": {
6949
+ id: "nvidia/llama-3_3-nemotron-super-49b-v1_5",
6634
6950
  name: "Llama 3.3 Nemotron Super 49B v1.5",
6635
6951
  api: "openai-completions",
6636
6952
  provider: "nvidia",
@@ -6705,6 +7021,25 @@ export const MODELS = {
6705
7021
  contextWindow: 262144,
6706
7022
  maxTokens: 262144,
6707
7023
  },
7024
+ "nvidia/nemotron-voicechat": {
7025
+ id: "nvidia/nemotron-voicechat",
7026
+ name: "nemotron-voicechat",
7027
+ api: "openai-completions",
7028
+ provider: "nvidia",
7029
+ baseUrl: "https://integrate.api.nvidia.com/v1",
7030
+ headers: { "NVCF-POLL-SECONDS": "3600" },
7031
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
7032
+ reasoning: false,
7033
+ input: ["text"],
7034
+ cost: {
7035
+ input: 0,
7036
+ output: 0,
7037
+ cacheRead: 0,
7038
+ cacheWrite: 0,
7039
+ },
7040
+ contextWindow: 128000,
7041
+ maxTokens: 8192,
7042
+ },
6708
7043
  "nvidia/nvidia-nemotron-nano-9b-v2": {
6709
7044
  id: "nvidia/nvidia-nemotron-nano-9b-v2",
6710
7045
  name: "nvidia-nemotron-nano-9b-v2",
@@ -6743,6 +7078,25 @@ export const MODELS = {
6743
7078
  contextWindow: 131072,
6744
7079
  maxTokens: 32768,
6745
7080
  },
7081
+ "qwen/qwen2.5-coder-32b-instruct": {
7082
+ id: "qwen/qwen2.5-coder-32b-instruct",
7083
+ name: "Qwen2.5 Coder 32b Instruct",
7084
+ api: "openai-completions",
7085
+ provider: "nvidia",
7086
+ baseUrl: "https://integrate.api.nvidia.com/v1",
7087
+ headers: { "NVCF-POLL-SECONDS": "3600" },
7088
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
7089
+ reasoning: false,
7090
+ input: ["text"],
7091
+ cost: {
7092
+ input: 0,
7093
+ output: 0,
7094
+ cacheRead: 0,
7095
+ cacheWrite: 0,
7096
+ },
7097
+ contextWindow: 128000,
7098
+ maxTokens: 4096,
7099
+ },
6746
7100
  "qwen/qwen3-coder-480b-a35b-instruct": {
6747
7101
  id: "qwen/qwen3-coder-480b-a35b-instruct",
6748
7102
  name: "Qwen3 Coder 480B A35B Instruct",
@@ -6762,6 +7116,25 @@ export const MODELS = {
6762
7116
  contextWindow: 262144,
6763
7117
  maxTokens: 66536,
6764
7118
  },
7119
+ "qwen/qwen3-next-80b-a3b-thinking": {
7120
+ id: "qwen/qwen3-next-80b-a3b-thinking",
7121
+ name: "Qwen3-Next-80B-A3B-Thinking",
7122
+ api: "openai-completions",
7123
+ provider: "nvidia",
7124
+ baseUrl: "https://integrate.api.nvidia.com/v1",
7125
+ headers: { "NVCF-POLL-SECONDS": "3600" },
7126
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
7127
+ reasoning: true,
7128
+ input: ["text"],
7129
+ cost: {
7130
+ input: 0,
7131
+ output: 0,
7132
+ cacheRead: 0,
7133
+ cacheWrite: 0,
7134
+ },
7135
+ contextWindow: 262144,
7136
+ maxTokens: 16384,
7137
+ },
6765
7138
  "qwen/qwen3.5-122b-a10b": {
6766
7139
  id: "qwen/qwen3.5-122b-a10b",
6767
7140
  name: "Qwen3.5 122B-A10B",
@@ -6819,6 +7192,25 @@ export const MODELS = {
6819
7192
  contextWindow: 256000,
6820
7193
  maxTokens: 16384,
6821
7194
  },
7195
+ "upstage/solar-10_7b-instruct": {
7196
+ id: "upstage/solar-10_7b-instruct",
7197
+ name: "solar-10.7b-instruct",
7198
+ api: "openai-completions",
7199
+ provider: "nvidia",
7200
+ baseUrl: "https://integrate.api.nvidia.com/v1",
7201
+ headers: { "NVCF-POLL-SECONDS": "3600" },
7202
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
7203
+ reasoning: false,
7204
+ input: ["text"],
7205
+ cost: {
7206
+ input: 0,
7207
+ output: 0,
7208
+ cacheRead: 0,
7209
+ cacheWrite: 0,
7210
+ },
7211
+ contextWindow: 128000,
7212
+ maxTokens: 8192,
7213
+ },
6822
7214
  "z-ai/glm-5.1": {
6823
7215
  id: "z-ai/glm-5.1",
6824
7216
  name: "GLM-5.1",
@@ -6838,6 +7230,25 @@ export const MODELS = {
6838
7230
  contextWindow: 131072,
6839
7231
  maxTokens: 131072,
6840
7232
  },
7233
+ "z-ai/glm4.7": {
7234
+ id: "z-ai/glm4.7",
7235
+ name: "GLM-4.7",
7236
+ api: "openai-completions",
7237
+ provider: "nvidia",
7238
+ baseUrl: "https://integrate.api.nvidia.com/v1",
7239
+ headers: { "NVCF-POLL-SECONDS": "3600" },
7240
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false },
7241
+ reasoning: true,
7242
+ input: ["text"],
7243
+ cost: {
7244
+ input: 0,
7245
+ output: 0,
7246
+ cacheRead: 0,
7247
+ cacheWrite: 0,
7248
+ },
7249
+ contextWindow: 204800,
7250
+ maxTokens: 131072,
7251
+ },
6841
7252
  },
6842
7253
  "openai": {
6843
7254
  "gpt-4": {
@@ -8367,9 +8778,9 @@ export const MODELS = {
8367
8778
  contextWindow: 200000,
8368
8779
  maxTokens: 32000,
8369
8780
  },
8370
- "nemotron-3-super-free": {
8371
- id: "nemotron-3-super-free",
8372
- name: "Nemotron 3 Super Free",
8781
+ "nemotron-3-ultra-free": {
8782
+ id: "nemotron-3-ultra-free",
8783
+ name: "Nemotron 3 Ultra Free",
8373
8784
  api: "openai-completions",
8374
8785
  provider: "opencode",
8375
8786
  baseUrl: "https://opencode.ai/zen/v1",
@@ -8381,7 +8792,7 @@ export const MODELS = {
8381
8792
  cacheRead: 0,
8382
8793
  cacheWrite: 0,
8383
8794
  },
8384
- contextWindow: 204800,
8795
+ contextWindow: 1000000,
8385
8796
  maxTokens: 128000,
8386
8797
  },
8387
8798
  "qwen3.5-plus": {
@@ -8628,7 +9039,7 @@ export const MODELS = {
8628
9039
  cacheRead: 0.05,
8629
9040
  cacheWrite: 0.625,
8630
9041
  },
8631
- contextWindow: 262144,
9042
+ contextWindow: 1000000,
8632
9043
  maxTokens: 65536,
8633
9044
  },
8634
9045
  "qwen3.7-max": {
@@ -8662,7 +9073,7 @@ export const MODELS = {
8662
9073
  cacheRead: 0.04,
8663
9074
  cacheWrite: 0.5,
8664
9075
  },
8665
- contextWindow: 262144,
9076
+ contextWindow: 1000000,
8666
9077
  maxTokens: 65536,
8667
9078
  },
8668
9079
  },
@@ -9690,12 +10101,12 @@ export const MODELS = {
9690
10101
  input: ["text", "image"],
9691
10102
  cost: {
9692
10103
  input: 0.12,
9693
- output: 0.37,
9694
- cacheRead: 0,
10104
+ output: 0.36,
10105
+ cacheRead: 0.09,
9695
10106
  cacheWrite: 0,
9696
10107
  },
9697
10108
  contextWindow: 262144,
9698
- maxTokens: 16384,
10109
+ maxTokens: 8192,
9699
10110
  },
9700
10111
  "google/gemma-4-31b-it:free": {
9701
10112
  id: "google/gemma-4-31b-it:free",
@@ -9844,7 +10255,7 @@ export const MODELS = {
9844
10255
  input: ["text"],
9845
10256
  cost: {
9846
10257
  input: 0.02,
9847
- output: 0.049999999999999996,
10258
+ output: 0.03,
9848
10259
  cacheRead: 0,
9849
10260
  cacheWrite: 0,
9850
10261
  },
@@ -10533,6 +10944,23 @@ export const MODELS = {
10533
10944
  contextWindow: 1000000,
10534
10945
  maxTokens: 262144,
10535
10946
  },
10947
+ "nvidia/nemotron-3-ultra-550b-a55b": {
10948
+ id: "nvidia/nemotron-3-ultra-550b-a55b",
10949
+ name: "NVIDIA: Nemotron 3 Ultra",
10950
+ api: "openai-completions",
10951
+ provider: "openrouter",
10952
+ baseUrl: "https://openrouter.ai/api/v1",
10953
+ reasoning: true,
10954
+ input: ["text"],
10955
+ cost: {
10956
+ input: 0.5,
10957
+ output: 2.5,
10958
+ cacheRead: 0.15,
10959
+ cacheWrite: 0,
10960
+ },
10961
+ contextWindow: 1000000,
10962
+ maxTokens: 16384,
10963
+ },
10536
10964
  "nvidia/nemotron-3-ultra-550b-a55b:free": {
10537
10965
  id: "nvidia/nemotron-3-ultra-550b-a55b:free",
10538
10966
  name: "NVIDIA: Nemotron 3 Ultra (free)",
@@ -11801,7 +12229,7 @@ export const MODELS = {
11801
12229
  cacheWrite: 0,
11802
12230
  },
11803
12231
  contextWindow: 131072,
11804
- maxTokens: 20000,
12232
+ maxTokens: 16384,
11805
12233
  },
11806
12234
  "qwen/qwen3-30b-a3b-instruct-2507": {
11807
12235
  id: "qwen/qwen3-30b-a3b-instruct-2507",
@@ -12466,23 +12894,6 @@ export const MODELS = {
12466
12894
  contextWindow: 256000,
12467
12895
  maxTokens: 128000,
12468
12896
  },
12469
- "sao10k/l3-euryale-70b": {
12470
- id: "sao10k/l3-euryale-70b",
12471
- name: "Sao10k: Llama 3 Euryale 70B v2.1",
12472
- api: "openai-completions",
12473
- provider: "openrouter",
12474
- baseUrl: "https://openrouter.ai/api/v1",
12475
- reasoning: false,
12476
- input: ["text"],
12477
- cost: {
12478
- input: 1.48,
12479
- output: 1.48,
12480
- cacheRead: 0,
12481
- cacheWrite: 0,
12482
- },
12483
- contextWindow: 8192,
12484
- maxTokens: 8192,
12485
- },
12486
12897
  "sao10k/l3.1-euryale-70b": {
12487
12898
  id: "sao10k/l3.1-euryale-70b",
12488
12899
  name: "Sao10K: Llama 3.1 Euryale 70B v2.2",
@@ -13216,7 +13627,7 @@ export const MODELS = {
13216
13627
  },
13217
13628
  "deepseek-ai/DeepSeek-V3": {
13218
13629
  id: "deepseek-ai/DeepSeek-V3",
13219
- name: "DeepSeek V3",
13630
+ name: "DeepSeek-V3",
13220
13631
  api: "openai-completions",
13221
13632
  provider: "together",
13222
13633
  baseUrl: "https://api.together.ai/v1",
@@ -13364,6 +13775,25 @@ export const MODELS = {
13364
13775
  contextWindow: 262144,
13365
13776
  maxTokens: 131000,
13366
13777
  },
13778
+ "nvidia/nemotron-3-ultra-550b-a55b": {
13779
+ id: "nvidia/nemotron-3-ultra-550b-a55b",
13780
+ name: "Nemotron 3 Ultra 550B A55B",
13781
+ api: "openai-completions",
13782
+ provider: "together",
13783
+ baseUrl: "https://api.together.ai/v1",
13784
+ compat: { "supportsStore": false, "supportsDeveloperRole": false, "supportsReasoningEffort": false, "maxTokensField": "max_tokens", "supportsStrictMode": false, "supportsLongCacheRetention": false, "thinkingFormat": "together" },
13785
+ reasoning: true,
13786
+ thinkingLevelMap: { "minimal": null, "low": null, "medium": null },
13787
+ input: ["text"],
13788
+ cost: {
13789
+ input: 0.6,
13790
+ output: 3.6,
13791
+ cacheRead: 0.2,
13792
+ cacheWrite: 0,
13793
+ },
13794
+ contextWindow: 512300,
13795
+ maxTokens: 512300,
13796
+ },
13367
13797
  "openai/gpt-oss-120b": {
13368
13798
  id: "openai/gpt-oss-120b",
13369
13799
  name: "GPT OSS 120B",
@@ -15060,6 +15490,23 @@ export const MODELS = {
15060
15490
  contextWindow: 256000,
15061
15491
  maxTokens: 32000,
15062
15492
  },
15493
+ "nvidia/nemotron-3-ultra-550b-a55b": {
15494
+ id: "nvidia/nemotron-3-ultra-550b-a55b",
15495
+ name: "Nemotron 3 Ultra",
15496
+ api: "anthropic-messages",
15497
+ provider: "vercel-ai-gateway",
15498
+ baseUrl: "https://ai-gateway.vercel.sh",
15499
+ reasoning: true,
15500
+ input: ["text"],
15501
+ cost: {
15502
+ input: 0.6,
15503
+ output: 2.4,
15504
+ cacheRead: 0.12,
15505
+ cacheWrite: 0,
15506
+ },
15507
+ contextWindow: 1000000,
15508
+ maxTokens: 65000,
15509
+ },
15063
15510
  "nvidia/nemotron-nano-12b-v2-vl": {
15064
15511
  id: "nvidia/nemotron-nano-12b-v2-vl",
15065
15512
  name: "Nvidia Nemotron Nano 12B V2 VL",