@lobehub/chat 1.128.9 → 1.129.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.env.example +5 -0
  2. package/.github/workflows/sync-database-schema.yml +0 -3
  3. package/CHANGELOG.md +50 -0
  4. package/Dockerfile +3 -1
  5. package/Dockerfile.database +3 -1
  6. package/Dockerfile.pglite +3 -1
  7. package/changelog/v1.json +18 -0
  8. package/docs/development/database-schema.dbml +2 -2
  9. package/docs/self-hosting/environment-variables/model-provider.mdx +24 -0
  10. package/docs/self-hosting/environment-variables/model-provider.zh-CN.mdx +27 -1
  11. package/docs/usage/providers/vercel-ai-gateway.mdx +62 -0
  12. package/docs/usage/providers/vercel-ai-gateway.zh-CN.mdx +61 -0
  13. package/next.config.ts +1 -46
  14. package/package.json +1 -2
  15. package/packages/agent-runtime/examples/tools-calling.ts +1 -1
  16. package/packages/const/src/layoutTokens.ts +1 -1
  17. package/packages/context-engine/src/base/BaseProcessor.ts +2 -1
  18. package/packages/database/migrations/0031_add_agent_index.sql +6 -2
  19. package/packages/database/migrations/0032_improve_agents_field.sql +6 -0
  20. package/packages/database/migrations/meta/0032_snapshot.json +6447 -0
  21. package/packages/database/migrations/meta/_journal.json +7 -0
  22. package/packages/database/src/core/migrations.json +14 -3
  23. package/packages/database/src/schemas/agent.ts +2 -2
  24. package/packages/database/src/server/models/__tests__/adapter.test.ts +1 -1
  25. package/packages/model-bank/package.json +2 -1
  26. package/packages/model-bank/src/aiModels/index.ts +3 -0
  27. package/packages/model-bank/src/aiModels/vercelaigateway.ts +1803 -0
  28. package/packages/model-runtime/src/const/modelProvider.ts +1 -0
  29. package/packages/model-runtime/src/providers/vercelaigateway/index.ts +62 -0
  30. package/packages/model-runtime/src/runtimeMap.ts +2 -0
  31. package/packages/types/src/user/settings/keyVaults.ts +1 -0
  32. package/src/app/(backend)/webapi/chat/azureai/route.test.ts +25 -0
  33. package/src/app/(backend)/webapi/chat/azureai/route.ts +6 -0
  34. package/src/app/[variants]/(main)/chat/(workspace)/_layout/Desktop/ChatHeader/index.tsx +8 -1
  35. package/src/components/Error/index.tsx +3 -7
  36. package/src/config/modelProviders/index.ts +4 -0
  37. package/src/config/modelProviders/vercelaigateway.ts +21 -0
  38. package/src/envs/llm.ts +6 -0
  39. package/sentry.client.config.ts +0 -30
  40. package/sentry.edge.config.ts +0 -17
  41. package/sentry.server.config.ts +0 -19
  42. package/src/app/[variants]/global-error.tsx +0 -20
  43. package/src/components/Error/sentryCaptureException.ts +0 -9
@@ -0,0 +1,1803 @@
1
+ import { AIChatModelCard, AIEmbeddingModelCard } from '../types/aiModel';
2
+
3
+ // 根据 Vercel AI Gateway 提供的模型列表,按 SOTA、大模型、小模型排序
4
+ const vercelAIGatewayChatModels: AIChatModelCard[] = [
5
+ {
6
+ abilities: {
7
+ functionCall: true,
8
+ reasoning: true,
9
+ vision: true,
10
+ },
11
+ contextWindowTokens: 1_048_576,
12
+ description:
13
+ 'Gemini 2.5 Pro 是我们最先进的推理 Gemini 模型,能够解决复杂问题。它具有 200 万 token 的上下文窗口,支持包括文本、图像、音频、视频和 PDF 文档在内的多模态输入。',
14
+ displayName: 'Gemini 2.5 Pro',
15
+ enabled: true,
16
+ id: 'google/gemini-2.5-pro',
17
+ pricing: {
18
+ currency: 'USD',
19
+ units: [
20
+ { name: 'textInput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
21
+ { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
22
+ ],
23
+ },
24
+ type: 'chat',
25
+ },
26
+ {
27
+ abilities: {
28
+ functionCall: true,
29
+ reasoning: true,
30
+ vision: true,
31
+ },
32
+ contextWindowTokens: 200_000,
33
+ description:
34
+ 'Claude Opus 4.1 是 Opus 4 的即插即用替代品,为实际编码和代理任务提供卓越的性能和精度。Opus 4.1 将最先进的编码性能提升到 SWE-bench Verified 的 74.5%,并以更高的严谨性和对细节的关注处理复杂的多步问题。',
35
+ displayName: 'Claude Opus 4.1',
36
+ id: 'anthropic/claude-opus-4.1',
37
+ pricing: {
38
+ currency: 'USD',
39
+ units: [
40
+ { name: 'textInput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
41
+ { name: 'textOutput', rate: 75, strategy: 'fixed', unit: 'millionTokens' },
42
+ { name: 'textInput_cacheRead', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
43
+ { name: 'textInput_cacheWrite', rate: 18.75, strategy: 'fixed', unit: 'millionTokens' },
44
+ ],
45
+ },
46
+ type: 'chat',
47
+ },
48
+ {
49
+ abilities: {
50
+ functionCall: true,
51
+ reasoning: true,
52
+ vision: true,
53
+ },
54
+ contextWindowTokens: 200_000,
55
+ description:
56
+ 'Claude Opus 4 是 Anthropic 迄今为止最强大的模型,也是世界上最好的编码模型,在 SWE-bench (72.5%) 和 Terminal-bench (43.2%) 上领先。它为需要专注努力和数千个步骤的长期任务提供持续性能,能够连续工作数小时——显著扩展了 AI 代理的能力。',
57
+ displayName: 'Claude Opus 4',
58
+ id: 'anthropic/claude-opus-4',
59
+ pricing: {
60
+ currency: 'USD',
61
+ units: [
62
+ { name: 'textInput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
63
+ { name: 'textOutput', rate: 75, strategy: 'fixed', unit: 'millionTokens' },
64
+ { name: 'textInput_cacheRead', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
65
+ { name: 'textInput_cacheWrite', rate: 18.75, strategy: 'fixed', unit: 'millionTokens' },
66
+ ],
67
+ },
68
+ type: 'chat',
69
+ },
70
+ {
71
+ abilities: {
72
+ functionCall: true,
73
+ reasoning: true,
74
+ },
75
+ contextWindowTokens: 200_000,
76
+ description:
77
+ 'Claude Sonnet 4 在 Sonnet 3.7 的行业领先能力基础上进行了显著改进,在编码方面表现出色,在 SWE-bench 上达到了最先进的 72.7%。该模型在性能和效率之间取得了平衡,适用于内部和外部用例,并通过增强的可控性实现对实现的更大控制。',
78
+ displayName: 'Claude Sonnet 4',
79
+ enabled: true,
80
+ id: 'anthropic/claude-sonnet-4',
81
+ pricing: {
82
+ currency: 'USD',
83
+ units: [
84
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
85
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
86
+ { name: 'textInput_cacheRead', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
87
+ { name: 'textInput_cacheWrite', rate: 3.75, strategy: 'fixed', unit: 'millionTokens' },
88
+ ],
89
+ },
90
+ type: 'chat',
91
+ },
92
+ {
93
+ abilities: {
94
+ functionCall: true,
95
+ reasoning: true,
96
+ },
97
+ contextWindowTokens: 400_000,
98
+ description:
99
+ 'GPT-5 是 OpenAI 的旗舰语言模型,在复杂推理、广泛的现实世界知识、代码密集型和多步代理任务方面表现出色。',
100
+ displayName: 'GPT-5',
101
+ enabled: true,
102
+ id: 'openai/gpt-5',
103
+ pricing: {
104
+ currency: 'USD',
105
+ units: [
106
+ { name: 'textInput', rate: 1.25, strategy: 'fixed', unit: 'millionTokens' },
107
+ { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
108
+ { name: 'textInput_cacheRead', rate: 0.125, strategy: 'fixed', unit: 'millionTokens' },
109
+ ],
110
+ },
111
+ type: 'chat',
112
+ },
113
+ {
114
+ abilities: {
115
+ functionCall: true,
116
+ reasoning: true,
117
+ vision: true,
118
+ },
119
+ contextWindowTokens: 200_000,
120
+ description:
121
+ 'OpenAI 的 o3 是最强大的推理模型,在编码、数学、科学和视觉感知方面设立了新的最先进水平。它擅长需要多方面分析的复杂查询,在分析图像、图表和图形方面具有特殊优势。',
122
+ displayName: 'o3',
123
+ enabled: true,
124
+ id: 'openai/o3',
125
+ pricing: {
126
+ currency: 'USD',
127
+ units: [
128
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
129
+ { name: 'textOutput', rate: 8, strategy: 'fixed', unit: 'millionTokens' },
130
+ { name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
131
+ ],
132
+ },
133
+ type: 'chat',
134
+ },
135
+ {
136
+ abilities: {
137
+ functionCall: true,
138
+ reasoning: true,
139
+ vision: true,
140
+ },
141
+ contextWindowTokens: 200_000,
142
+ description:
143
+ 'OpenAI 的 o1 是旗舰推理模型,专为需要深度思考的复杂问题而设计。它为复杂多步任务提供了强大的推理能力和更高的准确性。',
144
+ displayName: 'o1',
145
+ id: 'openai/o1',
146
+ pricing: {
147
+ currency: 'USD',
148
+ units: [
149
+ { name: 'textInput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
150
+ { name: 'textOutput', rate: 60, strategy: 'fixed', unit: 'millionTokens' },
151
+ { name: 'textInput_cacheRead', rate: 7.5, strategy: 'fixed', unit: 'millionTokens' },
152
+ ],
153
+ },
154
+ type: 'chat',
155
+ },
156
+ {
157
+ abilities: {
158
+ functionCall: true,
159
+ reasoning: true,
160
+ vision: true,
161
+ },
162
+ contextWindowTokens: 200_000,
163
+ description:
164
+ 'Claude 3.7 Sonnet 是第一个混合推理模型,也是 Anthropic 迄今为止最智能的模型。它在编码、内容生成、数据分析和规划任务方面提供了最先进的性能,在其前身 Claude 3.5 Sonnet 的软件工程和计算机使用能力基础上进行了构建。',
165
+ displayName: 'Claude 3.7 Sonnet',
166
+ id: 'anthropic/claude-3.7-sonnet',
167
+ pricing: {
168
+ currency: 'USD',
169
+ units: [
170
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
171
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
172
+ { name: 'textInput_cacheRead', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
173
+ { name: 'textInput_cacheWrite', rate: 3.75, strategy: 'fixed', unit: 'millionTokens' },
174
+ ],
175
+ },
176
+ type: 'chat',
177
+ },
178
+ {
179
+ abilities: {
180
+ functionCall: true,
181
+ reasoning: true,
182
+ vision: true,
183
+ },
184
+ contextWindowTokens: 200_000,
185
+ description:
186
+ 'Claude 3.5 Sonnet 在智能和速度之间达到了理想的平衡——特别是对于企业工作负载。与同类产品相比,它以更低的成本提供了强大的性能,并专为大规模 AI 部署中的高耐久性而设计。',
187
+ displayName: 'Claude 3.5 Sonnet',
188
+ id: 'anthropic/claude-3.5-sonnet',
189
+ pricing: {
190
+ currency: 'USD',
191
+ units: [
192
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
193
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
194
+ { name: 'textInput_cacheRead', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
195
+ { name: 'textInput_cacheWrite', rate: 3.75, strategy: 'fixed', unit: 'millionTokens' },
196
+ ],
197
+ },
198
+ type: 'chat',
199
+ },
200
+ {
201
+ abilities: {
202
+ functionCall: true,
203
+ reasoning: true,
204
+ vision: true,
205
+ },
206
+ contextWindowTokens: 200_000,
207
+ description:
208
+ 'Claude 3 Opus 是 Anthropic 最智能的模型,在高度复杂的任务上具有市场领先的性能。它能够以卓越的流畅度和类人理解力驾驭开放式提示和前所未见的场景。',
209
+ displayName: 'Claude 3 Opus',
210
+ id: 'anthropic/claude-3-opus',
211
+ pricing: {
212
+ currency: 'USD',
213
+ units: [
214
+ { name: 'textInput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
215
+ { name: 'textOutput', rate: 75, strategy: 'fixed', unit: 'millionTokens' },
216
+ { name: 'textInput_cacheRead', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
217
+ { name: 'textInput_cacheWrite', rate: 18.75, strategy: 'fixed', unit: 'millionTokens' },
218
+ ],
219
+ },
220
+ type: 'chat',
221
+ },
222
+ {
223
+ abilities: {
224
+ functionCall: true,
225
+ reasoning: true,
226
+ vision: true,
227
+ },
228
+ contextWindowTokens: 128_000,
229
+ description:
230
+ 'GPT-4o 来自 OpenAI,具有广泛的通用知识和领域专长,能够遵循自然语言的复杂指令并准确解决难题。它以更快、更便宜的 API 匹配 GPT-4 Turbo 的性能。',
231
+ displayName: 'GPT-4o',
232
+ id: 'openai/gpt-4o',
233
+ pricing: {
234
+ currency: 'USD',
235
+ units: [
236
+ { name: 'textInput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
237
+ { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
238
+ { name: 'textInput_cacheRead', rate: 1.25, strategy: 'fixed', unit: 'millionTokens' },
239
+ ],
240
+ },
241
+ type: 'chat',
242
+ },
243
+ {
244
+ abilities: {
245
+ functionCall: true,
246
+ vision: true,
247
+ },
248
+ contextWindowTokens: 400_000,
249
+ description:
250
+ 'GPT-5 mini 是一个成本优化的模型,在推理/聊天任务方面表现出色。它在速度、成本和能力之间提供了最佳平衡。',
251
+ displayName: 'GPT-5 mini',
252
+ id: 'openai/gpt-5-mini',
253
+ pricing: {
254
+ currency: 'USD',
255
+ units: [
256
+ { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
257
+ { name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
258
+ { name: 'textInput_cacheRead', rate: 0.025, strategy: 'fixed', unit: 'millionTokens' },
259
+ ],
260
+ },
261
+ type: 'chat',
262
+ },
263
+ {
264
+ abilities: {
265
+ functionCall: true,
266
+ vision: true,
267
+ },
268
+ contextWindowTokens: 400_000,
269
+ description: 'GPT-5 nano 是一个高吞吐量模型,在简单指令或分类任务方面表现出色。',
270
+ displayName: 'GPT-5 nano',
271
+ id: 'openai/gpt-5-nano',
272
+ pricing: {
273
+ currency: 'USD',
274
+ units: [
275
+ { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' },
276
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
277
+ { name: 'textInput_cacheRead', rate: 0.005, strategy: 'fixed', unit: 'millionTokens' },
278
+ ],
279
+ },
280
+ type: 'chat',
281
+ },
282
+ {
283
+ abilities: {
284
+ functionCall: true,
285
+ vision: true,
286
+ },
287
+ contextWindowTokens: 131_072,
288
+ description: '极其能干的通用大型语言模型,具有强大、可控的推理能力',
289
+ displayName: 'gpt-oss-120b',
290
+ id: 'openai/gpt-oss-120b',
291
+ pricing: {
292
+ currency: 'USD',
293
+ units: [
294
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
295
+ { name: 'textOutput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
296
+ ],
297
+ },
298
+ type: 'chat',
299
+ },
300
+ {
301
+ abilities: {
302
+ functionCall: true,
303
+ },
304
+ contextWindowTokens: 128_000,
305
+ description:
306
+ '一个紧凑、开源权重的语言模型,针对低延迟和资源受限环境进行了优化,包括本地和边缘部署',
307
+ displayName: 'gpt-oss-20b',
308
+ id: 'openai/gpt-oss-20b',
309
+ pricing: {
310
+ currency: 'USD',
311
+ units: [
312
+ { name: 'textInput', rate: 0.07, strategy: 'fixed', unit: 'millionTokens' },
313
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
314
+ ],
315
+ },
316
+ type: 'chat',
317
+ },
318
+ {
319
+ abilities: {
320
+ functionCall: true,
321
+ vision: true,
322
+ },
323
+ contextWindowTokens: 200_000,
324
+ description:
325
+ 'o3-mini 是 OpenAI 最新的小型推理模型,在 o1-mini 的相同成本和延迟目标下提供高智能。',
326
+ displayName: 'o3-mini',
327
+ id: 'openai/o3-mini',
328
+ pricing: {
329
+ currency: 'USD',
330
+ units: [
331
+ { name: 'textInput', rate: 1.1, strategy: 'fixed', unit: 'millionTokens' },
332
+ { name: 'textOutput', rate: 4.4, strategy: 'fixed', unit: 'millionTokens' },
333
+ { name: 'textInput_cacheRead', rate: 0.55, strategy: 'fixed', unit: 'millionTokens' },
334
+ ],
335
+ },
336
+ type: 'chat',
337
+ },
338
+ {
339
+ abilities: {
340
+ functionCall: true,
341
+ vision: true,
342
+ },
343
+ contextWindowTokens: 200_000,
344
+ description:
345
+ 'OpenAI 的 o4-mini 提供快速、成本效益高的推理,在其尺寸上具有卓越性能,特别是在数学(AIME 基准测试中表现最佳)、编码和视觉任务方面。',
346
+ displayName: 'o4-mini',
347
+ id: 'openai/o4-mini',
348
+ pricing: {
349
+ currency: 'USD',
350
+ units: [
351
+ { name: 'textInput', rate: 1.1, strategy: 'fixed', unit: 'millionTokens' },
352
+ { name: 'textOutput', rate: 4.4, strategy: 'fixed', unit: 'millionTokens' },
353
+ { name: 'textInput_cacheRead', rate: 0.275, strategy: 'fixed', unit: 'millionTokens' },
354
+ ],
355
+ },
356
+ type: 'chat',
357
+ },
358
+ {
359
+ abilities: {
360
+ functionCall: true,
361
+ reasoning: true,
362
+ vision: true,
363
+ },
364
+ contextWindowTokens: 1_047_576,
365
+ description: 'GPT 4.1 是 OpenAI 的旗舰模型,适用于复杂任务。它非常适合跨领域解决问题。',
366
+ displayName: 'GPT-4.1',
367
+ id: 'openai/gpt-4.1',
368
+ pricing: {
369
+ currency: 'USD',
370
+ units: [
371
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
372
+ { name: 'textOutput', rate: 8, strategy: 'fixed', unit: 'millionTokens' },
373
+ { name: 'textInput_cacheRead', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
374
+ ],
375
+ },
376
+ type: 'chat',
377
+ },
378
+ {
379
+ abilities: {
380
+ functionCall: true,
381
+ reasoning: true,
382
+ },
383
+ contextWindowTokens: 131_072,
384
+ description:
385
+ 'Kimi K2 是由月之暗面 AI 开发的大规模混合专家 (MoE) 语言模型,具有 1 万亿总参数和每次前向传递 320 亿激活参数。它针对代理能力进行了优化,包括高级工具使用、推理和代码合成。',
386
+ displayName: 'Kimi K2',
387
+ enabled: true,
388
+ id: 'moonshotai/kimi-k2',
389
+ pricing: {
390
+ currency: 'USD',
391
+ units: [
392
+ { name: 'textInput', rate: 0.55, strategy: 'fixed', unit: 'millionTokens' },
393
+ { name: 'textOutput', rate: 2.2, strategy: 'fixed', unit: 'millionTokens' },
394
+ ],
395
+ },
396
+ type: 'chat',
397
+ },
398
+ {
399
+ abilities: {
400
+ functionCall: true,
401
+ },
402
+ contextWindowTokens: 262_144,
403
+ description:
404
+ 'Qwen3-Coder-480B-A35B-Instruct 是 Qwen 最具代理性的代码模型,在代理编码、代理浏览器使用和其他基础编码任务方面具有显著性能,达到了与 Claude Sonnet 相当的结果。',
405
+ displayName: 'Qwen3 Coder 480B A35B Instruct',
406
+ enabled: true,
407
+ id: 'alibaba/qwen3-coder',
408
+ pricing: {
409
+ currency: 'USD',
410
+ units: [
411
+ { name: 'textInput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
412
+ { name: 'textOutput', rate: 1.6, strategy: 'fixed', unit: 'millionTokens' },
413
+ ],
414
+ },
415
+ type: 'chat',
416
+ },
417
+ {
418
+ abilities: {
419
+ functionCall: true,
420
+ reasoning: true,
421
+ },
422
+ contextWindowTokens: 40_960,
423
+ description:
424
+ 'Qwen3 是 Qwen 系列中最新一代的大型语言模型,提供了一套全面的密集和混合专家 (MoE) 模型。基于广泛的训练构建,Qwen3 在推理、指令遵循、代理能力和多语言支持方面提供了突破性的进展。',
425
+ displayName: 'Qwen3 235B A22B Instruct 2507',
426
+ id: 'alibaba/qwen-3-235b',
427
+ pricing: {
428
+ currency: 'USD',
429
+ units: [
430
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
431
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
432
+ ],
433
+ },
434
+ type: 'chat',
435
+ },
436
+ {
437
+ abilities: {
438
+ functionCall: true,
439
+ reasoning: true,
440
+ },
441
+ contextWindowTokens: 131_072,
442
+ description:
443
+ 'GLM-4.5 系列模型是专门为智能体设计的基础模型。旗舰 GLM-4.5 集成了 3550 亿总参数(320 亿活跃),统一了推理、编码和代理能力以解决复杂的应用需求。作为混合推理系统,它提供双重操作模式。',
444
+ displayName: 'GLM-4.5',
445
+ enabled: true,
446
+ id: 'zai/glm-4.5',
447
+ pricing: {
448
+ currency: 'USD',
449
+ units: [
450
+ { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
451
+ { name: 'textOutput', rate: 2.2, strategy: 'fixed', unit: 'millionTokens' },
452
+ ],
453
+ },
454
+ type: 'chat',
455
+ },
456
+ {
457
+ abilities: {
458
+ functionCall: true,
459
+ reasoning: true,
460
+ },
461
+ contextWindowTokens: 128_000,
462
+ description:
463
+ 'GLM-4.5 和 GLM-4.5-Air 是我们最新的旗舰模型,专门设计为面向代理应用的基础模型。两者都利用混合专家 (MoE) 架构。GLM-4.5 的总参数数为 3550 亿,每次前向传递有 320 亿活跃参数,而 GLM-4.5-Air 采用更简化的设计,总参数数为 1060 亿,活跃参数为 120 亿。',
464
+ displayName: 'GLM 4.5 Air',
465
+ id: 'zai/glm-4.5-air',
466
+ pricing: {
467
+ currency: 'USD',
468
+ units: [
469
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
470
+ { name: 'textOutput', rate: 1.1, strategy: 'fixed', unit: 'millionTokens' },
471
+ ],
472
+ },
473
+ type: 'chat',
474
+ },
475
+ {
476
+ abilities: {
477
+ functionCall: true,
478
+ reasoning: true,
479
+ vision: true,
480
+ },
481
+ contextWindowTokens: 65_536,
482
+ description:
483
+ 'GLM-4.5V 基于 GLM-4.5-Air 基础模型构建,继承了 GLM-4.1V-Thinking 的经过验证的技术,同时通过强大的 1060 亿参数 MoE 架构实现了有效的扩展。',
484
+ displayName: 'GLM 4.5V',
485
+ id: 'zai/glm-4.5v',
486
+ pricing: {
487
+ currency: 'USD',
488
+ units: [
489
+ { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
490
+ { name: 'textOutput', rate: 1.8, strategy: 'fixed', unit: 'millionTokens' },
491
+ ],
492
+ },
493
+ type: 'chat',
494
+ },
495
+ {
496
+ abilities: {
497
+ functionCall: true,
498
+ reasoning: true,
499
+ },
500
+ contextWindowTokens: 40_960,
501
+ description:
502
+ 'Qwen3 是 Qwen 系列中最新一代的大型语言模型,提供了一套全面的密集和混合专家 (MoE) 模型。基于广泛的训练构建,Qwen3 在推理、指令遵循、代理能力和多语言支持方面提供了突破性的进展。',
503
+ displayName: 'Qwen3 32B',
504
+ id: 'alibaba/qwen-3-32b',
505
+ pricing: {
506
+ currency: 'USD',
507
+ units: [
508
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
509
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
510
+ ],
511
+ },
512
+ type: 'chat',
513
+ },
514
+ {
515
+ abilities: {
516
+ functionCall: true,
517
+ reasoning: true,
518
+ },
519
+ contextWindowTokens: 40_960,
520
+ description:
521
+ 'Qwen3 是 Qwen 系列中最新一代的大型语言模型,提供了一套全面的密集和混合专家 (MoE) 模型。基于广泛的训练构建,Qwen3 在推理、指令遵循、代理能力和多语言支持方面提供了突破性的进展。',
522
+ displayName: 'Qwen3 30B A3B',
523
+ id: 'alibaba/qwen-3-30b',
524
+ pricing: {
525
+ currency: 'USD',
526
+ units: [
527
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
528
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
529
+ ],
530
+ },
531
+ type: 'chat',
532
+ },
533
+ {
534
+ abilities: {
535
+ functionCall: true,
536
+ reasoning: true,
537
+ },
538
+ contextWindowTokens: 40_960,
539
+ description:
540
+ 'Qwen3 是 Qwen 系列中最新一代的大型语言模型,提供了一套全面的密集和混合专家 (MoE) 模型。基于广泛的训练构建,Qwen3 在推理、指令遵循、代理能力和多语言支持方面提供了突破性的进展。',
541
+ displayName: 'Qwen3 14B',
542
+ id: 'alibaba/qwen-3-14b',
543
+ pricing: {
544
+ currency: 'USD',
545
+ units: [
546
+ { name: 'textInput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' },
547
+ { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' },
548
+ ],
549
+ },
550
+ type: 'chat',
551
+ },
552
+ {
553
+ abilities: {
554
+ functionCall: true,
555
+ },
556
+ contextWindowTokens: 256_000,
557
+ description:
558
+ 'Command A 是 Cohere 迄今为止性能最强的模型,在工具使用、代理、检索增强生成 (RAG) 和多语言用例方面表现出色。Command A 的上下文长度为 256K,仅需两个 GPU 即可运行,与 Command R+ 08-2024 相比吞吐量提高了 150%。',
559
+ displayName: 'Command A',
560
+ id: 'cohere/command-a',
561
+ pricing: {
562
+ currency: 'USD',
563
+ units: [
564
+ { name: 'textInput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
565
+ { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
566
+ ],
567
+ },
568
+ type: 'chat',
569
+ },
570
+ {
571
+ abilities: {
572
+ functionCall: true,
573
+ },
574
+ contextWindowTokens: 128_000,
575
+ description:
576
+ 'Command R 是一个针对对话交互和长上下文任务优化的大型语言模型。它定位于"可扩展"类别的模型,在高性能和强准确性之间取得平衡,使公司能够超越概念验证并进入生产。',
577
+ displayName: 'Command R',
578
+ id: 'cohere/command-r',
579
+ pricing: {
580
+ currency: 'USD',
581
+ units: [
582
+ { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
583
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
584
+ ],
585
+ },
586
+ type: 'chat',
587
+ },
588
+ {
589
+ abilities: {
590
+ functionCall: true,
591
+ },
592
+ contextWindowTokens: 128_000,
593
+ description:
594
+ 'Command R+ 是 Cohere 最新的大型语言模型,针对对话交互和长上下文任务进行了优化。它的目标是在性能上极其出色,使公司能够超越概念验证并进入生产。',
595
+ displayName: 'Command R+',
596
+ id: 'cohere/command-r-plus',
597
+ pricing: {
598
+ currency: 'USD',
599
+ units: [
600
+ { name: 'textInput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
601
+ { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
602
+ ],
603
+ },
604
+ type: 'chat',
605
+ },
606
+ {
607
+ abilities: {
608
+ functionCall: true,
609
+ reasoning: true,
610
+ },
611
+ contextWindowTokens: 128_000,
612
+ description:
613
+ 'DeepSeek R1 模型已经进行了小版本升级,当前版本为 DeepSeek-R1-0528。在最新更新中,DeepSeek R1 通过利用增加的计算资源和在训练后引入算法优化机制,显著提高了推理深度和推理能力。该模型在数学、编程和一般逻辑等多个基准评估中表现出色,其整体性能现在正接近领先模型,如 O3 和 Gemini 2.5 Pro。',
614
+ displayName: 'DeepSeek R1 0528',
615
+ id: 'deepseek/deepseek-r1',
616
+ pricing: {
617
+ currency: 'USD',
618
+ units: [
619
+ { name: 'textInput', rate: 0.55, strategy: 'fixed', unit: 'millionTokens' },
620
+ { name: 'textOutput', rate: 2.19, strategy: 'fixed', unit: 'millionTokens' },
621
+ ],
622
+ },
623
+ type: 'chat',
624
+ },
625
+ {
626
+ abilities: {
627
+ functionCall: true,
628
+ },
629
+ contextWindowTokens: 163_840,
630
+ description: '具有增强推理能力的快速通用大型语言模型',
631
+ displayName: 'DeepSeek V3 0324',
632
+ id: 'deepseek/deepseek-v3',
633
+ pricing: {
634
+ currency: 'USD',
635
+ units: [
636
+ { name: 'textInput', rate: 0.77, strategy: 'fixed', unit: 'millionTokens' },
637
+ { name: 'textOutput', rate: 0.77, strategy: 'fixed', unit: 'millionTokens' },
638
+ ],
639
+ },
640
+ type: 'chat',
641
+ },
642
+ {
643
+ contextWindowTokens: 128_000,
644
+ description: 'DeepSeek V3.1 Base 是 DeepSeek V3 模型的改进版本。',
645
+ displayName: 'DeepSeek V3.1 Base',
646
+ id: 'deepseek/deepseek-v3.1-base',
647
+ pricing: {
648
+ currency: 'USD',
649
+ units: [
650
+ { name: 'textInput', rate: 0.1999, strategy: 'fixed', unit: 'millionTokens' },
651
+ { name: 'textOutput', rate: 0.8001, strategy: 'fixed', unit: 'millionTokens' },
652
+ ],
653
+ },
654
+ type: 'chat',
655
+ },
656
+ {
657
+ abilities: {
658
+ functionCall: true,
659
+ reasoning: true,
660
+ vision: true,
661
+ },
662
+ contextWindowTokens: 1_048_576,
663
+ description:
664
+ 'Gemini 2.0 Flash 提供下一代功能和改进的功能,包括卓越的速度、内置工具使用、多模态生成和 100 万 token 的上下文窗口。',
665
+ displayName: 'Gemini 2.0 Flash',
666
+ id: 'google/gemini-2.0-flash',
667
+ pricing: {
668
+ currency: 'USD',
669
+ units: [
670
+ { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
671
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
672
+ ],
673
+ },
674
+ type: 'chat',
675
+ },
676
+ {
677
+ abilities: {
678
+ functionCall: true,
679
+ reasoning: true,
680
+ vision: true,
681
+ },
682
+ contextWindowTokens: 1_048_576,
683
+ description:
684
+ 'Gemini 2.0 Flash Lite 提供下一代功能和改进的功能,包括卓越的速度、内置工具使用、多模态生成和 100 万 token 的上下文窗口。',
685
+ displayName: 'Gemini 2.0 Flash Lite',
686
+ id: 'google/gemini-2.0-flash-lite',
687
+ pricing: {
688
+ currency: 'USD',
689
+ units: [
690
+ { name: 'textInput', rate: 0.075, strategy: 'fixed', unit: 'millionTokens' },
691
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
692
+ ],
693
+ },
694
+ type: 'chat',
695
+ },
696
+ {
697
+ abilities: {
698
+ functionCall: true,
699
+ reasoning: true,
700
+ vision: true,
701
+ },
702
+ contextWindowTokens: 1_000_000,
703
+ description:
704
+ 'Gemini 2.5 Flash 是一个思考模型,提供出色的全面能力。它旨在价格和性能之间取得平衡,支持多模态和 100 万 token 的上下文窗口。',
705
+ displayName: 'Gemini 2.5 Flash',
706
+ id: 'google/gemini-2.5-flash',
707
+ pricing: {
708
+ currency: 'USD',
709
+ units: [
710
+ { name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
711
+ { name: 'textOutput', rate: 2.5, strategy: 'fixed', unit: 'millionTokens' },
712
+ ],
713
+ },
714
+ type: 'chat',
715
+ },
716
+ {
717
+ abilities: {
718
+ functionCall: true,
719
+ reasoning: true,
720
+ vision: true,
721
+ },
722
+ contextWindowTokens: 1_048_576,
723
+ description:
724
+ 'Gemini 2.5 Flash-Lite 是一个平衡、低延迟的模型,具有可配置的思考预算和工具连接性(例如,Google Search 接地和代码执行)。它支持多模态输入,并提供 100 万 token 的上下文窗口。',
725
+ displayName: 'Gemini 2.5 Flash Lite',
726
+ id: 'google/gemini-2.5-flash-lite',
727
+ pricing: {
728
+ currency: 'USD',
729
+ units: [
730
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
731
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
732
+ ],
733
+ },
734
+ type: 'chat',
735
+ },
736
+ {
737
+ abilities: {
738
+ functionCall: true,
739
+ },
740
+ contextWindowTokens: 8192,
741
+ description:
742
+ '由 Google 精心调整用于聊天目的的 90 亿参数开源模型。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务,以提供快速高效的推理。',
743
+ displayName: 'Gemma 2 9B IT',
744
+ id: 'google/gemma-2-9b',
745
+ pricing: {
746
+ currency: 'USD',
747
+ units: [
748
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
749
+ { name: 'textOutput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
750
+ ],
751
+ },
752
+ type: 'chat',
753
+ },
754
+ {
755
+ abilities: {
756
+ functionCall: true,
757
+ vision: true,
758
+ },
759
+ contextWindowTokens: 256_000,
760
+ description:
761
+ 'xAI 最新和最伟大的旗舰模型,在自然语言、数学和推理方面提供无与伦比的性能——完美的全能选手。',
762
+ displayName: 'Grok 4',
763
+ id: 'xai/grok-4',
764
+ pricing: {
765
+ currency: 'USD',
766
+ units: [
767
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
768
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
769
+ ],
770
+ },
771
+ type: 'chat',
772
+ },
773
+ {
774
+ abilities: {
775
+ functionCall: true,
776
+ vision: true,
777
+ },
778
+ contextWindowTokens: 131_072,
779
+ description:
780
+ 'xAI 的轻量级模型,在响应之前进行思考。非常适合不需要深厚领域知识的简单或基于逻辑的任务。原始思维轨迹可访问。快速模型变体在更快的基础设施上提供服务,提供比标准快得多的响应时间。增加的速度以每个输出 token 更高的成本为代价。',
781
+ displayName: 'Grok 3 Mini Fast Beta',
782
+ id: 'xai/grok-3-mini-fast',
783
+ pricing: {
784
+ currency: 'USD',
785
+ units: [
786
+ { name: 'textInput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
787
+ { name: 'textOutput', rate: 4, strategy: 'fixed', unit: 'millionTokens' },
788
+ ],
789
+ },
790
+ type: 'chat',
791
+ },
792
+ {
793
+ abilities: {
794
+ functionCall: true,
795
+ vision: true,
796
+ },
797
+ contextWindowTokens: 131_072,
798
+ description:
799
+ 'xAI 的轻量级模型,在响应之前进行思考。非常适合不需要深厚领域知识的简单或基于逻辑的任务。原始思维轨迹可访问。',
800
+ displayName: 'Grok 3 Mini Beta',
801
+ id: 'xai/grok-3-mini',
802
+ pricing: {
803
+ currency: 'USD',
804
+ units: [
805
+ { name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
806
+ { name: 'textOutput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
807
+ ],
808
+ },
809
+ type: 'chat',
810
+ },
811
+ {
812
+ abilities: {
813
+ functionCall: true,
814
+ vision: true,
815
+ },
816
+ contextWindowTokens: 131_072,
817
+ description:
818
+ 'xAI 的旗舰模型,在企业用例方面表现出色,如数据提取、编码和文本摘要。在金融、医疗保健、法律和科学领域拥有深厚的领域知识。快速模型变体在更快的基础设施上提供服务,提供比标准快得多的响应时间。增加的速度以每个输出 token 更高的成本为代价。',
819
+ displayName: 'Grok 3 Fast Beta',
820
+ id: 'xai/grok-3-fast',
821
+ pricing: {
822
+ currency: 'USD',
823
+ units: [
824
+ { name: 'textInput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
825
+ { name: 'textOutput', rate: 25, strategy: 'fixed', unit: 'millionTokens' },
826
+ ],
827
+ },
828
+ type: 'chat',
829
+ },
830
+ {
831
+ abilities: {
832
+ functionCall: true,
833
+ vision: true,
834
+ },
835
+ contextWindowTokens: 131_072,
836
+ description:
837
+ 'xAI 的旗舰模型,在企业用例方面表现出色,如数据提取、编码和文本摘要。在金融、医疗保健、法律和科学领域拥有深厚的领域知识。',
838
+ displayName: 'Grok 3 Beta',
839
+ id: 'xai/grok-3',
840
+ pricing: {
841
+ currency: 'USD',
842
+ units: [
843
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
844
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
845
+ ],
846
+ },
847
+ type: 'chat',
848
+ },
849
+ {
850
+ abilities: {
851
+ functionCall: true,
852
+ vision: true,
853
+ },
854
+ contextWindowTokens: 32_768,
855
+ description:
856
+ 'Grok 2 视觉模型在基于视觉的任务方面表现出色,在视觉数学推理 (MathVista) 和基于文档的问答 (DocVQA) 方面提供最先进的性能。它能够处理各种视觉信息,包括文档、图表、图表、屏幕截图和照片。',
857
+ displayName: 'Grok 2 Vision',
858
+ id: 'xai/grok-2-vision',
859
+ pricing: {
860
+ currency: 'USD',
861
+ units: [
862
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
863
+ { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
864
+ ],
865
+ },
866
+ type: 'chat',
867
+ },
868
+ {
869
+ abilities: {
870
+ functionCall: true,
871
+ vision: true,
872
+ },
873
+ contextWindowTokens: 131_072,
874
+ description:
875
+ 'Grok 2 是一个具有最先进推理能力的前沿语言模型。它在聊天、编码和推理方面具有先进能力,在 LMSYS 排行榜上优于 Claude 3.5 Sonnet 和 GPT-4-Turbo。',
876
+ displayName: 'Grok 2',
877
+ id: 'xai/grok-2',
878
+ pricing: {
879
+ currency: 'USD',
880
+ units: [
881
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
882
+ { name: 'textOutput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
883
+ ],
884
+ },
885
+ type: 'chat',
886
+ },
887
+ {
888
+ abilities: {
889
+ functionCall: true,
890
+ },
891
+ contextWindowTokens: 8192,
892
+ description:
893
+ '由 Meta 精心调整用于指令遵循目的的 700 亿参数开源模型。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务,以提供快速高效的推理。',
894
+ displayName: 'Llama 3 70B Instruct',
895
+ id: 'meta/llama-3-70b',
896
+ pricing: {
897
+ currency: 'USD',
898
+ units: [
899
+ { name: 'textInput', rate: 0.59, strategy: 'fixed', unit: 'millionTokens' },
900
+ { name: 'textOutput', rate: 0.79, strategy: 'fixed', unit: 'millionTokens' },
901
+ ],
902
+ },
903
+ type: 'chat',
904
+ },
905
+ {
906
+ abilities: {
907
+ functionCall: true,
908
+ },
909
+ contextWindowTokens: 8192,
910
+ description:
911
+ '由 Meta 精心调整用于指令遵循目的的 80 亿参数开源模型。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务,以提供快速高效的推理。',
912
+ displayName: 'Llama 3 8B Instruct',
913
+ id: 'meta/llama-3-8b',
914
+ pricing: {
915
+ currency: 'USD',
916
+ units: [
917
+ { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' },
918
+ { name: 'textOutput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' },
919
+ ],
920
+ },
921
+ type: 'chat',
922
+ },
923
+ {
924
+ abilities: {
925
+ functionCall: true,
926
+ },
927
+ contextWindowTokens: 128_000,
928
+ description:
929
+ 'Meta Llama 3 70B Instruct 的更新版本,包括扩展的 128K 上下文长度、多语言和改进的推理能力。',
930
+ displayName: 'Llama 3.1 70B Instruct',
931
+ id: 'meta/llama-3.1-70b',
932
+ pricing: {
933
+ currency: 'USD',
934
+ units: [
935
+ { name: 'textInput', rate: 0.72, strategy: 'fixed', unit: 'millionTokens' },
936
+ { name: 'textOutput', rate: 0.72, strategy: 'fixed', unit: 'millionTokens' },
937
+ ],
938
+ },
939
+ type: 'chat',
940
+ },
941
+ {
942
+ abilities: {
943
+ functionCall: true,
944
+ },
945
+ contextWindowTokens: 131_000,
946
+ description:
947
+ 'Llama 3.1 8B 支持 128K 上下文窗口,使其成为实时对话界面和数据分析的理想选择,同时与更大的模型相比提供显著的成本节约。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务,以提供快速高效的推理。',
948
+ displayName: 'Llama 3.1 8B Instruct',
949
+ id: 'meta/llama-3.1-8b',
950
+ pricing: {
951
+ currency: 'USD',
952
+ units: [
953
+ { name: 'textInput', rate: 0.05, strategy: 'fixed', unit: 'millionTokens' },
954
+ { name: 'textOutput', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' },
955
+ ],
956
+ },
957
+ type: 'chat',
958
+ },
959
+ {
960
+ abilities: {
961
+ functionCall: true,
962
+ vision: true,
963
+ },
964
+ contextWindowTokens: 128_000,
965
+ description:
966
+ '指令调整的图像推理生成模型(文本 + 图像输入 / 文本输出),针对视觉识别、图像推理、标题生成和回答关于图像的一般问题进行了优化。',
967
+ displayName: 'Llama 3.2 11B Vision Instruct',
968
+ id: 'meta/llama-3.2-11b',
969
+ pricing: {
970
+ currency: 'USD',
971
+ units: [
972
+ { name: 'textInput', rate: 0.16, strategy: 'fixed', unit: 'millionTokens' },
973
+ { name: 'textOutput', rate: 0.16, strategy: 'fixed', unit: 'millionTokens' },
974
+ ],
975
+ },
976
+ type: 'chat',
977
+ },
978
+ {
979
+ abilities: {
980
+ functionCall: true,
981
+ },
982
+ contextWindowTokens: 128_000,
983
+ description: '仅文本模型,支持设备上用例,如多语言本地知识检索、摘要和重写。',
984
+ displayName: 'Llama 3.2 1B Instruct',
985
+ id: 'meta/llama-3.2-1b',
986
+ pricing: {
987
+ currency: 'USD',
988
+ units: [
989
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
990
+ { name: 'textOutput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
991
+ ],
992
+ },
993
+ type: 'chat',
994
+ },
995
+ {
996
+ abilities: {
997
+ functionCall: true,
998
+ },
999
+ contextWindowTokens: 128_000,
1000
+ description: '仅文本模型,精心调整用于支持设备上用例,如多语言本地知识检索、摘要和重写。',
1001
+ displayName: 'Llama 3.2 3B Instruct',
1002
+ id: 'meta/llama-3.2-3b',
1003
+ pricing: {
1004
+ currency: 'USD',
1005
+ units: [
1006
+ { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
1007
+ { name: 'textOutput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
1008
+ ],
1009
+ },
1010
+ type: 'chat',
1011
+ },
1012
+ {
1013
+ abilities: {
1014
+ functionCall: true,
1015
+ vision: true,
1016
+ },
1017
+ contextWindowTokens: 128_000,
1018
+ description:
1019
+ '指令调整的图像推理生成模型(文本 + 图像输入 / 文本输出),针对视觉识别、图像推理、标题生成和回答关于图像的一般问题进行了优化。',
1020
+ displayName: 'Llama 3.2 90B Vision Instruct',
1021
+ id: 'meta/llama-3.2-90b',
1022
+ pricing: {
1023
+ currency: 'USD',
1024
+ units: [
1025
+ { name: 'textInput', rate: 0.72, strategy: 'fixed', unit: 'millionTokens' },
1026
+ { name: 'textOutput', rate: 0.72, strategy: 'fixed', unit: 'millionTokens' },
1027
+ ],
1028
+ },
1029
+ type: 'chat',
1030
+ },
1031
+ {
1032
+ abilities: {
1033
+ functionCall: true,
1034
+ },
1035
+ contextWindowTokens: 128_000,
1036
+ description:
1037
+ '性能与效率的完美结合。该模型支持高性能对话 AI,专为内容创建、企业应用和研究而设计,提供先进的语言理解能力,包括文本摘要、分类、情感分析和代码生成。',
1038
+ displayName: 'Llama 3.3 70B Instruct',
1039
+ id: 'meta/llama-3.3-70b',
1040
+ pricing: {
1041
+ currency: 'USD',
1042
+ units: [
1043
+ { name: 'textInput', rate: 0.72, strategy: 'fixed', unit: 'millionTokens' },
1044
+ { name: 'textOutput', rate: 0.72, strategy: 'fixed', unit: 'millionTokens' },
1045
+ ],
1046
+ },
1047
+ type: 'chat',
1048
+ },
1049
+ {
1050
+ abilities: {
1051
+ functionCall: true,
1052
+ vision: true,
1053
+ },
1054
+ contextWindowTokens: 131_072,
1055
+ description:
1056
+ 'Llama 4 模型集合是原生多模态 AI 模型,支持文本和多模态体验。这些模型利用混合专家架构在文本和图像理解方面提供行业领先的性能。Llama 4 Maverick,一个 170 亿参数模型,具有 128 个专家。由 DeepInfra 提供服务。',
1057
+ displayName: 'Llama 4 Maverick 17B 128E Instruct',
1058
+ id: 'meta/llama-4-maverick',
1059
+ pricing: {
1060
+ currency: 'USD',
1061
+ units: [
1062
+ { name: 'textInput', rate: 0.2, strategy: 'fixed', unit: 'millionTokens' },
1063
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
1064
+ ],
1065
+ },
1066
+ type: 'chat',
1067
+ },
1068
+ {
1069
+ abilities: {
1070
+ functionCall: true,
1071
+ vision: true,
1072
+ },
1073
+ contextWindowTokens: 131_072,
1074
+ description:
1075
+ 'Llama 4 模型集合是原生多模态 AI 模型,支持文本和多模态体验。这些模型利用混合专家架构在文本和图像理解方面提供行业领先的性能。Llama 4 Scout,一个 170 亿参数模型,具有 16 个专家。由 DeepInfra 提供服务。',
1076
+ displayName: 'Llama 4 Scout 17B 16E Instruct',
1077
+ id: 'meta/llama-4-scout',
1078
+ pricing: {
1079
+ currency: 'USD',
1080
+ units: [
1081
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
1082
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
1083
+ ],
1084
+ },
1085
+ type: 'chat',
1086
+ },
1087
+ {
1088
+ abilities: {
1089
+ functionCall: true,
1090
+ },
1091
+ contextWindowTokens: 256_000,
1092
+ description:
1093
+ 'Mistral Codestral 25.01 是最先进的编码模型,针对低延迟、高频率用例进行了优化。精通 80 多种编程语言,它在中间填充 (FIM)、代码纠正和测试生成等任务上表现出色。',
1094
+ displayName: 'Mistral Codestral 25.01',
1095
+ id: 'mistral/codestral',
1096
+ pricing: {
1097
+ currency: 'USD',
1098
+ units: [
1099
+ { name: 'textInput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
1100
+ { name: 'textOutput', rate: 0.9, strategy: 'fixed', unit: 'millionTokens' },
1101
+ ],
1102
+ },
1103
+ type: 'chat',
1104
+ },
1105
+ {
1106
+ abilities: {
1107
+ functionCall: true,
1108
+ },
1109
+ contextWindowTokens: 128_000,
1110
+ description:
1111
+ 'Devstral 是一个用于软件工程任务的代理大型语言模型,使其成为软件工程代理的绝佳选择。',
1112
+ displayName: 'Devstral Small',
1113
+ id: 'mistral/devstral-small',
1114
+ pricing: {
1115
+ currency: 'USD',
1116
+ units: [
1117
+ { name: 'textInput', rate: 0.07, strategy: 'fixed', unit: 'millionTokens' },
1118
+ { name: 'textOutput', rate: 0.28, strategy: 'fixed', unit: 'millionTokens' },
1119
+ ],
1120
+ },
1121
+ type: 'chat',
1122
+ },
1123
+ {
1124
+ abilities: {
1125
+ functionCall: true,
1126
+ },
1127
+ contextWindowTokens: 128_000,
1128
+ description:
1129
+ '复杂思维,由深刻理解支持,具有您可以遵循和验证的透明推理。该模型即使在任务中途切换语言时,也能在众多语言中保持高保真推理。',
1130
+ displayName: 'Magistral Medium 2506',
1131
+ id: 'mistral/magistral-medium',
1132
+ pricing: {
1133
+ currency: 'USD',
1134
+ units: [
1135
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
1136
+ { name: 'textOutput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
1137
+ ],
1138
+ },
1139
+ type: 'chat',
1140
+ },
1141
+ {
1142
+ abilities: {
1143
+ functionCall: true,
1144
+ },
1145
+ contextWindowTokens: 128_000,
1146
+ description:
1147
+ '复杂思维,由深刻理解支持,具有您可以遵循和验证的透明推理。该模型即使在任务中途切换语言时,也能在众多语言中保持高保真推理。',
1148
+ displayName: 'Magistral Small 2506',
1149
+ id: 'mistral/magistral-small',
1150
+ pricing: {
1151
+ currency: 'USD',
1152
+ units: [
1153
+ { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
1154
+ { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
1155
+ ],
1156
+ },
1157
+ type: 'chat',
1158
+ },
1159
+ {
1160
+ abilities: {
1161
+ functionCall: true,
1162
+ },
1163
+ contextWindowTokens: 128_000,
1164
+ description: '一个紧凑、高效的模型,用于智能助手和本地分析等设备上任务,提供低延迟性能。',
1165
+ displayName: 'Ministral 3B',
1166
+ id: 'mistral/ministral-3b',
1167
+ pricing: {
1168
+ currency: 'USD',
1169
+ units: [
1170
+ { name: 'textInput', rate: 0.04, strategy: 'fixed', unit: 'millionTokens' },
1171
+ { name: 'textOutput', rate: 0.04, strategy: 'fixed', unit: 'millionTokens' },
1172
+ ],
1173
+ },
1174
+ type: 'chat',
1175
+ },
1176
+ {
1177
+ abilities: {
1178
+ functionCall: true,
1179
+ },
1180
+ contextWindowTokens: 128_000,
1181
+ description:
1182
+ '一个更强大的模型,具有更快、内存高效的推理,是复杂工作流程和要求苛刻的边缘应用的理想选择。',
1183
+ displayName: 'Ministral 8B',
1184
+ id: 'mistral/ministral-8b',
1185
+ pricing: {
1186
+ currency: 'USD',
1187
+ units: [
1188
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
1189
+ { name: 'textOutput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
1190
+ ],
1191
+ },
1192
+ type: 'chat',
1193
+ },
1194
+ {
1195
+ abilities: {
1196
+ functionCall: true,
1197
+ },
1198
+ contextWindowTokens: 32_000,
1199
+ description:
1200
+ 'Mistral Large 是复杂任务的理想选择,这些任务需要大型推理能力或高度专业化——如合成文本生成、代码生成、RAG 或代理。',
1201
+ displayName: 'Mistral Large',
1202
+ id: 'mistral/mistral-large',
1203
+ pricing: {
1204
+ currency: 'USD',
1205
+ units: [
1206
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
1207
+ { name: 'textOutput', rate: 6, strategy: 'fixed', unit: 'millionTokens' },
1208
+ ],
1209
+ },
1210
+ type: 'chat',
1211
+ },
1212
+ {
1213
+ abilities: {
1214
+ functionCall: true,
1215
+ },
1216
+ contextWindowTokens: 32_768,
1217
+ description:
1218
+ 'Mistral Saba 24B 是一个由 Mistral.ai 开发的 240 亿参数开源模型。Saba 是一个专门训练以在阿拉伯语、波斯语、乌尔都语、希伯来语和印度语言方面表现出色的专门模型。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务,以提供快速高效的推理。',
1219
+ displayName: 'Mistral Saba 24B',
1220
+ id: 'mistral/mistral-saba-24b',
1221
+ pricing: {
1222
+ currency: 'USD',
1223
+ units: [
1224
+ { name: 'textInput', rate: 0.79, strategy: 'fixed', unit: 'millionTokens' },
1225
+ { name: 'textOutput', rate: 0.79, strategy: 'fixed', unit: 'millionTokens' },
1226
+ ],
1227
+ },
1228
+ type: 'chat',
1229
+ },
1230
+ {
1231
+ abilities: {
1232
+ functionCall: true,
1233
+ },
1234
+ contextWindowTokens: 32_000,
1235
+ description:
1236
+ 'Mistral Small 是简单任务的理想选择,这些任务可以批量完成——如分类、客户支持或文本生成。它以可承受的价格点提供出色的性能。',
1237
+ displayName: 'Mistral Small',
1238
+ id: 'mistral/mistral-small',
1239
+ pricing: {
1240
+ currency: 'USD',
1241
+ units: [
1242
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
1243
+ { name: 'textOutput', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
1244
+ ],
1245
+ },
1246
+ type: 'chat',
1247
+ },
1248
+ {
1249
+ abilities: {
1250
+ functionCall: true,
1251
+ },
1252
+ contextWindowTokens: 65_536,
1253
+ description: '8x22b Instruct 模型。8x22b 是由 Mistral 提供服务的混合专家开源模型。',
1254
+ displayName: 'Mixtral MoE 8x22B Instruct',
1255
+ id: 'mistral/mixtral-8x22b-instruct',
1256
+ pricing: {
1257
+ currency: 'USD',
1258
+ units: [
1259
+ { name: 'textInput', rate: 1.2, strategy: 'fixed', unit: 'millionTokens' },
1260
+ { name: 'textOutput', rate: 1.2, strategy: 'fixed', unit: 'millionTokens' },
1261
+ ],
1262
+ },
1263
+ type: 'chat',
1264
+ },
1265
+ {
1266
+ abilities: {
1267
+ functionCall: true,
1268
+ vision: true,
1269
+ },
1270
+ contextWindowTokens: 128_000,
1271
+ description: '一个具有图像理解能力的 12B 模型,以及文本。',
1272
+ displayName: 'Pixtral 12B 2409',
1273
+ id: 'mistral/pixtral-12b',
1274
+ pricing: {
1275
+ currency: 'USD',
1276
+ units: [
1277
+ { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
1278
+ { name: 'textOutput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
1279
+ ],
1280
+ },
1281
+ type: 'chat',
1282
+ },
1283
+ {
1284
+ abilities: {
1285
+ functionCall: true,
1286
+ vision: true,
1287
+ },
1288
+ contextWindowTokens: 128_000,
1289
+ description:
1290
+ 'Pixtral Large 是我们多模态家族中的第二个模型,展示了前沿水平的图像理解。特别是,该模型能够理解文档、图表和自然图像,同时保持了 Mistral Large 2 的领先文本理解能力。',
1291
+ displayName: 'Pixtral Large',
1292
+ id: 'mistral/pixtral-large',
1293
+ pricing: {
1294
+ currency: 'USD',
1295
+ units: [
1296
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
1297
+ { name: 'textOutput', rate: 6, strategy: 'fixed', unit: 'millionTokens' },
1298
+ ],
1299
+ },
1300
+ type: 'chat',
1301
+ },
1302
+ {
1303
+ abilities: {
1304
+ functionCall: true,
1305
+ },
1306
+ contextWindowTokens: 32_768,
1307
+ description: 'Mercury Coder Small 是代码生成、调试和重构任务的理想选择,具有最小延迟。',
1308
+ displayName: 'Mercury Coder Small Beta',
1309
+ id: 'inception/mercury-coder-small',
1310
+ pricing: {
1311
+ currency: 'USD',
1312
+ units: [
1313
+ { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
1314
+ { name: 'textOutput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
1315
+ ],
1316
+ },
1317
+ type: 'chat',
1318
+ },
1319
+ {
1320
+ abilities: {
1321
+ functionCall: true,
1322
+ },
1323
+ contextWindowTokens: 32_768,
1324
+ description:
1325
+ 'Morph 提供了一个专门的 AI 模型,将前沿模型(如 Claude 或 GPT-4o)建议的代码更改应用到您的现有代码文件中 FAST - 4500+ tokens/秒。它充当 AI 编码工作流程中的最后一步。支持 16k 输入 tokens 和 16k 输出 tokens。',
1326
+ displayName: 'Morph V3 Fast',
1327
+ id: 'morph/morph-v3-fast',
1328
+ pricing: {
1329
+ currency: 'USD',
1330
+ units: [
1331
+ { name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' },
1332
+ { name: 'textOutput', rate: 1.2, strategy: 'fixed', unit: 'millionTokens' },
1333
+ ],
1334
+ },
1335
+ type: 'chat',
1336
+ },
1337
+ {
1338
+ abilities: {
1339
+ functionCall: true,
1340
+ },
1341
+ contextWindowTokens: 32_768,
1342
+ description:
1343
+ 'Morph 提供了一个专门的 AI 模型,将前沿模型(如 Claude 或 GPT-4o)建议的代码更改应用到您的现有代码文件中 FAST - 2500+ tokens/秒。它充当 AI 编码工作流程中的最后一步。支持 16k 输入 tokens 和 16k 输出 tokens。',
1344
+ displayName: 'Morph V3 Large',
1345
+ id: 'morph/morph-v3-large',
1346
+ pricing: {
1347
+ currency: 'USD',
1348
+ units: [
1349
+ { name: 'textInput', rate: 0.9, strategy: 'fixed', unit: 'millionTokens' },
1350
+ { name: 'textOutput', rate: 1.9, strategy: 'fixed', unit: 'millionTokens' },
1351
+ ],
1352
+ },
1353
+ type: 'chat',
1354
+ },
1355
+ {
1356
+ abilities: {
1357
+ functionCall: true,
1358
+ },
1359
+ contextWindowTokens: 16_385,
1360
+ description:
1361
+ 'OpenAI 在 GPT-3.5 系列中最能干且最具成本效益的模型,针对聊天目的进行了优化,但在传统完成任务中也表现良好。',
1362
+ displayName: 'GPT-3.5 Turbo',
1363
+ id: 'openai/gpt-3.5-turbo',
1364
+ pricing: {
1365
+ currency: 'USD',
1366
+ units: [
1367
+ { name: 'textInput', rate: 0.5, strategy: 'fixed', unit: 'millionTokens' },
1368
+ { name: 'textOutput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
1369
+ ],
1370
+ },
1371
+ type: 'chat',
1372
+ },
1373
+ {
1374
+ abilities: {
1375
+ functionCall: true,
1376
+ },
1377
+ contextWindowTokens: 8192,
1378
+ description: '与 GPT-3 时代模型类似的能力。与传统的完成端点兼容,而不是聊天完成端点。',
1379
+ displayName: 'GPT-3.5 Turbo Instruct',
1380
+ id: 'openai/gpt-3.5-turbo-instruct',
1381
+ pricing: {
1382
+ currency: 'USD',
1383
+ units: [
1384
+ { name: 'textInput', rate: 1.5, strategy: 'fixed', unit: 'millionTokens' },
1385
+ { name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
1386
+ ],
1387
+ },
1388
+ type: 'chat',
1389
+ },
1390
+ {
1391
+ abilities: {
1392
+ functionCall: true,
1393
+ },
1394
+ contextWindowTokens: 128_000,
1395
+ description:
1396
+ '来自 OpenAI 的 gpt-4-turbo 具有广泛的通用知识和领域专长,使其能够遵循自然语言的复杂指令并准确解决困难问题。它的知识截止日期为 2023 年 4 月,上下文窗口为 128,000 个 token。',
1397
+ displayName: 'GPT-4 Turbo',
1398
+ id: 'openai/gpt-4-turbo',
1399
+ pricing: {
1400
+ currency: 'USD',
1401
+ units: [
1402
+ { name: 'textInput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
1403
+ { name: 'textOutput', rate: 30, strategy: 'fixed', unit: 'millionTokens' },
1404
+ ],
1405
+ },
1406
+ type: 'chat',
1407
+ },
1408
+ {
1409
+ abilities: {
1410
+ functionCall: true,
1411
+ vision: true,
1412
+ },
1413
+ contextWindowTokens: 1_047_576,
1414
+ description:
1415
+ 'GPT 4.1 mini 在智能、速度和成本之间取得了平衡,使其成为许多用例的有吸引力的模型。',
1416
+ displayName: 'GPT-4.1 mini',
1417
+ id: 'openai/gpt-4.1-mini',
1418
+ pricing: {
1419
+ currency: 'USD',
1420
+ units: [
1421
+ { name: 'textInput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
1422
+ { name: 'textOutput', rate: 1.6, strategy: 'fixed', unit: 'millionTokens' },
1423
+ { name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
1424
+ ],
1425
+ },
1426
+ type: 'chat',
1427
+ },
1428
+ {
1429
+ abilities: {
1430
+ functionCall: true,
1431
+ vision: true,
1432
+ },
1433
+ contextWindowTokens: 1_047_576,
1434
+ description: 'GPT-4.1 nano 是最快、最具成本效益的 GPT 4.1 模型。',
1435
+ displayName: 'GPT-4.1 nano',
1436
+ id: 'openai/gpt-4.1-nano',
1437
+ pricing: {
1438
+ currency: 'USD',
1439
+ units: [
1440
+ { name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
1441
+ { name: 'textOutput', rate: 0.4, strategy: 'fixed', unit: 'millionTokens' },
1442
+ { name: 'textInput_cacheRead', rate: 0.025, strategy: 'fixed', unit: 'millionTokens' },
1443
+ ],
1444
+ },
1445
+ type: 'chat',
1446
+ },
1447
+ {
1448
+ abilities: {
1449
+ functionCall: true,
1450
+ vision: true,
1451
+ },
1452
+ contextWindowTokens: 128_000,
1453
+ description:
1454
+ 'GPT-4o mini 来自 OpenAI 是他们最先进且最具成本效益的小模型。它是多模态的(接受文本或图像输入并输出文本),并且比 gpt-3.5-turbo 具有更高的智能性,但速度同样快。',
1455
+ displayName: 'GPT-4o mini',
1456
+ id: 'openai/gpt-4o-mini',
1457
+ pricing: {
1458
+ currency: 'USD',
1459
+ units: [
1460
+ { name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' },
1461
+ { name: 'textOutput', rate: 0.6, strategy: 'fixed', unit: 'millionTokens' },
1462
+ { name: 'textInput_cacheRead', rate: 0.075, strategy: 'fixed', unit: 'millionTokens' },
1463
+ ],
1464
+ },
1465
+ type: 'chat',
1466
+ },
1467
+ {
1468
+ abilities: {
1469
+ functionCall: true,
1470
+ },
1471
+ contextWindowTokens: 127_000,
1472
+ description: 'Perplexity 的轻量级产品,具有搜索接地能力,比 Sonar Pro 更快、更便宜。',
1473
+ displayName: 'Sonar',
1474
+ id: 'perplexity/sonar',
1475
+ pricing: {
1476
+ currency: 'USD',
1477
+ units: [
1478
+ { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
1479
+ { name: 'textOutput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
1480
+ ],
1481
+ },
1482
+ type: 'chat',
1483
+ },
1484
+ {
1485
+ abilities: {
1486
+ functionCall: true,
1487
+ },
1488
+ contextWindowTokens: 200_000,
1489
+ description: 'Perplexity 的旗舰产品,具有搜索接地能力,支持高级查询和后续操作。',
1490
+ displayName: 'Sonar Pro',
1491
+ id: 'perplexity/sonar-pro',
1492
+ pricing: {
1493
+ currency: 'USD',
1494
+ units: [
1495
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
1496
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
1497
+ ],
1498
+ },
1499
+ type: 'chat',
1500
+ },
1501
+ {
1502
+ abilities: {
1503
+ functionCall: true,
1504
+ reasoning: true,
1505
+ },
1506
+ contextWindowTokens: 127_000,
1507
+ description: '一个专注于推理的模型,在响应中输出思维链 (CoT),提供具有搜索接地的详细解释。',
1508
+ displayName: 'Sonar Reasoning',
1509
+ id: 'perplexity/sonar-reasoning',
1510
+ pricing: {
1511
+ currency: 'USD',
1512
+ units: [
1513
+ { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
1514
+ { name: 'textOutput', rate: 5, strategy: 'fixed', unit: 'millionTokens' },
1515
+ ],
1516
+ },
1517
+ type: 'chat',
1518
+ },
1519
+ {
1520
+ abilities: {
1521
+ functionCall: true,
1522
+ reasoning: true,
1523
+ },
1524
+ contextWindowTokens: 127_000,
1525
+ description:
1526
+ '一个高级推理聚焦模型,在响应中输出思维链 (CoT),提供具有增强搜索能力和每个请求多个搜索查询的综合解释。',
1527
+ displayName: 'Sonar Reasoning Pro',
1528
+ id: 'perplexity/sonar-reasoning-pro',
1529
+ pricing: {
1530
+ currency: 'USD',
1531
+ units: [
1532
+ { name: 'textInput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
1533
+ { name: 'textOutput', rate: 8, strategy: 'fixed', unit: 'millionTokens' },
1534
+ ],
1535
+ },
1536
+ type: 'chat',
1537
+ },
1538
+ {
1539
+ abilities: {
1540
+ functionCall: true,
1541
+ },
1542
+ contextWindowTokens: 128_000,
1543
+ description:
1544
+ '访问 v0 背后的模型以生成、修复和优化现代 Web 应用,具有特定框架的推理和最新知识。',
1545
+ displayName: 'v0-1.0-md',
1546
+ id: 'vercel/v0-1.0-md',
1547
+ pricing: {
1548
+ currency: 'USD',
1549
+ units: [
1550
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
1551
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
1552
+ ],
1553
+ },
1554
+ type: 'chat',
1555
+ },
1556
+ {
1557
+ abilities: {
1558
+ functionCall: true,
1559
+ },
1560
+ contextWindowTokens: 128_000,
1561
+ description:
1562
+ '访问 v0 背后的模型以生成、修复和优化现代 Web 应用,具有特定框架的推理和最新知识。',
1563
+ displayName: 'v0-1.5-md',
1564
+ id: 'vercel/v0-1.5-md',
1565
+ pricing: {
1566
+ currency: 'USD',
1567
+ units: [
1568
+ { name: 'textInput', rate: 3, strategy: 'fixed', unit: 'millionTokens' },
1569
+ { name: 'textOutput', rate: 15, strategy: 'fixed', unit: 'millionTokens' },
1570
+ ],
1571
+ },
1572
+ type: 'chat',
1573
+ },
1574
+ {
1575
+ abilities: {
1576
+ functionCall: true,
1577
+ },
1578
+ contextWindowTokens: 300_000,
1579
+ description: '一个非常低成本的多模态模型,处理图像、视频和文本输入的速度极快。',
1580
+ displayName: 'Nova Lite',
1581
+ id: 'amazon/nova-lite',
1582
+ pricing: {
1583
+ currency: 'USD',
1584
+ units: [
1585
+ { name: 'textInput', rate: 0.06, strategy: 'fixed', unit: 'millionTokens' },
1586
+ { name: 'textOutput', rate: 0.24, strategy: 'fixed', unit: 'millionTokens' },
1587
+ ],
1588
+ },
1589
+ type: 'chat',
1590
+ },
1591
+ {
1592
+ abilities: {
1593
+ functionCall: true,
1594
+ },
1595
+ contextWindowTokens: 128_000,
1596
+ description: '一个仅文本模型,以非常低的成本提供最低延迟的响应。',
1597
+ displayName: 'Nova Micro',
1598
+ id: 'amazon/nova-micro',
1599
+ pricing: {
1600
+ currency: 'USD',
1601
+ units: [
1602
+ { name: 'textInput', rate: 0.035, strategy: 'fixed', unit: 'millionTokens' },
1603
+ { name: 'textOutput', rate: 0.14, strategy: 'fixed', unit: 'millionTokens' },
1604
+ ],
1605
+ },
1606
+ type: 'chat',
1607
+ },
1608
+ {
1609
+ abilities: {
1610
+ functionCall: true,
1611
+ vision: true,
1612
+ },
1613
+ contextWindowTokens: 200_000,
1614
+ description:
1615
+ 'Claude 3.5 Haiku 是我们最快模型的下一代。与 Claude 3 Haiku 的速度相似,Claude 3.5 Haiku 在每个技能集上都得到了改进,并在许多智能基准测试中超越了我们上一代最大的模型 Claude 3 Opus。',
1616
+ displayName: 'Claude 3.5 Haiku',
1617
+ id: 'anthropic/claude-3.5-haiku',
1618
+ pricing: {
1619
+ currency: 'USD',
1620
+ units: [
1621
+ { name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' },
1622
+ { name: 'textOutput', rate: 4, strategy: 'fixed', unit: 'millionTokens' },
1623
+ { name: 'textInput_cacheRead', rate: 0.08, strategy: 'fixed', unit: 'millionTokens' },
1624
+ { name: 'textInput_cacheWrite', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
1625
+ ],
1626
+ },
1627
+ type: 'chat',
1628
+ },
1629
+ {
1630
+ abilities: {
1631
+ functionCall: true,
1632
+ vision: true,
1633
+ },
1634
+ contextWindowTokens: 200_000,
1635
+ description:
1636
+ 'Claude 3 Haiku 是 Anthropic 迄今为止最快的模型,专为通常涉及较长提示的企业工作负载而设计。Haiku 可以快速分析大量文档,如季度文件、合同或法律案件,成本是其性能等级中其他模型的一半。',
1637
+ displayName: 'Claude 3 Haiku',
1638
+ id: 'anthropic/claude-3-haiku',
1639
+ pricing: {
1640
+ currency: 'USD',
1641
+ units: [
1642
+ { name: 'textInput', rate: 0.25, strategy: 'fixed', unit: 'millionTokens' },
1643
+ { name: 'textOutput', rate: 1.25, strategy: 'fixed', unit: 'millionTokens' },
1644
+ { name: 'textInput_cacheRead', rate: 0.03, strategy: 'fixed', unit: 'millionTokens' },
1645
+ { name: 'textInput_cacheWrite', rate: 0.3, strategy: 'fixed', unit: 'millionTokens' },
1646
+ ],
1647
+ },
1648
+ type: 'chat',
1649
+ },
1650
+ {
1651
+ abilities: {
1652
+ functionCall: true,
1653
+ },
1654
+ contextWindowTokens: 131_072,
1655
+ description:
1656
+ 'DeepSeek-R1-Distill-Llama-70B 是 70B Llama 模型的蒸馏、更高效变体。它在文本生成任务中保持强大性能,减少计算开销以便于部署和研究。由 Groq 使用其自定义语言处理单元 (LPU) 硬件提供服务,以提供快速高效的推理。',
1657
+ displayName: 'DeepSeek R1 Distill Llama 70B',
1658
+ id: 'deepseek/deepseek-r1-distill-llama-70b',
1659
+ pricing: {
1660
+ currency: 'USD',
1661
+ units: [
1662
+ { name: 'textInput', rate: 0.75, strategy: 'fixed', unit: 'millionTokens' },
1663
+ { name: 'textOutput', rate: 0.99, strategy: 'fixed', unit: 'millionTokens' },
1664
+ ],
1665
+ },
1666
+ type: 'chat',
1667
+ },
1668
+ {
1669
+ abilities: {
1670
+ functionCall: true,
1671
+ },
1672
+ contextWindowTokens: 300_000,
1673
+ description: '一个高度能干的多模态模型,具有准确性、速度和成本的最佳组合,适用于广泛的任务。',
1674
+ displayName: 'Nova Pro',
1675
+ id: 'amazon/nova-pro',
1676
+ pricing: {
1677
+ currency: 'USD',
1678
+ units: [
1679
+ { name: 'textInput', rate: 0.8, strategy: 'fixed', unit: 'millionTokens' },
1680
+ { name: 'textOutput', rate: 3.2, strategy: 'fixed', unit: 'millionTokens' },
1681
+ ],
1682
+ },
1683
+ type: 'chat',
1684
+ },
1685
+ ];
1686
+
1687
+ const vercelAIGatewayEmbeddingModels: AIEmbeddingModelCard[] = [
1688
+ {
1689
+ description:
1690
+ 'Amazon Titan Text Embeddings V2 是一个轻量级、高效的多语言嵌入模型,支持 1024、512 和 256 维度。',
1691
+ displayName: 'Titan Text Embeddings V2',
1692
+ id: 'amazon/titan-embed-text-v2',
1693
+ maxDimension: 1024,
1694
+ pricing: {
1695
+ currency: 'USD',
1696
+ units: [{ name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' }],
1697
+ },
1698
+ type: 'embedding',
1699
+ },
1700
+ {
1701
+ description: '最先进的嵌入模型,在英语、多语言和代码任务中具有出色的性能。',
1702
+ displayName: 'Gemini Embedding 001',
1703
+ id: 'google/gemini-embedding-001',
1704
+ maxDimension: 768,
1705
+ pricing: {
1706
+ currency: 'USD',
1707
+ units: [{ name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' }],
1708
+ },
1709
+ type: 'embedding',
1710
+ },
1711
+ {
1712
+ description: '针对代码和英语语言任务优化的英语聚焦文本嵌入模型。',
1713
+ displayName: 'Text Embedding 005',
1714
+ id: 'google/text-embedding-005',
1715
+ maxDimension: 768,
1716
+ pricing: {
1717
+ currency: 'USD',
1718
+ units: [{ name: 'textInput', rate: 0.025, strategy: 'fixed', unit: 'millionTokens' }],
1719
+ },
1720
+ type: 'embedding',
1721
+ },
1722
+ {
1723
+ description: '针对跨语言任务优化的多语言文本嵌入模型,支持多种语言。',
1724
+ displayName: 'Text Multilingual Embedding 002',
1725
+ id: 'google/text-multilingual-embedding-002',
1726
+ maxDimension: 768,
1727
+ pricing: {
1728
+ currency: 'USD',
1729
+ units: [{ name: 'textInput', rate: 0.025, strategy: 'fixed', unit: 'millionTokens' }],
1730
+ },
1731
+ type: 'embedding',
1732
+ },
1733
+ {
1734
+ description: '一个允许对文本、图像或混合内容进行分类或转换为嵌入的模型。',
1735
+ displayName: 'Embed v4.0',
1736
+ id: 'cohere/embed-v4.0',
1737
+ maxDimension: 1024,
1738
+ pricing: {
1739
+ currency: 'USD',
1740
+ units: [{ name: 'textInput', rate: 0.12, strategy: 'fixed', unit: 'millionTokens' }],
1741
+ },
1742
+ type: 'embedding',
1743
+ },
1744
+ {
1745
+ description: '可以嵌入代码数据库和存储库以支持编码助手的代码嵌入模型。',
1746
+ displayName: 'Codestral Embed',
1747
+ id: 'mistral/codestral-embed',
1748
+ maxDimension: 1024,
1749
+ pricing: {
1750
+ currency: 'USD',
1751
+ units: [{ name: 'textInput', rate: 0.15, strategy: 'fixed', unit: 'millionTokens' }],
1752
+ },
1753
+ type: 'embedding',
1754
+ },
1755
+ {
1756
+ description: '用于语义搜索、相似性、聚类和 RAG 工作流的通用文本嵌入模型。',
1757
+ displayName: 'Mistral Embed',
1758
+ id: 'mistral/mistral-embed',
1759
+ maxDimension: 1024,
1760
+ pricing: {
1761
+ currency: 'USD',
1762
+ units: [{ name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }],
1763
+ },
1764
+ type: 'embedding',
1765
+ },
1766
+ {
1767
+ description: 'OpenAI 最能干的嵌入模型,适用于英语和非英语任务。',
1768
+ displayName: 'text-embedding-3-large',
1769
+ id: 'openai/text-embedding-3-large',
1770
+ maxDimension: 3072,
1771
+ pricing: {
1772
+ currency: 'USD',
1773
+ units: [{ name: 'textInput', rate: 0.13, strategy: 'fixed', unit: 'millionTokens' }],
1774
+ },
1775
+ type: 'embedding',
1776
+ },
1777
+ {
1778
+ description: 'OpenAI 改进的、性能更高的 ada 嵌入模型版本。',
1779
+ displayName: 'text-embedding-3-small',
1780
+ id: 'openai/text-embedding-3-small',
1781
+ maxDimension: 1536,
1782
+ pricing: {
1783
+ currency: 'USD',
1784
+ units: [{ name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' }],
1785
+ },
1786
+ type: 'embedding',
1787
+ },
1788
+ {
1789
+ description: 'OpenAI 的传统文本嵌入模型。',
1790
+ displayName: 'text-embedding-ada-002',
1791
+ id: 'openai/text-embedding-ada-002',
1792
+ maxDimension: 1536,
1793
+ pricing: {
1794
+ currency: 'USD',
1795
+ units: [{ name: 'textInput', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' }],
1796
+ },
1797
+ type: 'embedding',
1798
+ },
1799
+ ];
1800
+
1801
+ export const allModels = [...vercelAIGatewayChatModels, ...vercelAIGatewayEmbeddingModels];
1802
+
1803
+ export default allModels;