@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,2237 @@
1
+ /**
2
+ * Curated Provider Core model catalog.
3
+ *
4
+ * This is the single source used by qlogicagent direct mode and llmrouter's
5
+ * model catalog API. External broad catalogs are intentionally not merged in.
6
+ */
7
+ export const BUILTIN_PROVIDERS = [
8
+ // 鈹€鈹€ Tier 1: Major Chinese providers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
9
+ // DeepSeek via Anthropic Messages API 鈥?CC-parity transport for tool_use / thinking blocks.
10
+ // See https://api-docs.deepseek.com/guides/anthropic_api
11
+ {
12
+ id: "deepseek",
13
+ name: "DeepSeek",
14
+ transport: "anthropic-messages",
15
+ baseUrl: "https://api.deepseek.com/anthropic",
16
+ apiKeyEnvVars: ["DEEPSEEK_API_KEY"],
17
+ authType: "x-api-key",
18
+ isAggregator: false,
19
+ defaultModel: "deepseek-v4-flash",
20
+ quirks: {
21
+ useEffortInsteadOfBudget: true,
22
+ filterImageBlocks: true,
23
+ maxReasoningEffort: "max",
24
+ supportsPrefixCompletion: true,
25
+ supportsThinkingParam: true,
26
+ disableThinkingByDefault: true,
27
+ },
28
+ models: [
29
+ {
30
+ id: "deepseek-v4-flash",
31
+ name: "DeepSeek V4 Flash",
32
+ contextWindow: 1000000,
33
+ maxOutput: 384000,
34
+ toolCall: true,
35
+ reasoning: true,
36
+ vision: false,
37
+ costInput: 0.14,
38
+ costOutput: 0.28,
39
+ costCacheRead: 0.0028,
40
+ },
41
+ {
42
+ id: "deepseek-v4-pro",
43
+ name: "DeepSeek V4 Pro",
44
+ contextWindow: 1000000,
45
+ maxOutput: 384000,
46
+ toolCall: true,
47
+ reasoning: true,
48
+ vision: false,
49
+ costInput: 1.74,
50
+ costOutput: 3.48,
51
+ costCacheRead: 0.0145,
52
+ },
53
+ ],
54
+ },
55
+ {
56
+ id: "qwen",
57
+ name: "Alibaba Qwen (DashScope)",
58
+ transport: "anthropic-messages",
59
+ baseUrl: "https://dashscope.aliyuncs.com/apps/anthropic",
60
+ apiKeyEnvVars: ["DASHSCOPE_API_KEY", "QWEN_API_KEY"],
61
+ authType: "x-api-key",
62
+ isAggregator: false,
63
+ defaultModel: "qwen3.6-plus",
64
+ quirks: {
65
+ supportsThinkingParam: true,
66
+ disableThinkingByDefault: true,
67
+ },
68
+ models: [
69
+ // 鈹€鈹€ Qwen3.6 series: flagship text+vision, 1M context, default thinking on (搂1.3, 搂5.2) 鈹€鈹€
70
+ {
71
+ id: "qwen3.6-max-preview",
72
+ name: "Qwen3.6 Max (Preview)",
73
+ contextWindow: 1000000,
74
+ maxOutput: 65536,
75
+ toolCall: true,
76
+ reasoning: true,
77
+ vision: true,
78
+ costInput: 2,
79
+ costOutput: 8,
80
+ costCacheRead: 0.2,
81
+ costCacheWrite: 2.5,
82
+ },
83
+ {
84
+ id: "qwen3.6-plus",
85
+ name: "Qwen3.6 Plus",
86
+ contextWindow: 1000000,
87
+ maxOutput: 65536,
88
+ toolCall: true,
89
+ reasoning: true,
90
+ vision: true,
91
+ costInput: 0.5,
92
+ costOutput: 3.0,
93
+ costCacheRead: 0.05,
94
+ costCacheWrite: 0.625,
95
+ },
96
+ {
97
+ id: "qwen3.6-flash",
98
+ name: "Qwen3.6 Flash",
99
+ contextWindow: 1000000,
100
+ maxOutput: 65536,
101
+ toolCall: true,
102
+ reasoning: true,
103
+ vision: true,
104
+ costInput: 0.1,
105
+ costOutput: 0.5,
106
+ costCacheRead: 0.01,
107
+ costCacheWrite: 0.125,
108
+ },
109
+ // 鈹€鈹€ Reasoning specialist (搂5.2: thinking required, cannot be toggled off) 鈹€鈹€
110
+ {
111
+ id: "qwq-plus",
112
+ name: "QwQ Plus",
113
+ contextWindow: 131072,
114
+ maxOutput: 65536,
115
+ toolCall: true,
116
+ reasoning: true,
117
+ reasoningRequired: true,
118
+ streamRequired: true,
119
+ vision: false,
120
+ costInput: 0.5,
121
+ costOutput: 3,
122
+ costCacheRead: 0.05,
123
+ costCacheWrite: 0.625,
124
+ },
125
+ // 鈹€鈹€ Classic generation: mixed thinking default off (搂5.2) 鈹€鈹€
126
+ {
127
+ id: "qwen-max",
128
+ name: "Qwen Max",
129
+ contextWindow: 131072,
130
+ maxOutput: 16384,
131
+ toolCall: true,
132
+ reasoning: true,
133
+ vision: false,
134
+ costInput: 2,
135
+ costOutput: 6,
136
+ costCacheRead: 0.2,
137
+ costCacheWrite: 2.5,
138
+ },
139
+ {
140
+ id: "qwen-plus",
141
+ name: "Qwen Plus",
142
+ contextWindow: 131072,
143
+ maxOutput: 16384,
144
+ toolCall: true,
145
+ reasoning: true,
146
+ vision: false,
147
+ costInput: 0.3,
148
+ costOutput: 0.85,
149
+ costCacheRead: 0.03,
150
+ costCacheWrite: 0.375,
151
+ },
152
+ {
153
+ id: "qwen-flash",
154
+ name: "Qwen Flash",
155
+ contextWindow: 131072,
156
+ maxOutput: 16384,
157
+ toolCall: true,
158
+ reasoning: true,
159
+ vision: false,
160
+ },
161
+ {
162
+ id: "qwen-turbo",
163
+ name: "Qwen Turbo",
164
+ contextWindow: 131072,
165
+ maxOutput: 16384,
166
+ toolCall: true,
167
+ reasoning: false,
168
+ vision: false,
169
+ },
170
+ // 鈹€鈹€ Multimodal VL series (older generation, 搂10) 鈹€鈹€
171
+ {
172
+ id: "qwen-vl-max",
173
+ name: "Qwen VL Max",
174
+ contextWindow: 131072,
175
+ maxOutput: 16384,
176
+ toolCall: true,
177
+ reasoning: false,
178
+ vision: true,
179
+ costInput: 3,
180
+ costOutput: 8.5,
181
+ },
182
+ // 鈹€鈹€ Omni series (搂33鈥?5: full-modality real-time interaction) 鈹€鈹€
183
+ {
184
+ id: "qwen3.5-omni-plus",
185
+ name: "Qwen3.5 Omni Plus",
186
+ contextWindow: 131072,
187
+ maxOutput: 16384,
188
+ toolCall: false,
189
+ reasoning: false,
190
+ streamRequired: true,
191
+ vision: true,
192
+ },
193
+ {
194
+ id: "qwen3.5-omni-plus-realtime",
195
+ name: "Qwen3.5 Omni Plus Realtime",
196
+ contextWindow: 131072,
197
+ maxOutput: 16384,
198
+ toolCall: false,
199
+ reasoning: false,
200
+ streamRequired: true,
201
+ vision: true,
202
+ mediaType: "realtime_audio",
203
+ mediaCapabilities: {
204
+ type: "realtime_audio",
205
+ modalities: ["text", "audio"],
206
+ vad: true,
207
+ toolCalling: false,
208
+ },
209
+ },
210
+ // 鈹€鈹€ OCR specialist (搂11: document-centric, single-turn only) 鈹€鈹€
211
+ {
212
+ id: "qwen-vl-ocr",
213
+ name: "Qwen VL OCR",
214
+ contextWindow: 131072,
215
+ maxOutput: 4096,
216
+ toolCall: false,
217
+ reasoning: false,
218
+ vision: true,
219
+ mediaType: "document_parsing",
220
+ mediaCapabilities: {
221
+ type: "document_parsing",
222
+ supportedFormats: ["jpg", "jpeg", "png", "bmp", "tiff", "pdf"],
223
+ maxFileSizeMB: 10,
224
+ },
225
+ },
226
+ // 鈹€鈹€ TTS: Qwen3 non-realtime TTS 鈹€鈹€
227
+ {
228
+ id: "qwen3-tts-flash",
229
+ name: "Qwen3 TTS Flash",
230
+ contextWindow: 32768,
231
+ maxOutput: 16384,
232
+ toolCall: false,
233
+ reasoning: false,
234
+ vision: false,
235
+ mediaType: "tts",
236
+ mediaCapabilities: {
237
+ type: "tts",
238
+ operations: ["text2speech", "voice_clone"],
239
+ maxCharacters: 32768,
240
+ formats: ["mp3", "wav"],
241
+ },
242
+ },
243
+ // 鈹€鈹€ TTS: CosyVoice (搂24鈥?5: WebSocket real-time + HTTP non-real-time) 鈹€鈹€
244
+ {
245
+ id: "cosyvoice-v2",
246
+ name: "CosyVoice V2",
247
+ contextWindow: 10000,
248
+ maxOutput: 1,
249
+ toolCall: false,
250
+ reasoning: false,
251
+ vision: false,
252
+ mediaType: "tts",
253
+ mediaCapabilities: {
254
+ type: "tts",
255
+ operations: ["text2speech", "voice_clone"],
256
+ maxCharacters: 10000,
257
+ formats: ["mp3", "wav", "pcm"],
258
+ },
259
+ },
260
+ // 鈹€鈹€ Image generation (搂13: DashScope async task API) 鈹€鈹€
261
+ {
262
+ id: "wan2.7-image-pro",
263
+ name: "Wan 2.7 Image Pro",
264
+ contextWindow: 4096,
265
+ maxOutput: 1,
266
+ toolCall: false,
267
+ reasoning: false,
268
+ vision: false,
269
+ mediaType: "image",
270
+ mediaCapabilities: {
271
+ type: "image",
272
+ operations: ["text2image"],
273
+ sizes: ["1024x1024", "1280x720", "720x1280", "2048x2048", "3840x2160"],
274
+ },
275
+ },
276
+ {
277
+ id: "wanx2.1-t2i-turbo",
278
+ name: "Wan 2.1 Text-to-Image Turbo",
279
+ contextWindow: 4096,
280
+ maxOutput: 1,
281
+ toolCall: false,
282
+ reasoning: false,
283
+ vision: false,
284
+ mediaType: "image",
285
+ mediaCapabilities: {
286
+ type: "image",
287
+ operations: ["text2image"],
288
+ sizes: ["1024x1024", "720x1280", "1280x720"],
289
+ },
290
+ },
291
+ // 鈹€鈹€ Video generation (搂19鈥?0: DashScope async task API) 鈹€鈹€
292
+ {
293
+ id: "wan2.7-t2v-plus",
294
+ name: "Wan 2.7 Text/Image-to-Video Plus",
295
+ contextWindow: 2000,
296
+ maxOutput: 1,
297
+ toolCall: false,
298
+ reasoning: false,
299
+ vision: false,
300
+ mediaType: "video",
301
+ mediaCapabilities: {
302
+ type: "video",
303
+ operations: ["text2video", "img2video"],
304
+ maxDurationSeconds: 15,
305
+ resolutions: ["720P", "1080P"],
306
+ },
307
+ },
308
+ {
309
+ id: "wanx2.1-t2v-turbo",
310
+ name: "Wan 2.1 Text/Image-to-Video Turbo",
311
+ contextWindow: 2000,
312
+ maxOutput: 1,
313
+ toolCall: false,
314
+ reasoning: false,
315
+ vision: false,
316
+ mediaType: "video",
317
+ mediaCapabilities: {
318
+ type: "video",
319
+ operations: ["text2video", "img2video"],
320
+ maxDurationSeconds: 5,
321
+ resolutions: ["480P", "720P"],
322
+ },
323
+ },
324
+ // 鈹€鈹€ Embedding (搂39: OpenAI-compat + DashScope) 鈹€鈹€
325
+ {
326
+ id: "text-embedding-v4",
327
+ name: "Text Embedding V4",
328
+ contextWindow: 8192,
329
+ maxOutput: 1,
330
+ toolCall: false,
331
+ reasoning: false,
332
+ vision: false,
333
+ mediaType: "embedding",
334
+ mediaCapabilities: {
335
+ type: "embedding",
336
+ dimensions: 1024,
337
+ maxTokens: 8192,
338
+ },
339
+ },
340
+ {
341
+ id: "sensevoice-v1",
342
+ name: "SenseVoice V1",
343
+ contextWindow: 0,
344
+ maxOutput: 1,
345
+ toolCall: false,
346
+ reasoning: false,
347
+ vision: false,
348
+ costInput: 0,
349
+ costOutput: 0,
350
+ mediaType: "stt",
351
+ mediaCapabilities: {
352
+ type: "stt",
353
+ languages: ["zh", "en", "ja", "ko", "yue"],
354
+ formats: ["wav", "mp3", "m4a", "flac", "aac"],
355
+ },
356
+ },
357
+ // 鈹€鈹€ Image background generation (搂15: DashScope async task) 鈹€鈹€
358
+ {
359
+ id: "wanx-background-generation-v2",
360
+ name: "Wan Background Generation V2",
361
+ contextWindow: 2000,
362
+ maxOutput: 1,
363
+ toolCall: false,
364
+ reasoning: false,
365
+ vision: false,
366
+ mediaType: "image",
367
+ mediaCapabilities: {
368
+ type: "image",
369
+ operations: ["inpainting"],
370
+ sizes: ["1024x1024", "1280x720", "720x1280"],
371
+ },
372
+ },
373
+ // 鈹€鈹€ Rerank (搂40: second-stage relevance scoring) 鈹€鈹€
374
+ {
375
+ id: "qwen3-rerank",
376
+ name: "Qwen3 Rerank",
377
+ contextWindow: 32768,
378
+ maxOutput: 1,
379
+ toolCall: false,
380
+ reasoning: false,
381
+ vision: false,
382
+ mediaType: "rerank",
383
+ mediaCapabilities: {
384
+ type: "rerank",
385
+ maxDocuments: 100,
386
+ },
387
+ },
388
+ ],
389
+ },
390
+ {
391
+ id: "zhipu",
392
+ name: "Zhipu GLM",
393
+ group: "zhipu",
394
+ transport: "anthropic-messages",
395
+ baseUrl: "https://open.bigmodel.cn/api/anthropic",
396
+ apiKeyEnvVars: ["ZHIPU_API_KEY", "GLM_API_KEY"],
397
+ authType: "x-api-key",
398
+ isAggregator: false,
399
+ defaultModel: "glm-5.1",
400
+ // GLM Anthropic-compat endpoint: thinking blocks unconfirmed;
401
+ // filter thinking blocks since GLM uses reasoning_content (OpenAI-style) not Anthropic thinking blocks.
402
+ quirks: {
403
+ filterThinkingBlocks: true,
404
+ },
405
+ models: [
406
+ // 鈹€鈹€ Text models (chat/completions via Anthropic-compat endpoint) 鈹€鈹€
407
+ {
408
+ id: "glm-5.1",
409
+ name: "GLM-5.1",
410
+ contextWindow: 200000,
411
+ maxOutput: 131072,
412
+ toolCall: true,
413
+ reasoning: true,
414
+ vision: false,
415
+ costInput: 6.0,
416
+ costOutput: 24.0,
417
+ costCacheRead: 1.3,
418
+ },
419
+ {
420
+ id: "glm-5",
421
+ name: "GLM-5",
422
+ contextWindow: 204800,
423
+ maxOutput: 131072,
424
+ toolCall: true,
425
+ reasoning: true,
426
+ vision: false,
427
+ costInput: 1.0,
428
+ costOutput: 3.2,
429
+ costCacheRead: 0.2,
430
+ },
431
+ {
432
+ id: "glm-5-turbo",
433
+ name: "GLM-5 Turbo",
434
+ contextWindow: 131072,
435
+ maxOutput: 16384,
436
+ toolCall: true,
437
+ reasoning: true,
438
+ vision: false,
439
+ costInput: 0.5,
440
+ costOutput: 1.0,
441
+ costCacheRead: 0.1,
442
+ },
443
+ {
444
+ id: "glm-4.7",
445
+ name: "GLM-4.7",
446
+ contextWindow: 131072,
447
+ maxOutput: 16384,
448
+ toolCall: true,
449
+ reasoning: true,
450
+ vision: false,
451
+ costInput: 1.0,
452
+ costOutput: 3.2,
453
+ costCacheRead: 0.2,
454
+ },
455
+ {
456
+ id: "glm-4.6",
457
+ name: "GLM-4.6",
458
+ contextWindow: 131072,
459
+ maxOutput: 16384,
460
+ toolCall: true,
461
+ reasoning: true,
462
+ vision: false,
463
+ costInput: 1.0,
464
+ costOutput: 3.2,
465
+ costCacheRead: 0.2,
466
+ },
467
+ {
468
+ id: "glm-4.5-air",
469
+ name: "GLM-4.5 Air",
470
+ contextWindow: 131072,
471
+ maxOutput: 98304,
472
+ toolCall: true,
473
+ reasoning: true,
474
+ vision: false,
475
+ costInput: 0.5,
476
+ costOutput: 1.5,
477
+ costCacheRead: 0.1,
478
+ },
479
+ {
480
+ id: "glm-4-long",
481
+ name: "GLM-4 Long",
482
+ contextWindow: 1000000,
483
+ maxOutput: 4096,
484
+ toolCall: true,
485
+ reasoning: false,
486
+ vision: false,
487
+ costInput: 0.1,
488
+ costOutput: 0.1,
489
+ },
490
+ // 鈹€鈹€ Free flash models 鈹€鈹€
491
+ {
492
+ id: "glm-4.7-flash",
493
+ name: "GLM-4.7 Flash (Free)",
494
+ contextWindow: 131072,
495
+ maxOutput: 16384,
496
+ toolCall: true,
497
+ reasoning: true,
498
+ vision: false,
499
+ costInput: 0,
500
+ costOutput: 0,
501
+ },
502
+ {
503
+ id: "glm-4.5-flash",
504
+ name: "GLM-4.5 Flash (Free)",
505
+ contextWindow: 131072,
506
+ maxOutput: 16384,
507
+ toolCall: true,
508
+ reasoning: true,
509
+ vision: false,
510
+ costInput: 0,
511
+ costOutput: 0,
512
+ },
513
+ // 鈹€鈹€ Vision models 鈹€鈹€
514
+ {
515
+ id: "glm-5v-turbo",
516
+ name: "GLM-5V Turbo",
517
+ contextWindow: 131072,
518
+ maxOutput: 16384,
519
+ toolCall: true,
520
+ reasoning: true,
521
+ vision: true,
522
+ costInput: 0.5,
523
+ costOutput: 1.0,
524
+ },
525
+ {
526
+ id: "glm-4.6v",
527
+ name: "GLM-4.6V",
528
+ contextWindow: 131072,
529
+ maxOutput: 16384,
530
+ toolCall: true,
531
+ reasoning: true,
532
+ vision: true,
533
+ costInput: 1.0,
534
+ costOutput: 3.2,
535
+ },
536
+ {
537
+ id: "glm-4.6v-flash",
538
+ name: "GLM-4.6V Flash (Free)",
539
+ contextWindow: 131072,
540
+ maxOutput: 16384,
541
+ toolCall: true,
542
+ reasoning: false,
543
+ vision: true,
544
+ costInput: 0,
545
+ costOutput: 0,
546
+ },
547
+ // 鈹€鈹€ Image generation 鈹€鈹€
548
+ // POST /paas/v4/images/generations (sync)
549
+ // POST /paas/v4/async/images/generations (async for glm-image)
550
+ {
551
+ id: "cogview-4-250304",
552
+ name: "CogView-4",
553
+ contextWindow: 0,
554
+ maxOutput: 0,
555
+ toolCall: false,
556
+ reasoning: false,
557
+ vision: false,
558
+ mediaType: "image",
559
+ mediaCapabilities: {
560
+ type: "image",
561
+ operations: ["text2image"],
562
+ sizes: ["1024x1024", "768x1344", "864x1152", "1344x768", "1152x864", "1440x720", "720x1440"],
563
+ },
564
+ },
565
+ {
566
+ id: "cogview-3-flash",
567
+ name: "CogView-3 Flash (Free)",
568
+ contextWindow: 0,
569
+ maxOutput: 0,
570
+ toolCall: false,
571
+ reasoning: false,
572
+ vision: false,
573
+ costInput: 0,
574
+ costOutput: 0,
575
+ mediaType: "image",
576
+ mediaCapabilities: {
577
+ type: "image",
578
+ operations: ["text2image"],
579
+ sizes: ["1024x1024", "768x1344", "864x1152", "1344x768", "1152x864", "1440x720", "720x1440"],
580
+ },
581
+ },
582
+ {
583
+ id: "glm-image",
584
+ name: "GLM-Image",
585
+ contextWindow: 0,
586
+ maxOutput: 0,
587
+ toolCall: false,
588
+ reasoning: false,
589
+ vision: false,
590
+ mediaType: "image",
591
+ mediaCapabilities: {
592
+ type: "image",
593
+ operations: ["text2image"],
594
+ sizes: ["1280x1280", "1568x1056", "1056x1568", "1472x1088", "1088x1472", "1728x960", "960x1728"],
595
+ },
596
+ },
597
+ // 鈹€鈹€ Video generation 鈹€鈹€
598
+ // POST /paas/v4/videos/generations (async task)
599
+ {
600
+ id: "cogvideox-3",
601
+ name: "CogVideoX-3",
602
+ contextWindow: 0,
603
+ maxOutput: 0,
604
+ toolCall: false,
605
+ reasoning: false,
606
+ vision: false,
607
+ mediaType: "video",
608
+ mediaCapabilities: {
609
+ type: "video",
610
+ operations: ["text2video", "img2video"],
611
+ maxDurationSeconds: 10,
612
+ resolutions: ["1080p"],
613
+ },
614
+ },
615
+ {
616
+ id: "cogvideox-flash",
617
+ name: "CogVideoX Flash (Free)",
618
+ contextWindow: 0,
619
+ maxOutput: 0,
620
+ toolCall: false,
621
+ reasoning: false,
622
+ vision: false,
623
+ costInput: 0,
624
+ costOutput: 0,
625
+ mediaType: "video",
626
+ mediaCapabilities: {
627
+ type: "video",
628
+ operations: ["text2video", "img2video"],
629
+ maxDurationSeconds: 6,
630
+ },
631
+ },
632
+ // 鈹€鈹€ TTS 鈹€鈹€
633
+ // POST /paas/v4/audio/speech
634
+ {
635
+ id: "glm-tts",
636
+ name: "GLM-TTS",
637
+ contextWindow: 0,
638
+ maxOutput: 0,
639
+ toolCall: false,
640
+ reasoning: false,
641
+ vision: false,
642
+ mediaType: "tts",
643
+ mediaCapabilities: {
644
+ type: "tts",
645
+ operations: ["text2speech"],
646
+ voices: ["tongtong", "chuichui", "xiaochen", "jam", "kazi", "douji", "luodo"],
647
+ maxCharacters: 1024,
648
+ formats: ["wav", "pcm"],
649
+ },
650
+ },
651
+ // 鈹€鈹€ Voice Clone 鈹€鈹€
652
+ // POST /paas/v4/voice/clone
653
+ {
654
+ id: "glm-tts-clone",
655
+ name: "GLM-TTS Clone",
656
+ contextWindow: 0,
657
+ maxOutput: 0,
658
+ toolCall: false,
659
+ reasoning: false,
660
+ vision: false,
661
+ mediaType: "voice_clone",
662
+ mediaCapabilities: {
663
+ type: "voice_clone",
664
+ maxSampleDurationSeconds: 30,
665
+ maxSampleSizeMB: 10,
666
+ formats: ["wav", "mp3"],
667
+ },
668
+ },
669
+ // 鈹€鈹€ STT (Audio Transcription) 鈹€鈹€
670
+ // POST /paas/v4/audio/transcriptions
671
+ {
672
+ id: "glm-asr-2512",
673
+ name: "GLM-ASR-2512",
674
+ contextWindow: 0,
675
+ maxOutput: 0,
676
+ toolCall: false,
677
+ reasoning: false,
678
+ vision: false,
679
+ mediaType: "stt",
680
+ mediaCapabilities: {
681
+ type: "stt",
682
+ languages: ["zh", "en"],
683
+ maxDurationSeconds: 30,
684
+ formats: ["wav", "mp3"],
685
+ },
686
+ },
687
+ {
688
+ id: "glm-4-voice",
689
+ name: "GLM-4-Voice",
690
+ contextWindow: 128000,
691
+ maxOutput: 16000,
692
+ toolCall: false,
693
+ reasoning: false,
694
+ vision: false,
695
+ mediaType: "realtime_audio",
696
+ mediaCapabilities: {
697
+ type: "realtime_audio",
698
+ modalities: ["text", "audio"],
699
+ vad: true,
700
+ toolCalling: false,
701
+ },
702
+ },
703
+ {
704
+ id: "glm-realtime-flash",
705
+ name: "GLM Realtime Flash",
706
+ contextWindow: 128000,
707
+ maxOutput: 16000,
708
+ toolCall: false,
709
+ reasoning: false,
710
+ vision: true,
711
+ mediaType: "realtime_video",
712
+ mediaCapabilities: {
713
+ type: "realtime_video",
714
+ modalities: ["text", "audio", "video"],
715
+ vad: true,
716
+ toolCalling: false,
717
+ },
718
+ },
719
+ {
720
+ id: "glm-realtime-air",
721
+ name: "GLM Realtime Air",
722
+ contextWindow: 128000,
723
+ maxOutput: 16000,
724
+ toolCall: false,
725
+ reasoning: false,
726
+ vision: true,
727
+ mediaType: "realtime_video",
728
+ mediaCapabilities: {
729
+ type: "realtime_video",
730
+ modalities: ["text", "audio", "video"],
731
+ vad: true,
732
+ toolCalling: false,
733
+ },
734
+ },
735
+ // 鈹€鈹€ Embeddings 鈹€鈹€
736
+ // POST /paas/v4/embeddings
737
+ {
738
+ id: "embedding-3",
739
+ name: "Embedding-3",
740
+ contextWindow: 0,
741
+ maxOutput: 0,
742
+ toolCall: false,
743
+ reasoning: false,
744
+ vision: false,
745
+ mediaType: "embedding",
746
+ mediaCapabilities: {
747
+ type: "embedding",
748
+ dimensions: 2048,
749
+ maxTokens: 3072,
750
+ },
751
+ },
752
+ {
753
+ id: "embedding-2",
754
+ name: "Embedding-2",
755
+ contextWindow: 0,
756
+ maxOutput: 0,
757
+ toolCall: false,
758
+ reasoning: false,
759
+ vision: false,
760
+ mediaType: "embedding",
761
+ mediaCapabilities: {
762
+ type: "embedding",
763
+ dimensions: 1024,
764
+ maxTokens: 512,
765
+ },
766
+ },
767
+ // 鈹€鈹€ Rerank 鈹€鈹€
768
+ // POST /paas/v4/rerank
769
+ {
770
+ id: "rerank",
771
+ name: "GLM Rerank",
772
+ contextWindow: 0,
773
+ maxOutput: 0,
774
+ toolCall: false,
775
+ reasoning: false,
776
+ vision: false,
777
+ mediaType: "rerank",
778
+ mediaCapabilities: {
779
+ type: "rerank",
780
+ maxDocuments: 128,
781
+ maxQueryLength: 4096,
782
+ maxDocumentLength: 4096,
783
+ },
784
+ },
785
+ // 鈹€鈹€ Document Parsing / OCR 鈹€鈹€
786
+ // POST /paas/v4/layout_parsing
787
+ {
788
+ id: "glm-ocr",
789
+ name: "GLM-OCR",
790
+ contextWindow: 0,
791
+ maxOutput: 0,
792
+ toolCall: false,
793
+ reasoning: false,
794
+ vision: false,
795
+ mediaType: "document_parsing",
796
+ mediaCapabilities: {
797
+ type: "document_parsing",
798
+ supportedFormats: ["pdf", "jpg", "png"],
799
+ maxPageCount: 100,
800
+ maxFileSizeMB: 50,
801
+ },
802
+ },
803
+ ],
804
+ },
805
+ // GLM via OpenAI-compat endpoint 鈥?native reasoning_content streaming,
806
+ // cached_tokens usage tracking, and standard OpenAI tool_calls format.
807
+ // Covers gaps that Anthropic-compat endpoint cannot: thinking mode passback,
808
+ // prompt_tokens_details.cached_tokens, builtin web_search tool.
809
+ // See: https://docs.bigmodel.cn/cn/guide/develop/api
810
+ {
811
+ id: "zhipu-openai",
812
+ name: "Zhipu GLM OpenAI",
813
+ group: "zhipu",
814
+ transport: "openai-chat",
815
+ baseUrl: "https://open.bigmodel.cn/api/paas/v4",
816
+ apiKeyEnvVars: ["ZHIPU_API_KEY", "GLM_API_KEY"],
817
+ authType: "bearer",
818
+ isAggregator: false,
819
+ defaultModel: "glm-5.1",
820
+ quirks: {
821
+ supportsReasoningEffort: true,
822
+ supportsThinkingParam: true,
823
+ supportsToolStream: true,
824
+ builtinWebSearch: true,
825
+ builtinCodeInterpreter: true,
826
+ },
827
+ models: [
828
+ // 鈹€鈹€ Text models (chat/completions via OpenAI-compat endpoint) 鈹€鈹€
829
+ {
830
+ id: "glm-5.1",
831
+ name: "GLM-5.1",
832
+ contextWindow: 200000,
833
+ maxOutput: 131072,
834
+ toolCall: true,
835
+ reasoning: true,
836
+ vision: false,
837
+ costInput: 6.0,
838
+ costOutput: 24.0,
839
+ costCacheRead: 1.3,
840
+ },
841
+ {
842
+ id: "glm-5",
843
+ name: "GLM-5",
844
+ contextWindow: 204800,
845
+ maxOutput: 131072,
846
+ toolCall: true,
847
+ reasoning: true,
848
+ vision: false,
849
+ costInput: 1.0,
850
+ costOutput: 3.2,
851
+ costCacheRead: 0.2,
852
+ },
853
+ {
854
+ id: "glm-5-turbo",
855
+ name: "GLM-5 Turbo",
856
+ contextWindow: 131072,
857
+ maxOutput: 16384,
858
+ toolCall: true,
859
+ reasoning: true,
860
+ vision: false,
861
+ costInput: 0.5,
862
+ costOutput: 1.0,
863
+ costCacheRead: 0.1,
864
+ },
865
+ {
866
+ id: "glm-4.7",
867
+ name: "GLM-4.7",
868
+ contextWindow: 131072,
869
+ maxOutput: 16384,
870
+ toolCall: true,
871
+ reasoning: true,
872
+ vision: false,
873
+ costInput: 1.0,
874
+ costOutput: 3.2,
875
+ costCacheRead: 0.2,
876
+ },
877
+ {
878
+ id: "glm-4.6",
879
+ name: "GLM-4.6",
880
+ contextWindow: 131072,
881
+ maxOutput: 16384,
882
+ toolCall: true,
883
+ reasoning: true,
884
+ vision: false,
885
+ costInput: 1.0,
886
+ costOutput: 3.2,
887
+ costCacheRead: 0.2,
888
+ },
889
+ {
890
+ id: "glm-4.5-air",
891
+ name: "GLM-4.5 Air",
892
+ contextWindow: 131072,
893
+ maxOutput: 98304,
894
+ toolCall: true,
895
+ reasoning: true,
896
+ vision: false,
897
+ costInput: 0.5,
898
+ costOutput: 1.5,
899
+ costCacheRead: 0.1,
900
+ },
901
+ {
902
+ id: "glm-4-long",
903
+ name: "GLM-4 Long",
904
+ contextWindow: 1000000,
905
+ maxOutput: 4096,
906
+ toolCall: true,
907
+ reasoning: false,
908
+ vision: false,
909
+ costInput: 0.1,
910
+ costOutput: 0.1,
911
+ },
912
+ // 鈹€鈹€ Free flash models 鈹€鈹€
913
+ {
914
+ id: "glm-4.7-flash",
915
+ name: "GLM-4.7 Flash (Free)",
916
+ contextWindow: 131072,
917
+ maxOutput: 16384,
918
+ toolCall: true,
919
+ reasoning: true,
920
+ vision: false,
921
+ costInput: 0,
922
+ costOutput: 0,
923
+ },
924
+ {
925
+ id: "glm-4.5-flash",
926
+ name: "GLM-4.5 Flash (Free)",
927
+ contextWindow: 131072,
928
+ maxOutput: 16384,
929
+ toolCall: true,
930
+ reasoning: true,
931
+ vision: false,
932
+ costInput: 0,
933
+ costOutput: 0,
934
+ },
935
+ // 鈹€鈹€ Vision models 鈹€鈹€
936
+ {
937
+ id: "glm-5v-turbo",
938
+ name: "GLM-5V Turbo",
939
+ contextWindow: 131072,
940
+ maxOutput: 16384,
941
+ toolCall: true,
942
+ reasoning: true,
943
+ vision: true,
944
+ costInput: 0.5,
945
+ costOutput: 1.0,
946
+ },
947
+ {
948
+ id: "glm-4.6v",
949
+ name: "GLM-4.6V",
950
+ contextWindow: 131072,
951
+ maxOutput: 16384,
952
+ toolCall: true,
953
+ reasoning: true,
954
+ vision: true,
955
+ costInput: 1.0,
956
+ costOutput: 3.2,
957
+ },
958
+ {
959
+ id: "glm-4.6v-flash",
960
+ name: "GLM-4.6V Flash (Free)",
961
+ contextWindow: 131072,
962
+ maxOutput: 16384,
963
+ toolCall: true,
964
+ reasoning: false,
965
+ vision: true,
966
+ costInput: 0,
967
+ costOutput: 0,
968
+ },
969
+ ],
970
+ },
971
+ // GLM Coding endpoint 鈥?dedicated code-domain variant with enhanced code capabilities.
972
+ // Uses /api/coding/paas/v4 base URL.
973
+ // See: zhipu-ProviderMax.md 搂5 Coding model
974
+ {
975
+ id: "zhipu-coding",
976
+ name: "Zhipu GLM Coding",
977
+ group: "zhipu",
978
+ transport: "openai-chat",
979
+ baseUrl: "https://open.bigmodel.cn/api/coding/paas/v4",
980
+ apiKeyEnvVars: ["ZHIPU_API_KEY", "GLM_API_KEY"],
981
+ authType: "bearer",
982
+ isAggregator: false,
983
+ defaultModel: "codegeex-4",
984
+ quirks: {
985
+ supportsReasoningEffort: true,
986
+ supportsThinkingParam: true,
987
+ supportsToolStream: true,
988
+ },
989
+ models: [
990
+ {
991
+ id: "codegeex-4",
992
+ name: "CodeGeeX-4",
993
+ contextWindow: 131072,
994
+ maxOutput: 16384,
995
+ toolCall: true,
996
+ reasoning: true,
997
+ vision: false,
998
+ costInput: 0.1,
999
+ costOutput: 0.1,
1000
+ },
1001
+ ],
1002
+ },
1003
+ {
1004
+ id: "minimax",
1005
+ name: "MiniMax",
1006
+ group: "minimax",
1007
+ transport: "anthropic-messages",
1008
+ baseUrl: "https://api.minimaxi.com/anthropic",
1009
+ apiKeyEnvVars: ["MINIMAX_API_KEY"],
1010
+ authType: "x-api-key",
1011
+ isAggregator: false,
1012
+ defaultModel: "MiniMax-M2.7",
1013
+ // MiniMax rejects temperature=0; range is (0.0, 1.0]
1014
+ omitZeroTemperature: true,
1015
+ quirks: {
1016
+ filterImageBlocks: true,
1017
+ },
1018
+ models: [
1019
+ // 鈹€鈹€ Text LLM (Anthropic Messages transport) 鈹€鈹€
1020
+ {
1021
+ id: "MiniMax-M2.7",
1022
+ name: "MiniMax M2.7",
1023
+ contextWindow: 204800,
1024
+ maxOutput: 131072,
1025
+ toolCall: true,
1026
+ reasoning: true,
1027
+ vision: false,
1028
+ costInput: 0.3,
1029
+ costOutput: 1.2,
1030
+ costCacheRead: 0.06,
1031
+ costCacheWrite: 0.375,
1032
+ },
1033
+ {
1034
+ id: "MiniMax-M2.7-highspeed",
1035
+ name: "MiniMax M2.7 Highspeed",
1036
+ contextWindow: 204800,
1037
+ maxOutput: 131072,
1038
+ toolCall: true,
1039
+ reasoning: true,
1040
+ vision: false,
1041
+ costInput: 0.3,
1042
+ costOutput: 1.2,
1043
+ costCacheRead: 0.06,
1044
+ costCacheWrite: 0.375,
1045
+ },
1046
+ {
1047
+ id: "MiniMax-M2.5",
1048
+ name: "MiniMax M2.5",
1049
+ contextWindow: 204800,
1050
+ maxOutput: 131072,
1051
+ toolCall: true,
1052
+ reasoning: true,
1053
+ vision: false,
1054
+ costInput: 0.15,
1055
+ costOutput: 0.6,
1056
+ costCacheRead: 0.03,
1057
+ costCacheWrite: 0.19,
1058
+ },
1059
+ {
1060
+ id: "MiniMax-M2.5-highspeed",
1061
+ name: "MiniMax M2.5 Highspeed",
1062
+ contextWindow: 204800,
1063
+ maxOutput: 131072,
1064
+ toolCall: true,
1065
+ reasoning: true,
1066
+ vision: false,
1067
+ costInput: 0.15,
1068
+ costOutput: 0.6,
1069
+ costCacheRead: 0.03,
1070
+ costCacheWrite: 0.19,
1071
+ },
1072
+ // 鈹€鈹€ Legacy text models (搂1.5: available but not default-routed) 鈹€鈹€
1073
+ {
1074
+ id: "MiniMax-M2.1",
1075
+ name: "MiniMax M2.1 (legacy)",
1076
+ contextWindow: 204800,
1077
+ maxOutput: 131072,
1078
+ toolCall: true,
1079
+ reasoning: false,
1080
+ vision: false,
1081
+ costInput: 0.15,
1082
+ costOutput: 0.6,
1083
+ },
1084
+ {
1085
+ id: "MiniMax-M2.1-highspeed",
1086
+ name: "MiniMax M2.1 Highspeed (legacy)",
1087
+ contextWindow: 204800,
1088
+ maxOutput: 131072,
1089
+ toolCall: true,
1090
+ reasoning: false,
1091
+ vision: false,
1092
+ costInput: 0.15,
1093
+ costOutput: 0.6,
1094
+ },
1095
+ {
1096
+ id: "MiniMax-M2",
1097
+ name: "MiniMax M2 (legacy)",
1098
+ contextWindow: 204800,
1099
+ maxOutput: 131072,
1100
+ toolCall: true,
1101
+ reasoning: false,
1102
+ vision: false,
1103
+ costInput: 0.1,
1104
+ costOutput: 0.4,
1105
+ },
1106
+ // 鈹€鈹€ Music generation (native API, not Anthropic Messages) 鈹€鈹€
1107
+ // POST https://api.minimaxi.com/v1/music_generation
1108
+ // Async job: submit 鈫?poll task_id 鈫?fetch audio URL
1109
+ // Auth: Authorization: Bearer $MINIMAX_API_KEY
1110
+ // Params: { model, lyrics, refer_voice (cover), instrumental (accompaniment) }
1111
+ // Docs: https://platform.minimaxi.com/document/Music
1112
+ {
1113
+ id: "music-2.6",
1114
+ name: "MiniMax Music 2.6",
1115
+ contextWindow: 4096,
1116
+ maxOutput: 1,
1117
+ toolCall: false,
1118
+ reasoning: false,
1119
+ vision: false,
1120
+ costInput: 0,
1121
+ costOutput: 0,
1122
+ mediaType: "music",
1123
+ mediaCapabilities: {
1124
+ type: "music",
1125
+ operations: ["text2music"],
1126
+ maxDurationSeconds: 300,
1127
+ formats: ["mp3", "wav"],
1128
+ },
1129
+ },
1130
+ {
1131
+ id: "music-cover",
1132
+ name: "MiniMax Music Cover",
1133
+ contextWindow: 4096,
1134
+ maxOutput: 1,
1135
+ toolCall: false,
1136
+ reasoning: false,
1137
+ vision: false,
1138
+ costInput: 0,
1139
+ costOutput: 0,
1140
+ mediaType: "music",
1141
+ mediaCapabilities: {
1142
+ type: "music",
1143
+ operations: ["cover"],
1144
+ maxDurationSeconds: 300,
1145
+ formats: ["mp3", "wav"],
1146
+ },
1147
+ },
1148
+ // 鈹€鈹€ Video generation (native API: POST /v1/video_generation) 鈹€鈹€
1149
+ // Async job: submit 鈫?poll task_id 鈫?download video URL
1150
+ // Models: text2video, img2video, first+last frame, subject reference
1151
+ {
1152
+ id: "MiniMax-Hailuo-2.3",
1153
+ name: "Hailuo 2.3",
1154
+ contextWindow: 2000,
1155
+ maxOutput: 1,
1156
+ toolCall: false,
1157
+ reasoning: false,
1158
+ vision: false,
1159
+ costInput: 0,
1160
+ costOutput: 0,
1161
+ mediaType: "video",
1162
+ mediaCapabilities: {
1163
+ type: "video",
1164
+ operations: ["text2video", "img2video"],
1165
+ maxDurationSeconds: 10,
1166
+ resolutions: ["768P", "1080P"],
1167
+ },
1168
+ },
1169
+ {
1170
+ id: "MiniMax-Hailuo-2.3-Fast",
1171
+ name: "Hailuo 2.3 Fast",
1172
+ contextWindow: 2000,
1173
+ maxOutput: 1,
1174
+ toolCall: false,
1175
+ reasoning: false,
1176
+ vision: false,
1177
+ costInput: 0,
1178
+ costOutput: 0,
1179
+ mediaType: "video",
1180
+ mediaCapabilities: {
1181
+ type: "video",
1182
+ operations: ["text2video", "img2video"],
1183
+ maxDurationSeconds: 10,
1184
+ resolutions: ["768P", "1080P"],
1185
+ },
1186
+ },
1187
+ {
1188
+ id: "MiniMax-Hailuo-02",
1189
+ name: "Hailuo 02",
1190
+ contextWindow: 2000,
1191
+ maxOutput: 1,
1192
+ toolCall: false,
1193
+ reasoning: false,
1194
+ vision: false,
1195
+ costInput: 0,
1196
+ costOutput: 0,
1197
+ mediaType: "video",
1198
+ mediaCapabilities: {
1199
+ type: "video",
1200
+ operations: ["text2video", "img2video"],
1201
+ maxDurationSeconds: 10,
1202
+ resolutions: ["512P", "768P", "1080P"],
1203
+ },
1204
+ },
1205
+ // 鈹€鈹€ Image generation (native API: POST /v1/image_generation) 鈹€鈹€
1206
+ {
1207
+ id: "image-01",
1208
+ name: "MiniMax Image 01",
1209
+ contextWindow: 4096,
1210
+ maxOutput: 1,
1211
+ toolCall: false,
1212
+ reasoning: false,
1213
+ vision: false,
1214
+ costInput: 0,
1215
+ costOutput: 0,
1216
+ mediaType: "image",
1217
+ mediaCapabilities: {
1218
+ type: "image",
1219
+ operations: ["text2image", "img2img"],
1220
+ sizes: ["512x512", "1024x1024", "2048x2048"],
1221
+ },
1222
+ },
1223
+ {
1224
+ id: "image-01-live",
1225
+ name: "MiniMax Image 01 Live",
1226
+ contextWindow: 4096,
1227
+ maxOutput: 1,
1228
+ toolCall: false,
1229
+ reasoning: false,
1230
+ vision: false,
1231
+ costInput: 0,
1232
+ costOutput: 0,
1233
+ mediaType: "image",
1234
+ mediaCapabilities: {
1235
+ type: "image",
1236
+ operations: ["text2image"],
1237
+ sizes: ["1024x1024"],
1238
+ },
1239
+ },
1240
+ // 鈹€鈹€ TTS (native API: POST /v1/t2a_v2) 鈹€鈹€
1241
+ {
1242
+ id: "speech-2.8-hd",
1243
+ name: "MiniMax Speech 2.8 HD",
1244
+ contextWindow: 10000,
1245
+ maxOutput: 1,
1246
+ toolCall: false,
1247
+ reasoning: false,
1248
+ vision: false,
1249
+ costInput: 0,
1250
+ costOutput: 0,
1251
+ mediaType: "tts",
1252
+ mediaCapabilities: {
1253
+ type: "tts",
1254
+ operations: ["text2speech", "voice_clone"],
1255
+ formats: ["mp3", "pcm", "flac", "wav", "opus"],
1256
+ },
1257
+ },
1258
+ {
1259
+ id: "speech-2.8-turbo",
1260
+ name: "MiniMax Speech 2.8 Turbo",
1261
+ contextWindow: 10000,
1262
+ maxOutput: 1,
1263
+ toolCall: false,
1264
+ reasoning: false,
1265
+ vision: false,
1266
+ costInput: 0,
1267
+ costOutput: 0,
1268
+ mediaType: "tts",
1269
+ mediaCapabilities: {
1270
+ type: "tts",
1271
+ operations: ["text2speech", "voice_clone"],
1272
+ formats: ["mp3", "pcm", "flac", "wav", "opus"],
1273
+ },
1274
+ },
1275
+ {
1276
+ id: "voice-clone",
1277
+ aliases: ["minimax-voice-clone"],
1278
+ name: "MiniMax Voice Clone",
1279
+ contextWindow: 0,
1280
+ maxOutput: 1,
1281
+ toolCall: false,
1282
+ reasoning: false,
1283
+ vision: false,
1284
+ costInput: 0,
1285
+ costOutput: 0,
1286
+ mediaType: "voice_clone",
1287
+ mediaCapabilities: {
1288
+ type: "voice_clone",
1289
+ maxSampleDurationSeconds: 30,
1290
+ maxSampleSizeMB: 20,
1291
+ formats: ["mp3", "wav", "m4a"],
1292
+ },
1293
+ },
1294
+ ],
1295
+ },
1296
+ // MiniMax OpenAI-compatible route 鈥?enables reasoning_split for thinking/content
1297
+ // separation and cumulative streaming (搂3.5, 搂3.7 of minimax-ProviderMax).
1298
+ // Same API key as Anthropic route; base URL at /v1.
1299
+ {
1300
+ id: "minimax-openai",
1301
+ name: "MiniMax (OpenAI)",
1302
+ group: "minimax",
1303
+ transport: "openai-chat",
1304
+ baseUrl: "https://api.minimaxi.com/v1",
1305
+ apiKeyEnvVars: ["MINIMAX_API_KEY"],
1306
+ authType: "bearer",
1307
+ isAggregator: false,
1308
+ omitZeroTemperature: true,
1309
+ defaultModel: "MiniMax-M2.7",
1310
+ quirks: {
1311
+ supportsReasoningSplit: true,
1312
+ },
1313
+ models: [
1314
+ {
1315
+ id: "MiniMax-M2.7",
1316
+ aliases: ["minimax-m2.7"],
1317
+ name: "MiniMax M2.7",
1318
+ contextWindow: 204800,
1319
+ maxOutput: 131072,
1320
+ toolCall: true,
1321
+ reasoning: true,
1322
+ vision: false,
1323
+ costInput: 0.3,
1324
+ costOutput: 1.2,
1325
+ },
1326
+ {
1327
+ id: "MiniMax-M2.7-highspeed",
1328
+ name: "MiniMax M2.7 Highspeed",
1329
+ contextWindow: 204800,
1330
+ maxOutput: 131072,
1331
+ toolCall: true,
1332
+ reasoning: true,
1333
+ vision: false,
1334
+ costInput: 0.3,
1335
+ costOutput: 1.2,
1336
+ },
1337
+ {
1338
+ id: "MiniMax-M2.5",
1339
+ name: "MiniMax M2.5",
1340
+ contextWindow: 204800,
1341
+ maxOutput: 131072,
1342
+ toolCall: true,
1343
+ reasoning: true,
1344
+ vision: false,
1345
+ costInput: 0.15,
1346
+ costOutput: 0.6,
1347
+ },
1348
+ {
1349
+ id: "MiniMax-M2.5-highspeed",
1350
+ name: "MiniMax M2.5 Highspeed",
1351
+ contextWindow: 204800,
1352
+ maxOutput: 131072,
1353
+ toolCall: true,
1354
+ reasoning: true,
1355
+ vision: false,
1356
+ costInput: 0.15,
1357
+ costOutput: 0.6,
1358
+ },
1359
+ {
1360
+ id: "MiniMax-M2.1",
1361
+ name: "MiniMax M2.1 (legacy)",
1362
+ contextWindow: 204800,
1363
+ maxOutput: 131072,
1364
+ toolCall: true,
1365
+ reasoning: false,
1366
+ vision: false,
1367
+ costInput: 0.15,
1368
+ costOutput: 0.6,
1369
+ },
1370
+ {
1371
+ id: "MiniMax-M2.1-highspeed",
1372
+ name: "MiniMax M2.1 Highspeed (legacy)",
1373
+ contextWindow: 204800,
1374
+ maxOutput: 131072,
1375
+ toolCall: true,
1376
+ reasoning: false,
1377
+ vision: false,
1378
+ costInput: 0.15,
1379
+ costOutput: 0.6,
1380
+ },
1381
+ ],
1382
+ },
1383
+ {
1384
+ id: "moonshot",
1385
+ name: "Moonshot (Kimi)",
1386
+ transport: "openai-chat",
1387
+ baseUrl: "https://api.moonshot.cn",
1388
+ apiKeyEnvVars: ["MOONSHOT_API_KEY"],
1389
+ authType: "bearer",
1390
+ isAggregator: false,
1391
+ // Moonshot API rejects temperature=0; omit when zero
1392
+ omitZeroTemperature: true,
1393
+ defaultModel: "kimi-k2.6",
1394
+ quirks: {
1395
+ supportsReasoningEffort: true,
1396
+ supportsThinkingParam: true,
1397
+ disableThinkingByDefault: true,
1398
+ builtinWebSearch: true,
1399
+ },
1400
+ models: [
1401
+ // 鈹€鈹€ Kimi K2.6 鈥?flagship reasoning model, immutable sampling params 鈹€鈹€
1402
+ {
1403
+ id: "kimi-k2.6",
1404
+ name: "Kimi K2.6",
1405
+ contextWindow: 262144,
1406
+ maxOutput: 262144,
1407
+ toolCall: true,
1408
+ reasoning: true,
1409
+ vision: true,
1410
+ costInput: 0.95,
1411
+ costOutput: 4.0,
1412
+ costCacheRead: 0.16,
1413
+ },
1414
+ // 鈹€鈹€ Kimi K2.5 鈥?multimodal reasoning, image+video input 鈹€鈹€
1415
+ {
1416
+ id: "kimi-k2.5",
1417
+ name: "Kimi K2.5",
1418
+ contextWindow: 262144,
1419
+ maxOutput: 262144,
1420
+ toolCall: true,
1421
+ reasoning: true,
1422
+ vision: true,
1423
+ costInput: 0.8,
1424
+ costOutput: 4.0,
1425
+ costCacheRead: 0.2,
1426
+ },
1427
+ // 鈹€鈹€ Moonshot V1 鈥?long-context completion tier (128K) 鈹€鈹€
1428
+ {
1429
+ id: "moonshot-v1-128k",
1430
+ name: "Moonshot V1 128K",
1431
+ contextWindow: 131072,
1432
+ maxOutput: 8192,
1433
+ toolCall: true,
1434
+ reasoning: false,
1435
+ vision: false,
1436
+ costInput: 0.84,
1437
+ costOutput: 0.84,
1438
+ },
1439
+ ],
1440
+ },
1441
+ // Doubao multimodal generation models 鈥?native Volcengine API (not Anthropic Messages compatible).
1442
+ // Text models are available via Coding Plan Anthropic endpoint, not listed here.
1443
+ // See: https://www.volcengine.com/docs/82379/1330310
1444
+ {
1445
+ id: "volcengine",
1446
+ name: "Doubao / Volcengine",
1447
+ transport: "volcengine-responses",
1448
+ baseUrl: "https://ark.cn-beijing.volces.com/api",
1449
+ apiKeyEnvVars: ["ARK_API_KEY", "DOUBAO_API_KEY"],
1450
+ authType: "bearer",
1451
+ isAggregator: false,
1452
+ defaultModel: "doubao-seed-2-0-lite-260428",
1453
+ models: [
1454
+ // 鈹€鈹€ Text generation (Seed 2.0 series) 鈥?Responses API 鈹€鈹€
1455
+ // POST https://ark.cn-beijing.volces.com/api/v3/responses
1456
+ // Supports: deep thinking, tool calling, vision, structured output, context caching
1457
+ // Docs: https://www.volcengine.com/docs/82379/1399008
1458
+ {
1459
+ id: "doubao-seed-2-0-pro-260215",
1460
+ aliases: ["doubao-seed-2-0-pro"],
1461
+ name: "Doubao Seed 2.0 Pro",
1462
+ contextWindow: 262_144,
1463
+ maxOutput: 131_072,
1464
+ toolCall: true,
1465
+ reasoning: true,
1466
+ vision: true,
1467
+ costInput: 0,
1468
+ costOutput: 0,
1469
+ },
1470
+ {
1471
+ id: "doubao-seed-2-0-lite-260428",
1472
+ aliases: ["doubao-seed-2-0-lite"],
1473
+ name: "Doubao Seed 2.0 Lite",
1474
+ contextWindow: 262_144,
1475
+ maxOutput: 131_072,
1476
+ toolCall: true,
1477
+ reasoning: true,
1478
+ vision: true,
1479
+ costInput: 0,
1480
+ costOutput: 0,
1481
+ },
1482
+ {
1483
+ id: "doubao-seed-2-0-mini-260428",
1484
+ aliases: ["doubao-seed-2-0-mini"],
1485
+ name: "Doubao Seed 2.0 Mini",
1486
+ contextWindow: 262_144,
1487
+ maxOutput: 131_072,
1488
+ toolCall: true,
1489
+ reasoning: true,
1490
+ vision: true,
1491
+ costInput: 0,
1492
+ costOutput: 0,
1493
+ },
1494
+ {
1495
+ id: "doubao-seed-2-0-code-preview-260215",
1496
+ aliases: ["doubao-seed-2-0-code-preview"],
1497
+ name: "Doubao Seed 2.0 Code Preview",
1498
+ contextWindow: 262_144,
1499
+ maxOutput: 131_072,
1500
+ toolCall: true,
1501
+ reasoning: true,
1502
+ vision: true,
1503
+ costInput: 0,
1504
+ costOutput: 0,
1505
+ },
1506
+ {
1507
+ id: "doubao-seed-1-8-251228",
1508
+ name: "Doubao Seed 1.8",
1509
+ contextWindow: 262_144,
1510
+ maxOutput: 65_536,
1511
+ toolCall: true,
1512
+ reasoning: true,
1513
+ vision: true,
1514
+ costInput: 0,
1515
+ costOutput: 0,
1516
+ },
1517
+ {
1518
+ id: "doubao-embedding-vision-251215",
1519
+ name: "Doubao Embedding Vision",
1520
+ contextWindow: 8192,
1521
+ maxOutput: 1,
1522
+ toolCall: false,
1523
+ reasoning: false,
1524
+ vision: true,
1525
+ costInput: 0,
1526
+ costOutput: 0,
1527
+ mediaType: "embedding",
1528
+ mediaCapabilities: {
1529
+ type: "embedding",
1530
+ dimensions: 2048,
1531
+ maxTokens: 8192,
1532
+ },
1533
+ },
1534
+ // 鈹€鈹€ Video generation (Seedance) 鈹€鈹€
1535
+ // POST https://ark.cn-beijing.volces.com/api/v3/contents/generations/tasks
1536
+ // Async job: submit 鈫?poll task_id 鈫?fetch result video URL
1537
+ // Supports: text-to-video, image-to-video (first/last frame), video extend, video edit
1538
+ // Output: mp4, 480p/720p/1080p, 24fps, 4~15s
1539
+ // Docs: https://www.volcengine.com/docs/82379/2291680
1540
+ {
1541
+ id: "doubao-seedance-2-0-260128",
1542
+ aliases: ["doubao-seedance-2-0"],
1543
+ name: "Doubao Seedance 2.0",
1544
+ contextWindow: 4096,
1545
+ maxOutput: 1,
1546
+ toolCall: false,
1547
+ reasoning: false,
1548
+ vision: true,
1549
+ costInput: 0,
1550
+ costOutput: 0,
1551
+ mediaType: "video",
1552
+ mediaCapabilities: {
1553
+ type: "video",
1554
+ operations: ["text2video", "img2video", "video2video", "edit", "merge", "upscale"],
1555
+ maxDurationSeconds: 15,
1556
+ resolutions: ["480p", "720p", "1080p"],
1557
+ aspectRatios: ["16:9", "9:16", "1:1"],
1558
+ fps: [24],
1559
+ },
1560
+ },
1561
+ {
1562
+ id: "doubao-seedance-2-0-fast-260128",
1563
+ aliases: ["doubao-seedance-2-0-fast"],
1564
+ name: "Doubao Seedance 2.0 Fast",
1565
+ contextWindow: 4096,
1566
+ maxOutput: 1,
1567
+ toolCall: false,
1568
+ reasoning: false,
1569
+ vision: true,
1570
+ costInput: 0,
1571
+ costOutput: 0,
1572
+ mediaType: "video",
1573
+ mediaCapabilities: {
1574
+ type: "video",
1575
+ operations: ["text2video", "img2video", "video2video", "edit", "merge", "upscale"],
1576
+ maxDurationSeconds: 15,
1577
+ resolutions: ["480p", "720p"],
1578
+ aspectRatios: ["16:9", "9:16", "1:1"],
1579
+ fps: [24],
1580
+ },
1581
+ },
1582
+ // 鈹€鈹€ Image generation (Seedream) 鈹€鈹€
1583
+ // POST https://ark.cn-beijing.volces.com/api/v3/images/generations
1584
+ // Sync response: returns base64/URL image directly
1585
+ // Supports: text-to-image, image-to-image (single/multi-ref), group generation
1586
+ // Docs: https://www.volcengine.com/docs/82379/1824121
1587
+ {
1588
+ id: "doubao-seedream-5-0-260128",
1589
+ aliases: ["doubao-seedream-5-0"],
1590
+ name: "Doubao Seedream 5.0",
1591
+ contextWindow: 4096,
1592
+ maxOutput: 1,
1593
+ toolCall: false,
1594
+ reasoning: false,
1595
+ vision: true,
1596
+ costInput: 0,
1597
+ costOutput: 0,
1598
+ mediaType: "image",
1599
+ mediaCapabilities: {
1600
+ type: "image",
1601
+ operations: ["text2image", "img2img"],
1602
+ sizes: ["2K"],
1603
+ },
1604
+ },
1605
+ {
1606
+ id: "doubao-seedream-4-5-251128",
1607
+ aliases: ["doubao-seedream-4-5"],
1608
+ name: "Doubao Seedream 4.5",
1609
+ contextWindow: 4096,
1610
+ maxOutput: 1,
1611
+ toolCall: false,
1612
+ reasoning: false,
1613
+ vision: true,
1614
+ costInput: 0,
1615
+ costOutput: 0,
1616
+ mediaType: "image",
1617
+ mediaCapabilities: {
1618
+ type: "image",
1619
+ operations: ["text2image", "img2img"],
1620
+ sizes: ["2K"],
1621
+ },
1622
+ },
1623
+ // 鈹€鈹€ 3D generation 鈹€鈹€
1624
+ // POST https://ark.cn-beijing.volces.com/api/v3/3d-contents/generations/tasks
1625
+ // Async job: submit 鈫?poll task_id 鈫?download glb/obj/usd/usdz
1626
+ // Supports: image-to-3D with PBR materials (Seed3D/HiTem3D), text-to-3D (Hyper3D)
1627
+ // Docs: https://www.volcengine.com/docs/82379/1874993
1628
+ {
1629
+ id: "doubao-seed3d-2-0-260328",
1630
+ aliases: ["doubao-seed3d-2-0"],
1631
+ name: "Doubao Seed3D 2.0",
1632
+ contextWindow: 4096,
1633
+ maxOutput: 1,
1634
+ toolCall: false,
1635
+ reasoning: false,
1636
+ vision: true,
1637
+ costInput: 0,
1638
+ costOutput: 0,
1639
+ mediaType: "3d",
1640
+ mediaCapabilities: {
1641
+ type: "3d",
1642
+ operations: ["img2_3d"],
1643
+ outputFormats: ["glb", "obj", "usd", "usdz"],
1644
+ },
1645
+ },
1646
+ {
1647
+ id: "hyper3d-gen2-260112",
1648
+ aliases: ["hyper3d-gen2"],
1649
+ name: "Hyper3D Gen2",
1650
+ contextWindow: 4096,
1651
+ maxOutput: 1,
1652
+ toolCall: false,
1653
+ reasoning: false,
1654
+ vision: true,
1655
+ costInput: 0,
1656
+ costOutput: 0,
1657
+ mediaType: "3d",
1658
+ mediaCapabilities: {
1659
+ type: "3d",
1660
+ operations: ["text2_3d", "img2_3d"],
1661
+ outputFormats: ["glb", "obj"],
1662
+ },
1663
+ },
1664
+ {
1665
+ id: "hitem3d-2-0-251223",
1666
+ aliases: ["hitem3d-2-0"],
1667
+ name: "HiTem3D 2.0",
1668
+ contextWindow: 4096,
1669
+ maxOutput: 1,
1670
+ toolCall: false,
1671
+ reasoning: false,
1672
+ vision: true,
1673
+ costInput: 0,
1674
+ costOutput: 0,
1675
+ mediaType: "3d",
1676
+ mediaCapabilities: {
1677
+ type: "3d",
1678
+ operations: ["img2_3d"],
1679
+ outputFormats: ["glb", "obj", "usd"],
1680
+ },
1681
+ },
1682
+ ],
1683
+ },
1684
+ // 鈹€鈹€ Tier 2: Major International providers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
1685
+ {
1686
+ id: "openai",
1687
+ name: "OpenAI",
1688
+ transport: "openai-responses",
1689
+ baseUrl: "https://api.openai.com",
1690
+ apiKeyEnvVars: ["OPENAI_API_KEY"],
1691
+ authType: "bearer",
1692
+ isAggregator: false,
1693
+ defaultModel: "gpt-5.5",
1694
+ quirks: {
1695
+ builtinWebSearch: true,
1696
+ builtinCodeInterpreter: true,
1697
+ builtinFileSearch: true,
1698
+ },
1699
+ models: [
1700
+ {
1701
+ id: "gpt-5.5",
1702
+ name: "GPT-5.5",
1703
+ contextWindow: 1050000,
1704
+ maxOutput: 128000,
1705
+ toolCall: true,
1706
+ reasoning: true,
1707
+ vision: true,
1708
+ costInput: 5.0,
1709
+ costOutput: 30.0,
1710
+ costCacheRead: 0.5,
1711
+ },
1712
+ {
1713
+ id: "gpt-5.4",
1714
+ name: "GPT-5.4",
1715
+ contextWindow: 1050000,
1716
+ maxOutput: 128000,
1717
+ toolCall: true,
1718
+ reasoning: true,
1719
+ vision: true,
1720
+ costInput: 2.5,
1721
+ costOutput: 15.0,
1722
+ costCacheRead: 0.25,
1723
+ },
1724
+ {
1725
+ id: "gpt-5.4-mini",
1726
+ name: "GPT-5.4 Mini",
1727
+ contextWindow: 400000,
1728
+ maxOutput: 128000,
1729
+ toolCall: true,
1730
+ reasoning: true,
1731
+ vision: true,
1732
+ costInput: 0.75,
1733
+ costOutput: 4.5,
1734
+ costCacheRead: 0.075,
1735
+ },
1736
+ {
1737
+ id: "gpt-5.4-nano",
1738
+ name: "GPT-5.4 Nano",
1739
+ contextWindow: 400000,
1740
+ maxOutput: 128000,
1741
+ toolCall: true,
1742
+ reasoning: true,
1743
+ vision: true,
1744
+ costInput: 0.20,
1745
+ costOutput: 1.25,
1746
+ costCacheRead: 0.02,
1747
+ },
1748
+ // 鈹€鈹€ Image generation (Images API) 鈹€鈹€
1749
+ // POST https://api.openai.com/v1/images/generations
1750
+ // Sync response: returns base64/URL image
1751
+ // Params: { model: "gpt-image-2", prompt, size, quality, background, moderation }
1752
+ // Docs: https://developers.openai.com/api/docs/guides/image-generation
1753
+ {
1754
+ id: "gpt-image-2",
1755
+ name: "GPT Image 2",
1756
+ contextWindow: 4096,
1757
+ maxOutput: 1,
1758
+ toolCall: false,
1759
+ reasoning: false,
1760
+ vision: true,
1761
+ costInput: 0,
1762
+ costOutput: 0,
1763
+ mediaType: "image",
1764
+ mediaCapabilities: {
1765
+ type: "image",
1766
+ operations: ["text2image", "img2img", "inpainting"],
1767
+ sizes: ["1024x1024", "1536x1024", "1024x1536", "auto"],
1768
+ transparentBackground: true,
1769
+ },
1770
+ },
1771
+ // 鈹€鈹€ Realtime Audio (WebSocket Realtime API) 鈹€鈹€
1772
+ // WSS wss://api.openai.com/v1/realtime?model=<model>
1773
+ // Auth via sub-protocol: openai-insecure-api-key.<key>
1774
+ // Bidirectional audio/voice streaming with VAD + tool calling
1775
+ // Docs: https://developers.openai.com/docs/api-reference/realtime
1776
+ {
1777
+ id: "gpt-realtime-2",
1778
+ name: "GPT Realtime 2",
1779
+ contextWindow: 128000,
1780
+ maxOutput: 16000,
1781
+ toolCall: true,
1782
+ reasoning: true,
1783
+ vision: false,
1784
+ mediaType: "realtime_audio",
1785
+ mediaCapabilities: {
1786
+ type: "realtime_audio",
1787
+ voices: ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
1788
+ modalities: ["text", "audio"],
1789
+ vad: true,
1790
+ toolCalling: true,
1791
+ },
1792
+ },
1793
+ {
1794
+ id: "gpt-realtime-translate",
1795
+ name: "GPT Realtime Translate",
1796
+ contextWindow: 128000,
1797
+ maxOutput: 16000,
1798
+ toolCall: false,
1799
+ reasoning: false,
1800
+ vision: false,
1801
+ mediaType: "realtime_audio",
1802
+ mediaCapabilities: {
1803
+ type: "realtime_audio",
1804
+ voices: ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
1805
+ modalities: ["text", "audio"],
1806
+ vad: true,
1807
+ toolCalling: false,
1808
+ },
1809
+ },
1810
+ {
1811
+ id: "gpt-realtime-1.5",
1812
+ name: "GPT Realtime 1.5",
1813
+ contextWindow: 128000,
1814
+ maxOutput: 16000,
1815
+ toolCall: true,
1816
+ reasoning: false,
1817
+ vision: false,
1818
+ mediaType: "realtime_audio",
1819
+ mediaCapabilities: {
1820
+ type: "realtime_audio",
1821
+ voices: ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
1822
+ modalities: ["text", "audio"],
1823
+ vad: true,
1824
+ toolCalling: true,
1825
+ },
1826
+ },
1827
+ {
1828
+ id: "gpt-realtime-mini",
1829
+ name: "GPT Realtime Mini",
1830
+ contextWindow: 128000,
1831
+ maxOutput: 16000,
1832
+ toolCall: true,
1833
+ reasoning: false,
1834
+ vision: false,
1835
+ mediaType: "realtime_audio",
1836
+ mediaCapabilities: {
1837
+ type: "realtime_audio",
1838
+ voices: ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
1839
+ modalities: ["text", "audio"],
1840
+ vad: true,
1841
+ toolCalling: true,
1842
+ },
1843
+ },
1844
+ ],
1845
+ },
1846
+ {
1847
+ id: "anthropic",
1848
+ name: "Anthropic",
1849
+ transport: "anthropic-messages",
1850
+ baseUrl: "https://api.anthropic.com",
1851
+ apiKeyEnvVars: ["ANTHROPIC_API_KEY"],
1852
+ authType: "x-api-key",
1853
+ isAggregator: false,
1854
+ defaultModel: "claude-opus-4-7",
1855
+ quirks: {
1856
+ supportsDocumentVision: true,
1857
+ },
1858
+ models: [
1859
+ {
1860
+ id: "claude-opus-4-7",
1861
+ name: "Claude Opus 4.7",
1862
+ contextWindow: 1000000,
1863
+ maxOutput: 128000,
1864
+ toolCall: true,
1865
+ reasoning: true,
1866
+ vision: true,
1867
+ costInput: 5.0,
1868
+ costOutput: 25.0,
1869
+ costCacheRead: 0.5,
1870
+ costCacheWrite: 6.25,
1871
+ },
1872
+ {
1873
+ id: "claude-opus-4-6",
1874
+ name: "Claude Opus 4.6",
1875
+ contextWindow: 1000000,
1876
+ maxOutput: 128000,
1877
+ toolCall: true,
1878
+ reasoning: true,
1879
+ vision: true,
1880
+ costInput: 5.0,
1881
+ costOutput: 25.0,
1882
+ costCacheRead: 0.5,
1883
+ costCacheWrite: 6.25,
1884
+ },
1885
+ {
1886
+ id: "claude-sonnet-4-6",
1887
+ name: "Claude Sonnet 4.6",
1888
+ contextWindow: 1000000,
1889
+ maxOutput: 64000,
1890
+ toolCall: true,
1891
+ reasoning: true,
1892
+ vision: true,
1893
+ costInput: 3.0,
1894
+ costOutput: 15.0,
1895
+ costCacheRead: 0.3,
1896
+ costCacheWrite: 3.75,
1897
+ },
1898
+ {
1899
+ id: "claude-sonnet-4-5",
1900
+ name: "Claude Sonnet 4.5",
1901
+ contextWindow: 200000,
1902
+ maxOutput: 64000,
1903
+ toolCall: true,
1904
+ reasoning: true,
1905
+ vision: true,
1906
+ costInput: 3.0,
1907
+ costOutput: 15.0,
1908
+ costCacheRead: 0.3,
1909
+ costCacheWrite: 3.75,
1910
+ },
1911
+ {
1912
+ id: "claude-haiku-4-5",
1913
+ name: "Claude Haiku 4.5",
1914
+ contextWindow: 200000,
1915
+ maxOutput: 64000,
1916
+ toolCall: true,
1917
+ reasoning: true,
1918
+ vision: true,
1919
+ costInput: 1.0,
1920
+ costOutput: 5.0,
1921
+ costCacheRead: 0.1,
1922
+ costCacheWrite: 1.25,
1923
+ },
1924
+ ],
1925
+ },
1926
+ {
1927
+ id: "google",
1928
+ name: "Google Gemini",
1929
+ transport: "gemini-generatecontent",
1930
+ baseUrl: "https://generativelanguage.googleapis.com/v1beta",
1931
+ apiKeyEnvVars: ["GOOGLE_API_KEY", "GEMINI_API_KEY"],
1932
+ authType: "x-api-key",
1933
+ isAggregator: false,
1934
+ defaultModel: "gemini-3-flash-preview",
1935
+ quirks: {
1936
+ builtinWebSearch: true,
1937
+ builtinCodeInterpreter: true,
1938
+ builtinUrlContext: true,
1939
+ builtinMapsGrounding: true,
1940
+ builtinFileSearch: true,
1941
+ supportsDocumentVision: true,
1942
+ },
1943
+ models: [
1944
+ // 鈹€鈹€ Gemini 3 series (latest generation) 鈹€鈹€
1945
+ {
1946
+ id: "gemini-3.1-pro-preview",
1947
+ name: "Gemini 3.1 Pro",
1948
+ contextWindow: 1048576,
1949
+ maxOutput: 65536,
1950
+ toolCall: true,
1951
+ reasoning: true,
1952
+ reasoningRequired: true,
1953
+ vision: true,
1954
+ costInput: 2.0,
1955
+ costOutput: 12.0,
1956
+ },
1957
+ {
1958
+ id: "gemini-3-flash-preview",
1959
+ name: "Gemini 3 Flash",
1960
+ contextWindow: 1048576,
1961
+ maxOutput: 65536,
1962
+ toolCall: true,
1963
+ reasoning: true,
1964
+ vision: true,
1965
+ costInput: 0.50,
1966
+ costOutput: 3.0,
1967
+ },
1968
+ {
1969
+ id: "gemini-3.1-flash-lite",
1970
+ name: "Gemini 3.1 Flash-Lite",
1971
+ contextWindow: 1048576,
1972
+ maxOutput: 65536,
1973
+ toolCall: true,
1974
+ reasoning: true,
1975
+ vision: true,
1976
+ costInput: 0.25,
1977
+ costOutput: 1.5,
1978
+ },
1979
+ // 鈹€鈹€ Image generation (native Gemini generateContent with responseModalities) 鈹€鈹€
1980
+ {
1981
+ id: "gemini-3.1-flash-image-preview",
1982
+ name: "Gemini 3.1 Flash Image (Nano Banana 2)",
1983
+ contextWindow: 4096,
1984
+ maxOutput: 1,
1985
+ toolCall: false,
1986
+ reasoning: false,
1987
+ vision: true,
1988
+ costInput: 0,
1989
+ costOutput: 0,
1990
+ mediaType: "image",
1991
+ mediaCapabilities: {
1992
+ type: "image",
1993
+ operations: ["text2image", "img2img"],
1994
+ sizes: ["1024x1024"],
1995
+ },
1996
+ },
1997
+ {
1998
+ id: "gemini-3-pro-image-preview",
1999
+ name: "Gemini 3 Pro Image (Nano Banana Pro)",
2000
+ contextWindow: 4096,
2001
+ maxOutput: 1,
2002
+ toolCall: false,
2003
+ reasoning: false,
2004
+ vision: true,
2005
+ costInput: 0,
2006
+ costOutput: 0,
2007
+ mediaType: "image",
2008
+ mediaCapabilities: {
2009
+ type: "image",
2010
+ operations: ["text2image", "img2img"],
2011
+ sizes: ["1024x1024"],
2012
+ },
2013
+ },
2014
+ // 鈹€鈹€ Video generation (Veo 3.1 series 鈥?generateVideos endpoint) 鈹€鈹€
2015
+ {
2016
+ id: "veo-3.1-generate-preview",
2017
+ name: "Veo 3.1 Standard",
2018
+ contextWindow: 2000,
2019
+ maxOutput: 1,
2020
+ toolCall: false,
2021
+ reasoning: false,
2022
+ vision: false,
2023
+ mediaType: "video",
2024
+ mediaCapabilities: {
2025
+ type: "video",
2026
+ operations: ["text2video", "img2video"],
2027
+ maxDurationSeconds: 8,
2028
+ resolutions: ["720P", "1080P", "4K"],
2029
+ },
2030
+ },
2031
+ {
2032
+ id: "veo-3.1-fast-generate-preview",
2033
+ name: "Veo 3.1 Fast",
2034
+ contextWindow: 2000,
2035
+ maxOutput: 1,
2036
+ toolCall: false,
2037
+ reasoning: false,
2038
+ vision: false,
2039
+ mediaType: "video",
2040
+ mediaCapabilities: {
2041
+ type: "video",
2042
+ operations: ["text2video", "img2video"],
2043
+ maxDurationSeconds: 8,
2044
+ resolutions: ["720P", "1080P", "4K"],
2045
+ },
2046
+ },
2047
+ {
2048
+ id: "veo-3.1-lite-generate-preview",
2049
+ name: "Veo 3.1 Lite",
2050
+ contextWindow: 2000,
2051
+ maxOutput: 1,
2052
+ toolCall: false,
2053
+ reasoning: false,
2054
+ vision: false,
2055
+ mediaType: "video",
2056
+ mediaCapabilities: {
2057
+ type: "video",
2058
+ operations: ["text2video", "img2video"],
2059
+ maxDurationSeconds: 8,
2060
+ resolutions: ["720P", "1080P"],
2061
+ },
2062
+ },
2063
+ // 鈹€鈹€ Music generation (Lyria 3 series 鈥?generateMusic endpoint) 鈹€鈹€
2064
+ {
2065
+ id: "lyria-3-pro-preview",
2066
+ name: "Lyria 3 Pro",
2067
+ contextWindow: 2000,
2068
+ maxOutput: 1,
2069
+ toolCall: false,
2070
+ reasoning: false,
2071
+ vision: false,
2072
+ mediaType: "music",
2073
+ mediaCapabilities: {
2074
+ type: "music",
2075
+ operations: ["text2music"],
2076
+ maxDurationSeconds: 300,
2077
+ formats: ["mp3"],
2078
+ },
2079
+ },
2080
+ {
2081
+ id: "lyria-3-clip-preview",
2082
+ name: "Lyria 3 Clip",
2083
+ contextWindow: 2000,
2084
+ maxOutput: 1,
2085
+ toolCall: false,
2086
+ reasoning: false,
2087
+ vision: false,
2088
+ mediaType: "music",
2089
+ mediaCapabilities: {
2090
+ type: "music",
2091
+ operations: ["text2music"],
2092
+ maxDurationSeconds: 30,
2093
+ formats: ["mp3"],
2094
+ },
2095
+ },
2096
+ // 鈹€鈹€ Music RealTime (Lyria RealTime 鈥?WebSocket streaming session) 鈹€鈹€
2097
+ {
2098
+ id: "lyria-realtime-exp",
2099
+ name: "Lyria RealTime (Experimental)",
2100
+ contextWindow: 2000,
2101
+ maxOutput: 1,
2102
+ toolCall: false,
2103
+ reasoning: false,
2104
+ vision: false,
2105
+ mediaType: "music_realtime",
2106
+ mediaCapabilities: {
2107
+ type: "music",
2108
+ operations: ["realtime"],
2109
+ formats: ["wav"],
2110
+ },
2111
+ },
2112
+ // 鈹€鈹€ TTS (generateContent with speech config) 鈹€鈹€
2113
+ {
2114
+ id: "gemini-3.1-flash-tts-preview",
2115
+ name: "Gemini 3.1 Flash TTS",
2116
+ contextWindow: 8192,
2117
+ maxOutput: 1,
2118
+ toolCall: false,
2119
+ reasoning: false,
2120
+ vision: false,
2121
+ costInput: 1.0,
2122
+ costOutput: 20.0,
2123
+ mediaType: "tts",
2124
+ mediaCapabilities: {
2125
+ type: "tts",
2126
+ operations: ["text2speech"],
2127
+ maxCharacters: 8000,
2128
+ formats: ["mp3", "wav"],
2129
+ },
2130
+ },
2131
+ // 鈹€鈹€ Embedding (embedContent endpoint) 鈹€鈹€
2132
+ {
2133
+ id: "gemini-embedding-2",
2134
+ name: "Gemini Embedding 2 (Multimodal)",
2135
+ contextWindow: 8192,
2136
+ maxOutput: 1,
2137
+ toolCall: false,
2138
+ reasoning: false,
2139
+ vision: false,
2140
+ costInput: 0.20,
2141
+ costOutput: 0,
2142
+ mediaType: "embedding",
2143
+ mediaCapabilities: {
2144
+ type: "embedding",
2145
+ dimensions: 3072,
2146
+ maxTokens: 8192,
2147
+ },
2148
+ },
2149
+ ],
2150
+ },
2151
+ // 鈹€鈹€ Tier 3: Aggregators 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
2152
+ {
2153
+ id: "openrouter",
2154
+ name: "OpenRouter",
2155
+ transport: "openai-chat",
2156
+ baseUrl: "https://openrouter.ai/api",
2157
+ apiKeyEnvVars: ["OPENROUTER_API_KEY"],
2158
+ authType: "bearer",
2159
+ isAggregator: true,
2160
+ defaultModel: "anthropic/claude-opus-4.7",
2161
+ extraHeaders: {
2162
+ "HTTP-Referer": "https://claw.bot",
2163
+ "X-Title": "XiaozhiClaw",
2164
+ },
2165
+ models: [
2166
+ {
2167
+ id: "anthropic/claude-opus-4.7",
2168
+ name: "Claude Opus 4.7 (via OpenRouter)",
2169
+ contextWindow: 1000000,
2170
+ maxOutput: 128000,
2171
+ toolCall: true,
2172
+ reasoning: true,
2173
+ vision: true,
2174
+ costInput: 5.0,
2175
+ costOutput: 25.0,
2176
+ costCacheRead: 0.5,
2177
+ },
2178
+ {
2179
+ id: "anthropic/claude-opus-4.6",
2180
+ name: "Claude Opus 4.6 (via OpenRouter)",
2181
+ contextWindow: 1000000,
2182
+ maxOutput: 128000,
2183
+ toolCall: true,
2184
+ reasoning: true,
2185
+ vision: true,
2186
+ costInput: 5.0,
2187
+ costOutput: 25.0,
2188
+ costCacheRead: 0.5,
2189
+ },
2190
+ {
2191
+ id: "deepseek/deepseek-v4-flash",
2192
+ name: "DeepSeek V4 Flash (via OpenRouter)",
2193
+ contextWindow: 1048576,
2194
+ maxOutput: 393216,
2195
+ toolCall: true,
2196
+ reasoning: true,
2197
+ vision: false,
2198
+ costInput: 0.14,
2199
+ costOutput: 0.28,
2200
+ costCacheRead: 0.028,
2201
+ },
2202
+ ],
2203
+ },
2204
+ // 鈹€鈹€ Tier 4: Coding Plan variants 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
2205
+ // These use dedicated Anthropic-compatible endpoints for coding scenarios.
2206
+ // Users select these when they have a Coding Plan subscription instead of pay-per-token.
2207
+ {
2208
+ id: "qwen-coding",
2209
+ name: "Alibaba Qwen (Coding Plan)",
2210
+ transport: "anthropic-messages",
2211
+ baseUrl: "https://coding.dashscope.aliyuncs.com/apps/anthropic",
2212
+ apiKeyEnvVars: ["DASHSCOPE_API_KEY", "QWEN_API_KEY"],
2213
+ authType: "x-api-key",
2214
+ isAggregator: false,
2215
+ defaultModel: "qwen3-coder-plus",
2216
+ models: [
2217
+ {
2218
+ id: "qwen3-coder-plus",
2219
+ name: "Qwen3 Coder Plus (Coding Plan)",
2220
+ contextWindow: 262144,
2221
+ maxOutput: 65536,
2222
+ toolCall: true,
2223
+ reasoning: true,
2224
+ vision: false,
2225
+ },
2226
+ {
2227
+ id: "qwen3-coder-flash",
2228
+ name: "Qwen3 Coder Flash (Coding Plan)",
2229
+ contextWindow: 262144,
2230
+ maxOutput: 65536,
2231
+ toolCall: true,
2232
+ reasoning: true,
2233
+ vision: false,
2234
+ },
2235
+ ],
2236
+ },
2237
+ ];