abstractcore 2.9.1__py3-none-any.whl → 2.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/deepsearch.py +9 -4
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +882 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +52 -20
- abstractcore/config/manager.py +390 -12
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +30 -916
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +478 -28
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/structured_logging.py +29 -8
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.4.dist-info/METADATA +562 -0
- abstractcore-2.11.4.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
"source": "OpenAI official docs",
|
|
12
12
|
"canonical_name": "gpt-4",
|
|
13
13
|
"aliases": [],
|
|
14
|
-
"max_tokens": 128000
|
|
14
|
+
"max_tokens": 128000,
|
|
15
|
+
"video_support": false,
|
|
16
|
+
"video_input_mode": "none"
|
|
15
17
|
},
|
|
16
18
|
"gpt-4-turbo": {
|
|
17
19
|
"max_output_tokens": 4096,
|
|
@@ -30,7 +32,9 @@
|
|
|
30
32
|
"aliases": [
|
|
31
33
|
"gpt-4-turbo-preview"
|
|
32
34
|
],
|
|
33
|
-
"max_tokens": 128000
|
|
35
|
+
"max_tokens": 128000,
|
|
36
|
+
"video_support": false,
|
|
37
|
+
"video_input_mode": "frames"
|
|
34
38
|
},
|
|
35
39
|
"gpt-4-turbo-with-vision": {
|
|
36
40
|
"max_output_tokens": 4096,
|
|
@@ -50,7 +54,9 @@
|
|
|
50
54
|
"gpt-4-turbo-vision",
|
|
51
55
|
"gpt-4-vision-preview"
|
|
52
56
|
],
|
|
53
|
-
"max_tokens": 128000
|
|
57
|
+
"max_tokens": 128000,
|
|
58
|
+
"video_support": false,
|
|
59
|
+
"video_input_mode": "frames"
|
|
54
60
|
},
|
|
55
61
|
"gpt-4o": {
|
|
56
62
|
"max_output_tokens": 16384,
|
|
@@ -59,8 +65,8 @@
|
|
|
59
65
|
"parallel_tools": true,
|
|
60
66
|
"max_tools": -1,
|
|
61
67
|
"vision_support": true,
|
|
62
|
-
"audio_support":
|
|
63
|
-
"video_support":
|
|
68
|
+
"audio_support": false,
|
|
69
|
+
"video_support": false,
|
|
64
70
|
"image_resolutions": [
|
|
65
71
|
"variable"
|
|
66
72
|
],
|
|
@@ -80,7 +86,8 @@
|
|
|
80
86
|
"source": "OpenAI official docs 2025",
|
|
81
87
|
"canonical_name": "gpt-4o",
|
|
82
88
|
"aliases": [],
|
|
83
|
-
"max_tokens": 128000
|
|
89
|
+
"max_tokens": 128000,
|
|
90
|
+
"video_input_mode": "frames"
|
|
84
91
|
},
|
|
85
92
|
"gpt-4o-long-output": {
|
|
86
93
|
"max_output_tokens": 64000,
|
|
@@ -89,12 +96,14 @@
|
|
|
89
96
|
"parallel_tools": true,
|
|
90
97
|
"max_tools": -1,
|
|
91
98
|
"vision_support": true,
|
|
92
|
-
"audio_support":
|
|
99
|
+
"audio_support": false,
|
|
93
100
|
"notes": "16x output capacity variant",
|
|
94
101
|
"source": "OpenAI official docs",
|
|
95
102
|
"canonical_name": "gpt-4o-long-output",
|
|
96
103
|
"aliases": [],
|
|
97
|
-
"max_tokens": 128000
|
|
104
|
+
"max_tokens": 128000,
|
|
105
|
+
"video_support": false,
|
|
106
|
+
"video_input_mode": "frames"
|
|
98
107
|
},
|
|
99
108
|
"gpt-4o-mini": {
|
|
100
109
|
"max_output_tokens": 16000,
|
|
@@ -103,11 +112,13 @@
|
|
|
103
112
|
"parallel_tools": true,
|
|
104
113
|
"max_tools": -1,
|
|
105
114
|
"vision_support": true,
|
|
106
|
-
"audio_support":
|
|
115
|
+
"audio_support": false,
|
|
107
116
|
"source": "OpenAI official docs",
|
|
108
117
|
"canonical_name": "gpt-4o-mini",
|
|
109
118
|
"aliases": [],
|
|
110
|
-
"max_tokens": 128000
|
|
119
|
+
"max_tokens": 128000,
|
|
120
|
+
"video_support": false,
|
|
121
|
+
"video_input_mode": "frames"
|
|
111
122
|
},
|
|
112
123
|
"gpt-3.5-turbo": {
|
|
113
124
|
"max_output_tokens": 4096,
|
|
@@ -120,7 +131,9 @@
|
|
|
120
131
|
"source": "OpenAI official docs",
|
|
121
132
|
"canonical_name": "gpt-3.5-turbo",
|
|
122
133
|
"aliases": [],
|
|
123
|
-
"max_tokens": 16385
|
|
134
|
+
"max_tokens": 16385,
|
|
135
|
+
"video_support": false,
|
|
136
|
+
"video_input_mode": "none"
|
|
124
137
|
},
|
|
125
138
|
"o1": {
|
|
126
139
|
"max_output_tokens": 32768,
|
|
@@ -133,7 +146,10 @@
|
|
|
133
146
|
"source": "OpenAI official docs",
|
|
134
147
|
"canonical_name": "o1",
|
|
135
148
|
"aliases": [],
|
|
136
|
-
"max_tokens": 128000
|
|
149
|
+
"max_tokens": 128000,
|
|
150
|
+
"video_support": false,
|
|
151
|
+
"max_tools": -1,
|
|
152
|
+
"video_input_mode": "none"
|
|
137
153
|
},
|
|
138
154
|
"o1-mini": {
|
|
139
155
|
"max_output_tokens": 65536,
|
|
@@ -145,7 +161,10 @@
|
|
|
145
161
|
"source": "OpenAI official docs",
|
|
146
162
|
"canonical_name": "o1-mini",
|
|
147
163
|
"aliases": [],
|
|
148
|
-
"max_tokens": 128000
|
|
164
|
+
"max_tokens": 128000,
|
|
165
|
+
"video_support": false,
|
|
166
|
+
"max_tools": -1,
|
|
167
|
+
"video_input_mode": "none"
|
|
149
168
|
},
|
|
150
169
|
"o3": {
|
|
151
170
|
"max_output_tokens": 32768,
|
|
@@ -159,7 +178,9 @@
|
|
|
159
178
|
"source": "OpenAI official docs",
|
|
160
179
|
"canonical_name": "o3",
|
|
161
180
|
"aliases": [],
|
|
162
|
-
"max_tokens": 128000
|
|
181
|
+
"max_tokens": 128000,
|
|
182
|
+
"video_support": false,
|
|
183
|
+
"video_input_mode": "none"
|
|
163
184
|
},
|
|
164
185
|
"o3-mini": {
|
|
165
186
|
"max_output_tokens": 32768,
|
|
@@ -173,7 +194,9 @@
|
|
|
173
194
|
"source": "OpenAI official docs",
|
|
174
195
|
"canonical_name": "o3-mini",
|
|
175
196
|
"aliases": [],
|
|
176
|
-
"max_tokens": 128000
|
|
197
|
+
"max_tokens": 128000,
|
|
198
|
+
"video_support": false,
|
|
199
|
+
"video_input_mode": "none"
|
|
177
200
|
},
|
|
178
201
|
"claude-3.5-sonnet": {
|
|
179
202
|
"max_output_tokens": 8192,
|
|
@@ -196,7 +219,9 @@
|
|
|
196
219
|
"source": "Anthropic official docs",
|
|
197
220
|
"canonical_name": "claude-3.5-sonnet",
|
|
198
221
|
"aliases": [],
|
|
199
|
-
"max_tokens": 200000
|
|
222
|
+
"max_tokens": 200000,
|
|
223
|
+
"video_support": false,
|
|
224
|
+
"video_input_mode": "frames"
|
|
200
225
|
},
|
|
201
226
|
"claude-3.7-sonnet": {
|
|
202
227
|
"max_output_tokens": 128000,
|
|
@@ -213,7 +238,9 @@
|
|
|
213
238
|
"source": "Anthropic official docs",
|
|
214
239
|
"canonical_name": "claude-3.7-sonnet",
|
|
215
240
|
"aliases": [],
|
|
216
|
-
"max_tokens": 200000
|
|
241
|
+
"max_tokens": 200000,
|
|
242
|
+
"video_support": false,
|
|
243
|
+
"video_input_mode": "frames"
|
|
217
244
|
},
|
|
218
245
|
"claude-3.5-haiku": {
|
|
219
246
|
"max_output_tokens": 8192,
|
|
@@ -232,7 +259,9 @@
|
|
|
232
259
|
"aliases": [
|
|
233
260
|
"claude-3-5-haiku-20241022"
|
|
234
261
|
],
|
|
235
|
-
"max_tokens": 200000
|
|
262
|
+
"max_tokens": 200000,
|
|
263
|
+
"video_support": false,
|
|
264
|
+
"video_input_mode": "frames"
|
|
236
265
|
},
|
|
237
266
|
"claude-3-opus": {
|
|
238
267
|
"max_output_tokens": 4096,
|
|
@@ -248,7 +277,9 @@
|
|
|
248
277
|
"source": "Anthropic official docs",
|
|
249
278
|
"canonical_name": "claude-3-opus",
|
|
250
279
|
"aliases": [],
|
|
251
|
-
"max_tokens": 200000
|
|
280
|
+
"max_tokens": 200000,
|
|
281
|
+
"video_support": false,
|
|
282
|
+
"video_input_mode": "frames"
|
|
252
283
|
},
|
|
253
284
|
"claude-3-sonnet": {
|
|
254
285
|
"max_output_tokens": 4096,
|
|
@@ -264,7 +295,9 @@
|
|
|
264
295
|
"source": "Anthropic official docs",
|
|
265
296
|
"canonical_name": "claude-3-sonnet",
|
|
266
297
|
"aliases": [],
|
|
267
|
-
"max_tokens": 200000
|
|
298
|
+
"max_tokens": 200000,
|
|
299
|
+
"video_support": false,
|
|
300
|
+
"video_input_mode": "frames"
|
|
268
301
|
},
|
|
269
302
|
"claude-3-haiku": {
|
|
270
303
|
"max_output_tokens": 4096,
|
|
@@ -280,7 +313,9 @@
|
|
|
280
313
|
"source": "Anthropic official docs",
|
|
281
314
|
"canonical_name": "claude-3-haiku",
|
|
282
315
|
"aliases": [],
|
|
283
|
-
"max_tokens": 200000
|
|
316
|
+
"max_tokens": 200000,
|
|
317
|
+
"video_support": false,
|
|
318
|
+
"video_input_mode": "frames"
|
|
284
319
|
},
|
|
285
320
|
"claude-haiku-4-5": {
|
|
286
321
|
"max_output_tokens": 64000,
|
|
@@ -300,7 +335,9 @@
|
|
|
300
335
|
"claude-haiku-4-5-20251001",
|
|
301
336
|
"anthropic/claude-haiku-4-5"
|
|
302
337
|
],
|
|
303
|
-
"max_tokens": 200000
|
|
338
|
+
"max_tokens": 200000,
|
|
339
|
+
"video_support": false,
|
|
340
|
+
"video_input_mode": "frames"
|
|
304
341
|
},
|
|
305
342
|
"claude-4-opus": {
|
|
306
343
|
"max_output_tokens": 4096,
|
|
@@ -317,7 +354,9 @@
|
|
|
317
354
|
"source": "Anthropic official docs",
|
|
318
355
|
"canonical_name": "claude-4-opus",
|
|
319
356
|
"aliases": [],
|
|
320
|
-
"max_tokens": 200000
|
|
357
|
+
"max_tokens": 200000,
|
|
358
|
+
"video_support": false,
|
|
359
|
+
"video_input_mode": "frames"
|
|
321
360
|
},
|
|
322
361
|
"claude-4.1-opus": {
|
|
323
362
|
"max_output_tokens": 4096,
|
|
@@ -334,7 +373,9 @@
|
|
|
334
373
|
"source": "Anthropic official docs",
|
|
335
374
|
"canonical_name": "claude-4.1-opus",
|
|
336
375
|
"aliases": [],
|
|
337
|
-
"max_tokens": 200000
|
|
376
|
+
"max_tokens": 200000,
|
|
377
|
+
"video_support": false,
|
|
378
|
+
"video_input_mode": "frames"
|
|
338
379
|
},
|
|
339
380
|
"claude-4-sonnet": {
|
|
340
381
|
"max_output_tokens": 8192,
|
|
@@ -351,7 +392,9 @@
|
|
|
351
392
|
"source": "Anthropic official docs",
|
|
352
393
|
"canonical_name": "claude-4-sonnet",
|
|
353
394
|
"aliases": [],
|
|
354
|
-
"max_tokens": 200000
|
|
395
|
+
"max_tokens": 200000,
|
|
396
|
+
"video_support": false,
|
|
397
|
+
"video_input_mode": "frames"
|
|
355
398
|
},
|
|
356
399
|
"claude-4.5-sonnet": {
|
|
357
400
|
"max_output_tokens": 64000,
|
|
@@ -372,7 +415,9 @@
|
|
|
372
415
|
"claude-sonnet-4-5-20250929",
|
|
373
416
|
"anthropic/claude-sonnet-4-5"
|
|
374
417
|
],
|
|
375
|
-
"max_tokens": 200000
|
|
418
|
+
"max_tokens": 200000,
|
|
419
|
+
"video_support": false,
|
|
420
|
+
"video_input_mode": "frames"
|
|
376
421
|
},
|
|
377
422
|
"claude-opus-4-5": {
|
|
378
423
|
"max_output_tokens": 64000,
|
|
@@ -392,7 +437,110 @@
|
|
|
392
437
|
"claude-opus-4-5-20251101",
|
|
393
438
|
"anthropic/claude-opus-4-5"
|
|
394
439
|
],
|
|
395
|
-
"max_tokens": 200000
|
|
440
|
+
"max_tokens": 200000,
|
|
441
|
+
"video_support": false,
|
|
442
|
+
"video_input_mode": "frames"
|
|
443
|
+
},
|
|
444
|
+
"cogito:3b": {
|
|
445
|
+
"max_output_tokens": 4096,
|
|
446
|
+
"tool_support": "native",
|
|
447
|
+
"structured_output": "native",
|
|
448
|
+
"parallel_tools": false,
|
|
449
|
+
"vision_support": false,
|
|
450
|
+
"audio_support": false,
|
|
451
|
+
"notes": "Cogito v1 preview (Llama-based) 3B; 128k context; tool calling capable. Max output tokens not specified in model card (conservative 2048 default).",
|
|
452
|
+
"source": "DeepCogito HF model card + Ollama library",
|
|
453
|
+
"canonical_name": "cogito:3b",
|
|
454
|
+
"aliases": [
|
|
455
|
+
"cogito",
|
|
456
|
+
"cogito:3b-v1-preview-llama-q8_0",
|
|
457
|
+
"cogito-v1-preview-llama-3B",
|
|
458
|
+
"deepcogito/cogito-v1-preview-llama-3B"
|
|
459
|
+
],
|
|
460
|
+
"max_tokens": 128000,
|
|
461
|
+
"video_support": false,
|
|
462
|
+
"max_tools": -1,
|
|
463
|
+
"video_input_mode": "none"
|
|
464
|
+
},
|
|
465
|
+
"cogito:8b": {
|
|
466
|
+
"max_output_tokens": 4096,
|
|
467
|
+
"tool_support": "native",
|
|
468
|
+
"structured_output": "native",
|
|
469
|
+
"parallel_tools": false,
|
|
470
|
+
"vision_support": false,
|
|
471
|
+
"audio_support": false,
|
|
472
|
+
"notes": "Cogito v1 preview (Llama-based) 8B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
|
|
473
|
+
"source": "DeepCogito HF model card + Ollama library",
|
|
474
|
+
"canonical_name": "cogito:8b",
|
|
475
|
+
"aliases": [
|
|
476
|
+
"cogito:8b-v1-preview-llama",
|
|
477
|
+
"cogito-v1-preview-llama-8B",
|
|
478
|
+
"deepcogito/cogito-v1-preview-llama-8B"
|
|
479
|
+
],
|
|
480
|
+
"max_tokens": 128000,
|
|
481
|
+
"video_support": false,
|
|
482
|
+
"max_tools": -1,
|
|
483
|
+
"video_input_mode": "none"
|
|
484
|
+
},
|
|
485
|
+
"cogito:14b": {
|
|
486
|
+
"max_output_tokens": 4096,
|
|
487
|
+
"tool_support": "native",
|
|
488
|
+
"structured_output": "native",
|
|
489
|
+
"parallel_tools": false,
|
|
490
|
+
"vision_support": false,
|
|
491
|
+
"audio_support": false,
|
|
492
|
+
"notes": "Cogito v1 preview (Qwen2-based) 14B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
|
|
493
|
+
"source": "DeepCogito HF model card + Ollama library",
|
|
494
|
+
"canonical_name": "cogito:14b",
|
|
495
|
+
"aliases": [
|
|
496
|
+
"cogito:14b-v1-preview-qwen",
|
|
497
|
+
"cogito-v1-preview-qwen-14B",
|
|
498
|
+
"deepcogito/cogito-v1-preview-qwen-14B"
|
|
499
|
+
],
|
|
500
|
+
"max_tokens": 128000,
|
|
501
|
+
"video_support": false,
|
|
502
|
+
"max_tools": -1,
|
|
503
|
+
"video_input_mode": "none"
|
|
504
|
+
},
|
|
505
|
+
"cogito:32b": {
|
|
506
|
+
"max_output_tokens": 4096,
|
|
507
|
+
"tool_support": "native",
|
|
508
|
+
"structured_output": "native",
|
|
509
|
+
"parallel_tools": false,
|
|
510
|
+
"vision_support": false,
|
|
511
|
+
"audio_support": false,
|
|
512
|
+
"notes": "Cogito v1 preview (Qwen-based) 32B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
|
|
513
|
+
"source": "DeepCogito HF model card + Ollama library",
|
|
514
|
+
"canonical_name": "cogito:32b",
|
|
515
|
+
"aliases": [
|
|
516
|
+
"cogito:32b-v1-preview-qwen",
|
|
517
|
+
"cogito-v1-preview-qwen-32B",
|
|
518
|
+
"deepcogito/cogito-v1-preview-qwen-32B"
|
|
519
|
+
],
|
|
520
|
+
"max_tokens": 128000,
|
|
521
|
+
"video_support": false,
|
|
522
|
+
"max_tools": -1,
|
|
523
|
+
"video_input_mode": "none"
|
|
524
|
+
},
|
|
525
|
+
"cogito:70b": {
|
|
526
|
+
"max_output_tokens": 4096,
|
|
527
|
+
"tool_support": "native",
|
|
528
|
+
"structured_output": "native",
|
|
529
|
+
"parallel_tools": false,
|
|
530
|
+
"vision_support": false,
|
|
531
|
+
"audio_support": false,
|
|
532
|
+
"notes": "Cogito v1 preview (Llama 3.1-based) 70B; 128k context; hybrid reasoning model with standard and extended thinking modes; optimized for coding, STEM, instruction following, and tool calling.",
|
|
533
|
+
"source": "DeepCogito HF model card + Ollama library",
|
|
534
|
+
"canonical_name": "cogito:70b",
|
|
535
|
+
"aliases": [
|
|
536
|
+
"cogito:70b-v1-preview-llama",
|
|
537
|
+
"cogito-v1-preview-llama-70B",
|
|
538
|
+
"deepcogito/cogito-v1-preview-llama-70B"
|
|
539
|
+
],
|
|
540
|
+
"max_tokens": 128000,
|
|
541
|
+
"video_support": false,
|
|
542
|
+
"max_tools": -1,
|
|
543
|
+
"video_input_mode": "none"
|
|
396
544
|
},
|
|
397
545
|
"llama-3.2-1b": {
|
|
398
546
|
"max_output_tokens": 2048,
|
|
@@ -405,7 +553,10 @@
|
|
|
405
553
|
"source": "Meta official docs",
|
|
406
554
|
"canonical_name": "llama-3.2-1b",
|
|
407
555
|
"aliases": [],
|
|
408
|
-
"max_tokens": 8192
|
|
556
|
+
"max_tokens": 8192,
|
|
557
|
+
"video_support": false,
|
|
558
|
+
"max_tools": -1,
|
|
559
|
+
"video_input_mode": "none"
|
|
409
560
|
},
|
|
410
561
|
"llama-3.2-3b": {
|
|
411
562
|
"max_output_tokens": 2048,
|
|
@@ -418,7 +569,10 @@
|
|
|
418
569
|
"source": "Meta official docs",
|
|
419
570
|
"canonical_name": "llama-3.2-3b",
|
|
420
571
|
"aliases": [],
|
|
421
|
-
"max_tokens": 8192
|
|
572
|
+
"max_tokens": 8192,
|
|
573
|
+
"video_support": false,
|
|
574
|
+
"max_tools": -1,
|
|
575
|
+
"video_input_mode": "none"
|
|
422
576
|
},
|
|
423
577
|
"llama-3.2-11b-vision": {
|
|
424
578
|
"max_output_tokens": 2048,
|
|
@@ -434,7 +588,10 @@
|
|
|
434
588
|
"source": "Meta official docs",
|
|
435
589
|
"canonical_name": "llama-3.2-11b-vision",
|
|
436
590
|
"aliases": [],
|
|
437
|
-
"max_tokens": 128000
|
|
591
|
+
"max_tokens": 128000,
|
|
592
|
+
"video_support": false,
|
|
593
|
+
"max_tools": -1,
|
|
594
|
+
"video_input_mode": "frames"
|
|
438
595
|
},
|
|
439
596
|
"llama-3.3-70b": {
|
|
440
597
|
"max_output_tokens": 8192,
|
|
@@ -447,7 +604,10 @@
|
|
|
447
604
|
"source": "Meta official docs",
|
|
448
605
|
"canonical_name": "llama-3.3-70b",
|
|
449
606
|
"aliases": [],
|
|
450
|
-
"max_tokens": 128000
|
|
607
|
+
"max_tokens": 128000,
|
|
608
|
+
"video_support": false,
|
|
609
|
+
"max_tools": -1,
|
|
610
|
+
"video_input_mode": "none"
|
|
451
611
|
},
|
|
452
612
|
"llama-3.1-8b": {
|
|
453
613
|
"max_output_tokens": 8192,
|
|
@@ -460,7 +620,10 @@
|
|
|
460
620
|
"source": "Meta official docs",
|
|
461
621
|
"canonical_name": "llama-3.1-8b",
|
|
462
622
|
"aliases": [],
|
|
463
|
-
"max_tokens": 128000
|
|
623
|
+
"max_tokens": 128000,
|
|
624
|
+
"video_support": false,
|
|
625
|
+
"max_tools": -1,
|
|
626
|
+
"video_input_mode": "none"
|
|
464
627
|
},
|
|
465
628
|
"llama-3.1-70b": {
|
|
466
629
|
"max_output_tokens": 8192,
|
|
@@ -473,7 +636,10 @@
|
|
|
473
636
|
"source": "Meta official docs",
|
|
474
637
|
"canonical_name": "llama-3.1-70b",
|
|
475
638
|
"aliases": [],
|
|
476
|
-
"max_tokens": 128000
|
|
639
|
+
"max_tokens": 128000,
|
|
640
|
+
"video_support": false,
|
|
641
|
+
"max_tools": -1,
|
|
642
|
+
"video_input_mode": "none"
|
|
477
643
|
},
|
|
478
644
|
"llama-3.1-405b": {
|
|
479
645
|
"max_output_tokens": 8192,
|
|
@@ -486,7 +652,10 @@
|
|
|
486
652
|
"source": "Meta official docs",
|
|
487
653
|
"canonical_name": "llama-3.1-405b",
|
|
488
654
|
"aliases": [],
|
|
489
|
-
"max_tokens": 128000
|
|
655
|
+
"max_tokens": 128000,
|
|
656
|
+
"video_support": false,
|
|
657
|
+
"max_tools": -1,
|
|
658
|
+
"video_input_mode": "none"
|
|
490
659
|
},
|
|
491
660
|
"llama-4": {
|
|
492
661
|
"max_output_tokens": 8192,
|
|
@@ -499,7 +668,29 @@
|
|
|
499
668
|
"source": "Meta announcement",
|
|
500
669
|
"canonical_name": "llama-4",
|
|
501
670
|
"aliases": [],
|
|
502
|
-
"max_tokens": 10000000
|
|
671
|
+
"max_tokens": 10000000,
|
|
672
|
+
"video_support": false,
|
|
673
|
+
"max_tools": -1,
|
|
674
|
+
"video_input_mode": "frames"
|
|
675
|
+
},
|
|
676
|
+
"llava-next-video-7b-hf": {
|
|
677
|
+
"max_output_tokens": 1024,
|
|
678
|
+
"tool_support": "prompted",
|
|
679
|
+
"structured_output": "prompted",
|
|
680
|
+
"parallel_tools": false,
|
|
681
|
+
"vision_support": true,
|
|
682
|
+
"video_support": true,
|
|
683
|
+
"audio_support": false,
|
|
684
|
+
"notes": "LLaVA-NeXT-Video 7B (HuggingFace Transformers video-input VLM)",
|
|
685
|
+
"source": "HuggingFace model card / Transformers docs",
|
|
686
|
+
"canonical_name": "llava-next-video-7b-hf",
|
|
687
|
+
"aliases": [
|
|
688
|
+
"llava-hf/LLaVA-NeXT-Video-7B-hf",
|
|
689
|
+
"LLaVA-NeXT-Video-7B-hf"
|
|
690
|
+
],
|
|
691
|
+
"max_tokens": 10250,
|
|
692
|
+
"max_tools": -1,
|
|
693
|
+
"video_input_mode": "native"
|
|
503
694
|
},
|
|
504
695
|
"qwen2.5-0.5b": {
|
|
505
696
|
"max_output_tokens": 8192,
|
|
@@ -512,7 +703,10 @@
|
|
|
512
703
|
"source": "Alibaba official docs",
|
|
513
704
|
"canonical_name": "qwen2.5-0.5b",
|
|
514
705
|
"aliases": [],
|
|
515
|
-
"max_tokens": 32768
|
|
706
|
+
"max_tokens": 32768,
|
|
707
|
+
"video_support": false,
|
|
708
|
+
"max_tools": -1,
|
|
709
|
+
"video_input_mode": "none"
|
|
516
710
|
},
|
|
517
711
|
"qwen2.5-1.5b": {
|
|
518
712
|
"max_output_tokens": 8192,
|
|
@@ -525,7 +719,10 @@
|
|
|
525
719
|
"source": "Alibaba official docs",
|
|
526
720
|
"canonical_name": "qwen2.5-1.5b",
|
|
527
721
|
"aliases": [],
|
|
528
|
-
"max_tokens": 32768
|
|
722
|
+
"max_tokens": 32768,
|
|
723
|
+
"video_support": false,
|
|
724
|
+
"max_tools": -1,
|
|
725
|
+
"video_input_mode": "none"
|
|
529
726
|
},
|
|
530
727
|
"qwen2.5-3b": {
|
|
531
728
|
"max_output_tokens": 8192,
|
|
@@ -538,7 +735,10 @@
|
|
|
538
735
|
"source": "Alibaba official docs",
|
|
539
736
|
"canonical_name": "qwen2.5-3b",
|
|
540
737
|
"aliases": [],
|
|
541
|
-
"max_tokens": 32768
|
|
738
|
+
"max_tokens": 32768,
|
|
739
|
+
"video_support": false,
|
|
740
|
+
"max_tools": -1,
|
|
741
|
+
"video_input_mode": "none"
|
|
542
742
|
},
|
|
543
743
|
"qwen2.5-7b": {
|
|
544
744
|
"max_output_tokens": 8192,
|
|
@@ -551,7 +751,10 @@
|
|
|
551
751
|
"source": "Alibaba official docs",
|
|
552
752
|
"canonical_name": "qwen2.5-7b",
|
|
553
753
|
"aliases": [],
|
|
554
|
-
"max_tokens": 131072
|
|
754
|
+
"max_tokens": 131072,
|
|
755
|
+
"video_support": false,
|
|
756
|
+
"max_tools": -1,
|
|
757
|
+
"video_input_mode": "none"
|
|
555
758
|
},
|
|
556
759
|
"qwen2.5-14b": {
|
|
557
760
|
"max_output_tokens": 8192,
|
|
@@ -564,7 +767,10 @@
|
|
|
564
767
|
"source": "Alibaba official docs",
|
|
565
768
|
"canonical_name": "qwen2.5-14b",
|
|
566
769
|
"aliases": [],
|
|
567
|
-
"max_tokens": 131072
|
|
770
|
+
"max_tokens": 131072,
|
|
771
|
+
"video_support": false,
|
|
772
|
+
"max_tools": -1,
|
|
773
|
+
"video_input_mode": "none"
|
|
568
774
|
},
|
|
569
775
|
"qwen2.5-32b": {
|
|
570
776
|
"max_output_tokens": 8192,
|
|
@@ -577,7 +783,10 @@
|
|
|
577
783
|
"source": "Alibaba official docs",
|
|
578
784
|
"canonical_name": "qwen2.5-32b",
|
|
579
785
|
"aliases": [],
|
|
580
|
-
"max_tokens": 131072
|
|
786
|
+
"max_tokens": 131072,
|
|
787
|
+
"video_support": false,
|
|
788
|
+
"max_tools": -1,
|
|
789
|
+
"video_input_mode": "none"
|
|
581
790
|
},
|
|
582
791
|
"qwen2.5-72b": {
|
|
583
792
|
"max_output_tokens": 8192,
|
|
@@ -590,7 +799,10 @@
|
|
|
590
799
|
"source": "Alibaba official docs",
|
|
591
800
|
"canonical_name": "qwen2.5-72b",
|
|
592
801
|
"aliases": [],
|
|
593
|
-
"max_tokens": 131072
|
|
802
|
+
"max_tokens": 131072,
|
|
803
|
+
"video_support": false,
|
|
804
|
+
"max_tools": -1,
|
|
805
|
+
"video_input_mode": "none"
|
|
594
806
|
},
|
|
595
807
|
"qwen3-0.6b": {
|
|
596
808
|
"max_output_tokens": 8192,
|
|
@@ -600,11 +812,15 @@
|
|
|
600
812
|
"vision_support": false,
|
|
601
813
|
"audio_support": false,
|
|
602
814
|
"thinking_support": true,
|
|
815
|
+
"thinking_control": "/no_think",
|
|
603
816
|
"notes": "Qwen3 base model with thinking capabilities",
|
|
604
817
|
"source": "Alibaba Qwen3 technical report",
|
|
605
818
|
"canonical_name": "qwen3-0.6b",
|
|
606
819
|
"aliases": [],
|
|
607
|
-
"max_tokens": 32768
|
|
820
|
+
"max_tokens": 32768,
|
|
821
|
+
"video_support": false,
|
|
822
|
+
"max_tools": -1,
|
|
823
|
+
"video_input_mode": "none"
|
|
608
824
|
},
|
|
609
825
|
"qwen3-1.7b": {
|
|
610
826
|
"max_output_tokens": 8192,
|
|
@@ -614,11 +830,15 @@
|
|
|
614
830
|
"vision_support": false,
|
|
615
831
|
"audio_support": false,
|
|
616
832
|
"thinking_support": true,
|
|
833
|
+
"thinking_control": "/no_think",
|
|
617
834
|
"notes": "Qwen3 1.7B model with thinking capabilities",
|
|
618
835
|
"source": "Alibaba Qwen3 technical report",
|
|
619
836
|
"canonical_name": "qwen3-1.7b",
|
|
620
837
|
"aliases": [],
|
|
621
|
-
"max_tokens": 32768
|
|
838
|
+
"max_tokens": 32768,
|
|
839
|
+
"video_support": false,
|
|
840
|
+
"max_tools": -1,
|
|
841
|
+
"video_input_mode": "none"
|
|
622
842
|
},
|
|
623
843
|
"qwen3-4b": {
|
|
624
844
|
"max_output_tokens": 8192,
|
|
@@ -628,11 +848,54 @@
|
|
|
628
848
|
"vision_support": false,
|
|
629
849
|
"audio_support": false,
|
|
630
850
|
"thinking_support": true,
|
|
851
|
+
"thinking_control": "/no_think",
|
|
631
852
|
"notes": "Qwen3 4B model with extended context via YaRN scaling",
|
|
632
853
|
"source": "Alibaba Qwen3 technical report",
|
|
633
854
|
"canonical_name": "qwen3-4b",
|
|
634
855
|
"aliases": [],
|
|
635
|
-
"max_tokens": 131072
|
|
856
|
+
"max_tokens": 131072,
|
|
857
|
+
"video_support": false,
|
|
858
|
+
"max_tools": -1,
|
|
859
|
+
"video_input_mode": "none"
|
|
860
|
+
},
|
|
861
|
+
"qwen3-4b-2507": {
|
|
862
|
+
"max_output_tokens": 8192,
|
|
863
|
+
"tool_support": "native",
|
|
864
|
+
"structured_output": "native",
|
|
865
|
+
"parallel_tools": false,
|
|
866
|
+
"vision_support": false,
|
|
867
|
+
"audio_support": false,
|
|
868
|
+
"thinking_support": false,
|
|
869
|
+
"notes": "Qwen3-4B-2507 non-thinking instruct variant. Supports only non-thinking mode; does not generate <think></think> blocks.",
|
|
870
|
+
"source": "LM Studio model card (Qwen/Qwen3-4B-2507) and Qwen3 2507 release notes",
|
|
871
|
+
"canonical_name": "qwen3-4b-2507",
|
|
872
|
+
"aliases": [
|
|
873
|
+
"qwen/qwen3-4b-2507"
|
|
874
|
+
],
|
|
875
|
+
"max_tokens": 262144,
|
|
876
|
+
"video_support": false,
|
|
877
|
+
"max_tools": -1,
|
|
878
|
+
"video_input_mode": "none"
|
|
879
|
+
},
|
|
880
|
+
"qwen3-4b-thinking-2507": {
|
|
881
|
+
"max_output_tokens": 8192,
|
|
882
|
+
"tool_support": "native",
|
|
883
|
+
"structured_output": "native",
|
|
884
|
+
"parallel_tools": false,
|
|
885
|
+
"vision_support": false,
|
|
886
|
+
"audio_support": false,
|
|
887
|
+
"thinking_support": true,
|
|
888
|
+
"thinking_output_field": "reasoning_content",
|
|
889
|
+
"notes": "Qwen3-4B-Thinking-2507 thinking-only variant. The decoded output often contains only the closing </think> tag, with the opening <think> supplied by the chat template.",
|
|
890
|
+
"source": "LM Studio model card (Qwen/Qwen3-4B-Thinking-2507) and Qwen3 Thinking 2507 docs",
|
|
891
|
+
"canonical_name": "qwen3-4b-thinking-2507",
|
|
892
|
+
"aliases": [
|
|
893
|
+
"qwen/qwen3-4b-thinking-2507"
|
|
894
|
+
],
|
|
895
|
+
"max_tokens": 262144,
|
|
896
|
+
"video_support": false,
|
|
897
|
+
"max_tools": -1,
|
|
898
|
+
"video_input_mode": "none"
|
|
636
899
|
},
|
|
637
900
|
"qwen3-32b": {
|
|
638
901
|
"max_output_tokens": 8192,
|
|
@@ -642,11 +905,107 @@
|
|
|
642
905
|
"vision_support": false,
|
|
643
906
|
"audio_support": false,
|
|
644
907
|
"thinking_support": true,
|
|
908
|
+
"thinking_control": "/no_think",
|
|
645
909
|
"notes": "Qwen3 32B model with advanced thinking capabilities",
|
|
646
910
|
"source": "Alibaba Qwen3 technical report",
|
|
647
911
|
"canonical_name": "qwen3-32b",
|
|
648
912
|
"aliases": [],
|
|
649
|
-
"max_tokens": 131072
|
|
913
|
+
"max_tokens": 131072,
|
|
914
|
+
"video_support": false,
|
|
915
|
+
"max_tools": -1,
|
|
916
|
+
"video_input_mode": "none"
|
|
917
|
+
},
|
|
918
|
+
"sera-32b": {
|
|
919
|
+
"max_output_tokens": 8192,
|
|
920
|
+
"tool_support": "prompted",
|
|
921
|
+
"structured_output": "prompted",
|
|
922
|
+
"parallel_tools": false,
|
|
923
|
+
"vision_support": false,
|
|
924
|
+
"audio_support": false,
|
|
925
|
+
"thinking_support": true,
|
|
926
|
+
"thinking_control": "/no_think",
|
|
927
|
+
"notes": "AllenAI SERA-32B coding agent model (Qwen3-32B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
|
|
928
|
+
"source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-32B)",
|
|
929
|
+
"canonical_name": "sera-32b",
|
|
930
|
+
"aliases": [
|
|
931
|
+
"allenai/SERA-32B",
|
|
932
|
+
"SERA-32B",
|
|
933
|
+
"SERA32B",
|
|
934
|
+
"SERA_32B"
|
|
935
|
+
],
|
|
936
|
+
"max_tokens": 32768,
|
|
937
|
+
"video_support": false,
|
|
938
|
+
"max_tools": -1,
|
|
939
|
+
"video_input_mode": "none"
|
|
940
|
+
},
|
|
941
|
+
"sera-32b-ga": {
|
|
942
|
+
"max_output_tokens": 8192,
|
|
943
|
+
"tool_support": "prompted",
|
|
944
|
+
"structured_output": "prompted",
|
|
945
|
+
"parallel_tools": false,
|
|
946
|
+
"vision_support": false,
|
|
947
|
+
"audio_support": false,
|
|
948
|
+
"thinking_support": true,
|
|
949
|
+
"thinking_control": "/no_think",
|
|
950
|
+
"notes": "AllenAI SERA-32B-GA coding agent model (Qwen3-32B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
|
|
951
|
+
"source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-32B-GA)",
|
|
952
|
+
"canonical_name": "sera-32b-ga",
|
|
953
|
+
"aliases": [
|
|
954
|
+
"allenai/SERA-32B-GA",
|
|
955
|
+
"SERA-32B-GA",
|
|
956
|
+
"SERA32BGA",
|
|
957
|
+
"SERA_32B_GA"
|
|
958
|
+
],
|
|
959
|
+
"max_tokens": 32768,
|
|
960
|
+
"video_support": false,
|
|
961
|
+
"max_tools": -1,
|
|
962
|
+
"video_input_mode": "none"
|
|
963
|
+
},
|
|
964
|
+
"sera-8b": {
|
|
965
|
+
"max_output_tokens": 8192,
|
|
966
|
+
"tool_support": "prompted",
|
|
967
|
+
"structured_output": "prompted",
|
|
968
|
+
"parallel_tools": false,
|
|
969
|
+
"vision_support": false,
|
|
970
|
+
"audio_support": false,
|
|
971
|
+
"thinking_support": true,
|
|
972
|
+
"thinking_control": "/no_think",
|
|
973
|
+
"notes": "AllenAI SERA-8B coding agent model (Qwen3-8B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
|
|
974
|
+
"source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-8B)",
|
|
975
|
+
"canonical_name": "sera-8b",
|
|
976
|
+
"aliases": [
|
|
977
|
+
"allenai/SERA-8B",
|
|
978
|
+
"SERA-8B",
|
|
979
|
+
"SERA8B",
|
|
980
|
+
"SERA_8B"
|
|
981
|
+
],
|
|
982
|
+
"max_tokens": 32768,
|
|
983
|
+
"video_support": false,
|
|
984
|
+
"max_tools": -1,
|
|
985
|
+
"video_input_mode": "none"
|
|
986
|
+
},
|
|
987
|
+
"sera-8b-ga": {
|
|
988
|
+
"max_output_tokens": 8192,
|
|
989
|
+
"tool_support": "prompted",
|
|
990
|
+
"structured_output": "prompted",
|
|
991
|
+
"parallel_tools": false,
|
|
992
|
+
"vision_support": false,
|
|
993
|
+
"audio_support": false,
|
|
994
|
+
"thinking_support": true,
|
|
995
|
+
"thinking_control": "/no_think",
|
|
996
|
+
"notes": "AllenAI SERA-8B-GA coding agent model (Qwen3-8B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
|
|
997
|
+
"source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-8B-GA)",
|
|
998
|
+
"canonical_name": "sera-8b-ga",
|
|
999
|
+
"aliases": [
|
|
1000
|
+
"allenai/SERA-8B-GA",
|
|
1001
|
+
"SERA-8B-GA",
|
|
1002
|
+
"SERA8BGA",
|
|
1003
|
+
"SERA_8B_GA"
|
|
1004
|
+
],
|
|
1005
|
+
"max_tokens": 32768,
|
|
1006
|
+
"video_support": false,
|
|
1007
|
+
"max_tools": -1,
|
|
1008
|
+
"video_input_mode": "none"
|
|
650
1009
|
},
|
|
651
1010
|
"qwen3-30b-a3b": {
|
|
652
1011
|
"max_output_tokens": 8192,
|
|
@@ -656,11 +1015,15 @@
|
|
|
656
1015
|
"vision_support": false,
|
|
657
1016
|
"audio_support": false,
|
|
658
1017
|
"thinking_support": true,
|
|
1018
|
+
"thinking_control": "/no_think",
|
|
659
1019
|
"notes": "Qwen3 MoE model with 4-bit precision, 30B total/3B active parameters",
|
|
660
1020
|
"source": "Alibaba Qwen3 technical report",
|
|
661
1021
|
"canonical_name": "qwen3-30b-a3b",
|
|
662
1022
|
"aliases": [],
|
|
663
|
-
"max_tokens": 40960
|
|
1023
|
+
"max_tokens": 40960,
|
|
1024
|
+
"video_support": false,
|
|
1025
|
+
"max_tools": -1,
|
|
1026
|
+
"video_input_mode": "none"
|
|
664
1027
|
},
|
|
665
1028
|
"qwen3-30b-a3b-2507": {
|
|
666
1029
|
"max_output_tokens": 8192,
|
|
@@ -676,7 +1039,10 @@
|
|
|
676
1039
|
"aliases": [
|
|
677
1040
|
"qwen/qwen3-30b-a3b-2507"
|
|
678
1041
|
],
|
|
679
|
-
"max_tokens": 262144
|
|
1042
|
+
"max_tokens": 262144,
|
|
1043
|
+
"video_support": false,
|
|
1044
|
+
"max_tools": -1,
|
|
1045
|
+
"video_input_mode": "none"
|
|
680
1046
|
},
|
|
681
1047
|
"qwen3-coder-30b": {
|
|
682
1048
|
"max_output_tokens": 65536,
|
|
@@ -698,7 +1064,10 @@
|
|
|
698
1064
|
"qwen3-coder-30b-a3b",
|
|
699
1065
|
"qwen3-coder-30b-a3b-instruct"
|
|
700
1066
|
],
|
|
701
|
-
"max_tokens": 262144
|
|
1067
|
+
"max_tokens": 262144,
|
|
1068
|
+
"video_support": false,
|
|
1069
|
+
"max_tools": -1,
|
|
1070
|
+
"video_input_mode": "none"
|
|
702
1071
|
},
|
|
703
1072
|
"qwen2-vl": {
|
|
704
1073
|
"max_output_tokens": 8192,
|
|
@@ -713,7 +1082,10 @@
|
|
|
713
1082
|
"source": "Alibaba official docs",
|
|
714
1083
|
"canonical_name": "qwen2-vl",
|
|
715
1084
|
"aliases": [],
|
|
716
|
-
"max_tokens": 32768
|
|
1085
|
+
"max_tokens": 32768,
|
|
1086
|
+
"video_support": false,
|
|
1087
|
+
"max_tools": -1,
|
|
1088
|
+
"video_input_mode": "frames"
|
|
717
1089
|
},
|
|
718
1090
|
"qwen2.5-vl": {
|
|
719
1091
|
"max_output_tokens": 8192,
|
|
@@ -729,7 +1101,10 @@
|
|
|
729
1101
|
"source": "Alibaba official docs",
|
|
730
1102
|
"canonical_name": "qwen2.5-vl",
|
|
731
1103
|
"aliases": [],
|
|
732
|
-
"max_tokens": 128000
|
|
1104
|
+
"max_tokens": 128000,
|
|
1105
|
+
"video_support": false,
|
|
1106
|
+
"max_tools": -1,
|
|
1107
|
+
"video_input_mode": "frames"
|
|
733
1108
|
},
|
|
734
1109
|
"phi-2": {
|
|
735
1110
|
"max_output_tokens": 2048,
|
|
@@ -741,7 +1116,10 @@
|
|
|
741
1116
|
"source": "Microsoft official docs",
|
|
742
1117
|
"canonical_name": "phi-2",
|
|
743
1118
|
"aliases": [],
|
|
744
|
-
"max_tokens": 2048
|
|
1119
|
+
"max_tokens": 2048,
|
|
1120
|
+
"video_support": false,
|
|
1121
|
+
"max_tools": 0,
|
|
1122
|
+
"video_input_mode": "none"
|
|
745
1123
|
},
|
|
746
1124
|
"phi-3-mini": {
|
|
747
1125
|
"max_output_tokens": 4096,
|
|
@@ -754,7 +1132,10 @@
|
|
|
754
1132
|
"source": "Microsoft official docs",
|
|
755
1133
|
"canonical_name": "phi-3-mini",
|
|
756
1134
|
"aliases": [],
|
|
757
|
-
"max_tokens": 4096
|
|
1135
|
+
"max_tokens": 4096,
|
|
1136
|
+
"video_support": false,
|
|
1137
|
+
"max_tools": -1,
|
|
1138
|
+
"video_input_mode": "none"
|
|
758
1139
|
},
|
|
759
1140
|
"phi-3-small": {
|
|
760
1141
|
"max_output_tokens": 8192,
|
|
@@ -766,7 +1147,10 @@
|
|
|
766
1147
|
"source": "Microsoft official docs",
|
|
767
1148
|
"canonical_name": "phi-3-small",
|
|
768
1149
|
"aliases": [],
|
|
769
|
-
"max_tokens": 8192
|
|
1150
|
+
"max_tokens": 8192,
|
|
1151
|
+
"video_support": false,
|
|
1152
|
+
"max_tools": -1,
|
|
1153
|
+
"video_input_mode": "none"
|
|
770
1154
|
},
|
|
771
1155
|
"phi-3-medium": {
|
|
772
1156
|
"max_output_tokens": 4096,
|
|
@@ -778,7 +1162,10 @@
|
|
|
778
1162
|
"source": "Microsoft official docs",
|
|
779
1163
|
"canonical_name": "phi-3-medium",
|
|
780
1164
|
"aliases": [],
|
|
781
|
-
"max_tokens": 128000
|
|
1165
|
+
"max_tokens": 128000,
|
|
1166
|
+
"video_support": false,
|
|
1167
|
+
"max_tools": -1,
|
|
1168
|
+
"video_input_mode": "none"
|
|
782
1169
|
},
|
|
783
1170
|
"phi-3.5-mini": {
|
|
784
1171
|
"max_output_tokens": 4096,
|
|
@@ -790,7 +1177,10 @@
|
|
|
790
1177
|
"source": "Microsoft official docs",
|
|
791
1178
|
"canonical_name": "phi-3.5-mini",
|
|
792
1179
|
"aliases": [],
|
|
793
|
-
"max_tokens": 128000
|
|
1180
|
+
"max_tokens": 128000,
|
|
1181
|
+
"video_support": false,
|
|
1182
|
+
"max_tools": -1,
|
|
1183
|
+
"video_input_mode": "none"
|
|
794
1184
|
},
|
|
795
1185
|
"phi-3.5-moe": {
|
|
796
1186
|
"max_output_tokens": 4096,
|
|
@@ -803,7 +1193,10 @@
|
|
|
803
1193
|
"source": "Microsoft official docs",
|
|
804
1194
|
"canonical_name": "phi-3.5-moe",
|
|
805
1195
|
"aliases": [],
|
|
806
|
-
"max_tokens": 128000
|
|
1196
|
+
"max_tokens": 128000,
|
|
1197
|
+
"video_support": false,
|
|
1198
|
+
"max_tools": -1,
|
|
1199
|
+
"video_input_mode": "none"
|
|
807
1200
|
},
|
|
808
1201
|
"phi-3-vision": {
|
|
809
1202
|
"max_output_tokens": 4096,
|
|
@@ -818,7 +1211,10 @@
|
|
|
818
1211
|
"source": "Microsoft official docs",
|
|
819
1212
|
"canonical_name": "phi-3-vision",
|
|
820
1213
|
"aliases": [],
|
|
821
|
-
"max_tokens": 128000
|
|
1214
|
+
"max_tokens": 128000,
|
|
1215
|
+
"video_support": false,
|
|
1216
|
+
"max_tools": -1,
|
|
1217
|
+
"video_input_mode": "frames"
|
|
822
1218
|
},
|
|
823
1219
|
"phi-4": {
|
|
824
1220
|
"max_output_tokens": 16000,
|
|
@@ -831,7 +1227,10 @@
|
|
|
831
1227
|
"source": "Microsoft official docs",
|
|
832
1228
|
"canonical_name": "phi-4",
|
|
833
1229
|
"aliases": [],
|
|
834
|
-
"max_tokens": 16000
|
|
1230
|
+
"max_tokens": 16000,
|
|
1231
|
+
"video_support": false,
|
|
1232
|
+
"max_tools": -1,
|
|
1233
|
+
"video_input_mode": "none"
|
|
835
1234
|
},
|
|
836
1235
|
"mistral-7b": {
|
|
837
1236
|
"max_output_tokens": 8192,
|
|
@@ -843,7 +1242,10 @@
|
|
|
843
1242
|
"source": "Mistral AI docs",
|
|
844
1243
|
"canonical_name": "mistral-7b",
|
|
845
1244
|
"aliases": [],
|
|
846
|
-
"max_tokens": 8192
|
|
1245
|
+
"max_tokens": 8192,
|
|
1246
|
+
"video_support": false,
|
|
1247
|
+
"max_tools": -1,
|
|
1248
|
+
"video_input_mode": "none"
|
|
847
1249
|
},
|
|
848
1250
|
"mixtral-8x7b": {
|
|
849
1251
|
"max_output_tokens": 32768,
|
|
@@ -856,7 +1258,10 @@
|
|
|
856
1258
|
"source": "Mistral AI docs",
|
|
857
1259
|
"canonical_name": "mixtral-8x7b",
|
|
858
1260
|
"aliases": [],
|
|
859
|
-
"max_tokens": 32768
|
|
1261
|
+
"max_tokens": 32768,
|
|
1262
|
+
"video_support": false,
|
|
1263
|
+
"max_tools": -1,
|
|
1264
|
+
"video_input_mode": "none"
|
|
860
1265
|
},
|
|
861
1266
|
"mixtral-8x22b": {
|
|
862
1267
|
"max_output_tokens": 65536,
|
|
@@ -868,7 +1273,10 @@
|
|
|
868
1273
|
"source": "Mistral AI docs",
|
|
869
1274
|
"canonical_name": "mixtral-8x22b",
|
|
870
1275
|
"aliases": [],
|
|
871
|
-
"max_tokens": 65536
|
|
1276
|
+
"max_tokens": 65536,
|
|
1277
|
+
"video_support": false,
|
|
1278
|
+
"max_tools": -1,
|
|
1279
|
+
"video_input_mode": "none"
|
|
872
1280
|
},
|
|
873
1281
|
"mistral-small": {
|
|
874
1282
|
"max_output_tokens": 32768,
|
|
@@ -880,7 +1288,10 @@
|
|
|
880
1288
|
"source": "Mistral AI docs",
|
|
881
1289
|
"canonical_name": "mistral-small",
|
|
882
1290
|
"aliases": [],
|
|
883
|
-
"max_tokens": 32768
|
|
1291
|
+
"max_tokens": 32768,
|
|
1292
|
+
"video_support": false,
|
|
1293
|
+
"max_tools": -1,
|
|
1294
|
+
"video_input_mode": "none"
|
|
884
1295
|
},
|
|
885
1296
|
"mistral-medium": {
|
|
886
1297
|
"max_output_tokens": 32768,
|
|
@@ -892,7 +1303,10 @@
|
|
|
892
1303
|
"source": "Mistral AI docs",
|
|
893
1304
|
"canonical_name": "mistral-medium",
|
|
894
1305
|
"aliases": [],
|
|
895
|
-
"max_tokens": 32768
|
|
1306
|
+
"max_tokens": 32768,
|
|
1307
|
+
"video_support": false,
|
|
1308
|
+
"max_tools": -1,
|
|
1309
|
+
"video_input_mode": "none"
|
|
896
1310
|
},
|
|
897
1311
|
"mistral-large": {
|
|
898
1312
|
"max_output_tokens": 128000,
|
|
@@ -904,7 +1318,10 @@
|
|
|
904
1318
|
"source": "Mistral AI docs",
|
|
905
1319
|
"canonical_name": "mistral-large",
|
|
906
1320
|
"aliases": [],
|
|
907
|
-
"max_tokens": 128000
|
|
1321
|
+
"max_tokens": 128000,
|
|
1322
|
+
"video_support": false,
|
|
1323
|
+
"max_tools": -1,
|
|
1324
|
+
"video_input_mode": "none"
|
|
908
1325
|
},
|
|
909
1326
|
"codestral": {
|
|
910
1327
|
"max_output_tokens": 32768,
|
|
@@ -917,7 +1334,10 @@
|
|
|
917
1334
|
"source": "Mistral AI docs",
|
|
918
1335
|
"canonical_name": "codestral",
|
|
919
1336
|
"aliases": [],
|
|
920
|
-
"max_tokens": 32768
|
|
1337
|
+
"max_tokens": 32768,
|
|
1338
|
+
"video_support": false,
|
|
1339
|
+
"max_tools": -1,
|
|
1340
|
+
"video_input_mode": "none"
|
|
921
1341
|
},
|
|
922
1342
|
"magistral-small-2509": {
|
|
923
1343
|
"max_output_tokens": 8192,
|
|
@@ -937,7 +1357,9 @@
|
|
|
937
1357
|
"aliases": [
|
|
938
1358
|
"mistralai/magistral-small-2509"
|
|
939
1359
|
],
|
|
940
|
-
"max_tokens": 128000
|
|
1360
|
+
"max_tokens": 128000,
|
|
1361
|
+
"max_tools": -1,
|
|
1362
|
+
"video_input_mode": "frames"
|
|
941
1363
|
},
|
|
942
1364
|
"Qwen/Qwen3-VL-8B-Instruct-FP8": {
|
|
943
1365
|
"max_output_tokens": 8192,
|
|
@@ -961,7 +1383,9 @@
|
|
|
961
1383
|
"qwen3-vl-8b-fp8",
|
|
962
1384
|
"qwen3-vl-8b-instruct-fp8"
|
|
963
1385
|
],
|
|
964
|
-
"max_tokens": 262144
|
|
1386
|
+
"max_tokens": 262144,
|
|
1387
|
+
"max_tools": -1,
|
|
1388
|
+
"video_input_mode": "frames"
|
|
965
1389
|
},
|
|
966
1390
|
"llama3.2-vision:11b": {
|
|
967
1391
|
"max_output_tokens": 4096,
|
|
@@ -1012,7 +1436,9 @@
|
|
|
1012
1436
|
"llama3.2-vision-11b",
|
|
1013
1437
|
"llama-3.2-vision:11b"
|
|
1014
1438
|
],
|
|
1015
|
-
"max_tokens": 131072
|
|
1439
|
+
"max_tokens": 131072,
|
|
1440
|
+
"max_tools": -1,
|
|
1441
|
+
"video_input_mode": "frames"
|
|
1016
1442
|
},
|
|
1017
1443
|
"llama3.2-vision:70b": {
|
|
1018
1444
|
"max_output_tokens": 4096,
|
|
@@ -1038,7 +1464,9 @@
|
|
|
1038
1464
|
"llama3.2-vision-70b",
|
|
1039
1465
|
"llama-3.2-vision:70b"
|
|
1040
1466
|
],
|
|
1041
|
-
"max_tokens": 131072
|
|
1467
|
+
"max_tokens": 131072,
|
|
1468
|
+
"max_tools": -1,
|
|
1469
|
+
"video_input_mode": "frames"
|
|
1042
1470
|
},
|
|
1043
1471
|
"llama3.2-vision:90b": {
|
|
1044
1472
|
"max_output_tokens": 4096,
|
|
@@ -1064,7 +1492,9 @@
|
|
|
1064
1492
|
"llama3.2-vision-90b",
|
|
1065
1493
|
"llama-3.2-vision:90b"
|
|
1066
1494
|
],
|
|
1067
|
-
"max_tokens": 131072
|
|
1495
|
+
"max_tokens": 131072,
|
|
1496
|
+
"max_tools": -1,
|
|
1497
|
+
"video_input_mode": "frames"
|
|
1068
1498
|
},
|
|
1069
1499
|
"gemma-2b": {
|
|
1070
1500
|
"max_output_tokens": 8192,
|
|
@@ -1076,7 +1506,10 @@
|
|
|
1076
1506
|
"source": "Google docs",
|
|
1077
1507
|
"canonical_name": "gemma-2b",
|
|
1078
1508
|
"aliases": [],
|
|
1079
|
-
"max_tokens": 8192
|
|
1509
|
+
"max_tokens": 8192,
|
|
1510
|
+
"video_support": false,
|
|
1511
|
+
"max_tools": 0,
|
|
1512
|
+
"video_input_mode": "none"
|
|
1080
1513
|
},
|
|
1081
1514
|
"gemma-7b": {
|
|
1082
1515
|
"max_output_tokens": 8192,
|
|
@@ -1088,7 +1521,10 @@
|
|
|
1088
1521
|
"source": "Google docs",
|
|
1089
1522
|
"canonical_name": "gemma-7b",
|
|
1090
1523
|
"aliases": [],
|
|
1091
|
-
"max_tokens": 8192
|
|
1524
|
+
"max_tokens": 8192,
|
|
1525
|
+
"video_support": false,
|
|
1526
|
+
"max_tools": 0,
|
|
1527
|
+
"video_input_mode": "none"
|
|
1092
1528
|
},
|
|
1093
1529
|
"gemma2-9b": {
|
|
1094
1530
|
"max_output_tokens": 8192,
|
|
@@ -1100,7 +1536,10 @@
|
|
|
1100
1536
|
"source": "Google docs",
|
|
1101
1537
|
"canonical_name": "gemma2-9b",
|
|
1102
1538
|
"aliases": [],
|
|
1103
|
-
"max_tokens": 8192
|
|
1539
|
+
"max_tokens": 8192,
|
|
1540
|
+
"video_support": false,
|
|
1541
|
+
"max_tools": -1,
|
|
1542
|
+
"video_input_mode": "none"
|
|
1104
1543
|
},
|
|
1105
1544
|
"gemma2-27b": {
|
|
1106
1545
|
"max_output_tokens": 8192,
|
|
@@ -1112,7 +1551,10 @@
|
|
|
1112
1551
|
"source": "Google docs",
|
|
1113
1552
|
"canonical_name": "gemma2-27b",
|
|
1114
1553
|
"aliases": [],
|
|
1115
|
-
"max_tokens": 8192
|
|
1554
|
+
"max_tokens": 8192,
|
|
1555
|
+
"video_support": false,
|
|
1556
|
+
"max_tools": -1,
|
|
1557
|
+
"video_input_mode": "none"
|
|
1116
1558
|
},
|
|
1117
1559
|
"gemma3": {
|
|
1118
1560
|
"max_output_tokens": 8192,
|
|
@@ -1125,7 +1567,10 @@
|
|
|
1125
1567
|
"source": "Google docs",
|
|
1126
1568
|
"canonical_name": "gemma3",
|
|
1127
1569
|
"aliases": [],
|
|
1128
|
-
"max_tokens": 128000
|
|
1570
|
+
"max_tokens": 128000,
|
|
1571
|
+
"video_support": false,
|
|
1572
|
+
"max_tools": -1,
|
|
1573
|
+
"video_input_mode": "none"
|
|
1129
1574
|
},
|
|
1130
1575
|
"codegemma": {
|
|
1131
1576
|
"max_output_tokens": 8192,
|
|
@@ -1138,7 +1583,10 @@
|
|
|
1138
1583
|
"source": "Google docs",
|
|
1139
1584
|
"canonical_name": "codegemma",
|
|
1140
1585
|
"aliases": [],
|
|
1141
|
-
"max_tokens": 8192
|
|
1586
|
+
"max_tokens": 8192,
|
|
1587
|
+
"video_support": false,
|
|
1588
|
+
"max_tools": 0,
|
|
1589
|
+
"video_input_mode": "none"
|
|
1142
1590
|
},
|
|
1143
1591
|
"paligemma": {
|
|
1144
1592
|
"max_output_tokens": 1024,
|
|
@@ -1156,7 +1604,10 @@
|
|
|
1156
1604
|
"source": "Google docs",
|
|
1157
1605
|
"canonical_name": "paligemma",
|
|
1158
1606
|
"aliases": [],
|
|
1159
|
-
"max_tokens": 8192
|
|
1607
|
+
"max_tokens": 8192,
|
|
1608
|
+
"video_support": false,
|
|
1609
|
+
"max_tools": 0,
|
|
1610
|
+
"video_input_mode": "frames"
|
|
1160
1611
|
},
|
|
1161
1612
|
"glm-4": {
|
|
1162
1613
|
"max_output_tokens": 4096,
|
|
@@ -1169,7 +1620,10 @@
|
|
|
1169
1620
|
"source": "Model documentation",
|
|
1170
1621
|
"canonical_name": "glm-4",
|
|
1171
1622
|
"aliases": [],
|
|
1172
|
-
"max_tokens": 128000
|
|
1623
|
+
"max_tokens": 128000,
|
|
1624
|
+
"video_support": false,
|
|
1625
|
+
"max_tools": -1,
|
|
1626
|
+
"video_input_mode": "none"
|
|
1173
1627
|
},
|
|
1174
1628
|
"glm-4-9b": {
|
|
1175
1629
|
"max_output_tokens": 4096,
|
|
@@ -1182,7 +1636,10 @@
|
|
|
1182
1636
|
"source": "Model documentation",
|
|
1183
1637
|
"canonical_name": "glm-4-9b",
|
|
1184
1638
|
"aliases": [],
|
|
1185
|
-
"max_tokens": 128000
|
|
1639
|
+
"max_tokens": 128000,
|
|
1640
|
+
"video_support": false,
|
|
1641
|
+
"max_tools": -1,
|
|
1642
|
+
"video_input_mode": "none"
|
|
1186
1643
|
},
|
|
1187
1644
|
"glm-4-9b-0414-4bit": {
|
|
1188
1645
|
"max_output_tokens": 4096,
|
|
@@ -1195,7 +1652,10 @@
|
|
|
1195
1652
|
"source": "Model documentation",
|
|
1196
1653
|
"canonical_name": "glm-4-9b-0414-4bit",
|
|
1197
1654
|
"aliases": [],
|
|
1198
|
-
"max_tokens": 128000
|
|
1655
|
+
"max_tokens": 128000,
|
|
1656
|
+
"video_support": false,
|
|
1657
|
+
"max_tools": -1,
|
|
1658
|
+
"video_input_mode": "none"
|
|
1199
1659
|
},
|
|
1200
1660
|
"deepseek-r1": {
|
|
1201
1661
|
"max_output_tokens": 8192,
|
|
@@ -1208,7 +1668,10 @@
|
|
|
1208
1668
|
"source": "MLX community",
|
|
1209
1669
|
"canonical_name": "deepseek-r1",
|
|
1210
1670
|
"aliases": [],
|
|
1211
|
-
"max_tokens": 32768
|
|
1671
|
+
"max_tokens": 32768,
|
|
1672
|
+
"video_support": false,
|
|
1673
|
+
"max_tools": -1,
|
|
1674
|
+
"video_input_mode": "none"
|
|
1212
1675
|
},
|
|
1213
1676
|
"qwen3": {
|
|
1214
1677
|
"max_output_tokens": 8192,
|
|
@@ -1221,7 +1684,10 @@
|
|
|
1221
1684
|
"source": "MLX community",
|
|
1222
1685
|
"canonical_name": "qwen3",
|
|
1223
1686
|
"aliases": [],
|
|
1224
|
-
"max_tokens": 32768
|
|
1687
|
+
"max_tokens": 32768,
|
|
1688
|
+
"video_support": false,
|
|
1689
|
+
"max_tools": -1,
|
|
1690
|
+
"video_input_mode": "none"
|
|
1225
1691
|
},
|
|
1226
1692
|
"qwen3-14b": {
|
|
1227
1693
|
"max_output_tokens": 8192,
|
|
@@ -1231,11 +1697,15 @@
|
|
|
1231
1697
|
"vision_support": false,
|
|
1232
1698
|
"audio_support": false,
|
|
1233
1699
|
"thinking_support": true,
|
|
1700
|
+
"thinking_control": "/no_think",
|
|
1234
1701
|
"notes": "Qwen3 14B model with thinking capabilities",
|
|
1235
1702
|
"source": "Alibaba Qwen3 technical report",
|
|
1236
1703
|
"canonical_name": "qwen3-14b",
|
|
1237
1704
|
"aliases": [],
|
|
1238
|
-
"max_tokens": 131072
|
|
1705
|
+
"max_tokens": 131072,
|
|
1706
|
+
"video_support": false,
|
|
1707
|
+
"max_tools": -1,
|
|
1708
|
+
"video_input_mode": "none"
|
|
1239
1709
|
},
|
|
1240
1710
|
"qwen3-next-80b-a3b": {
|
|
1241
1711
|
"max_output_tokens": 16384,
|
|
@@ -1249,9 +1719,13 @@
|
|
|
1249
1719
|
"source": "Alibaba Qwen3-Next technical report",
|
|
1250
1720
|
"canonical_name": "qwen3-next-80b-a3b",
|
|
1251
1721
|
"aliases": [
|
|
1252
|
-
"qwen/qwen3-next-80b"
|
|
1722
|
+
"qwen/qwen3-next-80b",
|
|
1723
|
+
"qwen3-next-80b"
|
|
1253
1724
|
],
|
|
1254
|
-
"max_tokens": 262144
|
|
1725
|
+
"max_tokens": 262144,
|
|
1726
|
+
"video_support": false,
|
|
1727
|
+
"max_tools": -1,
|
|
1728
|
+
"video_input_mode": "none"
|
|
1255
1729
|
},
|
|
1256
1730
|
"gpt-5": {
|
|
1257
1731
|
"max_output_tokens": 8192,
|
|
@@ -1265,7 +1739,9 @@
|
|
|
1265
1739
|
"source": "OpenAI official docs",
|
|
1266
1740
|
"canonical_name": "gpt-5",
|
|
1267
1741
|
"aliases": [],
|
|
1268
|
-
"max_tokens": 200000
|
|
1742
|
+
"max_tokens": 200000,
|
|
1743
|
+
"video_support": false,
|
|
1744
|
+
"video_input_mode": "frames"
|
|
1269
1745
|
},
|
|
1270
1746
|
"gpt-5-turbo": {
|
|
1271
1747
|
"max_output_tokens": 4096,
|
|
@@ -1279,7 +1755,9 @@
|
|
|
1279
1755
|
"source": "OpenAI official docs",
|
|
1280
1756
|
"canonical_name": "gpt-5-turbo",
|
|
1281
1757
|
"aliases": [],
|
|
1282
|
-
"max_tokens": 200000
|
|
1758
|
+
"max_tokens": 200000,
|
|
1759
|
+
"video_support": false,
|
|
1760
|
+
"video_input_mode": "frames"
|
|
1283
1761
|
},
|
|
1284
1762
|
"gpt-5-pro": {
|
|
1285
1763
|
"max_output_tokens": 16384,
|
|
@@ -1293,7 +1771,9 @@
|
|
|
1293
1771
|
"source": "OpenAI official docs",
|
|
1294
1772
|
"canonical_name": "gpt-5-pro",
|
|
1295
1773
|
"aliases": [],
|
|
1296
|
-
"max_tokens": 200000
|
|
1774
|
+
"max_tokens": 200000,
|
|
1775
|
+
"video_support": false,
|
|
1776
|
+
"video_input_mode": "frames"
|
|
1297
1777
|
},
|
|
1298
1778
|
"gpt-5-mini": {
|
|
1299
1779
|
"max_output_tokens": 8192,
|
|
@@ -1307,7 +1787,9 @@
|
|
|
1307
1787
|
"source": "OpenAI official docs",
|
|
1308
1788
|
"canonical_name": "gpt-5-mini",
|
|
1309
1789
|
"aliases": [],
|
|
1310
|
-
"max_tokens": 200000
|
|
1790
|
+
"max_tokens": 200000,
|
|
1791
|
+
"video_support": false,
|
|
1792
|
+
"video_input_mode": "frames"
|
|
1311
1793
|
},
|
|
1312
1794
|
"gpt-5-vision": {
|
|
1313
1795
|
"max_output_tokens": 8192,
|
|
@@ -1324,7 +1806,9 @@
|
|
|
1324
1806
|
"source": "OpenAI official docs",
|
|
1325
1807
|
"canonical_name": "gpt-5-vision",
|
|
1326
1808
|
"aliases": [],
|
|
1327
|
-
"max_tokens": 200000
|
|
1809
|
+
"max_tokens": 200000,
|
|
1810
|
+
"video_support": false,
|
|
1811
|
+
"video_input_mode": "frames"
|
|
1328
1812
|
},
|
|
1329
1813
|
"qwen3-8b": {
|
|
1330
1814
|
"max_output_tokens": 8192,
|
|
@@ -1334,11 +1818,15 @@
|
|
|
1334
1818
|
"vision_support": false,
|
|
1335
1819
|
"audio_support": false,
|
|
1336
1820
|
"thinking_support": true,
|
|
1821
|
+
"thinking_control": "/no_think",
|
|
1337
1822
|
"notes": "Qwen3 8B model with thinking capabilities",
|
|
1338
1823
|
"source": "Alibaba Qwen3 technical report",
|
|
1339
1824
|
"canonical_name": "qwen3-8b",
|
|
1340
1825
|
"aliases": [],
|
|
1341
|
-
"max_tokens": 131072
|
|
1826
|
+
"max_tokens": 131072,
|
|
1827
|
+
"video_support": false,
|
|
1828
|
+
"max_tools": -1,
|
|
1829
|
+
"video_input_mode": "none"
|
|
1342
1830
|
},
|
|
1343
1831
|
"qwen3-235b-a22b": {
|
|
1344
1832
|
"max_output_tokens": 8192,
|
|
@@ -1348,11 +1836,15 @@
|
|
|
1348
1836
|
"vision_support": false,
|
|
1349
1837
|
"audio_support": false,
|
|
1350
1838
|
"thinking_support": true,
|
|
1839
|
+
"thinking_control": "/no_think",
|
|
1351
1840
|
"notes": "Qwen3 MoE model with 4-bit precision, 235B total/22B active parameters",
|
|
1352
1841
|
"source": "Alibaba Qwen3 technical report",
|
|
1353
1842
|
"canonical_name": "qwen3-235b-a22b",
|
|
1354
1843
|
"aliases": [],
|
|
1355
|
-
"max_tokens": 40960
|
|
1844
|
+
"max_tokens": 40960,
|
|
1845
|
+
"video_support": false,
|
|
1846
|
+
"max_tools": -1,
|
|
1847
|
+
"video_input_mode": "none"
|
|
1356
1848
|
},
|
|
1357
1849
|
"qwen3-vl": {
|
|
1358
1850
|
"max_output_tokens": 8192,
|
|
@@ -1372,7 +1864,9 @@
|
|
|
1372
1864
|
"source": "Alibaba Qwen3-VL technical report",
|
|
1373
1865
|
"canonical_name": "qwen3-vl",
|
|
1374
1866
|
"aliases": [],
|
|
1375
|
-
"max_tokens": 131072
|
|
1867
|
+
"max_tokens": 131072,
|
|
1868
|
+
"max_tools": -1,
|
|
1869
|
+
"video_input_mode": "native"
|
|
1376
1870
|
},
|
|
1377
1871
|
"qwen2.5-vl-7b": {
|
|
1378
1872
|
"max_output_tokens": 8192,
|
|
@@ -1400,8 +1894,11 @@
|
|
|
1400
1894
|
"qwen/qwen2.5-vl-7b",
|
|
1401
1895
|
"unsloth/Qwen2.5-VL-7B-Instruct-GGUF"
|
|
1402
1896
|
],
|
|
1403
|
-
"max_tokens": 128000
|
|
1404
|
-
|
|
1897
|
+
"max_tokens": 128000,
|
|
1898
|
+
"video_support": false,
|
|
1899
|
+
"max_tools": -1,
|
|
1900
|
+
"video_input_mode": "frames"
|
|
1901
|
+
},
|
|
1405
1902
|
"gemma3-4b": {
|
|
1406
1903
|
"max_output_tokens": 8192,
|
|
1407
1904
|
"tool_support": "native",
|
|
@@ -1429,7 +1926,9 @@
|
|
|
1429
1926
|
"aliases": [
|
|
1430
1927
|
"gemma3:4b"
|
|
1431
1928
|
],
|
|
1432
|
-
"max_tokens": 128000
|
|
1929
|
+
"max_tokens": 128000,
|
|
1930
|
+
"max_tools": -1,
|
|
1931
|
+
"video_input_mode": "frames"
|
|
1433
1932
|
},
|
|
1434
1933
|
"qwen2.5vl:7b": {
|
|
1435
1934
|
"max_output_tokens": 8192,
|
|
@@ -1451,7 +1950,10 @@
|
|
|
1451
1950
|
"aliases": [
|
|
1452
1951
|
"qwen2.5vl"
|
|
1453
1952
|
],
|
|
1454
|
-
"max_tokens": 128000
|
|
1953
|
+
"max_tokens": 128000,
|
|
1954
|
+
"video_support": false,
|
|
1955
|
+
"max_tools": -1,
|
|
1956
|
+
"video_input_mode": "frames"
|
|
1455
1957
|
},
|
|
1456
1958
|
"gemma3:4b-it-qat": {
|
|
1457
1959
|
"max_output_tokens": 8192,
|
|
@@ -1472,7 +1974,9 @@
|
|
|
1472
1974
|
"source": "Ollama model library",
|
|
1473
1975
|
"canonical_name": "gemma3:4b-it-qat",
|
|
1474
1976
|
"aliases": [],
|
|
1475
|
-
"max_tokens": 128000
|
|
1977
|
+
"max_tokens": 128000,
|
|
1978
|
+
"max_tools": -1,
|
|
1979
|
+
"video_input_mode": "frames"
|
|
1476
1980
|
},
|
|
1477
1981
|
"gemma3n:e4b": {
|
|
1478
1982
|
"max_output_tokens": 8192,
|
|
@@ -1501,7 +2005,9 @@
|
|
|
1501
2005
|
"gemma3n:e2b:latest",
|
|
1502
2006
|
"gemma3n:e2b"
|
|
1503
2007
|
],
|
|
1504
|
-
"max_tokens": 32768
|
|
2008
|
+
"max_tokens": 32768,
|
|
2009
|
+
"max_tools": -1,
|
|
2010
|
+
"video_input_mode": "native"
|
|
1505
2011
|
},
|
|
1506
2012
|
"seed-oss": {
|
|
1507
2013
|
"max_output_tokens": 8192,
|
|
@@ -1516,7 +2022,10 @@
|
|
|
1516
2022
|
"source": "ByteDance SEED-OSS documentation",
|
|
1517
2023
|
"canonical_name": "seed-oss",
|
|
1518
2024
|
"aliases": [],
|
|
1519
|
-
"max_tokens": 524288
|
|
2025
|
+
"max_tokens": 524288,
|
|
2026
|
+
"video_support": false,
|
|
2027
|
+
"max_tools": -1,
|
|
2028
|
+
"video_input_mode": "none"
|
|
1520
2029
|
},
|
|
1521
2030
|
"glm-4.5": {
|
|
1522
2031
|
"max_output_tokens": 4096,
|
|
@@ -1530,7 +2039,10 @@
|
|
|
1530
2039
|
"source": "Zhipu AI GLM-4.5 announcement",
|
|
1531
2040
|
"canonical_name": "glm-4.5",
|
|
1532
2041
|
"aliases": [],
|
|
1533
|
-
"max_tokens": 128000
|
|
2042
|
+
"max_tokens": 128000,
|
|
2043
|
+
"video_support": false,
|
|
2044
|
+
"max_tools": -1,
|
|
2045
|
+
"video_input_mode": "none"
|
|
1534
2046
|
},
|
|
1535
2047
|
"glm-4.6": {
|
|
1536
2048
|
"max_output_tokens": 4096,
|
|
@@ -1548,7 +2060,10 @@
|
|
|
1548
2060
|
"zai-org/GLM-4.6-FP8",
|
|
1549
2061
|
"glm-4.6-fp8"
|
|
1550
2062
|
],
|
|
1551
|
-
"max_tokens": 128000
|
|
2063
|
+
"max_tokens": 128000,
|
|
2064
|
+
"video_support": false,
|
|
2065
|
+
"max_tools": -1,
|
|
2066
|
+
"video_input_mode": "none"
|
|
1552
2067
|
},
|
|
1553
2068
|
"glm-4.5-air": {
|
|
1554
2069
|
"max_output_tokens": 4096,
|
|
@@ -1562,7 +2077,55 @@
|
|
|
1562
2077
|
"source": "Zhipu AI GLM-4.5-Air announcement",
|
|
1563
2078
|
"canonical_name": "glm-4.5-air",
|
|
1564
2079
|
"aliases": [],
|
|
1565
|
-
"max_tokens": 128000
|
|
2080
|
+
"max_tokens": 128000,
|
|
2081
|
+
"video_support": false,
|
|
2082
|
+
"max_tools": -1,
|
|
2083
|
+
"video_input_mode": "none"
|
|
2084
|
+
},
|
|
2085
|
+
"glm-4.7-flash": {
|
|
2086
|
+
"max_output_tokens": 131072,
|
|
2087
|
+
"tool_support": "native",
|
|
2088
|
+
"structured_output": "native",
|
|
2089
|
+
"parallel_tools": true,
|
|
2090
|
+
"vision_support": false,
|
|
2091
|
+
"audio_support": false,
|
|
2092
|
+
"thinking_support": true,
|
|
2093
|
+
"thinking_modes": [
|
|
2094
|
+
"interleaved_thinking",
|
|
2095
|
+
"preserved_thinking",
|
|
2096
|
+
"turn_level_thinking"
|
|
2097
|
+
],
|
|
2098
|
+
"architecture": "mixture_of_experts",
|
|
2099
|
+
"total_parameters": "30B",
|
|
2100
|
+
"active_parameters": "3B",
|
|
2101
|
+
"experts": 64,
|
|
2102
|
+
"shared_experts": 1,
|
|
2103
|
+
"experts_activated": 4,
|
|
2104
|
+
"tensor_type": "BF16",
|
|
2105
|
+
"attention_mechanism": "grouped_query_attention",
|
|
2106
|
+
"positional_encoding": "rope",
|
|
2107
|
+
"transformer_layers": 47,
|
|
2108
|
+
"agentic_coding": true,
|
|
2109
|
+
"ui_generation": true,
|
|
2110
|
+
"notes": "GLM-4.7-Flash lightweight MoE model (30B total/3B active, 64 routed experts + 1 shared/4 activated) optimized for high-speed agentic coding and complex reasoning. Features Interleaved Thinking (reasoning before actions), Preserved Thinking (cross-turn consistency), and Turn-level Thinking (per-turn toggle). BF16 precision (~62.5GB). Compatible with vLLM, SGLang, and Hugging Face Transformers. Strong performance on SWE-bench Verified (59.2%), AIME 25 (91.6%), \u03c4\u00b2-Bench (79.5%), GPQA (75.2%), HLE (14.4%). MIT license. Recommended: temp 0.7 for coding, 0 for tool tasks.",
|
|
2111
|
+
"source": "HuggingFace zai-org/GLM-4.7-Flash official model card",
|
|
2112
|
+
"canonical_name": "glm-4.7-flash",
|
|
2113
|
+
"aliases": [
|
|
2114
|
+
"zai-org/glm-4.7-flash",
|
|
2115
|
+
"z-ai/glm-4.7-flash"
|
|
2116
|
+
],
|
|
2117
|
+
"max_tokens": 128000,
|
|
2118
|
+
"license": "MIT",
|
|
2119
|
+
"inference_parameters": {
|
|
2120
|
+
"temperature": 1.0,
|
|
2121
|
+
"top_p": 0.95,
|
|
2122
|
+
"max_new_tokens": 131072,
|
|
2123
|
+
"coding_temp": 0.7,
|
|
2124
|
+
"agentic_temp": 0.0
|
|
2125
|
+
},
|
|
2126
|
+
"video_support": false,
|
|
2127
|
+
"max_tools": -1,
|
|
2128
|
+
"video_input_mode": "none"
|
|
1566
2129
|
},
|
|
1567
2130
|
"llama-4-109b": {
|
|
1568
2131
|
"max_output_tokens": 8192,
|
|
@@ -1575,7 +2138,10 @@
|
|
|
1575
2138
|
"source": "Meta LLaMA 4 announcement",
|
|
1576
2139
|
"canonical_name": "llama-4-109b",
|
|
1577
2140
|
"aliases": [],
|
|
1578
|
-
"max_tokens": 10000000
|
|
2141
|
+
"max_tokens": 10000000,
|
|
2142
|
+
"video_support": false,
|
|
2143
|
+
"max_tools": -1,
|
|
2144
|
+
"video_input_mode": "frames"
|
|
1579
2145
|
},
|
|
1580
2146
|
"granite3.2:2b": {
|
|
1581
2147
|
"max_output_tokens": 8192,
|
|
@@ -1590,7 +2156,10 @@
|
|
|
1590
2156
|
"aliases": [
|
|
1591
2157
|
"granite3.2-2b"
|
|
1592
2158
|
],
|
|
1593
|
-
"max_tokens": 32768
|
|
2159
|
+
"max_tokens": 32768,
|
|
2160
|
+
"video_support": false,
|
|
2161
|
+
"max_tools": -1,
|
|
2162
|
+
"video_input_mode": "none"
|
|
1594
2163
|
},
|
|
1595
2164
|
"granite3.2:8b": {
|
|
1596
2165
|
"max_output_tokens": 8192,
|
|
@@ -1605,7 +2174,10 @@
|
|
|
1605
2174
|
"aliases": [
|
|
1606
2175
|
"granite3.2-8b"
|
|
1607
2176
|
],
|
|
1608
|
-
"max_tokens": 32768
|
|
2177
|
+
"max_tokens": 32768,
|
|
2178
|
+
"video_support": false,
|
|
2179
|
+
"max_tools": -1,
|
|
2180
|
+
"video_input_mode": "none"
|
|
1609
2181
|
},
|
|
1610
2182
|
"granite3.2-vision:2b": {
|
|
1611
2183
|
"max_output_tokens": 8192,
|
|
@@ -1631,7 +2203,9 @@
|
|
|
1631
2203
|
"granite-vision",
|
|
1632
2204
|
"ibm-granite-vision"
|
|
1633
2205
|
],
|
|
1634
|
-
"max_tokens": 32768
|
|
2206
|
+
"max_tokens": 32768,
|
|
2207
|
+
"max_tools": -1,
|
|
2208
|
+
"video_input_mode": "frames"
|
|
1635
2209
|
},
|
|
1636
2210
|
"gemini-2.5-flash": {
|
|
1637
2211
|
"max_output_tokens": 8192,
|
|
@@ -1657,7 +2231,8 @@
|
|
|
1657
2231
|
"aliases": [
|
|
1658
2232
|
"gemini-2.5-flash-001"
|
|
1659
2233
|
],
|
|
1660
|
-
"max_tokens": 1000000
|
|
2234
|
+
"max_tokens": 1000000,
|
|
2235
|
+
"video_input_mode": "native"
|
|
1661
2236
|
},
|
|
1662
2237
|
"gemini-2.5-pro": {
|
|
1663
2238
|
"max_output_tokens": 65536,
|
|
@@ -1673,7 +2248,7 @@
|
|
|
1673
2248
|
"448x448",
|
|
1674
2249
|
"1024x1024"
|
|
1675
2250
|
],
|
|
1676
|
-
"max_image_resolution":
|
|
2251
|
+
"max_image_resolution": "768x768",
|
|
1677
2252
|
"image_tokenization_method": "gemini_vision_encoder",
|
|
1678
2253
|
"thinking_support": true,
|
|
1679
2254
|
"thinking_budget": true,
|
|
@@ -1683,7 +2258,8 @@
|
|
|
1683
2258
|
"aliases": [
|
|
1684
2259
|
"gemini-2.5-pro-001"
|
|
1685
2260
|
],
|
|
1686
|
-
"max_tokens": 1048576
|
|
2261
|
+
"max_tokens": 1048576,
|
|
2262
|
+
"video_input_mode": "native"
|
|
1687
2263
|
},
|
|
1688
2264
|
"granite3.3:2b": {
|
|
1689
2265
|
"max_output_tokens": 8192,
|
|
@@ -1698,7 +2274,10 @@
|
|
|
1698
2274
|
"aliases": [
|
|
1699
2275
|
"granite3.3-2b"
|
|
1700
2276
|
],
|
|
1701
|
-
"max_tokens": 32768
|
|
2277
|
+
"max_tokens": 32768,
|
|
2278
|
+
"video_support": false,
|
|
2279
|
+
"max_tools": -1,
|
|
2280
|
+
"video_input_mode": "none"
|
|
1702
2281
|
},
|
|
1703
2282
|
"granite3.3:8b": {
|
|
1704
2283
|
"max_output_tokens": 8192,
|
|
@@ -1713,7 +2292,10 @@
|
|
|
1713
2292
|
"aliases": [
|
|
1714
2293
|
"granite3.3-8b"
|
|
1715
2294
|
],
|
|
1716
|
-
"max_tokens": 32768
|
|
2295
|
+
"max_tokens": 32768,
|
|
2296
|
+
"video_support": false,
|
|
2297
|
+
"max_tools": -1,
|
|
2298
|
+
"video_input_mode": "none"
|
|
1717
2299
|
},
|
|
1718
2300
|
"embeddinggemma:300m": {
|
|
1719
2301
|
"max_output_tokens": 0,
|
|
@@ -1729,7 +2311,42 @@
|
|
|
1729
2311
|
"google/embeddinggemma-300m"
|
|
1730
2312
|
],
|
|
1731
2313
|
"max_tokens": 8192,
|
|
1732
|
-
"model_type": "embedding"
|
|
2314
|
+
"model_type": "embedding",
|
|
2315
|
+
"video_support": false,
|
|
2316
|
+
"max_tools": 0,
|
|
2317
|
+
"video_input_mode": "none"
|
|
2318
|
+
},
|
|
2319
|
+
"nomic-embed-text-v1.5": {
|
|
2320
|
+
"max_output_tokens": 0,
|
|
2321
|
+
"tool_support": "none",
|
|
2322
|
+
"structured_output": "none",
|
|
2323
|
+
"parallel_tools": false,
|
|
2324
|
+
"vision_support": false,
|
|
2325
|
+
"audio_support": false,
|
|
2326
|
+
"notes": "Nomic Embed v1.5 text embedding model (Matryoshka). Embedding dims commonly used: 768 (and truncation-friendly sizes like 512/256/128/64). Not a text-generative model.",
|
|
2327
|
+
"source": "Nomic AI documentation + HuggingFace model card",
|
|
2328
|
+
"canonical_name": "nomic-embed-text-v1.5",
|
|
2329
|
+
"aliases": [
|
|
2330
|
+
"nomic-ai/nomic-embed-text-v1.5",
|
|
2331
|
+
"nomic-embed-text-v1.5",
|
|
2332
|
+
"text-embedding-nomic-embed-text-v1.5",
|
|
2333
|
+
"text-embedding-nomic-embed-text-v1.5@q6_k",
|
|
2334
|
+
"nomic-embed-text-v1.5@q6_k"
|
|
2335
|
+
],
|
|
2336
|
+
"max_tokens": 8192,
|
|
2337
|
+
"model_type": "embedding",
|
|
2338
|
+
"embedding_dimension": 768,
|
|
2339
|
+
"matryoshka_dims": [
|
|
2340
|
+
768,
|
|
2341
|
+
512,
|
|
2342
|
+
256,
|
|
2343
|
+
128,
|
|
2344
|
+
64
|
|
2345
|
+
],
|
|
2346
|
+
"embedding_support": true,
|
|
2347
|
+
"video_support": false,
|
|
2348
|
+
"max_tools": 0,
|
|
2349
|
+
"video_input_mode": "none"
|
|
1733
2350
|
},
|
|
1734
2351
|
"blip-image-captioning-base": {
|
|
1735
2352
|
"max_output_tokens": 512,
|
|
@@ -1754,7 +2371,9 @@
|
|
|
1754
2371
|
"aliases": [
|
|
1755
2372
|
"Salesforce/blip-image-captioning-base"
|
|
1756
2373
|
],
|
|
1757
|
-
"max_tokens": 2048
|
|
2374
|
+
"max_tokens": 2048,
|
|
2375
|
+
"max_tools": 0,
|
|
2376
|
+
"video_input_mode": "frames"
|
|
1758
2377
|
},
|
|
1759
2378
|
"glyph": {
|
|
1760
2379
|
"max_output_tokens": 8192,
|
|
@@ -1780,7 +2399,7 @@
|
|
|
1780
2399
|
"conversation_template": {
|
|
1781
2400
|
"system_prefix": "<|system|>\n",
|
|
1782
2401
|
"system_suffix": "\n",
|
|
1783
|
-
"user_prefix": "<|user|>\n",
|
|
2402
|
+
"user_prefix": "<|user|>\n",
|
|
1784
2403
|
"user_suffix": "\n",
|
|
1785
2404
|
"assistant_prefix": "<|assistant|>\n",
|
|
1786
2405
|
"assistant_suffix": "\n"
|
|
@@ -1798,7 +2417,9 @@
|
|
|
1798
2417
|
"max_tokens": 131072,
|
|
1799
2418
|
"license": "MIT",
|
|
1800
2419
|
"arxiv": "2510.17800",
|
|
1801
|
-
"repository": "https://github.com/thu-coai/Glyph"
|
|
2420
|
+
"repository": "https://github.com/thu-coai/Glyph",
|
|
2421
|
+
"max_tools": -1,
|
|
2422
|
+
"video_input_mode": "frames"
|
|
1802
2423
|
},
|
|
1803
2424
|
"glm-4.1v-9b-base": {
|
|
1804
2425
|
"max_output_tokens": 8192,
|
|
@@ -1828,7 +2449,9 @@
|
|
|
1828
2449
|
"aliases": [
|
|
1829
2450
|
"zai-org/GLM-4.1V-9B-Base"
|
|
1830
2451
|
],
|
|
1831
|
-
"max_tokens": 131072
|
|
2452
|
+
"max_tokens": 131072,
|
|
2453
|
+
"max_tools": -1,
|
|
2454
|
+
"video_input_mode": "frames"
|
|
1832
2455
|
},
|
|
1833
2456
|
"glm-4.1v-9b-thinking": {
|
|
1834
2457
|
"max_output_tokens": 8192,
|
|
@@ -1865,7 +2488,9 @@
|
|
|
1865
2488
|
"glm4.1v-9b-thinking"
|
|
1866
2489
|
],
|
|
1867
2490
|
"max_tokens": 65536,
|
|
1868
|
-
"arxiv": "2507.01006"
|
|
2491
|
+
"arxiv": "2507.01006",
|
|
2492
|
+
"max_tools": -1,
|
|
2493
|
+
"video_input_mode": "frames"
|
|
1869
2494
|
},
|
|
1870
2495
|
"mistral-small-3.1-24b-instruct": {
|
|
1871
2496
|
"max_output_tokens": 8192,
|
|
@@ -1888,7 +2513,9 @@
|
|
|
1888
2513
|
],
|
|
1889
2514
|
"max_tokens": 131072,
|
|
1890
2515
|
"total_parameters": "24B",
|
|
1891
|
-
"release_date": "2025-03-17"
|
|
2516
|
+
"release_date": "2025-03-17",
|
|
2517
|
+
"max_tools": -1,
|
|
2518
|
+
"video_input_mode": "frames"
|
|
1892
2519
|
},
|
|
1893
2520
|
"mistral-small-3.2-24b-instruct": {
|
|
1894
2521
|
"max_output_tokens": 8192,
|
|
@@ -1913,7 +2540,9 @@
|
|
|
1913
2540
|
],
|
|
1914
2541
|
"max_tokens": 131072,
|
|
1915
2542
|
"total_parameters": "24B",
|
|
1916
|
-
"release_date": "2025-06-01"
|
|
2543
|
+
"release_date": "2025-06-01",
|
|
2544
|
+
"max_tools": -1,
|
|
2545
|
+
"video_input_mode": "frames"
|
|
1917
2546
|
},
|
|
1918
2547
|
"llama-4-scout": {
|
|
1919
2548
|
"max_output_tokens": 8192,
|
|
@@ -1942,7 +2571,9 @@
|
|
|
1942
2571
|
"max_tokens": 10000000,
|
|
1943
2572
|
"release_date": "2025-04-05",
|
|
1944
2573
|
"image_patch_size": 14,
|
|
1945
|
-
"max_image_tokens": 6400
|
|
2574
|
+
"max_image_tokens": 6400,
|
|
2575
|
+
"max_tools": -1,
|
|
2576
|
+
"video_input_mode": "frames"
|
|
1946
2577
|
},
|
|
1947
2578
|
"llama-4-maverick": {
|
|
1948
2579
|
"max_output_tokens": 8192,
|
|
@@ -1970,7 +2601,9 @@
|
|
|
1970
2601
|
"max_tokens": 1000000,
|
|
1971
2602
|
"release_date": "2025-04-05",
|
|
1972
2603
|
"image_patch_size": 14,
|
|
1973
|
-
"max_image_tokens": 6400
|
|
2604
|
+
"max_image_tokens": 6400,
|
|
2605
|
+
"max_tools": -1,
|
|
2606
|
+
"video_input_mode": "frames"
|
|
1974
2607
|
},
|
|
1975
2608
|
"llama-4-behemoth": {
|
|
1976
2609
|
"max_output_tokens": 8192,
|
|
@@ -1999,7 +2632,9 @@
|
|
|
1999
2632
|
"release_date": "2025-04-05",
|
|
2000
2633
|
"status": "announced",
|
|
2001
2634
|
"image_patch_size": 14,
|
|
2002
|
-
"max_image_tokens": 6400
|
|
2635
|
+
"max_image_tokens": 6400,
|
|
2636
|
+
"max_tools": -1,
|
|
2637
|
+
"video_input_mode": "frames"
|
|
2003
2638
|
},
|
|
2004
2639
|
"minimax-m2": {
|
|
2005
2640
|
"max_output_tokens": 131072,
|
|
@@ -2021,7 +2656,6 @@
|
|
|
2021
2656
|
"aliases": [
|
|
2022
2657
|
"MiniMaxAI/MiniMax-M2",
|
|
2023
2658
|
"mlx-community/minimax-m2",
|
|
2024
|
-
"mlx-community/MiniMax-M2",
|
|
2025
2659
|
"unsloth/MiniMax-M2-GGUF",
|
|
2026
2660
|
"minimax-m2-230b",
|
|
2027
2661
|
"minimax-m2-10b-active",
|
|
@@ -2035,7 +2669,9 @@
|
|
|
2035
2669
|
"top_p": 0.95,
|
|
2036
2670
|
"top_k": 40
|
|
2037
2671
|
},
|
|
2038
|
-
"default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax."
|
|
2672
|
+
"default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax.",
|
|
2673
|
+
"max_tools": -1,
|
|
2674
|
+
"video_input_mode": "none"
|
|
2039
2675
|
},
|
|
2040
2676
|
"minimax-m2.1": {
|
|
2041
2677
|
"max_output_tokens": 131072,
|
|
@@ -2058,7 +2694,6 @@
|
|
|
2058
2694
|
"canonical_name": "minimax-m2.1",
|
|
2059
2695
|
"aliases": [
|
|
2060
2696
|
"MiniMaxAI/MiniMax-M2.1",
|
|
2061
|
-
"minimaxai/minimax-m2.1",
|
|
2062
2697
|
"minimax-m2.1-229b",
|
|
2063
2698
|
"minimax-m2.1-10b-active",
|
|
2064
2699
|
"minimax/minimax-m2.1"
|
|
@@ -2095,14 +2730,16 @@
|
|
|
2095
2730
|
"SciCode": 41.0,
|
|
2096
2731
|
"IFBench": 70.0,
|
|
2097
2732
|
"AA-LCR": 62.0,
|
|
2098
|
-
"
|
|
2733
|
+
"\u03c4\u00b2-Bench Telecom": 87.0
|
|
2099
2734
|
},
|
|
2100
2735
|
"inference_parameters": {
|
|
2101
2736
|
"temperature": 1.0,
|
|
2102
2737
|
"top_p": 0.95,
|
|
2103
2738
|
"top_k": 40
|
|
2104
2739
|
},
|
|
2105
|
-
"default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax."
|
|
2740
|
+
"default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax.",
|
|
2741
|
+
"max_tools": -1,
|
|
2742
|
+
"video_input_mode": "none"
|
|
2106
2743
|
},
|
|
2107
2744
|
"glm-4.6v": {
|
|
2108
2745
|
"max_output_tokens": 16384,
|
|
@@ -2134,7 +2771,10 @@
|
|
|
2134
2771
|
"end": "<|end_of_box|>"
|
|
2135
2772
|
},
|
|
2136
2773
|
"thinking_control": "/nothink",
|
|
2137
|
-
"thinking_tags": [
|
|
2774
|
+
"thinking_tags": [
|
|
2775
|
+
"<think>",
|
|
2776
|
+
"</think>"
|
|
2777
|
+
],
|
|
2138
2778
|
"notes": "GLM-4.6V foundation model (106B params) for cloud deployment. Native multimodal function calling with vision-driven tool use using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Supports interleaved image-text generation, 128K context, multimodal document understanding, and frontend replication from screenshots. Generates reasoning in 'reasoning_content' field or <think></think> tags. Achieves SoTA performance in visual understanding among similar parameter scales. Thinking can be disabled with '/nothink' suffix in user message. See: https://github.com/zai-org/GLM-V",
|
|
2139
2779
|
"source": "HuggingFace zai-org/GLM-4.6V and GLM-V GitHub",
|
|
2140
2780
|
"canonical_name": "glm-4.6v",
|
|
@@ -2147,7 +2787,9 @@
|
|
|
2147
2787
|
"max_tokens": 128000,
|
|
2148
2788
|
"release_date": "2025-05-07",
|
|
2149
2789
|
"arxiv": "2507.01006",
|
|
2150
|
-
"license": "MIT"
|
|
2790
|
+
"license": "MIT",
|
|
2791
|
+
"max_tools": -1,
|
|
2792
|
+
"video_input_mode": "frames"
|
|
2151
2793
|
},
|
|
2152
2794
|
"glm-4.6v-flash": {
|
|
2153
2795
|
"max_output_tokens": 8192,
|
|
@@ -2178,7 +2820,10 @@
|
|
|
2178
2820
|
"end": "<|end_of_box|>"
|
|
2179
2821
|
},
|
|
2180
2822
|
"thinking_control": "/nothink",
|
|
2181
|
-
"thinking_tags": [
|
|
2823
|
+
"thinking_tags": [
|
|
2824
|
+
"<think>",
|
|
2825
|
+
"</think>"
|
|
2826
|
+
],
|
|
2182
2827
|
"notes": "GLM-4.6V-Flash lightweight model (9B params) optimized for local deployment and low-latency applications. Maintains native multimodal function calling using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Generates reasoning in 'reasoning_content' field or <think></think> tags. Ideal for edge and resource-constrained environments while preserving core GLM-4.6V capabilities. Thinking can be disabled with '/nothink' suffix. See: https://github.com/zai-org/GLM-V",
|
|
2183
2828
|
"source": "HuggingFace zai-org/GLM-4.6V-Flash and GLM-V GitHub",
|
|
2184
2829
|
"canonical_name": "glm-4.6v-flash",
|
|
@@ -2191,7 +2836,9 @@
|
|
|
2191
2836
|
"max_tokens": 128000,
|
|
2192
2837
|
"release_date": "2025-05-07",
|
|
2193
2838
|
"arxiv": "2507.01006",
|
|
2194
|
-
"license": "MIT"
|
|
2839
|
+
"license": "MIT",
|
|
2840
|
+
"max_tools": -1,
|
|
2841
|
+
"video_input_mode": "frames"
|
|
2195
2842
|
},
|
|
2196
2843
|
"glm-4.7": {
|
|
2197
2844
|
"max_output_tokens": 32768,
|
|
@@ -2205,14 +2852,18 @@
|
|
|
2205
2852
|
"architecture": "mixture_of_experts",
|
|
2206
2853
|
"total_parameters": "358B",
|
|
2207
2854
|
"thinking_paradigm": "multi_mode",
|
|
2208
|
-
"thinking_modes": [
|
|
2855
|
+
"thinking_modes": [
|
|
2856
|
+
"interleaved_thinking",
|
|
2857
|
+
"preserved_thinking",
|
|
2858
|
+
"turn_level_thinking"
|
|
2859
|
+
],
|
|
2209
2860
|
"native_function_calling": true,
|
|
2210
2861
|
"agentic_coding": true,
|
|
2211
2862
|
"terminal_tasks": true,
|
|
2212
2863
|
"web_browsing": true,
|
|
2213
2864
|
"tool_calling_parser": "glm47",
|
|
2214
2865
|
"reasoning_parser": "glm45",
|
|
2215
|
-
"notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (
|
|
2866
|
+
"notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (\u03c4\u00b2-Bench: 87.4%), web browsing (BrowseComp: 52%), and complex reasoning (HLE w/ Tools: 42.8%, AIME 2025: 95.7%). 128K context window with 32K output capacity. Optimized for modern coding environments including Claude Code, Kilo Code, Cline, Roo Code.",
|
|
2216
2867
|
"source": "HuggingFace zai-org/GLM-4.7 and GLM technical blog",
|
|
2217
2868
|
"canonical_name": "glm-4.7",
|
|
2218
2869
|
"aliases": [
|
|
@@ -2240,7 +2891,7 @@
|
|
|
2240
2891
|
"BrowseComp": 52.0,
|
|
2241
2892
|
"BrowseComp (w/ Context Manage)": 67.5,
|
|
2242
2893
|
"BrowseComp-Zh": 66.6,
|
|
2243
|
-
"
|
|
2894
|
+
"\u03c4\u00b2-Bench": 87.4,
|
|
2244
2895
|
"MMLU-Pro": 84.3,
|
|
2245
2896
|
"GPQA-Diamond": 85.7
|
|
2246
2897
|
},
|
|
@@ -2249,7 +2900,9 @@
|
|
|
2249
2900
|
"top_p": 0.95,
|
|
2250
2901
|
"enable_thinking": true,
|
|
2251
2902
|
"clear_thinking": false
|
|
2252
|
-
}
|
|
2903
|
+
},
|
|
2904
|
+
"max_tools": -1,
|
|
2905
|
+
"video_input_mode": "none"
|
|
2253
2906
|
},
|
|
2254
2907
|
"devstral-small-2-24b": {
|
|
2255
2908
|
"max_output_tokens": 16384,
|
|
@@ -2269,12 +2922,17 @@
|
|
|
2269
2922
|
"canonical_name": "devstral-small-2-24b",
|
|
2270
2923
|
"aliases": [
|
|
2271
2924
|
"mistralai/Devstral-Small-2-24B-Instruct-2512",
|
|
2925
|
+
"mistralai/devstral-small-2-2512",
|
|
2926
|
+
"mistralai/devstral-small-2",
|
|
2272
2927
|
"devstral-small-2",
|
|
2928
|
+
"devstral-small-2-2512",
|
|
2273
2929
|
"devstral-small-2-24b-instruct"
|
|
2274
2930
|
],
|
|
2275
2931
|
"max_tokens": 262144,
|
|
2276
2932
|
"release_date": "2025-12",
|
|
2277
|
-
"license": "Apache-2.0"
|
|
2933
|
+
"license": "Apache-2.0",
|
|
2934
|
+
"max_tools": -1,
|
|
2935
|
+
"video_input_mode": "none"
|
|
2278
2936
|
},
|
|
2279
2937
|
"devstral-2-123b": {
|
|
2280
2938
|
"max_output_tokens": 16384,
|
|
@@ -2294,12 +2952,15 @@
|
|
|
2294
2952
|
"canonical_name": "devstral-2-123b",
|
|
2295
2953
|
"aliases": [
|
|
2296
2954
|
"mistralai/Devstral-2-123B-Instruct-2512",
|
|
2955
|
+
"mistralai/devstral-2",
|
|
2297
2956
|
"devstral-2",
|
|
2298
2957
|
"devstral-2-123b-instruct"
|
|
2299
2958
|
],
|
|
2300
2959
|
"max_tokens": 262144,
|
|
2301
2960
|
"release_date": "2025-12",
|
|
2302
|
-
"license": "Modified-MIT"
|
|
2961
|
+
"license": "Modified-MIT",
|
|
2962
|
+
"max_tools": -1,
|
|
2963
|
+
"video_input_mode": "none"
|
|
2303
2964
|
},
|
|
2304
2965
|
"qwen3-235b-a22b-2507": {
|
|
2305
2966
|
"max_output_tokens": 16384,
|
|
@@ -2327,7 +2988,9 @@
|
|
|
2327
2988
|
"max_tokens": 262144,
|
|
2328
2989
|
"release_date": "2025-07",
|
|
2329
2990
|
"arxiv": "2505.09388",
|
|
2330
|
-
"license": "Apache-2.0"
|
|
2991
|
+
"license": "Apache-2.0",
|
|
2992
|
+
"max_tools": -1,
|
|
2993
|
+
"video_input_mode": "none"
|
|
2331
2994
|
},
|
|
2332
2995
|
"qwen3-235b-a22b-2507-fp8": {
|
|
2333
2996
|
"max_output_tokens": 16384,
|
|
@@ -2356,7 +3019,9 @@
|
|
|
2356
3019
|
"max_tokens": 262144,
|
|
2357
3020
|
"release_date": "2025-07",
|
|
2358
3021
|
"arxiv": "2505.09388",
|
|
2359
|
-
"license": "Apache-2.0"
|
|
3022
|
+
"license": "Apache-2.0",
|
|
3023
|
+
"max_tools": -1,
|
|
3024
|
+
"video_input_mode": "none"
|
|
2360
3025
|
},
|
|
2361
3026
|
"granite-4.0-h-tiny": {
|
|
2362
3027
|
"max_output_tokens": 16384,
|
|
@@ -2388,7 +3053,9 @@
|
|
|
2388
3053
|
],
|
|
2389
3054
|
"max_tokens": 131072,
|
|
2390
3055
|
"release_date": "2025-10-02",
|
|
2391
|
-
"license": "Apache-2.0"
|
|
3056
|
+
"license": "Apache-2.0",
|
|
3057
|
+
"max_tools": -1,
|
|
3058
|
+
"video_input_mode": "none"
|
|
2392
3059
|
},
|
|
2393
3060
|
"gpt-oss-20b": {
|
|
2394
3061
|
"max_output_tokens": 8192,
|
|
@@ -2406,7 +3073,11 @@
|
|
|
2406
3073
|
"tensor_type": "BF16+U8",
|
|
2407
3074
|
"quantization_method": "MXFP4",
|
|
2408
3075
|
"response_format": "harmony",
|
|
2409
|
-
"reasoning_levels": [
|
|
3076
|
+
"reasoning_levels": [
|
|
3077
|
+
"low",
|
|
3078
|
+
"medium",
|
|
3079
|
+
"high"
|
|
3080
|
+
],
|
|
2410
3081
|
"agentic_capabilities": true,
|
|
2411
3082
|
"function_calling": true,
|
|
2412
3083
|
"web_browsing": true,
|
|
@@ -2422,7 +3093,9 @@
|
|
|
2422
3093
|
"max_tokens": 128000,
|
|
2423
3094
|
"release_date": "2025-08",
|
|
2424
3095
|
"arxiv": "2508.10925",
|
|
2425
|
-
"license": "Apache-2.0"
|
|
3096
|
+
"license": "Apache-2.0",
|
|
3097
|
+
"max_tools": -1,
|
|
3098
|
+
"video_input_mode": "none"
|
|
2426
3099
|
},
|
|
2427
3100
|
"gpt-oss-120b": {
|
|
2428
3101
|
"max_output_tokens": 8192,
|
|
@@ -2440,7 +3113,11 @@
|
|
|
2440
3113
|
"tensor_type": "BF16+U8",
|
|
2441
3114
|
"quantization_method": "MXFP4",
|
|
2442
3115
|
"response_format": "harmony",
|
|
2443
|
-
"reasoning_levels": [
|
|
3116
|
+
"reasoning_levels": [
|
|
3117
|
+
"low",
|
|
3118
|
+
"medium",
|
|
3119
|
+
"high"
|
|
3120
|
+
],
|
|
2444
3121
|
"agentic_capabilities": true,
|
|
2445
3122
|
"function_calling": true,
|
|
2446
3123
|
"web_browsing": true,
|
|
@@ -2457,7 +3134,9 @@
|
|
|
2457
3134
|
"max_tokens": 128000,
|
|
2458
3135
|
"release_date": "2025-08",
|
|
2459
3136
|
"arxiv": "2508.10925",
|
|
2460
|
-
"license": "Apache-2.0"
|
|
3137
|
+
"license": "Apache-2.0",
|
|
3138
|
+
"max_tools": -1,
|
|
3139
|
+
"video_input_mode": "none"
|
|
2461
3140
|
},
|
|
2462
3141
|
"qwen3-vl-2b": {
|
|
2463
3142
|
"max_output_tokens": 8192,
|
|
@@ -2484,7 +3163,11 @@
|
|
|
2484
3163
|
"spatial_perception": true,
|
|
2485
3164
|
"document_understanding": true,
|
|
2486
3165
|
"ocr_languages": 32,
|
|
2487
|
-
"architecture_updates": [
|
|
3166
|
+
"architecture_updates": [
|
|
3167
|
+
"Interleaved-MRoPE",
|
|
3168
|
+
"DeepStack",
|
|
3169
|
+
"Text-Timestamp-Alignment"
|
|
3170
|
+
],
|
|
2488
3171
|
"notes": "Qwen3-VL 2B dense model with 256K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
|
|
2489
3172
|
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2490
3173
|
"canonical_name": "qwen3-vl-2b",
|
|
@@ -2495,7 +3178,9 @@
|
|
|
2495
3178
|
"max_tokens": 262144,
|
|
2496
3179
|
"release_date": "2025-05",
|
|
2497
3180
|
"arxiv": "2505.09388",
|
|
2498
|
-
"license": "Apache-2.0"
|
|
3181
|
+
"license": "Apache-2.0",
|
|
3182
|
+
"max_tools": -1,
|
|
3183
|
+
"video_input_mode": "native"
|
|
2499
3184
|
},
|
|
2500
3185
|
"qwen3-vl-4b": {
|
|
2501
3186
|
"max_output_tokens": 8192,
|
|
@@ -2523,7 +3208,11 @@
|
|
|
2523
3208
|
"document_understanding": true,
|
|
2524
3209
|
"ocr_languages": 32,
|
|
2525
3210
|
"total_parameters": "4.83B",
|
|
2526
|
-
"architecture_updates": [
|
|
3211
|
+
"architecture_updates": [
|
|
3212
|
+
"Interleaved-MRoPE",
|
|
3213
|
+
"DeepStack",
|
|
3214
|
+
"Text-Timestamp-Alignment"
|
|
3215
|
+
],
|
|
2527
3216
|
"notes": "Qwen3-VL 4B dense model (4.83B params) with 256K context, optimized for LMStudio. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding. FP8 checkpoints available.",
|
|
2528
3217
|
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2529
3218
|
"canonical_name": "qwen3-vl-4b",
|
|
@@ -2535,7 +3224,9 @@
|
|
|
2535
3224
|
"max_tokens": 262144,
|
|
2536
3225
|
"release_date": "2025-05",
|
|
2537
3226
|
"arxiv": "2505.09388",
|
|
2538
|
-
"license": "Apache-2.0"
|
|
3227
|
+
"license": "Apache-2.0",
|
|
3228
|
+
"max_tools": -1,
|
|
3229
|
+
"video_input_mode": "native"
|
|
2539
3230
|
},
|
|
2540
3231
|
"qwen3-vl-8b": {
|
|
2541
3232
|
"max_output_tokens": 8192,
|
|
@@ -2563,7 +3254,11 @@
|
|
|
2563
3254
|
"document_understanding": true,
|
|
2564
3255
|
"ocr_languages": 32,
|
|
2565
3256
|
"total_parameters": "8.77B",
|
|
2566
|
-
"architecture_updates": [
|
|
3257
|
+
"architecture_updates": [
|
|
3258
|
+
"Interleaved-MRoPE",
|
|
3259
|
+
"DeepStack",
|
|
3260
|
+
"Text-Timestamp-Alignment"
|
|
3261
|
+
],
|
|
2567
3262
|
"notes": "Qwen3-VL 8B dense model (8.77B params) with 256K context, optimized for LMStudio. Most powerful vision-language model in Qwen series. Visual agent for GUI operation, visual coding, advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. FP8 checkpoints available.",
|
|
2568
3263
|
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2569
3264
|
"canonical_name": "qwen3-vl-8b",
|
|
@@ -2575,7 +3270,9 @@
|
|
|
2575
3270
|
"max_tokens": 262144,
|
|
2576
3271
|
"release_date": "2025-05",
|
|
2577
3272
|
"arxiv": "2505.09388",
|
|
2578
|
-
"license": "Apache-2.0"
|
|
3273
|
+
"license": "Apache-2.0",
|
|
3274
|
+
"max_tools": -1,
|
|
3275
|
+
"video_input_mode": "native"
|
|
2579
3276
|
},
|
|
2580
3277
|
"qwen3-vl-30b-a3b": {
|
|
2581
3278
|
"max_output_tokens": 8192,
|
|
@@ -2605,7 +3302,11 @@
|
|
|
2605
3302
|
"architecture": "mixture_of_experts",
|
|
2606
3303
|
"total_parameters": "30.5B",
|
|
2607
3304
|
"active_parameters": "3.3B",
|
|
2608
|
-
"architecture_updates": [
|
|
3305
|
+
"architecture_updates": [
|
|
3306
|
+
"Interleaved-MRoPE",
|
|
3307
|
+
"DeepStack",
|
|
3308
|
+
"Text-Timestamp-Alignment"
|
|
3309
|
+
],
|
|
2609
3310
|
"notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model in the series. 128K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
|
|
2610
3311
|
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2611
3312
|
"canonical_name": "qwen3-vl-30b-a3b",
|
|
@@ -2617,7 +3318,9 @@
|
|
|
2617
3318
|
"max_tokens": 131072,
|
|
2618
3319
|
"release_date": "2025-05",
|
|
2619
3320
|
"arxiv": "2505.09388",
|
|
2620
|
-
"license": "Apache-2.0"
|
|
3321
|
+
"license": "Apache-2.0",
|
|
3322
|
+
"max_tools": -1,
|
|
3323
|
+
"video_input_mode": "native"
|
|
2621
3324
|
},
|
|
2622
3325
|
"qwen3-vl-235b-a22b": {
|
|
2623
3326
|
"max_output_tokens": 8192,
|
|
@@ -2649,7 +3352,11 @@
|
|
|
2649
3352
|
"active_parameters": "22B",
|
|
2650
3353
|
"experts": 128,
|
|
2651
3354
|
"experts_activated": 8,
|
|
2652
|
-
"architecture_updates": [
|
|
3355
|
+
"architecture_updates": [
|
|
3356
|
+
"Interleaved-MRoPE",
|
|
3357
|
+
"DeepStack",
|
|
3358
|
+
"Text-Timestamp-Alignment"
|
|
3359
|
+
],
|
|
2653
3360
|
"notes": "Qwen3-VL 235B MoE model (235B total/22B active, 128 experts/8 activated), flagship vision model. 256K context expandable to 1M. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs. Superior visual perception and reasoning.",
|
|
2654
3361
|
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2655
3362
|
"canonical_name": "qwen3-vl-235b-a22b",
|
|
@@ -2660,7 +3367,9 @@
|
|
|
2660
3367
|
"max_tokens": 262144,
|
|
2661
3368
|
"release_date": "2025-05",
|
|
2662
3369
|
"arxiv": "2505.09388",
|
|
2663
|
-
"license": "Apache-2.0"
|
|
3370
|
+
"license": "Apache-2.0",
|
|
3371
|
+
"max_tools": -1,
|
|
3372
|
+
"video_input_mode": "native"
|
|
2664
3373
|
},
|
|
2665
3374
|
"nemotron-3-nano-30b-a3b": {
|
|
2666
3375
|
"max_output_tokens": 8192,
|
|
@@ -2686,7 +3395,14 @@
|
|
|
2686
3395
|
"agentic_capabilities": true,
|
|
2687
3396
|
"function_calling": true,
|
|
2688
3397
|
"tool_calling_format": "json",
|
|
2689
|
-
"languages": [
|
|
3398
|
+
"languages": [
|
|
3399
|
+
"English",
|
|
3400
|
+
"German",
|
|
3401
|
+
"Spanish",
|
|
3402
|
+
"French",
|
|
3403
|
+
"Italian",
|
|
3404
|
+
"Japanese"
|
|
3405
|
+
],
|
|
2690
3406
|
"notes": "NVIDIA Nemotron-3-Nano hybrid MoE model (30B total/3.5B active, 128 experts/6 activated + 1 shared). Combines 23 Mamba-2 layers with 6 Attention layers. Unified model for reasoning and non-reasoning tasks with configurable reasoning mode. Generates reasoning trace before final response. 256K context extendable to 1M with YaRN. Strong performance on AIME25 (99.2% with tools), SWE-Bench (38.8%), MiniF2F (50.0% pass@1). Native tool calling via chatml-function-calling format. Commercial use ready.",
|
|
2691
3407
|
"source": "NVIDIA Nemotron HuggingFace and technical report",
|
|
2692
3408
|
"canonical_name": "nemotron-3-nano-30b-a3b",
|
|
@@ -2720,7 +3436,9 @@
|
|
|
2720
3436
|
"Scale AI Multi Challenge": 38.5,
|
|
2721
3437
|
"Arena-Hard-V2 (Hard Prompt)": 72.1,
|
|
2722
3438
|
"Arena-Hard-V2 (Average)": 67.7
|
|
2723
|
-
}
|
|
3439
|
+
},
|
|
3440
|
+
"max_tools": -1,
|
|
3441
|
+
"video_input_mode": "none"
|
|
2724
3442
|
}
|
|
2725
3443
|
},
|
|
2726
3444
|
"tool_support_levels": {
|
|
@@ -2804,7 +3522,9 @@
|
|
|
2804
3522
|
"source": "AbstractCore generic fallback",
|
|
2805
3523
|
"canonical_name": "generic_vision_model",
|
|
2806
3524
|
"aliases": [],
|
|
2807
|
-
"max_tokens": 32768
|
|
3525
|
+
"max_tokens": 32768,
|
|
3526
|
+
"max_tools": -1,
|
|
3527
|
+
"video_input_mode": "frames"
|
|
2808
3528
|
},
|
|
2809
3529
|
"default_capabilities": {
|
|
2810
3530
|
"max_output_tokens": 4096,
|
|
@@ -2817,6 +3537,8 @@
|
|
|
2817
3537
|
"thinking_budget": false,
|
|
2818
3538
|
"video_support": false,
|
|
2819
3539
|
"fim_support": false,
|
|
2820
|
-
"max_tokens": 16384
|
|
3540
|
+
"max_tokens": 16384,
|
|
3541
|
+
"max_tools": 0,
|
|
3542
|
+
"video_input_mode": "none"
|
|
2821
3543
|
}
|
|
2822
3544
|
}
|