abstractcore 2.6.9__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +803 -141
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/__init__.py +2 -2
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +379 -93
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +540 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +116 -30
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +46 -24
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2443 -742
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +894 -159
- abstractcore/tools/registry.py +122 -18
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/METADATA +56 -2
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/RECORD +46 -37
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/top_level.txt +0 -0
|
@@ -124,7 +124,7 @@
|
|
|
124
124
|
},
|
|
125
125
|
"o1": {
|
|
126
126
|
"max_output_tokens": 32768,
|
|
127
|
-
"tool_support": "
|
|
127
|
+
"tool_support": "native",
|
|
128
128
|
"structured_output": "prompted",
|
|
129
129
|
"parallel_tools": false,
|
|
130
130
|
"vision_support": false,
|
|
@@ -137,7 +137,7 @@
|
|
|
137
137
|
},
|
|
138
138
|
"o1-mini": {
|
|
139
139
|
"max_output_tokens": 65536,
|
|
140
|
-
"tool_support": "
|
|
140
|
+
"tool_support": "prompted",
|
|
141
141
|
"structured_output": "prompted",
|
|
142
142
|
"parallel_tools": false,
|
|
143
143
|
"vision_support": false,
|
|
@@ -282,6 +282,26 @@
|
|
|
282
282
|
"aliases": [],
|
|
283
283
|
"max_tokens": 200000
|
|
284
284
|
},
|
|
285
|
+
"claude-haiku-4-5": {
|
|
286
|
+
"max_output_tokens": 64000,
|
|
287
|
+
"tool_support": "native",
|
|
288
|
+
"structured_output": "native",
|
|
289
|
+
"parallel_tools": true,
|
|
290
|
+
"max_tools": -1,
|
|
291
|
+
"vision_support": true,
|
|
292
|
+
"image_resolutions": [
|
|
293
|
+
"up to 1568x1568"
|
|
294
|
+
],
|
|
295
|
+
"audio_support": false,
|
|
296
|
+
"notes": "Claude Haiku 4.5 series. Anthropic API enforces a 64K max output token cap (currently 64000).",
|
|
297
|
+
"source": "Anthropic API error cap (max_tokens <= 64000)",
|
|
298
|
+
"canonical_name": "claude-haiku-4-5",
|
|
299
|
+
"aliases": [
|
|
300
|
+
"claude-haiku-4-5-20251001",
|
|
301
|
+
"anthropic/claude-haiku-4-5"
|
|
302
|
+
],
|
|
303
|
+
"max_tokens": 200000
|
|
304
|
+
},
|
|
285
305
|
"claude-4-opus": {
|
|
286
306
|
"max_output_tokens": 4096,
|
|
287
307
|
"tool_support": "native",
|
|
@@ -334,7 +354,7 @@
|
|
|
334
354
|
"max_tokens": 200000
|
|
335
355
|
},
|
|
336
356
|
"claude-4.5-sonnet": {
|
|
337
|
-
"max_output_tokens":
|
|
357
|
+
"max_output_tokens": 64000,
|
|
338
358
|
"tool_support": "native",
|
|
339
359
|
"structured_output": "native",
|
|
340
360
|
"parallel_tools": true,
|
|
@@ -344,15 +364,39 @@
|
|
|
344
364
|
"up to 1568x1568"
|
|
345
365
|
],
|
|
346
366
|
"audio_support": false,
|
|
347
|
-
"notes": "Claude 4.5 Sonnet
|
|
348
|
-
"source": "Anthropic
|
|
367
|
+
"notes": "Claude 4.5 Sonnet. Anthropic API enforces a 64K max output token cap (currently 64000).",
|
|
368
|
+
"source": "Anthropic API error cap (max_tokens <= 64000)",
|
|
349
369
|
"canonical_name": "claude-4.5-sonnet",
|
|
350
|
-
"aliases": [
|
|
370
|
+
"aliases": [
|
|
371
|
+
"claude-sonnet-4-5",
|
|
372
|
+
"claude-sonnet-4-5-20250929",
|
|
373
|
+
"anthropic/claude-sonnet-4-5"
|
|
374
|
+
],
|
|
375
|
+
"max_tokens": 200000
|
|
376
|
+
},
|
|
377
|
+
"claude-opus-4-5": {
|
|
378
|
+
"max_output_tokens": 64000,
|
|
379
|
+
"tool_support": "native",
|
|
380
|
+
"structured_output": "native",
|
|
381
|
+
"parallel_tools": true,
|
|
382
|
+
"max_tools": -1,
|
|
383
|
+
"vision_support": true,
|
|
384
|
+
"image_resolutions": [
|
|
385
|
+
"up to 1568x1568"
|
|
386
|
+
],
|
|
387
|
+
"audio_support": false,
|
|
388
|
+
"notes": "Claude Opus 4.5. Anthropic API enforces a 64K max output token cap (currently 64000).",
|
|
389
|
+
"source": "Anthropic API error cap (max_tokens <= 64000)",
|
|
390
|
+
"canonical_name": "claude-opus-4-5",
|
|
391
|
+
"aliases": [
|
|
392
|
+
"claude-opus-4-5-20251101",
|
|
393
|
+
"anthropic/claude-opus-4-5"
|
|
394
|
+
],
|
|
351
395
|
"max_tokens": 200000
|
|
352
396
|
},
|
|
353
397
|
"llama-3.2-1b": {
|
|
354
398
|
"max_output_tokens": 2048,
|
|
355
|
-
"tool_support": "
|
|
399
|
+
"tool_support": "native",
|
|
356
400
|
"structured_output": "native",
|
|
357
401
|
"parallel_tools": false,
|
|
358
402
|
"vision_support": false,
|
|
@@ -365,7 +409,7 @@
|
|
|
365
409
|
},
|
|
366
410
|
"llama-3.2-3b": {
|
|
367
411
|
"max_output_tokens": 2048,
|
|
368
|
-
"tool_support": "
|
|
412
|
+
"tool_support": "native",
|
|
369
413
|
"structured_output": "native",
|
|
370
414
|
"parallel_tools": false,
|
|
371
415
|
"vision_support": false,
|
|
@@ -394,7 +438,7 @@
|
|
|
394
438
|
},
|
|
395
439
|
"llama-3.3-70b": {
|
|
396
440
|
"max_output_tokens": 8192,
|
|
397
|
-
"tool_support": "
|
|
441
|
+
"tool_support": "native",
|
|
398
442
|
"structured_output": "native",
|
|
399
443
|
"parallel_tools": true,
|
|
400
444
|
"vision_support": false,
|
|
@@ -454,15 +498,12 @@
|
|
|
454
498
|
"notes": "Multimodal with early fusion, 109B total params (MoE)",
|
|
455
499
|
"source": "Meta announcement",
|
|
456
500
|
"canonical_name": "llama-4",
|
|
457
|
-
"aliases": [
|
|
458
|
-
"llama4-17b-scout-16e-instruct",
|
|
459
|
-
"llama-4-17b-scout-16e-instruct"
|
|
460
|
-
],
|
|
501
|
+
"aliases": [],
|
|
461
502
|
"max_tokens": 10000000
|
|
462
503
|
},
|
|
463
504
|
"qwen2.5-0.5b": {
|
|
464
505
|
"max_output_tokens": 8192,
|
|
465
|
-
"tool_support": "
|
|
506
|
+
"tool_support": "native",
|
|
466
507
|
"structured_output": "native",
|
|
467
508
|
"parallel_tools": false,
|
|
468
509
|
"vision_support": false,
|
|
@@ -475,7 +516,7 @@
|
|
|
475
516
|
},
|
|
476
517
|
"qwen2.5-1.5b": {
|
|
477
518
|
"max_output_tokens": 8192,
|
|
478
|
-
"tool_support": "
|
|
519
|
+
"tool_support": "native",
|
|
479
520
|
"structured_output": "native",
|
|
480
521
|
"parallel_tools": false,
|
|
481
522
|
"vision_support": false,
|
|
@@ -488,7 +529,7 @@
|
|
|
488
529
|
},
|
|
489
530
|
"qwen2.5-3b": {
|
|
490
531
|
"max_output_tokens": 8192,
|
|
491
|
-
"tool_support": "
|
|
532
|
+
"tool_support": "native",
|
|
492
533
|
"structured_output": "native",
|
|
493
534
|
"parallel_tools": false,
|
|
494
535
|
"vision_support": false,
|
|
@@ -501,7 +542,7 @@
|
|
|
501
542
|
},
|
|
502
543
|
"qwen2.5-7b": {
|
|
503
544
|
"max_output_tokens": 8192,
|
|
504
|
-
"tool_support": "
|
|
545
|
+
"tool_support": "native",
|
|
505
546
|
"structured_output": "native",
|
|
506
547
|
"parallel_tools": false,
|
|
507
548
|
"vision_support": false,
|
|
@@ -514,7 +555,7 @@
|
|
|
514
555
|
},
|
|
515
556
|
"qwen2.5-14b": {
|
|
516
557
|
"max_output_tokens": 8192,
|
|
517
|
-
"tool_support": "
|
|
558
|
+
"tool_support": "native",
|
|
518
559
|
"structured_output": "native",
|
|
519
560
|
"parallel_tools": false,
|
|
520
561
|
"vision_support": false,
|
|
@@ -527,7 +568,7 @@
|
|
|
527
568
|
},
|
|
528
569
|
"qwen2.5-32b": {
|
|
529
570
|
"max_output_tokens": 8192,
|
|
530
|
-
"tool_support": "
|
|
571
|
+
"tool_support": "native",
|
|
531
572
|
"structured_output": "native",
|
|
532
573
|
"parallel_tools": false,
|
|
533
574
|
"vision_support": false,
|
|
@@ -540,7 +581,7 @@
|
|
|
540
581
|
},
|
|
541
582
|
"qwen2.5-72b": {
|
|
542
583
|
"max_output_tokens": 8192,
|
|
543
|
-
"tool_support": "
|
|
584
|
+
"tool_support": "native",
|
|
544
585
|
"structured_output": "native",
|
|
545
586
|
"parallel_tools": false,
|
|
546
587
|
"vision_support": false,
|
|
@@ -553,7 +594,7 @@
|
|
|
553
594
|
},
|
|
554
595
|
"qwen3-0.6b": {
|
|
555
596
|
"max_output_tokens": 8192,
|
|
556
|
-
"tool_support": "
|
|
597
|
+
"tool_support": "native",
|
|
557
598
|
"structured_output": "native",
|
|
558
599
|
"parallel_tools": false,
|
|
559
600
|
"vision_support": false,
|
|
@@ -567,7 +608,7 @@
|
|
|
567
608
|
},
|
|
568
609
|
"qwen3-1.7b": {
|
|
569
610
|
"max_output_tokens": 8192,
|
|
570
|
-
"tool_support": "
|
|
611
|
+
"tool_support": "native",
|
|
571
612
|
"structured_output": "native",
|
|
572
613
|
"parallel_tools": false,
|
|
573
614
|
"vision_support": false,
|
|
@@ -581,7 +622,7 @@
|
|
|
581
622
|
},
|
|
582
623
|
"qwen3-4b": {
|
|
583
624
|
"max_output_tokens": 8192,
|
|
584
|
-
"tool_support": "
|
|
625
|
+
"tool_support": "native",
|
|
585
626
|
"structured_output": "native",
|
|
586
627
|
"parallel_tools": false,
|
|
587
628
|
"vision_support": false,
|
|
@@ -595,7 +636,7 @@
|
|
|
595
636
|
},
|
|
596
637
|
"qwen3-32b": {
|
|
597
638
|
"max_output_tokens": 8192,
|
|
598
|
-
"tool_support": "
|
|
639
|
+
"tool_support": "native",
|
|
599
640
|
"structured_output": "native",
|
|
600
641
|
"parallel_tools": false,
|
|
601
642
|
"vision_support": false,
|
|
@@ -609,7 +650,7 @@
|
|
|
609
650
|
},
|
|
610
651
|
"qwen3-30b-a3b": {
|
|
611
652
|
"max_output_tokens": 8192,
|
|
612
|
-
"tool_support": "
|
|
653
|
+
"tool_support": "native",
|
|
613
654
|
"structured_output": "native",
|
|
614
655
|
"parallel_tools": false,
|
|
615
656
|
"vision_support": false,
|
|
@@ -623,7 +664,7 @@
|
|
|
623
664
|
},
|
|
624
665
|
"qwen3-30b-a3b-2507": {
|
|
625
666
|
"max_output_tokens": 8192,
|
|
626
|
-
"tool_support": "
|
|
667
|
+
"tool_support": "native",
|
|
627
668
|
"structured_output": "native",
|
|
628
669
|
"parallel_tools": false,
|
|
629
670
|
"vision_support": false,
|
|
@@ -638,17 +679,26 @@
|
|
|
638
679
|
"max_tokens": 262144
|
|
639
680
|
},
|
|
640
681
|
"qwen3-coder-30b": {
|
|
641
|
-
"max_output_tokens":
|
|
682
|
+
"max_output_tokens": 65536,
|
|
642
683
|
"tool_support": "native",
|
|
643
684
|
"structured_output": "native",
|
|
644
685
|
"parallel_tools": true,
|
|
645
686
|
"vision_support": false,
|
|
646
687
|
"audio_support": false,
|
|
647
|
-
"
|
|
648
|
-
"
|
|
688
|
+
"architecture": "mixture_of_experts",
|
|
689
|
+
"total_parameters": "30.5B",
|
|
690
|
+
"active_parameters": "3.3B",
|
|
691
|
+
"experts": 128,
|
|
692
|
+
"experts_activated": 8,
|
|
693
|
+
"notes": "Code-focused MoE model (30.5B total/3.3B active, 128 experts/8 activated). Native tool support via chatml-function-calling format. Supports up to 1M tokens with YaRN extension.",
|
|
694
|
+
"source": "Qwen HuggingFace model card 2025",
|
|
649
695
|
"canonical_name": "qwen3-coder-30b",
|
|
650
|
-
"aliases": [
|
|
651
|
-
|
|
696
|
+
"aliases": [
|
|
697
|
+
"Qwen/Qwen3-Coder-30B-A3B-Instruct",
|
|
698
|
+
"qwen3-coder-30b-a3b",
|
|
699
|
+
"qwen3-coder-30b-a3b-instruct"
|
|
700
|
+
],
|
|
701
|
+
"max_tokens": 262144
|
|
652
702
|
},
|
|
653
703
|
"qwen2-vl": {
|
|
654
704
|
"max_output_tokens": 8192,
|
|
@@ -772,7 +822,7 @@
|
|
|
772
822
|
},
|
|
773
823
|
"phi-4": {
|
|
774
824
|
"max_output_tokens": 16000,
|
|
775
|
-
"tool_support": "
|
|
825
|
+
"tool_support": "native",
|
|
776
826
|
"structured_output": "native",
|
|
777
827
|
"parallel_tools": false,
|
|
778
828
|
"vision_support": false,
|
|
@@ -785,7 +835,7 @@
|
|
|
785
835
|
},
|
|
786
836
|
"mistral-7b": {
|
|
787
837
|
"max_output_tokens": 8192,
|
|
788
|
-
"tool_support": "
|
|
838
|
+
"tool_support": "native",
|
|
789
839
|
"structured_output": "native",
|
|
790
840
|
"parallel_tools": false,
|
|
791
841
|
"vision_support": false,
|
|
@@ -797,7 +847,7 @@
|
|
|
797
847
|
},
|
|
798
848
|
"mixtral-8x7b": {
|
|
799
849
|
"max_output_tokens": 32768,
|
|
800
|
-
"tool_support": "
|
|
850
|
+
"tool_support": "native",
|
|
801
851
|
"structured_output": "native",
|
|
802
852
|
"parallel_tools": false,
|
|
803
853
|
"vision_support": false,
|
|
@@ -1162,7 +1212,7 @@
|
|
|
1162
1212
|
},
|
|
1163
1213
|
"qwen3": {
|
|
1164
1214
|
"max_output_tokens": 8192,
|
|
1165
|
-
"tool_support": "
|
|
1215
|
+
"tool_support": "native",
|
|
1166
1216
|
"structured_output": "native",
|
|
1167
1217
|
"parallel_tools": false,
|
|
1168
1218
|
"vision_support": false,
|
|
@@ -1175,7 +1225,7 @@
|
|
|
1175
1225
|
},
|
|
1176
1226
|
"qwen3-14b": {
|
|
1177
1227
|
"max_output_tokens": 8192,
|
|
1178
|
-
"tool_support": "
|
|
1228
|
+
"tool_support": "native",
|
|
1179
1229
|
"structured_output": "prompted",
|
|
1180
1230
|
"parallel_tools": false,
|
|
1181
1231
|
"vision_support": false,
|
|
@@ -1189,7 +1239,7 @@
|
|
|
1189
1239
|
},
|
|
1190
1240
|
"qwen3-next-80b-a3b": {
|
|
1191
1241
|
"max_output_tokens": 16384,
|
|
1192
|
-
"tool_support": "
|
|
1242
|
+
"tool_support": "native",
|
|
1193
1243
|
"structured_output": "prompted",
|
|
1194
1244
|
"parallel_tools": true,
|
|
1195
1245
|
"vision_support": false,
|
|
@@ -1278,7 +1328,7 @@
|
|
|
1278
1328
|
},
|
|
1279
1329
|
"qwen3-8b": {
|
|
1280
1330
|
"max_output_tokens": 8192,
|
|
1281
|
-
"tool_support": "
|
|
1331
|
+
"tool_support": "native",
|
|
1282
1332
|
"structured_output": "prompted",
|
|
1283
1333
|
"parallel_tools": false,
|
|
1284
1334
|
"vision_support": false,
|
|
@@ -1292,7 +1342,7 @@
|
|
|
1292
1342
|
},
|
|
1293
1343
|
"qwen3-235b-a22b": {
|
|
1294
1344
|
"max_output_tokens": 8192,
|
|
1295
|
-
"tool_support": "
|
|
1345
|
+
"tool_support": "native",
|
|
1296
1346
|
"structured_output": "prompted",
|
|
1297
1347
|
"parallel_tools": false,
|
|
1298
1348
|
"vision_support": false,
|
|
@@ -1306,7 +1356,7 @@
|
|
|
1306
1356
|
},
|
|
1307
1357
|
"qwen3-vl": {
|
|
1308
1358
|
"max_output_tokens": 8192,
|
|
1309
|
-
"tool_support": "
|
|
1359
|
+
"tool_support": "native",
|
|
1310
1360
|
"structured_output": "prompted",
|
|
1311
1361
|
"parallel_tools": false,
|
|
1312
1362
|
"vision_support": true,
|
|
@@ -1324,90 +1374,6 @@
|
|
|
1324
1374
|
"aliases": [],
|
|
1325
1375
|
"max_tokens": 131072
|
|
1326
1376
|
},
|
|
1327
|
-
"qwen3-vl-4b": {
|
|
1328
|
-
"max_output_tokens": 8192,
|
|
1329
|
-
"tool_support": "prompted",
|
|
1330
|
-
"structured_output": "prompted",
|
|
1331
|
-
"parallel_tools": false,
|
|
1332
|
-
"vision_support": true,
|
|
1333
|
-
"video_support": true,
|
|
1334
|
-
"audio_support": false,
|
|
1335
|
-
"image_resolutions": [
|
|
1336
|
-
"64x64 to 4096x4096"
|
|
1337
|
-
],
|
|
1338
|
-
"max_image_resolution": "4096x4096",
|
|
1339
|
-
"image_patch_size": 16,
|
|
1340
|
-
"max_image_tokens": 24576,
|
|
1341
|
-
"pixel_grouping": "32x32",
|
|
1342
|
-
"image_tokenization_method": "patch_based_adaptive",
|
|
1343
|
-
"adaptive_resolution": true,
|
|
1344
|
-
"min_resolution": 64,
|
|
1345
|
-
"max_resolution": 4096,
|
|
1346
|
-
"vision_encoder": "ViT-based",
|
|
1347
|
-
"notes": "Qwen3-VL 4B dense model with 256K context, optimized for LMStudio. Parameters: 4.83B. FP8 checkpoints available.",
|
|
1348
|
-
"source": "Alibaba Qwen3-VL technical report 2025",
|
|
1349
|
-
"canonical_name": "qwen3-vl-4b",
|
|
1350
|
-
"aliases": [
|
|
1351
|
-
"qwen/qwen3-vl-4b"
|
|
1352
|
-
],
|
|
1353
|
-
"max_tokens": 262144
|
|
1354
|
-
},
|
|
1355
|
-
"qwen3-vl-8b": {
|
|
1356
|
-
"max_output_tokens": 8192,
|
|
1357
|
-
"tool_support": "prompted",
|
|
1358
|
-
"structured_output": "prompted",
|
|
1359
|
-
"parallel_tools": false,
|
|
1360
|
-
"vision_support": true,
|
|
1361
|
-
"video_support": true,
|
|
1362
|
-
"audio_support": false,
|
|
1363
|
-
"image_resolutions": [
|
|
1364
|
-
"64x64 to 4096x4096"
|
|
1365
|
-
],
|
|
1366
|
-
"max_image_resolution": "4096x4096",
|
|
1367
|
-
"image_patch_size": 16,
|
|
1368
|
-
"max_image_tokens": 24576,
|
|
1369
|
-
"pixel_grouping": "32x32",
|
|
1370
|
-
"image_tokenization_method": "patch_based_adaptive",
|
|
1371
|
-
"adaptive_resolution": true,
|
|
1372
|
-
"min_resolution": 64,
|
|
1373
|
-
"max_resolution": 4096,
|
|
1374
|
-
"vision_encoder": "ViT-based",
|
|
1375
|
-
"notes": "Qwen3-VL 8B dense model with 256K context, optimized for LMStudio. Parameters: 8.77B. FP8 checkpoints available.",
|
|
1376
|
-
"source": "Alibaba Qwen3-VL technical report 2025",
|
|
1377
|
-
"canonical_name": "qwen3-vl-8b",
|
|
1378
|
-
"aliases": [
|
|
1379
|
-
"qwen/qwen3-vl-8b"
|
|
1380
|
-
],
|
|
1381
|
-
"max_tokens": 262144
|
|
1382
|
-
},
|
|
1383
|
-
"qwen3-vl-30b": {
|
|
1384
|
-
"max_output_tokens": 8192,
|
|
1385
|
-
"tool_support": "prompted",
|
|
1386
|
-
"structured_output": "prompted",
|
|
1387
|
-
"parallel_tools": false,
|
|
1388
|
-
"vision_support": true,
|
|
1389
|
-
"video_support": true,
|
|
1390
|
-
"audio_support": false,
|
|
1391
|
-
"image_resolutions": [
|
|
1392
|
-
"64x64 to 4096x4096"
|
|
1393
|
-
],
|
|
1394
|
-
"max_image_resolution": "4096x4096",
|
|
1395
|
-
"image_patch_size": 16,
|
|
1396
|
-
"max_image_tokens": 24576,
|
|
1397
|
-
"pixel_grouping": "32x32",
|
|
1398
|
-
"image_tokenization_method": "patch_based_adaptive",
|
|
1399
|
-
"adaptive_resolution": true,
|
|
1400
|
-
"min_resolution": 64,
|
|
1401
|
-
"max_resolution": 4096,
|
|
1402
|
-
"vision_encoder": "ViT-based",
|
|
1403
|
-
"notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model, 128K context",
|
|
1404
|
-
"source": "Alibaba Qwen3-VL technical report 2025",
|
|
1405
|
-
"canonical_name": "qwen3-vl-30b",
|
|
1406
|
-
"aliases": [
|
|
1407
|
-
"qwen/qwen3-vl-30b"
|
|
1408
|
-
],
|
|
1409
|
-
"max_tokens": 131072
|
|
1410
|
-
},
|
|
1411
1377
|
"qwen2.5-vl-7b": {
|
|
1412
1378
|
"max_output_tokens": 8192,
|
|
1413
1379
|
"tool_support": "prompted",
|
|
@@ -1539,7 +1505,7 @@
|
|
|
1539
1505
|
},
|
|
1540
1506
|
"seed-oss": {
|
|
1541
1507
|
"max_output_tokens": 8192,
|
|
1542
|
-
"tool_support": "
|
|
1508
|
+
"tool_support": "native",
|
|
1543
1509
|
"structured_output": "prompted",
|
|
1544
1510
|
"parallel_tools": false,
|
|
1545
1511
|
"vision_support": false,
|
|
@@ -1554,7 +1520,7 @@
|
|
|
1554
1520
|
},
|
|
1555
1521
|
"glm-4.5": {
|
|
1556
1522
|
"max_output_tokens": 4096,
|
|
1557
|
-
"tool_support": "
|
|
1523
|
+
"tool_support": "native",
|
|
1558
1524
|
"structured_output": "prompted",
|
|
1559
1525
|
"parallel_tools": false,
|
|
1560
1526
|
"vision_support": false,
|
|
@@ -1568,7 +1534,7 @@
|
|
|
1568
1534
|
},
|
|
1569
1535
|
"glm-4.6": {
|
|
1570
1536
|
"max_output_tokens": 4096,
|
|
1571
|
-
"tool_support": "
|
|
1537
|
+
"tool_support": "native",
|
|
1572
1538
|
"structured_output": "prompted",
|
|
1573
1539
|
"parallel_tools": false,
|
|
1574
1540
|
"vision_support": false,
|
|
@@ -1577,12 +1543,16 @@
|
|
|
1577
1543
|
"notes": "GLM-4.6 MoE model with enhanced capabilities",
|
|
1578
1544
|
"source": "Zhipu AI GLM-4.6 announcement",
|
|
1579
1545
|
"canonical_name": "glm-4.6",
|
|
1580
|
-
"aliases": [
|
|
1546
|
+
"aliases": [
|
|
1547
|
+
"zai-org/GLM-4.6",
|
|
1548
|
+
"zai-org/GLM-4.6-FP8",
|
|
1549
|
+
"glm-4.6-fp8"
|
|
1550
|
+
],
|
|
1581
1551
|
"max_tokens": 128000
|
|
1582
1552
|
},
|
|
1583
1553
|
"glm-4.5-air": {
|
|
1584
1554
|
"max_output_tokens": 4096,
|
|
1585
|
-
"tool_support": "
|
|
1555
|
+
"tool_support": "native",
|
|
1586
1556
|
"structured_output": "prompted",
|
|
1587
1557
|
"parallel_tools": false,
|
|
1588
1558
|
"vision_support": false,
|
|
@@ -1609,7 +1579,7 @@
|
|
|
1609
1579
|
},
|
|
1610
1580
|
"granite3.2:2b": {
|
|
1611
1581
|
"max_output_tokens": 8192,
|
|
1612
|
-
"tool_support": "
|
|
1582
|
+
"tool_support": "native",
|
|
1613
1583
|
"structured_output": "prompted",
|
|
1614
1584
|
"parallel_tools": false,
|
|
1615
1585
|
"vision_support": false,
|
|
@@ -1624,7 +1594,7 @@
|
|
|
1624
1594
|
},
|
|
1625
1595
|
"granite3.2:8b": {
|
|
1626
1596
|
"max_output_tokens": 8192,
|
|
1627
|
-
"tool_support": "
|
|
1597
|
+
"tool_support": "native",
|
|
1628
1598
|
"structured_output": "prompted",
|
|
1629
1599
|
"parallel_tools": false,
|
|
1630
1600
|
"vision_support": false,
|
|
@@ -1639,7 +1609,7 @@
|
|
|
1639
1609
|
},
|
|
1640
1610
|
"granite3.2-vision:2b": {
|
|
1641
1611
|
"max_output_tokens": 8192,
|
|
1642
|
-
"tool_support": "
|
|
1612
|
+
"tool_support": "native",
|
|
1643
1613
|
"structured_output": "prompted",
|
|
1644
1614
|
"parallel_tools": false,
|
|
1645
1615
|
"vision_support": true,
|
|
@@ -1717,7 +1687,7 @@
|
|
|
1717
1687
|
},
|
|
1718
1688
|
"granite3.3:2b": {
|
|
1719
1689
|
"max_output_tokens": 8192,
|
|
1720
|
-
"tool_support": "
|
|
1690
|
+
"tool_support": "native",
|
|
1721
1691
|
"structured_output": "prompted",
|
|
1722
1692
|
"parallel_tools": false,
|
|
1723
1693
|
"vision_support": false,
|
|
@@ -1732,7 +1702,7 @@
|
|
|
1732
1702
|
},
|
|
1733
1703
|
"granite3.3:8b": {
|
|
1734
1704
|
"max_output_tokens": 8192,
|
|
1735
|
-
"tool_support": "
|
|
1705
|
+
"tool_support": "native",
|
|
1736
1706
|
"structured_output": "prompted",
|
|
1737
1707
|
"parallel_tools": false,
|
|
1738
1708
|
"vision_support": false,
|
|
@@ -1758,7 +1728,7 @@
|
|
|
1758
1728
|
"aliases": [
|
|
1759
1729
|
"google/embeddinggemma-300m"
|
|
1760
1730
|
],
|
|
1761
|
-
"max_tokens":
|
|
1731
|
+
"max_tokens": 8192,
|
|
1762
1732
|
"model_type": "embedding"
|
|
1763
1733
|
},
|
|
1764
1734
|
"blip-image-captioning-base": {
|
|
@@ -1784,7 +1754,7 @@
|
|
|
1784
1754
|
"aliases": [
|
|
1785
1755
|
"Salesforce/blip-image-captioning-base"
|
|
1786
1756
|
],
|
|
1787
|
-
"max_tokens":
|
|
1757
|
+
"max_tokens": 2048
|
|
1788
1758
|
},
|
|
1789
1759
|
"glyph": {
|
|
1790
1760
|
"max_output_tokens": 8192,
|
|
@@ -2032,7 +2002,7 @@
|
|
|
2032
2002
|
"max_image_tokens": 6400
|
|
2033
2003
|
},
|
|
2034
2004
|
"minimax-m2": {
|
|
2035
|
-
"max_output_tokens":
|
|
2005
|
+
"max_output_tokens": 131072,
|
|
2036
2006
|
"tool_support": "native",
|
|
2037
2007
|
"structured_output": "native",
|
|
2038
2008
|
"parallel_tools": true,
|
|
@@ -2045,8 +2015,8 @@
|
|
|
2045
2015
|
"total_parameters": "230B",
|
|
2046
2016
|
"thinking_paradigm": "interleaved_thinking",
|
|
2047
2017
|
"thinking_format": "<think>...</think>",
|
|
2048
|
-
"notes": "MiniMax M2 MoE model optimized for coding and agentic workflows. Industry-leading 204K token context window. Uses interleaved thinking with <think> tags for reasoning.
|
|
2049
|
-
"source": "MiniMax official docs (
|
|
2018
|
+
"notes": "MiniMax M2 open-source MoE model (230B total/10B active) optimized for coding and agentic workflows. Industry-leading 204K token context window with 131K output capacity. Uses interleaved thinking with <think> tags for reasoning. Achieves strong performance on SWE-Bench and Terminal-Bench tasks. Ranked #5 on Artificial Analysis Intelligence Index. Efficient deployment at up to 8% cost of comparable models. Supports complete tool calling for agent workflows. Runs seamlessly on 8xH100 setup using vLLM.",
|
|
2019
|
+
"source": "MiniMax official docs (HuggingFace MiniMaxAI/MiniMax-M2, Microsoft Azure AI Foundry blog)",
|
|
2050
2020
|
"canonical_name": "minimax-m2",
|
|
2051
2021
|
"aliases": [
|
|
2052
2022
|
"MiniMaxAI/MiniMax-M2",
|
|
@@ -2054,11 +2024,703 @@
|
|
|
2054
2024
|
"mlx-community/MiniMax-M2",
|
|
2055
2025
|
"unsloth/MiniMax-M2-GGUF",
|
|
2056
2026
|
"minimax-m2-230b",
|
|
2057
|
-
"minimax-m2-10b-active"
|
|
2027
|
+
"minimax-m2-10b-active",
|
|
2028
|
+
"minimax/minimax-m2"
|
|
2058
2029
|
],
|
|
2059
2030
|
"max_tokens": 208896,
|
|
2060
2031
|
"release_date": "2025-01",
|
|
2032
|
+
"license": "Apache-2.0",
|
|
2033
|
+
"inference_parameters": {
|
|
2034
|
+
"temperature": 1.0,
|
|
2035
|
+
"top_p": 0.95,
|
|
2036
|
+
"top_k": 40
|
|
2037
|
+
},
|
|
2038
|
+
"default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax."
|
|
2039
|
+
},
|
|
2040
|
+
"minimax-m2.1": {
|
|
2041
|
+
"max_output_tokens": 131072,
|
|
2042
|
+
"tool_support": "native",
|
|
2043
|
+
"structured_output": "native",
|
|
2044
|
+
"parallel_tools": true,
|
|
2045
|
+
"vision_support": false,
|
|
2046
|
+
"audio_support": false,
|
|
2047
|
+
"video_support": false,
|
|
2048
|
+
"thinking_support": true,
|
|
2049
|
+
"architecture": "mixture_of_experts",
|
|
2050
|
+
"active_parameters": "10B",
|
|
2051
|
+
"total_parameters": "229B",
|
|
2052
|
+
"tensor_type": "FP8",
|
|
2053
|
+
"thinking_paradigm": "interleaved_thinking",
|
|
2054
|
+
"thinking_format": "<think>...</think>",
|
|
2055
|
+
"agentic_coding": true,
|
|
2056
|
+
"notes": "MiniMax M2.1 enhanced MoE model (229B total/10B active) optimized for advanced coding, agentic workflows, and full-stack development. 200K token context window with massive 128K output capacity. Significant improvements over M2 in multilingual software engineering (SWE-bench Multilingual: 72.5%), achieving performance close to Claude Opus 4.5. Excels at full-stack development with VIBE average of 88.6 (Web: 91.5, Android: 89.7). Uses interleaved thinking with <think> tags. Achieves 74.0% on SWE-bench Verified, 47.9% on Terminal-bench 2.0. Supports complete native tool calling for agent workflows.",
|
|
2057
|
+
"source": "MiniMax official docs (platform.minimax.io, HuggingFace MiniMaxAI/MiniMax-M2.1)",
|
|
2058
|
+
"canonical_name": "minimax-m2.1",
|
|
2059
|
+
"aliases": [
|
|
2060
|
+
"MiniMaxAI/MiniMax-M2.1",
|
|
2061
|
+
"minimaxai/minimax-m2.1",
|
|
2062
|
+
"minimax-m2.1-229b",
|
|
2063
|
+
"minimax-m2.1-10b-active",
|
|
2064
|
+
"minimax/minimax-m2.1"
|
|
2065
|
+
],
|
|
2066
|
+
"max_tokens": 204800,
|
|
2067
|
+
"release_date": "2024-12",
|
|
2068
|
+
"license": "Modified-MIT",
|
|
2069
|
+
"arxiv": "2509.06501",
|
|
2070
|
+
"benchmarks": {
|
|
2071
|
+
"SWE-bench Verified": 74.0,
|
|
2072
|
+
"Multi-SWE-bench": 49.4,
|
|
2073
|
+
"SWE-bench Multilingual": 72.5,
|
|
2074
|
+
"Terminal-bench 2.0": 47.9,
|
|
2075
|
+
"SWE-bench Verified (Droid)": 71.3,
|
|
2076
|
+
"SWE-bench Verified (mini-swe-agent)": 67.0,
|
|
2077
|
+
"SWT-bench": 69.3,
|
|
2078
|
+
"SWE-Perf": 3.1,
|
|
2079
|
+
"SWE-Review": 8.9,
|
|
2080
|
+
"OctoCodingbench": 26.1,
|
|
2081
|
+
"VIBE Average": 88.6,
|
|
2082
|
+
"VIBE-Web": 91.5,
|
|
2083
|
+
"VIBE-Android": 89.7,
|
|
2084
|
+
"VIBE-Simulation": 87.1,
|
|
2085
|
+
"VIBE-iOS": 88.0,
|
|
2086
|
+
"VIBE-Backend": 86.7,
|
|
2087
|
+
"Toolathlon": 43.5,
|
|
2088
|
+
"BrowseComp": 47.4,
|
|
2089
|
+
"BrowseComp (context management)": 62.0,
|
|
2090
|
+
"AIME25": 83.0,
|
|
2091
|
+
"MMLU-Pro": 88.0,
|
|
2092
|
+
"GPQA-D": 83.0,
|
|
2093
|
+
"HLE w/o tools": 22.2,
|
|
2094
|
+
"LCB": 81.0,
|
|
2095
|
+
"SciCode": 41.0,
|
|
2096
|
+
"IFBench": 70.0,
|
|
2097
|
+
"AA-LCR": 62.0,
|
|
2098
|
+
"τ²-Bench Telecom": 87.0
|
|
2099
|
+
},
|
|
2100
|
+
"inference_parameters": {
|
|
2101
|
+
"temperature": 1.0,
|
|
2102
|
+
"top_p": 0.95,
|
|
2103
|
+
"top_k": 40
|
|
2104
|
+
},
|
|
2105
|
+
"default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax."
|
|
2106
|
+
},
|
|
2107
|
+
"glm-4.6v": {
|
|
2108
|
+
"max_output_tokens": 16384,
|
|
2109
|
+
"tool_support": "native",
|
|
2110
|
+
"structured_output": "native",
|
|
2111
|
+
"parallel_tools": true,
|
|
2112
|
+
"vision_support": true,
|
|
2113
|
+
"audio_support": false,
|
|
2114
|
+
"video_support": false,
|
|
2115
|
+
"thinking_support": true,
|
|
2116
|
+
"thinking_output_field": "reasoning_content",
|
|
2117
|
+
"image_resolutions": [
|
|
2118
|
+
"up to 4096x4096"
|
|
2119
|
+
],
|
|
2120
|
+
"max_image_resolution": "4096x4096",
|
|
2121
|
+
"architecture": "mixture_of_experts",
|
|
2122
|
+
"total_parameters": "106B",
|
|
2123
|
+
"image_tokenization_method": "glm_vision_encoder",
|
|
2124
|
+
"adaptive_resolution": true,
|
|
2125
|
+
"aspect_ratio_support": "arbitrary",
|
|
2126
|
+
"native_function_calling": true,
|
|
2127
|
+
"interleaved_generation": true,
|
|
2128
|
+
"document_understanding": true,
|
|
2129
|
+
"frontend_replication": true,
|
|
2130
|
+
"tool_calling_format": "glm_xml",
|
|
2131
|
+
"tool_calling_parser": "glm46v",
|
|
2132
|
+
"output_wrappers": {
|
|
2133
|
+
"start": "<|begin_of_box|>",
|
|
2134
|
+
"end": "<|end_of_box|>"
|
|
2135
|
+
},
|
|
2136
|
+
"thinking_control": "/nothink",
|
|
2137
|
+
"thinking_tags": ["<think>", "</think>"],
|
|
2138
|
+
"notes": "GLM-4.6V foundation model (106B params) for cloud deployment. Native multimodal function calling with vision-driven tool use using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Supports interleaved image-text generation, 128K context, multimodal document understanding, and frontend replication from screenshots. Generates reasoning in 'reasoning_content' field or <think></think> tags. Achieves SoTA performance in visual understanding among similar parameter scales. Thinking can be disabled with '/nothink' suffix in user message. See: https://github.com/zai-org/GLM-V",
|
|
2139
|
+
"source": "HuggingFace zai-org/GLM-4.6V and GLM-V GitHub",
|
|
2140
|
+
"canonical_name": "glm-4.6v",
|
|
2141
|
+
"aliases": [
|
|
2142
|
+
"zai-org/GLM-4.6V",
|
|
2143
|
+
"zai-org/GLM-4.6V-FP8",
|
|
2144
|
+
"glm-4.6v-106b",
|
|
2145
|
+
"glm-4.6v-fp8"
|
|
2146
|
+
],
|
|
2147
|
+
"max_tokens": 128000,
|
|
2148
|
+
"release_date": "2025-05-07",
|
|
2149
|
+
"arxiv": "2507.01006",
|
|
2150
|
+
"license": "MIT"
|
|
2151
|
+
},
|
|
2152
|
+
"glm-4.6v-flash": {
|
|
2153
|
+
"max_output_tokens": 8192,
|
|
2154
|
+
"tool_support": "native",
|
|
2155
|
+
"structured_output": "native",
|
|
2156
|
+
"parallel_tools": true,
|
|
2157
|
+
"vision_support": true,
|
|
2158
|
+
"audio_support": false,
|
|
2159
|
+
"video_support": false,
|
|
2160
|
+
"thinking_support": true,
|
|
2161
|
+
"thinking_output_field": "reasoning_content",
|
|
2162
|
+
"image_resolutions": [
|
|
2163
|
+
"up to 4096x4096"
|
|
2164
|
+
],
|
|
2165
|
+
"max_image_resolution": "4096x4096",
|
|
2166
|
+
"total_parameters": "9B",
|
|
2167
|
+
"image_tokenization_method": "glm_vision_encoder",
|
|
2168
|
+
"adaptive_resolution": true,
|
|
2169
|
+
"aspect_ratio_support": "arbitrary",
|
|
2170
|
+
"native_function_calling": true,
|
|
2171
|
+
"interleaved_generation": true,
|
|
2172
|
+
"document_understanding": true,
|
|
2173
|
+
"frontend_replication": true,
|
|
2174
|
+
"tool_calling_format": "glm_xml",
|
|
2175
|
+
"tool_calling_parser": "glm46v",
|
|
2176
|
+
"output_wrappers": {
|
|
2177
|
+
"start": "<|begin_of_box|>",
|
|
2178
|
+
"end": "<|end_of_box|>"
|
|
2179
|
+
},
|
|
2180
|
+
"thinking_control": "/nothink",
|
|
2181
|
+
"thinking_tags": ["<think>", "</think>"],
|
|
2182
|
+
"notes": "GLM-4.6V-Flash lightweight model (9B params) optimized for local deployment and low-latency applications. Maintains native multimodal function calling using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Generates reasoning in 'reasoning_content' field or <think></think> tags. Ideal for edge and resource-constrained environments while preserving core GLM-4.6V capabilities. Thinking can be disabled with '/nothink' suffix. See: https://github.com/zai-org/GLM-V",
|
|
2183
|
+
"source": "HuggingFace zai-org/GLM-4.6V-Flash and GLM-V GitHub",
|
|
2184
|
+
"canonical_name": "glm-4.6v-flash",
|
|
2185
|
+
"aliases": [
|
|
2186
|
+
"zai-org/GLM-4.6V-Flash",
|
|
2187
|
+
"zai-org/GLM-4.6V-Flash-FP8",
|
|
2188
|
+
"glm-4.6v-9b",
|
|
2189
|
+
"glm-4.6v-flash-fp8"
|
|
2190
|
+
],
|
|
2191
|
+
"max_tokens": 128000,
|
|
2192
|
+
"release_date": "2025-05-07",
|
|
2193
|
+
"arxiv": "2507.01006",
|
|
2194
|
+
"license": "MIT"
|
|
2195
|
+
},
|
|
2196
|
+
"glm-4.7": {
|
|
2197
|
+
"max_output_tokens": 32768,
|
|
2198
|
+
"tool_support": "native",
|
|
2199
|
+
"structured_output": "native",
|
|
2200
|
+
"parallel_tools": true,
|
|
2201
|
+
"vision_support": false,
|
|
2202
|
+
"audio_support": false,
|
|
2203
|
+
"video_support": false,
|
|
2204
|
+
"thinking_support": true,
|
|
2205
|
+
"architecture": "mixture_of_experts",
|
|
2206
|
+
"total_parameters": "358B",
|
|
2207
|
+
"thinking_paradigm": "multi_mode",
|
|
2208
|
+
"thinking_modes": ["interleaved_thinking", "preserved_thinking", "turn_level_thinking"],
|
|
2209
|
+
"native_function_calling": true,
|
|
2210
|
+
"agentic_coding": true,
|
|
2211
|
+
"terminal_tasks": true,
|
|
2212
|
+
"web_browsing": true,
|
|
2213
|
+
"tool_calling_parser": "glm47",
|
|
2214
|
+
"reasoning_parser": "glm45",
|
|
2215
|
+
"notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (τ²-Bench: 87.4%), web browsing (BrowseComp: 52%), and complex reasoning (HLE w/ Tools: 42.8%, AIME 2025: 95.7%). 128K context window with 32K output capacity. Optimized for modern coding environments including Claude Code, Kilo Code, Cline, Roo Code.",
|
|
2216
|
+
"source": "HuggingFace zai-org/GLM-4.7 and GLM technical blog",
|
|
2217
|
+
"canonical_name": "glm-4.7",
|
|
2218
|
+
"aliases": [
|
|
2219
|
+
"zai-org/GLM-4.7",
|
|
2220
|
+
"zai-org/GLM-4.7-FP8",
|
|
2221
|
+
"glm-4.7-fp8",
|
|
2222
|
+
"glm-4.7-358b"
|
|
2223
|
+
],
|
|
2224
|
+
"max_tokens": 128000,
|
|
2225
|
+
"release_date": "2025-06",
|
|
2226
|
+
"arxiv": "2508.06471",
|
|
2227
|
+
"license": "MIT",
|
|
2228
|
+
"benchmarks": {
|
|
2229
|
+
"SWE-bench Verified": 73.8,
|
|
2230
|
+
"SWE-bench Multilingual": 66.7,
|
|
2231
|
+
"Terminal Bench Hard": 33.3,
|
|
2232
|
+
"Terminal Bench 2.0": 41.0,
|
|
2233
|
+
"HLE": 24.8,
|
|
2234
|
+
"HLE (w/ Tools)": 42.8,
|
|
2235
|
+
"AIME 2025": 95.7,
|
|
2236
|
+
"HMMT Feb. 2025": 97.1,
|
|
2237
|
+
"HMMT Nov. 2025": 93.5,
|
|
2238
|
+
"IMOAnswerBench": 82.0,
|
|
2239
|
+
"LiveCodeBench-v6": 84.9,
|
|
2240
|
+
"BrowseComp": 52.0,
|
|
2241
|
+
"BrowseComp (w/ Context Manage)": 67.5,
|
|
2242
|
+
"BrowseComp-Zh": 66.6,
|
|
2243
|
+
"τ²-Bench": 87.4,
|
|
2244
|
+
"MMLU-Pro": 84.3,
|
|
2245
|
+
"GPQA-Diamond": 85.7
|
|
2246
|
+
},
|
|
2247
|
+
"inference_parameters": {
|
|
2248
|
+
"temperature": 1.0,
|
|
2249
|
+
"top_p": 0.95,
|
|
2250
|
+
"enable_thinking": true,
|
|
2251
|
+
"clear_thinking": false
|
|
2252
|
+
}
|
|
2253
|
+
},
|
|
2254
|
+
"devstral-small-2-24b": {
|
|
2255
|
+
"max_output_tokens": 16384,
|
|
2256
|
+
"tool_support": "native",
|
|
2257
|
+
"structured_output": "native",
|
|
2258
|
+
"parallel_tools": true,
|
|
2259
|
+
"vision_support": false,
|
|
2260
|
+
"audio_support": false,
|
|
2261
|
+
"video_support": false,
|
|
2262
|
+
"total_parameters": "24B",
|
|
2263
|
+
"architecture": "mistral3",
|
|
2264
|
+
"tensor_type": "FP8",
|
|
2265
|
+
"agentic_coding": true,
|
|
2266
|
+
"tool_calling_parser": "mistral",
|
|
2267
|
+
"notes": "Devstral Small 2 agentic LLM for software engineering (24B params, FP8). Excels at tool use, codebase exploration, multi-file edits. 256K context. Strong performance on SWE-bench Verified (68.0%), Terminal-Bench (22.5%), and SWE-bench Multilingual (55.7%). Improved generalization over predecessors. Uses Mistral 3 architecture with rope-scaling and Scalable-Softmax.",
|
|
2268
|
+
"source": "Mistral AI Devstral 2 docs and HuggingFace",
|
|
2269
|
+
"canonical_name": "devstral-small-2-24b",
|
|
2270
|
+
"aliases": [
|
|
2271
|
+
"mistralai/Devstral-Small-2-24B-Instruct-2512",
|
|
2272
|
+
"devstral-small-2",
|
|
2273
|
+
"devstral-small-2-24b-instruct"
|
|
2274
|
+
],
|
|
2275
|
+
"max_tokens": 262144,
|
|
2276
|
+
"release_date": "2025-12",
|
|
2277
|
+
"license": "Apache-2.0"
|
|
2278
|
+
},
|
|
2279
|
+
"devstral-2-123b": {
|
|
2280
|
+
"max_output_tokens": 16384,
|
|
2281
|
+
"tool_support": "native",
|
|
2282
|
+
"structured_output": "native",
|
|
2283
|
+
"parallel_tools": true,
|
|
2284
|
+
"vision_support": false,
|
|
2285
|
+
"audio_support": false,
|
|
2286
|
+
"video_support": false,
|
|
2287
|
+
"total_parameters": "123B",
|
|
2288
|
+
"architecture": "ministral3",
|
|
2289
|
+
"tensor_type": "FP8",
|
|
2290
|
+
"agentic_coding": true,
|
|
2291
|
+
"tool_calling_parser": "mistral",
|
|
2292
|
+
"notes": "Devstral 2 flagship agentic LLM for software engineering (123B params, FP8). Excels at tool use, codebase exploration, multi-file edits. 256K context. Top-tier performance on SWE-bench Verified (72.2%), Terminal-Bench (32.6%), and SWE-bench Multilingual (61.3%). Improved generalization and better performance than predecessors.",
|
|
2293
|
+
"source": "Mistral AI Devstral 2 docs and HuggingFace",
|
|
2294
|
+
"canonical_name": "devstral-2-123b",
|
|
2295
|
+
"aliases": [
|
|
2296
|
+
"mistralai/Devstral-2-123B-Instruct-2512",
|
|
2297
|
+
"devstral-2",
|
|
2298
|
+
"devstral-2-123b-instruct"
|
|
2299
|
+
],
|
|
2300
|
+
"max_tokens": 262144,
|
|
2301
|
+
"release_date": "2025-12",
|
|
2302
|
+
"license": "Modified-MIT"
|
|
2303
|
+
},
|
|
2304
|
+
"qwen3-235b-a22b-2507": {
|
|
2305
|
+
"max_output_tokens": 16384,
|
|
2306
|
+
"tool_support": "native",
|
|
2307
|
+
"structured_output": "native",
|
|
2308
|
+
"parallel_tools": true,
|
|
2309
|
+
"vision_support": false,
|
|
2310
|
+
"audio_support": false,
|
|
2311
|
+
"video_support": false,
|
|
2312
|
+
"thinking_support": false,
|
|
2313
|
+
"architecture": "mixture_of_experts",
|
|
2314
|
+
"total_parameters": "235B",
|
|
2315
|
+
"active_parameters": "22B",
|
|
2316
|
+
"experts": 128,
|
|
2317
|
+
"experts_activated": 8,
|
|
2318
|
+
"tensor_type": "BF16",
|
|
2319
|
+
"notes": "Qwen3-235B-A22B-Instruct-2507 non-thinking mode (235B total/22B active, 128 experts/8 activated). Significant improvements in instruction following, reasoning, math, science, coding, tool usage. Enhanced 256K long-context understanding, extendable to 1M tokens with DCA+MInference. Substantial gains in multilingual knowledge. Better alignment for subjective tasks.",
|
|
2320
|
+
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2321
|
+
"canonical_name": "qwen3-235b-a22b-2507",
|
|
2322
|
+
"aliases": [
|
|
2323
|
+
"Qwen/Qwen3-235B-A22B-Instruct-2507",
|
|
2324
|
+
"qwen3-235b-a22b-instruct-2507",
|
|
2325
|
+
"qwen3-235b-2507"
|
|
2326
|
+
],
|
|
2327
|
+
"max_tokens": 262144,
|
|
2328
|
+
"release_date": "2025-07",
|
|
2329
|
+
"arxiv": "2505.09388",
|
|
2330
|
+
"license": "Apache-2.0"
|
|
2331
|
+
},
|
|
2332
|
+
"qwen3-235b-a22b-2507-fp8": {
|
|
2333
|
+
"max_output_tokens": 16384,
|
|
2334
|
+
"tool_support": "native",
|
|
2335
|
+
"structured_output": "native",
|
|
2336
|
+
"parallel_tools": true,
|
|
2337
|
+
"vision_support": false,
|
|
2338
|
+
"audio_support": false,
|
|
2339
|
+
"video_support": false,
|
|
2340
|
+
"thinking_support": false,
|
|
2341
|
+
"architecture": "mixture_of_experts",
|
|
2342
|
+
"total_parameters": "235B",
|
|
2343
|
+
"active_parameters": "22B",
|
|
2344
|
+
"experts": 128,
|
|
2345
|
+
"experts_activated": 8,
|
|
2346
|
+
"tensor_type": "FP8",
|
|
2347
|
+
"quantization_method": "fine_grained_fp8_block128",
|
|
2348
|
+
"notes": "FP8-quantized version of Qwen3-235B-A22B-Instruct-2507. Fine-grained fp8 quantization with block size 128. Same capabilities as BF16 version but more efficient inference. Note: transformers has issues with fine-grained fp8 in distributed inference (may need CUDA_LAUNCH_BLOCKING=1).",
|
|
2349
|
+
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2350
|
+
"canonical_name": "qwen3-235b-a22b-2507-fp8",
|
|
2351
|
+
"aliases": [
|
|
2352
|
+
"Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
|
|
2353
|
+
"qwen3-235b-a22b-instruct-2507-fp8",
|
|
2354
|
+
"qwen3-235b-2507-fp8"
|
|
2355
|
+
],
|
|
2356
|
+
"max_tokens": 262144,
|
|
2357
|
+
"release_date": "2025-07",
|
|
2358
|
+
"arxiv": "2505.09388",
|
|
2359
|
+
"license": "Apache-2.0"
|
|
2360
|
+
},
|
|
2361
|
+
"granite-4.0-h-tiny": {
|
|
2362
|
+
"max_output_tokens": 16384,
|
|
2363
|
+
"tool_support": "native",
|
|
2364
|
+
"structured_output": "native",
|
|
2365
|
+
"parallel_tools": true,
|
|
2366
|
+
"vision_support": false,
|
|
2367
|
+
"audio_support": false,
|
|
2368
|
+
"video_support": false,
|
|
2369
|
+
"architecture": "granitemoehybrid",
|
|
2370
|
+
"total_parameters": "7B",
|
|
2371
|
+
"active_parameters": "1B",
|
|
2372
|
+
"experts": 64,
|
|
2373
|
+
"experts_activated": 6,
|
|
2374
|
+
"expert_hidden_size": 512,
|
|
2375
|
+
"shared_expert_hidden_size": 1024,
|
|
2376
|
+
"attention_layers": 4,
|
|
2377
|
+
"mamba2_layers": 36,
|
|
2378
|
+
"mamba2_state_size": 128,
|
|
2379
|
+
"embedding_size": 1536,
|
|
2380
|
+
"tensor_type": "BF16",
|
|
2381
|
+
"notes": "Granite 4.0-H-Tiny hybrid MoE model (7B total/1B active, 64 experts/6 activated). Combines 4 attention layers with 36 Mamba2 layers. 128K context. Enhanced tool-calling and instruction following. Strong performance on coding, math, and alignment tasks. Optimized for enterprise applications with improved IF capabilities.",
|
|
2382
|
+
"source": "IBM Granite 4.0 HuggingFace and technical report",
|
|
2383
|
+
"canonical_name": "granite-4.0-h-tiny",
|
|
2384
|
+
"aliases": [
|
|
2385
|
+
"ibm-granite/granite-4.0-h-tiny",
|
|
2386
|
+
"granite-4.0-h-tiny-moe",
|
|
2387
|
+
"granite-h-tiny"
|
|
2388
|
+
],
|
|
2389
|
+
"max_tokens": 131072,
|
|
2390
|
+
"release_date": "2025-10-02",
|
|
2391
|
+
"license": "Apache-2.0"
|
|
2392
|
+
},
|
|
2393
|
+
"gpt-oss-20b": {
|
|
2394
|
+
"max_output_tokens": 8192,
|
|
2395
|
+
"tool_support": "native",
|
|
2396
|
+
"structured_output": "native",
|
|
2397
|
+
"parallel_tools": true,
|
|
2398
|
+
"vision_support": false,
|
|
2399
|
+
"audio_support": false,
|
|
2400
|
+
"video_support": false,
|
|
2401
|
+
"thinking_support": true,
|
|
2402
|
+
"thinking_budget": true,
|
|
2403
|
+
"architecture": "mixture_of_experts",
|
|
2404
|
+
"total_parameters": "21B",
|
|
2405
|
+
"active_parameters": "3.6B",
|
|
2406
|
+
"tensor_type": "BF16+U8",
|
|
2407
|
+
"quantization_method": "MXFP4",
|
|
2408
|
+
"response_format": "harmony",
|
|
2409
|
+
"reasoning_levels": ["low", "medium", "high"],
|
|
2410
|
+
"agentic_capabilities": true,
|
|
2411
|
+
"function_calling": true,
|
|
2412
|
+
"web_browsing": true,
|
|
2413
|
+
"python_execution": true,
|
|
2414
|
+
"fine_tunable": true,
|
|
2415
|
+
"notes": "OpenAI GPT-OSS 20B open-weight model (21B total/3.6B active). Designed for lower latency, local, and specialized use cases. MXFP4 quantization enables running within 16GB memory. Configurable reasoning effort (low/medium/high). Full chain-of-thought access. Requires harmony response format. Apache 2.0 license for commercial use.",
|
|
2416
|
+
"source": "OpenAI GPT-OSS HuggingFace and arXiv:2508.10925",
|
|
2417
|
+
"canonical_name": "gpt-oss-20b",
|
|
2418
|
+
"aliases": [
|
|
2419
|
+
"openai/gpt-oss-20b",
|
|
2420
|
+
"gpt-oss:20b"
|
|
2421
|
+
],
|
|
2422
|
+
"max_tokens": 128000,
|
|
2423
|
+
"release_date": "2025-08",
|
|
2424
|
+
"arxiv": "2508.10925",
|
|
2425
|
+
"license": "Apache-2.0"
|
|
2426
|
+
},
|
|
2427
|
+
"gpt-oss-120b": {
|
|
2428
|
+
"max_output_tokens": 8192,
|
|
2429
|
+
"tool_support": "native",
|
|
2430
|
+
"structured_output": "native",
|
|
2431
|
+
"parallel_tools": true,
|
|
2432
|
+
"vision_support": false,
|
|
2433
|
+
"audio_support": false,
|
|
2434
|
+
"video_support": false,
|
|
2435
|
+
"thinking_support": true,
|
|
2436
|
+
"thinking_budget": true,
|
|
2437
|
+
"architecture": "mixture_of_experts",
|
|
2438
|
+
"total_parameters": "117B",
|
|
2439
|
+
"active_parameters": "5.1B",
|
|
2440
|
+
"tensor_type": "BF16+U8",
|
|
2441
|
+
"quantization_method": "MXFP4",
|
|
2442
|
+
"response_format": "harmony",
|
|
2443
|
+
"reasoning_levels": ["low", "medium", "high"],
|
|
2444
|
+
"agentic_capabilities": true,
|
|
2445
|
+
"function_calling": true,
|
|
2446
|
+
"web_browsing": true,
|
|
2447
|
+
"python_execution": true,
|
|
2448
|
+
"fine_tunable": true,
|
|
2449
|
+
"gpu_memory_required": "80GB",
|
|
2450
|
+
"notes": "OpenAI GPT-OSS 120B open-weight model (117B total/5.1B active). Production-ready for general purpose, high reasoning use cases. MXFP4 quantization enables single 80GB GPU deployment (H100/MI300X). Configurable reasoning effort (low/medium/high). Full chain-of-thought access. Requires harmony response format. Apache 2.0 license for commercial use.",
|
|
2451
|
+
"source": "OpenAI GPT-OSS HuggingFace and arXiv:2508.10925",
|
|
2452
|
+
"canonical_name": "gpt-oss-120b",
|
|
2453
|
+
"aliases": [
|
|
2454
|
+
"openai/gpt-oss-120b",
|
|
2455
|
+
"gpt-oss:120b"
|
|
2456
|
+
],
|
|
2457
|
+
"max_tokens": 128000,
|
|
2458
|
+
"release_date": "2025-08",
|
|
2459
|
+
"arxiv": "2508.10925",
|
|
2460
|
+
"license": "Apache-2.0"
|
|
2461
|
+
},
|
|
2462
|
+
"qwen3-vl-2b": {
|
|
2463
|
+
"max_output_tokens": 8192,
|
|
2464
|
+
"tool_support": "native",
|
|
2465
|
+
"structured_output": "native",
|
|
2466
|
+
"parallel_tools": true,
|
|
2467
|
+
"vision_support": true,
|
|
2468
|
+
"audio_support": false,
|
|
2469
|
+
"video_support": true,
|
|
2470
|
+
"image_resolutions": [
|
|
2471
|
+
"64x64 to 4096x4096"
|
|
2472
|
+
],
|
|
2473
|
+
"max_image_resolution": "4096x4096",
|
|
2474
|
+
"image_patch_size": 16,
|
|
2475
|
+
"max_image_tokens": 24576,
|
|
2476
|
+
"pixel_grouping": "32x32",
|
|
2477
|
+
"image_tokenization_method": "patch_based_adaptive",
|
|
2478
|
+
"adaptive_resolution": true,
|
|
2479
|
+
"min_resolution": 64,
|
|
2480
|
+
"max_resolution": 4096,
|
|
2481
|
+
"vision_encoder": "ViT-based",
|
|
2482
|
+
"visual_agent": true,
|
|
2483
|
+
"visual_coding": true,
|
|
2484
|
+
"spatial_perception": true,
|
|
2485
|
+
"document_understanding": true,
|
|
2486
|
+
"ocr_languages": 32,
|
|
2487
|
+
"architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
|
|
2488
|
+
"notes": "Qwen3-VL 2B dense model with 256K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
|
|
2489
|
+
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2490
|
+
"canonical_name": "qwen3-vl-2b",
|
|
2491
|
+
"aliases": [
|
|
2492
|
+
"Qwen/Qwen3-VL-2B-Instruct",
|
|
2493
|
+
"qwen3-vl-2b-instruct"
|
|
2494
|
+
],
|
|
2495
|
+
"max_tokens": 262144,
|
|
2496
|
+
"release_date": "2025-05",
|
|
2497
|
+
"arxiv": "2505.09388",
|
|
2498
|
+
"license": "Apache-2.0"
|
|
2499
|
+
},
|
|
2500
|
+
"qwen3-vl-4b": {
|
|
2501
|
+
"max_output_tokens": 8192,
|
|
2502
|
+
"tool_support": "native",
|
|
2503
|
+
"structured_output": "native",
|
|
2504
|
+
"parallel_tools": true,
|
|
2505
|
+
"vision_support": true,
|
|
2506
|
+
"audio_support": false,
|
|
2507
|
+
"video_support": true,
|
|
2508
|
+
"image_resolutions": [
|
|
2509
|
+
"64x64 to 4096x4096"
|
|
2510
|
+
],
|
|
2511
|
+
"max_image_resolution": "4096x4096",
|
|
2512
|
+
"image_patch_size": 16,
|
|
2513
|
+
"max_image_tokens": 24576,
|
|
2514
|
+
"pixel_grouping": "32x32",
|
|
2515
|
+
"image_tokenization_method": "patch_based_adaptive",
|
|
2516
|
+
"adaptive_resolution": true,
|
|
2517
|
+
"min_resolution": 64,
|
|
2518
|
+
"max_resolution": 4096,
|
|
2519
|
+
"vision_encoder": "ViT-based",
|
|
2520
|
+
"visual_agent": true,
|
|
2521
|
+
"visual_coding": true,
|
|
2522
|
+
"spatial_perception": true,
|
|
2523
|
+
"document_understanding": true,
|
|
2524
|
+
"ocr_languages": 32,
|
|
2525
|
+
"total_parameters": "4.83B",
|
|
2526
|
+
"architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
|
|
2527
|
+
"notes": "Qwen3-VL 4B dense model (4.83B params) with 256K context, optimized for LMStudio. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding. FP8 checkpoints available.",
|
|
2528
|
+
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2529
|
+
"canonical_name": "qwen3-vl-4b",
|
|
2530
|
+
"aliases": [
|
|
2531
|
+
"Qwen/Qwen3-VL-4B-Instruct",
|
|
2532
|
+
"qwen3-vl-4b-instruct",
|
|
2533
|
+
"qwen/qwen3-vl-4b"
|
|
2534
|
+
],
|
|
2535
|
+
"max_tokens": 262144,
|
|
2536
|
+
"release_date": "2025-05",
|
|
2537
|
+
"arxiv": "2505.09388",
|
|
2538
|
+
"license": "Apache-2.0"
|
|
2539
|
+
},
|
|
2540
|
+
"qwen3-vl-8b": {
|
|
2541
|
+
"max_output_tokens": 8192,
|
|
2542
|
+
"tool_support": "native",
|
|
2543
|
+
"structured_output": "native",
|
|
2544
|
+
"parallel_tools": true,
|
|
2545
|
+
"vision_support": true,
|
|
2546
|
+
"audio_support": false,
|
|
2547
|
+
"video_support": true,
|
|
2548
|
+
"image_resolutions": [
|
|
2549
|
+
"64x64 to 4096x4096"
|
|
2550
|
+
],
|
|
2551
|
+
"max_image_resolution": "4096x4096",
|
|
2552
|
+
"image_patch_size": 16,
|
|
2553
|
+
"max_image_tokens": 24576,
|
|
2554
|
+
"pixel_grouping": "32x32",
|
|
2555
|
+
"image_tokenization_method": "patch_based_adaptive",
|
|
2556
|
+
"adaptive_resolution": true,
|
|
2557
|
+
"min_resolution": 64,
|
|
2558
|
+
"max_resolution": 4096,
|
|
2559
|
+
"vision_encoder": "ViT-based",
|
|
2560
|
+
"visual_agent": true,
|
|
2561
|
+
"visual_coding": true,
|
|
2562
|
+
"spatial_perception": true,
|
|
2563
|
+
"document_understanding": true,
|
|
2564
|
+
"ocr_languages": 32,
|
|
2565
|
+
"total_parameters": "8.77B",
|
|
2566
|
+
"architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
|
|
2567
|
+
"notes": "Qwen3-VL 8B dense model (8.77B params) with 256K context, optimized for LMStudio. Most powerful vision-language model in Qwen series. Visual agent for GUI operation, visual coding, advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. FP8 checkpoints available.",
|
|
2568
|
+
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2569
|
+
"canonical_name": "qwen3-vl-8b",
|
|
2570
|
+
"aliases": [
|
|
2571
|
+
"Qwen/Qwen3-VL-8B-Instruct",
|
|
2572
|
+
"qwen3-vl-8b-instruct",
|
|
2573
|
+
"qwen/qwen3-vl-8b"
|
|
2574
|
+
],
|
|
2575
|
+
"max_tokens": 262144,
|
|
2576
|
+
"release_date": "2025-05",
|
|
2577
|
+
"arxiv": "2505.09388",
|
|
2578
|
+
"license": "Apache-2.0"
|
|
2579
|
+
},
|
|
2580
|
+
"qwen3-vl-30b-a3b": {
|
|
2581
|
+
"max_output_tokens": 8192,
|
|
2582
|
+
"tool_support": "native",
|
|
2583
|
+
"structured_output": "native",
|
|
2584
|
+
"parallel_tools": true,
|
|
2585
|
+
"vision_support": true,
|
|
2586
|
+
"audio_support": false,
|
|
2587
|
+
"video_support": true,
|
|
2588
|
+
"image_resolutions": [
|
|
2589
|
+
"64x64 to 4096x4096"
|
|
2590
|
+
],
|
|
2591
|
+
"max_image_resolution": "4096x4096",
|
|
2592
|
+
"image_patch_size": 16,
|
|
2593
|
+
"max_image_tokens": 24576,
|
|
2594
|
+
"pixel_grouping": "32x32",
|
|
2595
|
+
"image_tokenization_method": "patch_based_adaptive",
|
|
2596
|
+
"adaptive_resolution": true,
|
|
2597
|
+
"min_resolution": 64,
|
|
2598
|
+
"max_resolution": 4096,
|
|
2599
|
+
"vision_encoder": "ViT-based",
|
|
2600
|
+
"visual_agent": true,
|
|
2601
|
+
"visual_coding": true,
|
|
2602
|
+
"spatial_perception": true,
|
|
2603
|
+
"document_understanding": true,
|
|
2604
|
+
"ocr_languages": 32,
|
|
2605
|
+
"architecture": "mixture_of_experts",
|
|
2606
|
+
"total_parameters": "30.5B",
|
|
2607
|
+
"active_parameters": "3.3B",
|
|
2608
|
+
"architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
|
|
2609
|
+
"notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model in the series. 128K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
|
|
2610
|
+
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2611
|
+
"canonical_name": "qwen3-vl-30b-a3b",
|
|
2612
|
+
"aliases": [
|
|
2613
|
+
"Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
2614
|
+
"qwen3-vl-30b-a3b-instruct",
|
|
2615
|
+
"qwen/qwen3-vl-30b"
|
|
2616
|
+
],
|
|
2617
|
+
"max_tokens": 131072,
|
|
2618
|
+
"release_date": "2025-05",
|
|
2619
|
+
"arxiv": "2505.09388",
|
|
2620
|
+
"license": "Apache-2.0"
|
|
2621
|
+
},
|
|
2622
|
+
"qwen3-vl-235b-a22b": {
|
|
2623
|
+
"max_output_tokens": 8192,
|
|
2624
|
+
"tool_support": "native",
|
|
2625
|
+
"structured_output": "native",
|
|
2626
|
+
"parallel_tools": true,
|
|
2627
|
+
"vision_support": true,
|
|
2628
|
+
"audio_support": false,
|
|
2629
|
+
"video_support": true,
|
|
2630
|
+
"image_resolutions": [
|
|
2631
|
+
"64x64 to 4096x4096"
|
|
2632
|
+
],
|
|
2633
|
+
"max_image_resolution": "4096x4096",
|
|
2634
|
+
"image_patch_size": 16,
|
|
2635
|
+
"max_image_tokens": 24576,
|
|
2636
|
+
"pixel_grouping": "32x32",
|
|
2637
|
+
"image_tokenization_method": "patch_based_adaptive",
|
|
2638
|
+
"adaptive_resolution": true,
|
|
2639
|
+
"min_resolution": 64,
|
|
2640
|
+
"max_resolution": 4096,
|
|
2641
|
+
"vision_encoder": "ViT-based",
|
|
2642
|
+
"visual_agent": true,
|
|
2643
|
+
"visual_coding": true,
|
|
2644
|
+
"spatial_perception": true,
|
|
2645
|
+
"document_understanding": true,
|
|
2646
|
+
"ocr_languages": 32,
|
|
2647
|
+
"architecture": "mixture_of_experts",
|
|
2648
|
+
"total_parameters": "235B",
|
|
2649
|
+
"active_parameters": "22B",
|
|
2650
|
+
"experts": 128,
|
|
2651
|
+
"experts_activated": 8,
|
|
2652
|
+
"architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
|
|
2653
|
+
"notes": "Qwen3-VL 235B MoE model (235B total/22B active, 128 experts/8 activated), flagship vision model. 256K context expandable to 1M. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs. Superior visual perception and reasoning.",
|
|
2654
|
+
"source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
|
|
2655
|
+
"canonical_name": "qwen3-vl-235b-a22b",
|
|
2656
|
+
"aliases": [
|
|
2657
|
+
"Qwen/Qwen3-VL-235B-A22B-Instruct",
|
|
2658
|
+
"qwen3-vl-235b-a22b-instruct"
|
|
2659
|
+
],
|
|
2660
|
+
"max_tokens": 262144,
|
|
2661
|
+
"release_date": "2025-05",
|
|
2662
|
+
"arxiv": "2505.09388",
|
|
2061
2663
|
"license": "Apache-2.0"
|
|
2664
|
+
},
|
|
2665
|
+
"nemotron-3-nano-30b-a3b": {
|
|
2666
|
+
"max_output_tokens": 8192,
|
|
2667
|
+
"tool_support": "native",
|
|
2668
|
+
"structured_output": "native",
|
|
2669
|
+
"parallel_tools": true,
|
|
2670
|
+
"vision_support": false,
|
|
2671
|
+
"audio_support": false,
|
|
2672
|
+
"video_support": false,
|
|
2673
|
+
"thinking_support": true,
|
|
2674
|
+
"thinking_budget": false,
|
|
2675
|
+
"architecture": "nemotron_hybrid_moe",
|
|
2676
|
+
"total_parameters": "30B",
|
|
2677
|
+
"active_parameters": "3.5B",
|
|
2678
|
+
"experts": 128,
|
|
2679
|
+
"experts_activated": 6,
|
|
2680
|
+
"shared_experts": 1,
|
|
2681
|
+
"attention_layers": 6,
|
|
2682
|
+
"mamba2_layers": 23,
|
|
2683
|
+
"tensor_type": "BF16",
|
|
2684
|
+
"reasoning_paradigm": "unified_reasoning_response",
|
|
2685
|
+
"reasoning_configurable": true,
|
|
2686
|
+
"agentic_capabilities": true,
|
|
2687
|
+
"function_calling": true,
|
|
2688
|
+
"tool_calling_format": "json",
|
|
2689
|
+
"languages": ["English", "German", "Spanish", "French", "Italian", "Japanese"],
|
|
2690
|
+
"notes": "NVIDIA Nemotron-3-Nano hybrid MoE model (30B total/3.5B active, 128 experts/6 activated + 1 shared). Combines 23 Mamba-2 layers with 6 Attention layers. Unified model for reasoning and non-reasoning tasks with configurable reasoning mode. Generates reasoning trace before final response. 256K context extendable to 1M with YaRN. Strong performance on AIME25 (99.2% with tools), SWE-Bench (38.8%), MiniF2F (50.0% pass@1). Native tool calling via chatml-function-calling format. Commercial use ready.",
|
|
2691
|
+
"source": "NVIDIA Nemotron HuggingFace and technical report",
|
|
2692
|
+
"canonical_name": "nemotron-3-nano-30b-a3b",
|
|
2693
|
+
"aliases": [
|
|
2694
|
+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
2695
|
+
"NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
2696
|
+
"nvidia/nemotron-3-nano",
|
|
2697
|
+
"nemotron-3-nano",
|
|
2698
|
+
"nemotron-nano-30b"
|
|
2699
|
+
],
|
|
2700
|
+
"max_tokens": 262144,
|
|
2701
|
+
"release_date": "2025-12",
|
|
2702
|
+
"license": "nvidia-open-model-license",
|
|
2703
|
+
"benchmarks": {
|
|
2704
|
+
"MMLU-Pro": 78.3,
|
|
2705
|
+
"AIME25 (no tools)": 89.1,
|
|
2706
|
+
"AIME25 (with tools)": 99.2,
|
|
2707
|
+
"GPQA (no tools)": 73.0,
|
|
2708
|
+
"GPQA (with tools)": 75.0,
|
|
2709
|
+
"LiveCodeBench v6": 68.3,
|
|
2710
|
+
"SciCode (subtask)": 33.3,
|
|
2711
|
+
"HLE (no tools)": 10.6,
|
|
2712
|
+
"HLE (with tools)": 15.5,
|
|
2713
|
+
"MiniF2F pass@1": 50.0,
|
|
2714
|
+
"MiniF2F pass@32": 79.9,
|
|
2715
|
+
"Terminal Bench (hard subset)": 8.5,
|
|
2716
|
+
"SWE-Bench (OpenHands)": 38.8,
|
|
2717
|
+
"TauBench V2 (Average)": 49.0,
|
|
2718
|
+
"BFCL v4": 53.8,
|
|
2719
|
+
"IFBench (prompt)": 71.5,
|
|
2720
|
+
"Scale AI Multi Challenge": 38.5,
|
|
2721
|
+
"Arena-Hard-V2 (Hard Prompt)": 72.1,
|
|
2722
|
+
"Arena-Hard-V2 (Average)": 67.7
|
|
2723
|
+
}
|
|
2062
2724
|
}
|
|
2063
2725
|
},
|
|
2064
2726
|
"tool_support_levels": {
|
|
@@ -2157,4 +2819,4 @@
|
|
|
2157
2819
|
"fim_support": false,
|
|
2158
2820
|
"max_tokens": 16384
|
|
2159
2821
|
}
|
|
2160
|
-
}
|
|
2822
|
+
}
|