abstractcore 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. abstractcore/apps/summarizer.py +69 -27
  2. abstractcore/architectures/detection.py +190 -25
  3. abstractcore/assets/architecture_formats.json +129 -6
  4. abstractcore/assets/model_capabilities.json +789 -136
  5. abstractcore/config/main.py +2 -2
  6. abstractcore/config/manager.py +3 -1
  7. abstractcore/events/__init__.py +7 -1
  8. abstractcore/mcp/__init__.py +30 -0
  9. abstractcore/mcp/client.py +213 -0
  10. abstractcore/mcp/factory.py +64 -0
  11. abstractcore/mcp/naming.py +28 -0
  12. abstractcore/mcp/stdio_client.py +336 -0
  13. abstractcore/mcp/tool_source.py +164 -0
  14. abstractcore/processing/basic_deepsearch.py +1 -1
  15. abstractcore/processing/basic_summarizer.py +300 -83
  16. abstractcore/providers/anthropic_provider.py +91 -10
  17. abstractcore/providers/base.py +537 -16
  18. abstractcore/providers/huggingface_provider.py +17 -8
  19. abstractcore/providers/lmstudio_provider.py +170 -25
  20. abstractcore/providers/mlx_provider.py +13 -10
  21. abstractcore/providers/ollama_provider.py +42 -26
  22. abstractcore/providers/openai_compatible_provider.py +87 -22
  23. abstractcore/providers/openai_provider.py +12 -9
  24. abstractcore/providers/streaming.py +201 -39
  25. abstractcore/providers/vllm_provider.py +78 -21
  26. abstractcore/server/app.py +65 -28
  27. abstractcore/structured/retry.py +20 -7
  28. abstractcore/tools/__init__.py +5 -4
  29. abstractcore/tools/abstractignore.py +166 -0
  30. abstractcore/tools/arg_canonicalizer.py +61 -0
  31. abstractcore/tools/common_tools.py +2311 -772
  32. abstractcore/tools/core.py +109 -13
  33. abstractcore/tools/handler.py +17 -3
  34. abstractcore/tools/parser.py +798 -155
  35. abstractcore/tools/registry.py +107 -2
  36. abstractcore/tools/syntax_rewriter.py +68 -6
  37. abstractcore/tools/tag_rewriter.py +186 -1
  38. abstractcore/utils/jsonish.py +111 -0
  39. abstractcore/utils/version.py +1 -1
  40. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/METADATA +11 -2
  41. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/RECORD +45 -36
  42. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
  43. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
  44. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
  45. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
@@ -124,7 +124,7 @@
124
124
  },
125
125
  "o1": {
126
126
  "max_output_tokens": 32768,
127
- "tool_support": "none",
127
+ "tool_support": "native",
128
128
  "structured_output": "prompted",
129
129
  "parallel_tools": false,
130
130
  "vision_support": false,
@@ -137,7 +137,7 @@
137
137
  },
138
138
  "o1-mini": {
139
139
  "max_output_tokens": 65536,
140
- "tool_support": "none",
140
+ "tool_support": "prompted",
141
141
  "structured_output": "prompted",
142
142
  "parallel_tools": false,
143
143
  "vision_support": false,
@@ -282,6 +282,26 @@
282
282
  "aliases": [],
283
283
  "max_tokens": 200000
284
284
  },
285
+ "claude-haiku-4-5": {
286
+ "max_output_tokens": 64000,
287
+ "tool_support": "native",
288
+ "structured_output": "native",
289
+ "parallel_tools": true,
290
+ "max_tools": -1,
291
+ "vision_support": true,
292
+ "image_resolutions": [
293
+ "up to 1568x1568"
294
+ ],
295
+ "audio_support": false,
296
+ "notes": "Claude Haiku 4.5 series. Anthropic API enforces a 64K max output token cap (currently 64000).",
297
+ "source": "Anthropic API error cap (max_tokens <= 64000)",
298
+ "canonical_name": "claude-haiku-4-5",
299
+ "aliases": [
300
+ "claude-haiku-4-5-20251001",
301
+ "anthropic/claude-haiku-4-5"
302
+ ],
303
+ "max_tokens": 200000
304
+ },
285
305
  "claude-4-opus": {
286
306
  "max_output_tokens": 4096,
287
307
  "tool_support": "native",
@@ -334,7 +354,7 @@
334
354
  "max_tokens": 200000
335
355
  },
336
356
  "claude-4.5-sonnet": {
337
- "max_output_tokens": 8192,
357
+ "max_output_tokens": 64000,
338
358
  "tool_support": "native",
339
359
  "structured_output": "native",
340
360
  "parallel_tools": true,
@@ -344,15 +364,39 @@
344
364
  "up to 1568x1568"
345
365
  ],
346
366
  "audio_support": false,
347
- "notes": "Claude 4.5 Sonnet with enhanced reasoning",
348
- "source": "Anthropic official docs",
367
+ "notes": "Claude 4.5 Sonnet. Anthropic API enforces a 64K max output token cap (currently 64000).",
368
+ "source": "Anthropic API error cap (max_tokens <= 64000)",
349
369
  "canonical_name": "claude-4.5-sonnet",
350
- "aliases": [],
370
+ "aliases": [
371
+ "claude-sonnet-4-5",
372
+ "claude-sonnet-4-5-20250929",
373
+ "anthropic/claude-sonnet-4-5"
374
+ ],
375
+ "max_tokens": 200000
376
+ },
377
+ "claude-opus-4-5": {
378
+ "max_output_tokens": 64000,
379
+ "tool_support": "native",
380
+ "structured_output": "native",
381
+ "parallel_tools": true,
382
+ "max_tools": -1,
383
+ "vision_support": true,
384
+ "image_resolutions": [
385
+ "up to 1568x1568"
386
+ ],
387
+ "audio_support": false,
388
+ "notes": "Claude Opus 4.5. Anthropic API enforces a 64K max output token cap (currently 64000).",
389
+ "source": "Anthropic API error cap (max_tokens <= 64000)",
390
+ "canonical_name": "claude-opus-4-5",
391
+ "aliases": [
392
+ "claude-opus-4-5-20251101",
393
+ "anthropic/claude-opus-4-5"
394
+ ],
351
395
  "max_tokens": 200000
352
396
  },
353
397
  "llama-3.2-1b": {
354
398
  "max_output_tokens": 2048,
355
- "tool_support": "prompted",
399
+ "tool_support": "native",
356
400
  "structured_output": "native",
357
401
  "parallel_tools": false,
358
402
  "vision_support": false,
@@ -365,7 +409,7 @@
365
409
  },
366
410
  "llama-3.2-3b": {
367
411
  "max_output_tokens": 2048,
368
- "tool_support": "prompted",
412
+ "tool_support": "native",
369
413
  "structured_output": "native",
370
414
  "parallel_tools": false,
371
415
  "vision_support": false,
@@ -394,7 +438,7 @@
394
438
  },
395
439
  "llama-3.3-70b": {
396
440
  "max_output_tokens": 8192,
397
- "tool_support": "prompted",
441
+ "tool_support": "native",
398
442
  "structured_output": "native",
399
443
  "parallel_tools": true,
400
444
  "vision_support": false,
@@ -454,15 +498,12 @@
454
498
  "notes": "Multimodal with early fusion, 109B total params (MoE)",
455
499
  "source": "Meta announcement",
456
500
  "canonical_name": "llama-4",
457
- "aliases": [
458
- "llama4-17b-scout-16e-instruct",
459
- "llama-4-17b-scout-16e-instruct"
460
- ],
501
+ "aliases": [],
461
502
  "max_tokens": 10000000
462
503
  },
463
504
  "qwen2.5-0.5b": {
464
505
  "max_output_tokens": 8192,
465
- "tool_support": "prompted",
506
+ "tool_support": "native",
466
507
  "structured_output": "native",
467
508
  "parallel_tools": false,
468
509
  "vision_support": false,
@@ -475,7 +516,7 @@
475
516
  },
476
517
  "qwen2.5-1.5b": {
477
518
  "max_output_tokens": 8192,
478
- "tool_support": "prompted",
519
+ "tool_support": "native",
479
520
  "structured_output": "native",
480
521
  "parallel_tools": false,
481
522
  "vision_support": false,
@@ -488,7 +529,7 @@
488
529
  },
489
530
  "qwen2.5-3b": {
490
531
  "max_output_tokens": 8192,
491
- "tool_support": "prompted",
532
+ "tool_support": "native",
492
533
  "structured_output": "native",
493
534
  "parallel_tools": false,
494
535
  "vision_support": false,
@@ -501,7 +542,7 @@
501
542
  },
502
543
  "qwen2.5-7b": {
503
544
  "max_output_tokens": 8192,
504
- "tool_support": "prompted",
545
+ "tool_support": "native",
505
546
  "structured_output": "native",
506
547
  "parallel_tools": false,
507
548
  "vision_support": false,
@@ -514,7 +555,7 @@
514
555
  },
515
556
  "qwen2.5-14b": {
516
557
  "max_output_tokens": 8192,
517
- "tool_support": "prompted",
558
+ "tool_support": "native",
518
559
  "structured_output": "native",
519
560
  "parallel_tools": false,
520
561
  "vision_support": false,
@@ -527,7 +568,7 @@
527
568
  },
528
569
  "qwen2.5-32b": {
529
570
  "max_output_tokens": 8192,
530
- "tool_support": "prompted",
571
+ "tool_support": "native",
531
572
  "structured_output": "native",
532
573
  "parallel_tools": false,
533
574
  "vision_support": false,
@@ -540,7 +581,7 @@
540
581
  },
541
582
  "qwen2.5-72b": {
542
583
  "max_output_tokens": 8192,
543
- "tool_support": "prompted",
584
+ "tool_support": "native",
544
585
  "structured_output": "native",
545
586
  "parallel_tools": false,
546
587
  "vision_support": false,
@@ -553,7 +594,7 @@
553
594
  },
554
595
  "qwen3-0.6b": {
555
596
  "max_output_tokens": 8192,
556
- "tool_support": "prompted",
597
+ "tool_support": "native",
557
598
  "structured_output": "native",
558
599
  "parallel_tools": false,
559
600
  "vision_support": false,
@@ -567,7 +608,7 @@
567
608
  },
568
609
  "qwen3-1.7b": {
569
610
  "max_output_tokens": 8192,
570
- "tool_support": "prompted",
611
+ "tool_support": "native",
571
612
  "structured_output": "native",
572
613
  "parallel_tools": false,
573
614
  "vision_support": false,
@@ -581,7 +622,7 @@
581
622
  },
582
623
  "qwen3-4b": {
583
624
  "max_output_tokens": 8192,
584
- "tool_support": "prompted",
625
+ "tool_support": "native",
585
626
  "structured_output": "native",
586
627
  "parallel_tools": false,
587
628
  "vision_support": false,
@@ -595,7 +636,7 @@
595
636
  },
596
637
  "qwen3-32b": {
597
638
  "max_output_tokens": 8192,
598
- "tool_support": "prompted",
639
+ "tool_support": "native",
599
640
  "structured_output": "native",
600
641
  "parallel_tools": false,
601
642
  "vision_support": false,
@@ -609,7 +650,7 @@
609
650
  },
610
651
  "qwen3-30b-a3b": {
611
652
  "max_output_tokens": 8192,
612
- "tool_support": "prompted",
653
+ "tool_support": "native",
613
654
  "structured_output": "native",
614
655
  "parallel_tools": false,
615
656
  "vision_support": false,
@@ -623,7 +664,7 @@
623
664
  },
624
665
  "qwen3-30b-a3b-2507": {
625
666
  "max_output_tokens": 8192,
626
- "tool_support": "prompted",
667
+ "tool_support": "native",
627
668
  "structured_output": "native",
628
669
  "parallel_tools": false,
629
670
  "vision_support": false,
@@ -781,7 +822,7 @@
781
822
  },
782
823
  "phi-4": {
783
824
  "max_output_tokens": 16000,
784
- "tool_support": "prompted",
825
+ "tool_support": "native",
785
826
  "structured_output": "native",
786
827
  "parallel_tools": false,
787
828
  "vision_support": false,
@@ -794,7 +835,7 @@
794
835
  },
795
836
  "mistral-7b": {
796
837
  "max_output_tokens": 8192,
797
- "tool_support": "prompted",
838
+ "tool_support": "native",
798
839
  "structured_output": "native",
799
840
  "parallel_tools": false,
800
841
  "vision_support": false,
@@ -806,7 +847,7 @@
806
847
  },
807
848
  "mixtral-8x7b": {
808
849
  "max_output_tokens": 32768,
809
- "tool_support": "prompted",
850
+ "tool_support": "native",
810
851
  "structured_output": "native",
811
852
  "parallel_tools": false,
812
853
  "vision_support": false,
@@ -1171,7 +1212,7 @@
1171
1212
  },
1172
1213
  "qwen3": {
1173
1214
  "max_output_tokens": 8192,
1174
- "tool_support": "prompted",
1215
+ "tool_support": "native",
1175
1216
  "structured_output": "native",
1176
1217
  "parallel_tools": false,
1177
1218
  "vision_support": false,
@@ -1184,7 +1225,7 @@
1184
1225
  },
1185
1226
  "qwen3-14b": {
1186
1227
  "max_output_tokens": 8192,
1187
- "tool_support": "prompted",
1228
+ "tool_support": "native",
1188
1229
  "structured_output": "prompted",
1189
1230
  "parallel_tools": false,
1190
1231
  "vision_support": false,
@@ -1198,7 +1239,7 @@
1198
1239
  },
1199
1240
  "qwen3-next-80b-a3b": {
1200
1241
  "max_output_tokens": 16384,
1201
- "tool_support": "prompted",
1242
+ "tool_support": "native",
1202
1243
  "structured_output": "prompted",
1203
1244
  "parallel_tools": true,
1204
1245
  "vision_support": false,
@@ -1287,7 +1328,7 @@
1287
1328
  },
1288
1329
  "qwen3-8b": {
1289
1330
  "max_output_tokens": 8192,
1290
- "tool_support": "prompted",
1331
+ "tool_support": "native",
1291
1332
  "structured_output": "prompted",
1292
1333
  "parallel_tools": false,
1293
1334
  "vision_support": false,
@@ -1301,7 +1342,7 @@
1301
1342
  },
1302
1343
  "qwen3-235b-a22b": {
1303
1344
  "max_output_tokens": 8192,
1304
- "tool_support": "prompted",
1345
+ "tool_support": "native",
1305
1346
  "structured_output": "prompted",
1306
1347
  "parallel_tools": false,
1307
1348
  "vision_support": false,
@@ -1315,7 +1356,7 @@
1315
1356
  },
1316
1357
  "qwen3-vl": {
1317
1358
  "max_output_tokens": 8192,
1318
- "tool_support": "prompted",
1359
+ "tool_support": "native",
1319
1360
  "structured_output": "prompted",
1320
1361
  "parallel_tools": false,
1321
1362
  "vision_support": true,
@@ -1333,90 +1374,6 @@
1333
1374
  "aliases": [],
1334
1375
  "max_tokens": 131072
1335
1376
  },
1336
- "qwen3-vl-4b": {
1337
- "max_output_tokens": 8192,
1338
- "tool_support": "prompted",
1339
- "structured_output": "prompted",
1340
- "parallel_tools": false,
1341
- "vision_support": true,
1342
- "video_support": true,
1343
- "audio_support": false,
1344
- "image_resolutions": [
1345
- "64x64 to 4096x4096"
1346
- ],
1347
- "max_image_resolution": "4096x4096",
1348
- "image_patch_size": 16,
1349
- "max_image_tokens": 24576,
1350
- "pixel_grouping": "32x32",
1351
- "image_tokenization_method": "patch_based_adaptive",
1352
- "adaptive_resolution": true,
1353
- "min_resolution": 64,
1354
- "max_resolution": 4096,
1355
- "vision_encoder": "ViT-based",
1356
- "notes": "Qwen3-VL 4B dense model with 256K context, optimized for LMStudio. Parameters: 4.83B. FP8 checkpoints available.",
1357
- "source": "Alibaba Qwen3-VL technical report 2025",
1358
- "canonical_name": "qwen3-vl-4b",
1359
- "aliases": [
1360
- "qwen/qwen3-vl-4b"
1361
- ],
1362
- "max_tokens": 262144
1363
- },
1364
- "qwen3-vl-8b": {
1365
- "max_output_tokens": 8192,
1366
- "tool_support": "prompted",
1367
- "structured_output": "prompted",
1368
- "parallel_tools": false,
1369
- "vision_support": true,
1370
- "video_support": true,
1371
- "audio_support": false,
1372
- "image_resolutions": [
1373
- "64x64 to 4096x4096"
1374
- ],
1375
- "max_image_resolution": "4096x4096",
1376
- "image_patch_size": 16,
1377
- "max_image_tokens": 24576,
1378
- "pixel_grouping": "32x32",
1379
- "image_tokenization_method": "patch_based_adaptive",
1380
- "adaptive_resolution": true,
1381
- "min_resolution": 64,
1382
- "max_resolution": 4096,
1383
- "vision_encoder": "ViT-based",
1384
- "notes": "Qwen3-VL 8B dense model with 256K context, optimized for LMStudio. Parameters: 8.77B. FP8 checkpoints available.",
1385
- "source": "Alibaba Qwen3-VL technical report 2025",
1386
- "canonical_name": "qwen3-vl-8b",
1387
- "aliases": [
1388
- "qwen/qwen3-vl-8b"
1389
- ],
1390
- "max_tokens": 262144
1391
- },
1392
- "qwen3-vl-30b": {
1393
- "max_output_tokens": 8192,
1394
- "tool_support": "prompted",
1395
- "structured_output": "prompted",
1396
- "parallel_tools": false,
1397
- "vision_support": true,
1398
- "video_support": true,
1399
- "audio_support": false,
1400
- "image_resolutions": [
1401
- "64x64 to 4096x4096"
1402
- ],
1403
- "max_image_resolution": "4096x4096",
1404
- "image_patch_size": 16,
1405
- "max_image_tokens": 24576,
1406
- "pixel_grouping": "32x32",
1407
- "image_tokenization_method": "patch_based_adaptive",
1408
- "adaptive_resolution": true,
1409
- "min_resolution": 64,
1410
- "max_resolution": 4096,
1411
- "vision_encoder": "ViT-based",
1412
- "notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model, 128K context",
1413
- "source": "Alibaba Qwen3-VL technical report 2025",
1414
- "canonical_name": "qwen3-vl-30b",
1415
- "aliases": [
1416
- "qwen/qwen3-vl-30b"
1417
- ],
1418
- "max_tokens": 131072
1419
- },
1420
1377
  "qwen2.5-vl-7b": {
1421
1378
  "max_output_tokens": 8192,
1422
1379
  "tool_support": "prompted",
@@ -1548,7 +1505,7 @@
1548
1505
  },
1549
1506
  "seed-oss": {
1550
1507
  "max_output_tokens": 8192,
1551
- "tool_support": "prompted",
1508
+ "tool_support": "native",
1552
1509
  "structured_output": "prompted",
1553
1510
  "parallel_tools": false,
1554
1511
  "vision_support": false,
@@ -1563,7 +1520,7 @@
1563
1520
  },
1564
1521
  "glm-4.5": {
1565
1522
  "max_output_tokens": 4096,
1566
- "tool_support": "prompted",
1523
+ "tool_support": "native",
1567
1524
  "structured_output": "prompted",
1568
1525
  "parallel_tools": false,
1569
1526
  "vision_support": false,
@@ -1577,7 +1534,7 @@
1577
1534
  },
1578
1535
  "glm-4.6": {
1579
1536
  "max_output_tokens": 4096,
1580
- "tool_support": "prompted",
1537
+ "tool_support": "native",
1581
1538
  "structured_output": "prompted",
1582
1539
  "parallel_tools": false,
1583
1540
  "vision_support": false,
@@ -1586,12 +1543,16 @@
1586
1543
  "notes": "GLM-4.6 MoE model with enhanced capabilities",
1587
1544
  "source": "Zhipu AI GLM-4.6 announcement",
1588
1545
  "canonical_name": "glm-4.6",
1589
- "aliases": [],
1546
+ "aliases": [
1547
+ "zai-org/GLM-4.6",
1548
+ "zai-org/GLM-4.6-FP8",
1549
+ "glm-4.6-fp8"
1550
+ ],
1590
1551
  "max_tokens": 128000
1591
1552
  },
1592
1553
  "glm-4.5-air": {
1593
1554
  "max_output_tokens": 4096,
1594
- "tool_support": "prompted",
1555
+ "tool_support": "native",
1595
1556
  "structured_output": "prompted",
1596
1557
  "parallel_tools": false,
1597
1558
  "vision_support": false,
@@ -1618,7 +1579,7 @@
1618
1579
  },
1619
1580
  "granite3.2:2b": {
1620
1581
  "max_output_tokens": 8192,
1621
- "tool_support": "prompted",
1582
+ "tool_support": "native",
1622
1583
  "structured_output": "prompted",
1623
1584
  "parallel_tools": false,
1624
1585
  "vision_support": false,
@@ -1633,7 +1594,7 @@
1633
1594
  },
1634
1595
  "granite3.2:8b": {
1635
1596
  "max_output_tokens": 8192,
1636
- "tool_support": "prompted",
1597
+ "tool_support": "native",
1637
1598
  "structured_output": "prompted",
1638
1599
  "parallel_tools": false,
1639
1600
  "vision_support": false,
@@ -1648,7 +1609,7 @@
1648
1609
  },
1649
1610
  "granite3.2-vision:2b": {
1650
1611
  "max_output_tokens": 8192,
1651
- "tool_support": "prompted",
1612
+ "tool_support": "native",
1652
1613
  "structured_output": "prompted",
1653
1614
  "parallel_tools": false,
1654
1615
  "vision_support": true,
@@ -1726,7 +1687,7 @@
1726
1687
  },
1727
1688
  "granite3.3:2b": {
1728
1689
  "max_output_tokens": 8192,
1729
- "tool_support": "prompted",
1690
+ "tool_support": "native",
1730
1691
  "structured_output": "prompted",
1731
1692
  "parallel_tools": false,
1732
1693
  "vision_support": false,
@@ -1741,7 +1702,7 @@
1741
1702
  },
1742
1703
  "granite3.3:8b": {
1743
1704
  "max_output_tokens": 8192,
1744
- "tool_support": "prompted",
1705
+ "tool_support": "native",
1745
1706
  "structured_output": "prompted",
1746
1707
  "parallel_tools": false,
1747
1708
  "vision_support": false,
@@ -1767,7 +1728,7 @@
1767
1728
  "aliases": [
1768
1729
  "google/embeddinggemma-300m"
1769
1730
  ],
1770
- "max_tokens": 0,
1731
+ "max_tokens": 8192,
1771
1732
  "model_type": "embedding"
1772
1733
  },
1773
1734
  "blip-image-captioning-base": {
@@ -1793,7 +1754,7 @@
1793
1754
  "aliases": [
1794
1755
  "Salesforce/blip-image-captioning-base"
1795
1756
  ],
1796
- "max_tokens": 512
1757
+ "max_tokens": 2048
1797
1758
  },
1798
1759
  "glyph": {
1799
1760
  "max_output_tokens": 8192,
@@ -2041,7 +2002,7 @@
2041
2002
  "max_image_tokens": 6400
2042
2003
  },
2043
2004
  "minimax-m2": {
2044
- "max_output_tokens": 8192,
2005
+ "max_output_tokens": 131072,
2045
2006
  "tool_support": "native",
2046
2007
  "structured_output": "native",
2047
2008
  "parallel_tools": true,
@@ -2054,8 +2015,8 @@
2054
2015
  "total_parameters": "230B",
2055
2016
  "thinking_paradigm": "interleaved_thinking",
2056
2017
  "thinking_format": "<think>...</think>",
2057
- "notes": "MiniMax M2 MoE model optimized for coding and agentic workflows. Industry-leading 204K token context window. Uses interleaved thinking with <think> tags for reasoning. 10B active parameters from 230B total. Achieves strong performance on SWE-Bench and Terminal-Bench tasks. Supports complete tool calling for agent workflows.",
2058
- "source": "MiniMax official docs (minimax-m2.org, HuggingFace, GitHub)",
2018
+ "notes": "MiniMax M2 open-source MoE model (230B total/10B active) optimized for coding and agentic workflows. Industry-leading 204K token context window with 131K output capacity. Uses interleaved thinking with <think> tags for reasoning. Achieves strong performance on SWE-Bench and Terminal-Bench tasks. Ranked #5 on Artificial Analysis Intelligence Index. Efficient deployment at up to 8% cost of comparable models. Supports complete tool calling for agent workflows. Runs seamlessly on 8xH100 setup using vLLM.",
2019
+ "source": "MiniMax official docs (HuggingFace MiniMaxAI/MiniMax-M2, Microsoft Azure AI Foundry blog)",
2059
2020
  "canonical_name": "minimax-m2",
2060
2021
  "aliases": [
2061
2022
  "MiniMaxAI/MiniMax-M2",
@@ -2063,11 +2024,703 @@
2063
2024
  "mlx-community/MiniMax-M2",
2064
2025
  "unsloth/MiniMax-M2-GGUF",
2065
2026
  "minimax-m2-230b",
2066
- "minimax-m2-10b-active"
2027
+ "minimax-m2-10b-active",
2028
+ "minimax/minimax-m2"
2067
2029
  ],
2068
2030
  "max_tokens": 208896,
2069
2031
  "release_date": "2025-01",
2032
+ "license": "Apache-2.0",
2033
+ "inference_parameters": {
2034
+ "temperature": 1.0,
2035
+ "top_p": 0.95,
2036
+ "top_k": 40
2037
+ },
2038
+ "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax."
2039
+ },
2040
+ "minimax-m2.1": {
2041
+ "max_output_tokens": 131072,
2042
+ "tool_support": "native",
2043
+ "structured_output": "native",
2044
+ "parallel_tools": true,
2045
+ "vision_support": false,
2046
+ "audio_support": false,
2047
+ "video_support": false,
2048
+ "thinking_support": true,
2049
+ "architecture": "mixture_of_experts",
2050
+ "active_parameters": "10B",
2051
+ "total_parameters": "229B",
2052
+ "tensor_type": "FP8",
2053
+ "thinking_paradigm": "interleaved_thinking",
2054
+ "thinking_format": "<think>...</think>",
2055
+ "agentic_coding": true,
2056
+ "notes": "MiniMax M2.1 enhanced MoE model (229B total/10B active) optimized for advanced coding, agentic workflows, and full-stack development. 200K token context window with massive 128K output capacity. Significant improvements over M2 in multilingual software engineering (SWE-bench Multilingual: 72.5%), achieving performance close to Claude Opus 4.5. Excels at full-stack development with VIBE average of 88.6 (Web: 91.5, Android: 89.7). Uses interleaved thinking with <think> tags. Achieves 74.0% on SWE-bench Verified, 47.9% on Terminal-bench 2.0. Supports complete native tool calling for agent workflows.",
2057
+ "source": "MiniMax official docs (platform.minimax.io, HuggingFace MiniMaxAI/MiniMax-M2.1)",
2058
+ "canonical_name": "minimax-m2.1",
2059
+ "aliases": [
2060
+ "MiniMaxAI/MiniMax-M2.1",
2061
+ "minimaxai/minimax-m2.1",
2062
+ "minimax-m2.1-229b",
2063
+ "minimax-m2.1-10b-active",
2064
+ "minimax/minimax-m2.1"
2065
+ ],
2066
+ "max_tokens": 204800,
2067
+ "release_date": "2024-12",
2068
+ "license": "Modified-MIT",
2069
+ "arxiv": "2509.06501",
2070
+ "benchmarks": {
2071
+ "SWE-bench Verified": 74.0,
2072
+ "Multi-SWE-bench": 49.4,
2073
+ "SWE-bench Multilingual": 72.5,
2074
+ "Terminal-bench 2.0": 47.9,
2075
+ "SWE-bench Verified (Droid)": 71.3,
2076
+ "SWE-bench Verified (mini-swe-agent)": 67.0,
2077
+ "SWT-bench": 69.3,
2078
+ "SWE-Perf": 3.1,
2079
+ "SWE-Review": 8.9,
2080
+ "OctoCodingbench": 26.1,
2081
+ "VIBE Average": 88.6,
2082
+ "VIBE-Web": 91.5,
2083
+ "VIBE-Android": 89.7,
2084
+ "VIBE-Simulation": 87.1,
2085
+ "VIBE-iOS": 88.0,
2086
+ "VIBE-Backend": 86.7,
2087
+ "Toolathlon": 43.5,
2088
+ "BrowseComp": 47.4,
2089
+ "BrowseComp (context management)": 62.0,
2090
+ "AIME25": 83.0,
2091
+ "MMLU-Pro": 88.0,
2092
+ "GPQA-D": 83.0,
2093
+ "HLE w/o tools": 22.2,
2094
+ "LCB": 81.0,
2095
+ "SciCode": 41.0,
2096
+ "IFBench": 70.0,
2097
+ "AA-LCR": 62.0,
2098
+ "τ²-Bench Telecom": 87.0
2099
+ },
2100
+ "inference_parameters": {
2101
+ "temperature": 1.0,
2102
+ "top_p": 0.95,
2103
+ "top_k": 40
2104
+ },
2105
+ "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax."
2106
+ },
2107
+ "glm-4.6v": {
2108
+ "max_output_tokens": 16384,
2109
+ "tool_support": "native",
2110
+ "structured_output": "native",
2111
+ "parallel_tools": true,
2112
+ "vision_support": true,
2113
+ "audio_support": false,
2114
+ "video_support": false,
2115
+ "thinking_support": true,
2116
+ "thinking_output_field": "reasoning_content",
2117
+ "image_resolutions": [
2118
+ "up to 4096x4096"
2119
+ ],
2120
+ "max_image_resolution": "4096x4096",
2121
+ "architecture": "mixture_of_experts",
2122
+ "total_parameters": "106B",
2123
+ "image_tokenization_method": "glm_vision_encoder",
2124
+ "adaptive_resolution": true,
2125
+ "aspect_ratio_support": "arbitrary",
2126
+ "native_function_calling": true,
2127
+ "interleaved_generation": true,
2128
+ "document_understanding": true,
2129
+ "frontend_replication": true,
2130
+ "tool_calling_format": "glm_xml",
2131
+ "tool_calling_parser": "glm46v",
2132
+ "output_wrappers": {
2133
+ "start": "<|begin_of_box|>",
2134
+ "end": "<|end_of_box|>"
2135
+ },
2136
+ "thinking_control": "/nothink",
2137
+ "thinking_tags": ["<think>", "</think>"],
2138
+ "notes": "GLM-4.6V foundation model (106B params) for cloud deployment. Native multimodal function calling with vision-driven tool use using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Supports interleaved image-text generation, 128K context, multimodal document understanding, and frontend replication from screenshots. Generates reasoning in 'reasoning_content' field or <think></think> tags. Achieves SoTA performance in visual understanding among similar parameter scales. Thinking can be disabled with '/nothink' suffix in user message. See: https://github.com/zai-org/GLM-V",
2139
+ "source": "HuggingFace zai-org/GLM-4.6V and GLM-V GitHub",
2140
+ "canonical_name": "glm-4.6v",
2141
+ "aliases": [
2142
+ "zai-org/GLM-4.6V",
2143
+ "zai-org/GLM-4.6V-FP8",
2144
+ "glm-4.6v-106b",
2145
+ "glm-4.6v-fp8"
2146
+ ],
2147
+ "max_tokens": 128000,
2148
+ "release_date": "2025-05-07",
2149
+ "arxiv": "2507.01006",
2150
+ "license": "MIT"
2151
+ },
2152
+ "glm-4.6v-flash": {
2153
+ "max_output_tokens": 8192,
2154
+ "tool_support": "native",
2155
+ "structured_output": "native",
2156
+ "parallel_tools": true,
2157
+ "vision_support": true,
2158
+ "audio_support": false,
2159
+ "video_support": false,
2160
+ "thinking_support": true,
2161
+ "thinking_output_field": "reasoning_content",
2162
+ "image_resolutions": [
2163
+ "up to 4096x4096"
2164
+ ],
2165
+ "max_image_resolution": "4096x4096",
2166
+ "total_parameters": "9B",
2167
+ "image_tokenization_method": "glm_vision_encoder",
2168
+ "adaptive_resolution": true,
2169
+ "aspect_ratio_support": "arbitrary",
2170
+ "native_function_calling": true,
2171
+ "interleaved_generation": true,
2172
+ "document_understanding": true,
2173
+ "frontend_replication": true,
2174
+ "tool_calling_format": "glm_xml",
2175
+ "tool_calling_parser": "glm46v",
2176
+ "output_wrappers": {
2177
+ "start": "<|begin_of_box|>",
2178
+ "end": "<|end_of_box|>"
2179
+ },
2180
+ "thinking_control": "/nothink",
2181
+ "thinking_tags": ["<think>", "</think>"],
2182
+ "notes": "GLM-4.6V-Flash lightweight model (9B params) optimized for local deployment and low-latency applications. Maintains native multimodal function calling using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Generates reasoning in 'reasoning_content' field or <think></think> tags. Ideal for edge and resource-constrained environments while preserving core GLM-4.6V capabilities. Thinking can be disabled with '/nothink' suffix. See: https://github.com/zai-org/GLM-V",
2183
+ "source": "HuggingFace zai-org/GLM-4.6V-Flash and GLM-V GitHub",
2184
+ "canonical_name": "glm-4.6v-flash",
2185
+ "aliases": [
2186
+ "zai-org/GLM-4.6V-Flash",
2187
+ "zai-org/GLM-4.6V-Flash-FP8",
2188
+ "glm-4.6v-9b",
2189
+ "glm-4.6v-flash-fp8"
2190
+ ],
2191
+ "max_tokens": 128000,
2192
+ "release_date": "2025-05-07",
2193
+ "arxiv": "2507.01006",
2194
+ "license": "MIT"
2195
+ },
2196
+ "glm-4.7": {
2197
+ "max_output_tokens": 32768,
2198
+ "tool_support": "native",
2199
+ "structured_output": "native",
2200
+ "parallel_tools": true,
2201
+ "vision_support": false,
2202
+ "audio_support": false,
2203
+ "video_support": false,
2204
+ "thinking_support": true,
2205
+ "architecture": "mixture_of_experts",
2206
+ "total_parameters": "358B",
2207
+ "thinking_paradigm": "multi_mode",
2208
+ "thinking_modes": ["interleaved_thinking", "preserved_thinking", "turn_level_thinking"],
2209
+ "native_function_calling": true,
2210
+ "agentic_coding": true,
2211
+ "terminal_tasks": true,
2212
+ "web_browsing": true,
2213
+ "tool_calling_parser": "glm47",
2214
+ "reasoning_parser": "glm45",
2215
+ "notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (τ²-Bench: 87.4%), web browsing (BrowseComp: 52%), and complex reasoning (HLE w/ Tools: 42.8%, AIME 2025: 95.7%). 128K context window with 32K output capacity. Optimized for modern coding environments including Claude Code, Kilo Code, Cline, Roo Code.",
2216
+ "source": "HuggingFace zai-org/GLM-4.7 and GLM technical blog",
2217
+ "canonical_name": "glm-4.7",
2218
+ "aliases": [
2219
+ "zai-org/GLM-4.7",
2220
+ "zai-org/GLM-4.7-FP8",
2221
+ "glm-4.7-fp8",
2222
+ "glm-4.7-358b"
2223
+ ],
2224
+ "max_tokens": 128000,
2225
+ "release_date": "2025-06",
2226
+ "arxiv": "2508.06471",
2227
+ "license": "MIT",
2228
+ "benchmarks": {
2229
+ "SWE-bench Verified": 73.8,
2230
+ "SWE-bench Multilingual": 66.7,
2231
+ "Terminal Bench Hard": 33.3,
2232
+ "Terminal Bench 2.0": 41.0,
2233
+ "HLE": 24.8,
2234
+ "HLE (w/ Tools)": 42.8,
2235
+ "AIME 2025": 95.7,
2236
+ "HMMT Feb. 2025": 97.1,
2237
+ "HMMT Nov. 2025": 93.5,
2238
+ "IMOAnswerBench": 82.0,
2239
+ "LiveCodeBench-v6": 84.9,
2240
+ "BrowseComp": 52.0,
2241
+ "BrowseComp (w/ Context Manage)": 67.5,
2242
+ "BrowseComp-Zh": 66.6,
2243
+ "τ²-Bench": 87.4,
2244
+ "MMLU-Pro": 84.3,
2245
+ "GPQA-Diamond": 85.7
2246
+ },
2247
+ "inference_parameters": {
2248
+ "temperature": 1.0,
2249
+ "top_p": 0.95,
2250
+ "enable_thinking": true,
2251
+ "clear_thinking": false
2252
+ }
2253
+ },
2254
+ "devstral-small-2-24b": {
2255
+ "max_output_tokens": 16384,
2256
+ "tool_support": "native",
2257
+ "structured_output": "native",
2258
+ "parallel_tools": true,
2259
+ "vision_support": false,
2260
+ "audio_support": false,
2261
+ "video_support": false,
2262
+ "total_parameters": "24B",
2263
+ "architecture": "mistral3",
2264
+ "tensor_type": "FP8",
2265
+ "agentic_coding": true,
2266
+ "tool_calling_parser": "mistral",
2267
+ "notes": "Devstral Small 2 agentic LLM for software engineering (24B params, FP8). Excels at tool use, codebase exploration, multi-file edits. 256K context. Strong performance on SWE-bench Verified (68.0%), Terminal-Bench (22.5%), and SWE-bench Multilingual (55.7%). Improved generalization over predecessors. Uses Mistral 3 architecture with rope-scaling and Scalable-Softmax.",
2268
+ "source": "Mistral AI Devstral 2 docs and HuggingFace",
2269
+ "canonical_name": "devstral-small-2-24b",
2270
+ "aliases": [
2271
+ "mistralai/Devstral-Small-2-24B-Instruct-2512",
2272
+ "devstral-small-2",
2273
+ "devstral-small-2-24b-instruct"
2274
+ ],
2275
+ "max_tokens": 262144,
2276
+ "release_date": "2025-12",
2277
+ "license": "Apache-2.0"
2278
+ },
2279
+ "devstral-2-123b": {
2280
+ "max_output_tokens": 16384,
2281
+ "tool_support": "native",
2282
+ "structured_output": "native",
2283
+ "parallel_tools": true,
2284
+ "vision_support": false,
2285
+ "audio_support": false,
2286
+ "video_support": false,
2287
+ "total_parameters": "123B",
2288
+ "architecture": "ministral3",
2289
+ "tensor_type": "FP8",
2290
+ "agentic_coding": true,
2291
+ "tool_calling_parser": "mistral",
2292
+ "notes": "Devstral 2 flagship agentic LLM for software engineering (123B params, FP8). Excels at tool use, codebase exploration, multi-file edits. 256K context. Top-tier performance on SWE-bench Verified (72.2%), Terminal-Bench (32.6%), and SWE-bench Multilingual (61.3%). Improved generalization and better performance than predecessors.",
2293
+ "source": "Mistral AI Devstral 2 docs and HuggingFace",
2294
+ "canonical_name": "devstral-2-123b",
2295
+ "aliases": [
2296
+ "mistralai/Devstral-2-123B-Instruct-2512",
2297
+ "devstral-2",
2298
+ "devstral-2-123b-instruct"
2299
+ ],
2300
+ "max_tokens": 262144,
2301
+ "release_date": "2025-12",
2302
+ "license": "Modified-MIT"
2303
+ },
2304
+ "qwen3-235b-a22b-2507": {
2305
+ "max_output_tokens": 16384,
2306
+ "tool_support": "native",
2307
+ "structured_output": "native",
2308
+ "parallel_tools": true,
2309
+ "vision_support": false,
2310
+ "audio_support": false,
2311
+ "video_support": false,
2312
+ "thinking_support": false,
2313
+ "architecture": "mixture_of_experts",
2314
+ "total_parameters": "235B",
2315
+ "active_parameters": "22B",
2316
+ "experts": 128,
2317
+ "experts_activated": 8,
2318
+ "tensor_type": "BF16",
2319
+ "notes": "Qwen3-235B-A22B-Instruct-2507 non-thinking mode (235B total/22B active, 128 experts/8 activated). Significant improvements in instruction following, reasoning, math, science, coding, tool usage. Enhanced 256K long-context understanding, extendable to 1M tokens with DCA+MInference. Substantial gains in multilingual knowledge. Better alignment for subjective tasks.",
2320
+ "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2321
+ "canonical_name": "qwen3-235b-a22b-2507",
2322
+ "aliases": [
2323
+ "Qwen/Qwen3-235B-A22B-Instruct-2507",
2324
+ "qwen3-235b-a22b-instruct-2507",
2325
+ "qwen3-235b-2507"
2326
+ ],
2327
+ "max_tokens": 262144,
2328
+ "release_date": "2025-07",
2329
+ "arxiv": "2505.09388",
2330
+ "license": "Apache-2.0"
2331
+ },
2332
+ "qwen3-235b-a22b-2507-fp8": {
2333
+ "max_output_tokens": 16384,
2334
+ "tool_support": "native",
2335
+ "structured_output": "native",
2336
+ "parallel_tools": true,
2337
+ "vision_support": false,
2338
+ "audio_support": false,
2339
+ "video_support": false,
2340
+ "thinking_support": false,
2341
+ "architecture": "mixture_of_experts",
2342
+ "total_parameters": "235B",
2343
+ "active_parameters": "22B",
2344
+ "experts": 128,
2345
+ "experts_activated": 8,
2346
+ "tensor_type": "FP8",
2347
+ "quantization_method": "fine_grained_fp8_block128",
2348
+ "notes": "FP8-quantized version of Qwen3-235B-A22B-Instruct-2507. Fine-grained fp8 quantization with block size 128. Same capabilities as BF16 version but more efficient inference. Note: transformers has issues with fine-grained fp8 in distributed inference (may need CUDA_LAUNCH_BLOCKING=1).",
2349
+ "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2350
+ "canonical_name": "qwen3-235b-a22b-2507-fp8",
2351
+ "aliases": [
2352
+ "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
2353
+ "qwen3-235b-a22b-instruct-2507-fp8",
2354
+ "qwen3-235b-2507-fp8"
2355
+ ],
2356
+ "max_tokens": 262144,
2357
+ "release_date": "2025-07",
2358
+ "arxiv": "2505.09388",
2359
+ "license": "Apache-2.0"
2360
+ },
2361
+ "granite-4.0-h-tiny": {
2362
+ "max_output_tokens": 16384,
2363
+ "tool_support": "native",
2364
+ "structured_output": "native",
2365
+ "parallel_tools": true,
2366
+ "vision_support": false,
2367
+ "audio_support": false,
2368
+ "video_support": false,
2369
+ "architecture": "granitemoehybrid",
2370
+ "total_parameters": "7B",
2371
+ "active_parameters": "1B",
2372
+ "experts": 64,
2373
+ "experts_activated": 6,
2374
+ "expert_hidden_size": 512,
2375
+ "shared_expert_hidden_size": 1024,
2376
+ "attention_layers": 4,
2377
+ "mamba2_layers": 36,
2378
+ "mamba2_state_size": 128,
2379
+ "embedding_size": 1536,
2380
+ "tensor_type": "BF16",
2381
+ "notes": "Granite 4.0-H-Tiny hybrid MoE model (7B total/1B active, 64 experts/6 activated). Combines 4 attention layers with 36 Mamba2 layers. 128K context. Enhanced tool-calling and instruction following. Strong performance on coding, math, and alignment tasks. Optimized for enterprise applications with improved IF capabilities.",
2382
+ "source": "IBM Granite 4.0 HuggingFace and technical report",
2383
+ "canonical_name": "granite-4.0-h-tiny",
2384
+ "aliases": [
2385
+ "ibm-granite/granite-4.0-h-tiny",
2386
+ "granite-4.0-h-tiny-moe",
2387
+ "granite-h-tiny"
2388
+ ],
2389
+ "max_tokens": 131072,
2390
+ "release_date": "2025-10-02",
2391
+ "license": "Apache-2.0"
2392
+ },
2393
+ "gpt-oss-20b": {
2394
+ "max_output_tokens": 8192,
2395
+ "tool_support": "native",
2396
+ "structured_output": "native",
2397
+ "parallel_tools": true,
2398
+ "vision_support": false,
2399
+ "audio_support": false,
2400
+ "video_support": false,
2401
+ "thinking_support": true,
2402
+ "thinking_budget": true,
2403
+ "architecture": "mixture_of_experts",
2404
+ "total_parameters": "21B",
2405
+ "active_parameters": "3.6B",
2406
+ "tensor_type": "BF16+U8",
2407
+ "quantization_method": "MXFP4",
2408
+ "response_format": "harmony",
2409
+ "reasoning_levels": ["low", "medium", "high"],
2410
+ "agentic_capabilities": true,
2411
+ "function_calling": true,
2412
+ "web_browsing": true,
2413
+ "python_execution": true,
2414
+ "fine_tunable": true,
2415
+ "notes": "OpenAI GPT-OSS 20B open-weight model (21B total/3.6B active). Designed for lower latency, local, and specialized use cases. MXFP4 quantization enables running within 16GB memory. Configurable reasoning effort (low/medium/high). Full chain-of-thought access. Requires harmony response format. Apache 2.0 license for commercial use.",
2416
+ "source": "OpenAI GPT-OSS HuggingFace and arXiv:2508.10925",
2417
+ "canonical_name": "gpt-oss-20b",
2418
+ "aliases": [
2419
+ "openai/gpt-oss-20b",
2420
+ "gpt-oss:20b"
2421
+ ],
2422
+ "max_tokens": 128000,
2423
+ "release_date": "2025-08",
2424
+ "arxiv": "2508.10925",
2425
+ "license": "Apache-2.0"
2426
+ },
2427
+ "gpt-oss-120b": {
2428
+ "max_output_tokens": 8192,
2429
+ "tool_support": "native",
2430
+ "structured_output": "native",
2431
+ "parallel_tools": true,
2432
+ "vision_support": false,
2433
+ "audio_support": false,
2434
+ "video_support": false,
2435
+ "thinking_support": true,
2436
+ "thinking_budget": true,
2437
+ "architecture": "mixture_of_experts",
2438
+ "total_parameters": "117B",
2439
+ "active_parameters": "5.1B",
2440
+ "tensor_type": "BF16+U8",
2441
+ "quantization_method": "MXFP4",
2442
+ "response_format": "harmony",
2443
+ "reasoning_levels": ["low", "medium", "high"],
2444
+ "agentic_capabilities": true,
2445
+ "function_calling": true,
2446
+ "web_browsing": true,
2447
+ "python_execution": true,
2448
+ "fine_tunable": true,
2449
+ "gpu_memory_required": "80GB",
2450
+ "notes": "OpenAI GPT-OSS 120B open-weight model (117B total/5.1B active). Production-ready for general purpose, high reasoning use cases. MXFP4 quantization enables single 80GB GPU deployment (H100/MI300X). Configurable reasoning effort (low/medium/high). Full chain-of-thought access. Requires harmony response format. Apache 2.0 license for commercial use.",
2451
+ "source": "OpenAI GPT-OSS HuggingFace and arXiv:2508.10925",
2452
+ "canonical_name": "gpt-oss-120b",
2453
+ "aliases": [
2454
+ "openai/gpt-oss-120b",
2455
+ "gpt-oss:120b"
2456
+ ],
2457
+ "max_tokens": 128000,
2458
+ "release_date": "2025-08",
2459
+ "arxiv": "2508.10925",
2460
+ "license": "Apache-2.0"
2461
+ },
2462
+ "qwen3-vl-2b": {
2463
+ "max_output_tokens": 8192,
2464
+ "tool_support": "native",
2465
+ "structured_output": "native",
2466
+ "parallel_tools": true,
2467
+ "vision_support": true,
2468
+ "audio_support": false,
2469
+ "video_support": true,
2470
+ "image_resolutions": [
2471
+ "64x64 to 4096x4096"
2472
+ ],
2473
+ "max_image_resolution": "4096x4096",
2474
+ "image_patch_size": 16,
2475
+ "max_image_tokens": 24576,
2476
+ "pixel_grouping": "32x32",
2477
+ "image_tokenization_method": "patch_based_adaptive",
2478
+ "adaptive_resolution": true,
2479
+ "min_resolution": 64,
2480
+ "max_resolution": 4096,
2481
+ "vision_encoder": "ViT-based",
2482
+ "visual_agent": true,
2483
+ "visual_coding": true,
2484
+ "spatial_perception": true,
2485
+ "document_understanding": true,
2486
+ "ocr_languages": 32,
2487
+ "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
2488
+ "notes": "Qwen3-VL 2B dense model with 256K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
2489
+ "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2490
+ "canonical_name": "qwen3-vl-2b",
2491
+ "aliases": [
2492
+ "Qwen/Qwen3-VL-2B-Instruct",
2493
+ "qwen3-vl-2b-instruct"
2494
+ ],
2495
+ "max_tokens": 262144,
2496
+ "release_date": "2025-05",
2497
+ "arxiv": "2505.09388",
2498
+ "license": "Apache-2.0"
2499
+ },
2500
+ "qwen3-vl-4b": {
2501
+ "max_output_tokens": 8192,
2502
+ "tool_support": "native",
2503
+ "structured_output": "native",
2504
+ "parallel_tools": true,
2505
+ "vision_support": true,
2506
+ "audio_support": false,
2507
+ "video_support": true,
2508
+ "image_resolutions": [
2509
+ "64x64 to 4096x4096"
2510
+ ],
2511
+ "max_image_resolution": "4096x4096",
2512
+ "image_patch_size": 16,
2513
+ "max_image_tokens": 24576,
2514
+ "pixel_grouping": "32x32",
2515
+ "image_tokenization_method": "patch_based_adaptive",
2516
+ "adaptive_resolution": true,
2517
+ "min_resolution": 64,
2518
+ "max_resolution": 4096,
2519
+ "vision_encoder": "ViT-based",
2520
+ "visual_agent": true,
2521
+ "visual_coding": true,
2522
+ "spatial_perception": true,
2523
+ "document_understanding": true,
2524
+ "ocr_languages": 32,
2525
+ "total_parameters": "4.83B",
2526
+ "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
2527
+ "notes": "Qwen3-VL 4B dense model (4.83B params) with 256K context, optimized for LMStudio. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding. FP8 checkpoints available.",
2528
+ "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2529
+ "canonical_name": "qwen3-vl-4b",
2530
+ "aliases": [
2531
+ "Qwen/Qwen3-VL-4B-Instruct",
2532
+ "qwen3-vl-4b-instruct",
2533
+ "qwen/qwen3-vl-4b"
2534
+ ],
2535
+ "max_tokens": 262144,
2536
+ "release_date": "2025-05",
2537
+ "arxiv": "2505.09388",
2070
2538
  "license": "Apache-2.0"
2539
+ },
2540
+ "qwen3-vl-8b": {
2541
+ "max_output_tokens": 8192,
2542
+ "tool_support": "native",
2543
+ "structured_output": "native",
2544
+ "parallel_tools": true,
2545
+ "vision_support": true,
2546
+ "audio_support": false,
2547
+ "video_support": true,
2548
+ "image_resolutions": [
2549
+ "64x64 to 4096x4096"
2550
+ ],
2551
+ "max_image_resolution": "4096x4096",
2552
+ "image_patch_size": 16,
2553
+ "max_image_tokens": 24576,
2554
+ "pixel_grouping": "32x32",
2555
+ "image_tokenization_method": "patch_based_adaptive",
2556
+ "adaptive_resolution": true,
2557
+ "min_resolution": 64,
2558
+ "max_resolution": 4096,
2559
+ "vision_encoder": "ViT-based",
2560
+ "visual_agent": true,
2561
+ "visual_coding": true,
2562
+ "spatial_perception": true,
2563
+ "document_understanding": true,
2564
+ "ocr_languages": 32,
2565
+ "total_parameters": "8.77B",
2566
+ "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
2567
+ "notes": "Qwen3-VL 8B dense model (8.77B params) with 256K context, optimized for LMStudio. Most powerful vision-language model in Qwen series. Visual agent for GUI operation, visual coding, advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. FP8 checkpoints available.",
2568
+ "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2569
+ "canonical_name": "qwen3-vl-8b",
2570
+ "aliases": [
2571
+ "Qwen/Qwen3-VL-8B-Instruct",
2572
+ "qwen3-vl-8b-instruct",
2573
+ "qwen/qwen3-vl-8b"
2574
+ ],
2575
+ "max_tokens": 262144,
2576
+ "release_date": "2025-05",
2577
+ "arxiv": "2505.09388",
2578
+ "license": "Apache-2.0"
2579
+ },
2580
+ "qwen3-vl-30b-a3b": {
2581
+ "max_output_tokens": 8192,
2582
+ "tool_support": "native",
2583
+ "structured_output": "native",
2584
+ "parallel_tools": true,
2585
+ "vision_support": true,
2586
+ "audio_support": false,
2587
+ "video_support": true,
2588
+ "image_resolutions": [
2589
+ "64x64 to 4096x4096"
2590
+ ],
2591
+ "max_image_resolution": "4096x4096",
2592
+ "image_patch_size": 16,
2593
+ "max_image_tokens": 24576,
2594
+ "pixel_grouping": "32x32",
2595
+ "image_tokenization_method": "patch_based_adaptive",
2596
+ "adaptive_resolution": true,
2597
+ "min_resolution": 64,
2598
+ "max_resolution": 4096,
2599
+ "vision_encoder": "ViT-based",
2600
+ "visual_agent": true,
2601
+ "visual_coding": true,
2602
+ "spatial_perception": true,
2603
+ "document_understanding": true,
2604
+ "ocr_languages": 32,
2605
+ "architecture": "mixture_of_experts",
2606
+ "total_parameters": "30.5B",
2607
+ "active_parameters": "3.3B",
2608
+ "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
2609
+ "notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model in the series. 128K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
2610
+ "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2611
+ "canonical_name": "qwen3-vl-30b-a3b",
2612
+ "aliases": [
2613
+ "Qwen/Qwen3-VL-30B-A3B-Instruct",
2614
+ "qwen3-vl-30b-a3b-instruct",
2615
+ "qwen/qwen3-vl-30b"
2616
+ ],
2617
+ "max_tokens": 131072,
2618
+ "release_date": "2025-05",
2619
+ "arxiv": "2505.09388",
2620
+ "license": "Apache-2.0"
2621
+ },
2622
+ "qwen3-vl-235b-a22b": {
2623
+ "max_output_tokens": 8192,
2624
+ "tool_support": "native",
2625
+ "structured_output": "native",
2626
+ "parallel_tools": true,
2627
+ "vision_support": true,
2628
+ "audio_support": false,
2629
+ "video_support": true,
2630
+ "image_resolutions": [
2631
+ "64x64 to 4096x4096"
2632
+ ],
2633
+ "max_image_resolution": "4096x4096",
2634
+ "image_patch_size": 16,
2635
+ "max_image_tokens": 24576,
2636
+ "pixel_grouping": "32x32",
2637
+ "image_tokenization_method": "patch_based_adaptive",
2638
+ "adaptive_resolution": true,
2639
+ "min_resolution": 64,
2640
+ "max_resolution": 4096,
2641
+ "vision_encoder": "ViT-based",
2642
+ "visual_agent": true,
2643
+ "visual_coding": true,
2644
+ "spatial_perception": true,
2645
+ "document_understanding": true,
2646
+ "ocr_languages": 32,
2647
+ "architecture": "mixture_of_experts",
2648
+ "total_parameters": "235B",
2649
+ "active_parameters": "22B",
2650
+ "experts": 128,
2651
+ "experts_activated": 8,
2652
+ "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
2653
+ "notes": "Qwen3-VL 235B MoE model (235B total/22B active, 128 experts/8 activated), flagship vision model. 256K context expandable to 1M. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs. Superior visual perception and reasoning.",
2654
+ "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2655
+ "canonical_name": "qwen3-vl-235b-a22b",
2656
+ "aliases": [
2657
+ "Qwen/Qwen3-VL-235B-A22B-Instruct",
2658
+ "qwen3-vl-235b-a22b-instruct"
2659
+ ],
2660
+ "max_tokens": 262144,
2661
+ "release_date": "2025-05",
2662
+ "arxiv": "2505.09388",
2663
+ "license": "Apache-2.0"
2664
+ },
2665
+ "nemotron-3-nano-30b-a3b": {
2666
+ "max_output_tokens": 8192,
2667
+ "tool_support": "native",
2668
+ "structured_output": "native",
2669
+ "parallel_tools": true,
2670
+ "vision_support": false,
2671
+ "audio_support": false,
2672
+ "video_support": false,
2673
+ "thinking_support": true,
2674
+ "thinking_budget": false,
2675
+ "architecture": "nemotron_hybrid_moe",
2676
+ "total_parameters": "30B",
2677
+ "active_parameters": "3.5B",
2678
+ "experts": 128,
2679
+ "experts_activated": 6,
2680
+ "shared_experts": 1,
2681
+ "attention_layers": 6,
2682
+ "mamba2_layers": 23,
2683
+ "tensor_type": "BF16",
2684
+ "reasoning_paradigm": "unified_reasoning_response",
2685
+ "reasoning_configurable": true,
2686
+ "agentic_capabilities": true,
2687
+ "function_calling": true,
2688
+ "tool_calling_format": "json",
2689
+ "languages": ["English", "German", "Spanish", "French", "Italian", "Japanese"],
2690
+ "notes": "NVIDIA Nemotron-3-Nano hybrid MoE model (30B total/3.5B active, 128 experts/6 activated + 1 shared). Combines 23 Mamba-2 layers with 6 Attention layers. Unified model for reasoning and non-reasoning tasks with configurable reasoning mode. Generates reasoning trace before final response. 256K context extendable to 1M with YaRN. Strong performance on AIME25 (99.2% with tools), SWE-Bench (38.8%), MiniF2F (50.0% pass@1). Native tool calling via chatml-function-calling format. Commercial use ready.",
2691
+ "source": "NVIDIA Nemotron HuggingFace and technical report",
2692
+ "canonical_name": "nemotron-3-nano-30b-a3b",
2693
+ "aliases": [
2694
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
2695
+ "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
2696
+ "nvidia/nemotron-3-nano",
2697
+ "nemotron-3-nano",
2698
+ "nemotron-nano-30b"
2699
+ ],
2700
+ "max_tokens": 262144,
2701
+ "release_date": "2025-12",
2702
+ "license": "nvidia-open-model-license",
2703
+ "benchmarks": {
2704
+ "MMLU-Pro": 78.3,
2705
+ "AIME25 (no tools)": 89.1,
2706
+ "AIME25 (with tools)": 99.2,
2707
+ "GPQA (no tools)": 73.0,
2708
+ "GPQA (with tools)": 75.0,
2709
+ "LiveCodeBench v6": 68.3,
2710
+ "SciCode (subtask)": 33.3,
2711
+ "HLE (no tools)": 10.6,
2712
+ "HLE (with tools)": 15.5,
2713
+ "MiniF2F pass@1": 50.0,
2714
+ "MiniF2F pass@32": 79.9,
2715
+ "Terminal Bench (hard subset)": 8.5,
2716
+ "SWE-Bench (OpenHands)": 38.8,
2717
+ "TauBench V2 (Average)": 49.0,
2718
+ "BFCL v4": 53.8,
2719
+ "IFBench (prompt)": 71.5,
2720
+ "Scale AI Multi Challenge": 38.5,
2721
+ "Arena-Hard-V2 (Hard Prompt)": 72.1,
2722
+ "Arena-Hard-V2 (Average)": 67.7
2723
+ }
2071
2724
  }
2072
2725
  },
2073
2726
  "tool_support_levels": {
@@ -2166,4 +2819,4 @@
2166
2819
  "fim_support": false,
2167
2820
  "max_tokens": 16384
2168
2821
  }
2169
- }
2822
+ }