abstractcore 2.9.0__py3-none-any.whl → 2.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. abstractcore/__init__.py +7 -27
  2. abstractcore/apps/extractor.py +33 -100
  3. abstractcore/apps/intent.py +19 -0
  4. abstractcore/apps/judge.py +20 -1
  5. abstractcore/apps/summarizer.py +20 -1
  6. abstractcore/architectures/detection.py +34 -1
  7. abstractcore/architectures/response_postprocessing.py +313 -0
  8. abstractcore/assets/architecture_formats.json +38 -8
  9. abstractcore/assets/model_capabilities.json +781 -160
  10. abstractcore/compression/__init__.py +1 -2
  11. abstractcore/compression/glyph_processor.py +6 -4
  12. abstractcore/config/main.py +31 -19
  13. abstractcore/config/manager.py +389 -11
  14. abstractcore/config/vision_config.py +5 -5
  15. abstractcore/core/interface.py +151 -3
  16. abstractcore/core/session.py +16 -10
  17. abstractcore/download.py +1 -1
  18. abstractcore/embeddings/manager.py +20 -6
  19. abstractcore/endpoint/__init__.py +2 -0
  20. abstractcore/endpoint/app.py +458 -0
  21. abstractcore/mcp/client.py +3 -1
  22. abstractcore/media/__init__.py +52 -17
  23. abstractcore/media/auto_handler.py +42 -22
  24. abstractcore/media/base.py +44 -1
  25. abstractcore/media/capabilities.py +12 -33
  26. abstractcore/media/enrichment.py +105 -0
  27. abstractcore/media/handlers/anthropic_handler.py +19 -28
  28. abstractcore/media/handlers/local_handler.py +124 -70
  29. abstractcore/media/handlers/openai_handler.py +19 -31
  30. abstractcore/media/processors/__init__.py +4 -2
  31. abstractcore/media/processors/audio_processor.py +57 -0
  32. abstractcore/media/processors/office_processor.py +8 -3
  33. abstractcore/media/processors/pdf_processor.py +46 -3
  34. abstractcore/media/processors/text_processor.py +22 -24
  35. abstractcore/media/processors/video_processor.py +58 -0
  36. abstractcore/media/types.py +97 -4
  37. abstractcore/media/utils/image_scaler.py +20 -2
  38. abstractcore/media/utils/video_frames.py +219 -0
  39. abstractcore/media/vision_fallback.py +136 -22
  40. abstractcore/processing/__init__.py +32 -3
  41. abstractcore/processing/basic_deepsearch.py +15 -10
  42. abstractcore/processing/basic_intent.py +3 -2
  43. abstractcore/processing/basic_judge.py +3 -2
  44. abstractcore/processing/basic_summarizer.py +1 -1
  45. abstractcore/providers/__init__.py +3 -1
  46. abstractcore/providers/anthropic_provider.py +95 -8
  47. abstractcore/providers/base.py +1516 -81
  48. abstractcore/providers/huggingface_provider.py +546 -69
  49. abstractcore/providers/lmstudio_provider.py +35 -923
  50. abstractcore/providers/mlx_provider.py +382 -35
  51. abstractcore/providers/model_capabilities.py +5 -1
  52. abstractcore/providers/ollama_provider.py +99 -15
  53. abstractcore/providers/openai_compatible_provider.py +406 -180
  54. abstractcore/providers/openai_provider.py +188 -44
  55. abstractcore/providers/openrouter_provider.py +76 -0
  56. abstractcore/providers/registry.py +61 -5
  57. abstractcore/providers/streaming.py +138 -33
  58. abstractcore/providers/vllm_provider.py +92 -817
  59. abstractcore/server/app.py +461 -13
  60. abstractcore/server/audio_endpoints.py +139 -0
  61. abstractcore/server/vision_endpoints.py +1319 -0
  62. abstractcore/structured/handler.py +316 -41
  63. abstractcore/tools/common_tools.py +5501 -2012
  64. abstractcore/tools/comms_tools.py +1641 -0
  65. abstractcore/tools/core.py +37 -7
  66. abstractcore/tools/handler.py +4 -9
  67. abstractcore/tools/parser.py +49 -2
  68. abstractcore/tools/tag_rewriter.py +2 -1
  69. abstractcore/tools/telegram_tdlib.py +407 -0
  70. abstractcore/tools/telegram_tools.py +261 -0
  71. abstractcore/utils/cli.py +1085 -72
  72. abstractcore/utils/token_utils.py +2 -0
  73. abstractcore/utils/truncation.py +29 -0
  74. abstractcore/utils/version.py +3 -4
  75. abstractcore/utils/vlm_token_calculator.py +12 -2
  76. abstractcore-2.11.2.dist-info/METADATA +562 -0
  77. abstractcore-2.11.2.dist-info/RECORD +133 -0
  78. {abstractcore-2.9.0.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
  79. {abstractcore-2.9.0.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
  80. abstractcore-2.9.0.dist-info/METADATA +0 -1189
  81. abstractcore-2.9.0.dist-info/RECORD +0 -119
  82. {abstractcore-2.9.0.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
  83. {abstractcore-2.9.0.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,9 @@
11
11
  "source": "OpenAI official docs",
12
12
  "canonical_name": "gpt-4",
13
13
  "aliases": [],
14
- "max_tokens": 128000
14
+ "max_tokens": 128000,
15
+ "video_support": false,
16
+ "video_input_mode": "none"
15
17
  },
16
18
  "gpt-4-turbo": {
17
19
  "max_output_tokens": 4096,
@@ -30,7 +32,9 @@
30
32
  "aliases": [
31
33
  "gpt-4-turbo-preview"
32
34
  ],
33
- "max_tokens": 128000
35
+ "max_tokens": 128000,
36
+ "video_support": false,
37
+ "video_input_mode": "frames"
34
38
  },
35
39
  "gpt-4-turbo-with-vision": {
36
40
  "max_output_tokens": 4096,
@@ -50,7 +54,9 @@
50
54
  "gpt-4-turbo-vision",
51
55
  "gpt-4-vision-preview"
52
56
  ],
53
- "max_tokens": 128000
57
+ "max_tokens": 128000,
58
+ "video_support": false,
59
+ "video_input_mode": "frames"
54
60
  },
55
61
  "gpt-4o": {
56
62
  "max_output_tokens": 16384,
@@ -59,8 +65,8 @@
59
65
  "parallel_tools": true,
60
66
  "max_tools": -1,
61
67
  "vision_support": true,
62
- "audio_support": true,
63
- "video_support": true,
68
+ "audio_support": false,
69
+ "video_support": false,
64
70
  "image_resolutions": [
65
71
  "variable"
66
72
  ],
@@ -80,7 +86,8 @@
80
86
  "source": "OpenAI official docs 2025",
81
87
  "canonical_name": "gpt-4o",
82
88
  "aliases": [],
83
- "max_tokens": 128000
89
+ "max_tokens": 128000,
90
+ "video_input_mode": "frames"
84
91
  },
85
92
  "gpt-4o-long-output": {
86
93
  "max_output_tokens": 64000,
@@ -89,12 +96,14 @@
89
96
  "parallel_tools": true,
90
97
  "max_tools": -1,
91
98
  "vision_support": true,
92
- "audio_support": true,
99
+ "audio_support": false,
93
100
  "notes": "16x output capacity variant",
94
101
  "source": "OpenAI official docs",
95
102
  "canonical_name": "gpt-4o-long-output",
96
103
  "aliases": [],
97
- "max_tokens": 128000
104
+ "max_tokens": 128000,
105
+ "video_support": false,
106
+ "video_input_mode": "frames"
98
107
  },
99
108
  "gpt-4o-mini": {
100
109
  "max_output_tokens": 16000,
@@ -103,11 +112,13 @@
103
112
  "parallel_tools": true,
104
113
  "max_tools": -1,
105
114
  "vision_support": true,
106
- "audio_support": true,
115
+ "audio_support": false,
107
116
  "source": "OpenAI official docs",
108
117
  "canonical_name": "gpt-4o-mini",
109
118
  "aliases": [],
110
- "max_tokens": 128000
119
+ "max_tokens": 128000,
120
+ "video_support": false,
121
+ "video_input_mode": "frames"
111
122
  },
112
123
  "gpt-3.5-turbo": {
113
124
  "max_output_tokens": 4096,
@@ -120,7 +131,9 @@
120
131
  "source": "OpenAI official docs",
121
132
  "canonical_name": "gpt-3.5-turbo",
122
133
  "aliases": [],
123
- "max_tokens": 16385
134
+ "max_tokens": 16385,
135
+ "video_support": false,
136
+ "video_input_mode": "none"
124
137
  },
125
138
  "o1": {
126
139
  "max_output_tokens": 32768,
@@ -133,7 +146,10 @@
133
146
  "source": "OpenAI official docs",
134
147
  "canonical_name": "o1",
135
148
  "aliases": [],
136
- "max_tokens": 128000
149
+ "max_tokens": 128000,
150
+ "video_support": false,
151
+ "max_tools": -1,
152
+ "video_input_mode": "none"
137
153
  },
138
154
  "o1-mini": {
139
155
  "max_output_tokens": 65536,
@@ -145,7 +161,10 @@
145
161
  "source": "OpenAI official docs",
146
162
  "canonical_name": "o1-mini",
147
163
  "aliases": [],
148
- "max_tokens": 128000
164
+ "max_tokens": 128000,
165
+ "video_support": false,
166
+ "max_tools": -1,
167
+ "video_input_mode": "none"
149
168
  },
150
169
  "o3": {
151
170
  "max_output_tokens": 32768,
@@ -159,7 +178,9 @@
159
178
  "source": "OpenAI official docs",
160
179
  "canonical_name": "o3",
161
180
  "aliases": [],
162
- "max_tokens": 128000
181
+ "max_tokens": 128000,
182
+ "video_support": false,
183
+ "video_input_mode": "none"
163
184
  },
164
185
  "o3-mini": {
165
186
  "max_output_tokens": 32768,
@@ -173,7 +194,9 @@
173
194
  "source": "OpenAI official docs",
174
195
  "canonical_name": "o3-mini",
175
196
  "aliases": [],
176
- "max_tokens": 128000
197
+ "max_tokens": 128000,
198
+ "video_support": false,
199
+ "video_input_mode": "none"
177
200
  },
178
201
  "claude-3.5-sonnet": {
179
202
  "max_output_tokens": 8192,
@@ -196,7 +219,9 @@
196
219
  "source": "Anthropic official docs",
197
220
  "canonical_name": "claude-3.5-sonnet",
198
221
  "aliases": [],
199
- "max_tokens": 200000
222
+ "max_tokens": 200000,
223
+ "video_support": false,
224
+ "video_input_mode": "frames"
200
225
  },
201
226
  "claude-3.7-sonnet": {
202
227
  "max_output_tokens": 128000,
@@ -213,7 +238,9 @@
213
238
  "source": "Anthropic official docs",
214
239
  "canonical_name": "claude-3.7-sonnet",
215
240
  "aliases": [],
216
- "max_tokens": 200000
241
+ "max_tokens": 200000,
242
+ "video_support": false,
243
+ "video_input_mode": "frames"
217
244
  },
218
245
  "claude-3.5-haiku": {
219
246
  "max_output_tokens": 8192,
@@ -232,7 +259,9 @@
232
259
  "aliases": [
233
260
  "claude-3-5-haiku-20241022"
234
261
  ],
235
- "max_tokens": 200000
262
+ "max_tokens": 200000,
263
+ "video_support": false,
264
+ "video_input_mode": "frames"
236
265
  },
237
266
  "claude-3-opus": {
238
267
  "max_output_tokens": 4096,
@@ -248,7 +277,9 @@
248
277
  "source": "Anthropic official docs",
249
278
  "canonical_name": "claude-3-opus",
250
279
  "aliases": [],
251
- "max_tokens": 200000
280
+ "max_tokens": 200000,
281
+ "video_support": false,
282
+ "video_input_mode": "frames"
252
283
  },
253
284
  "claude-3-sonnet": {
254
285
  "max_output_tokens": 4096,
@@ -264,7 +295,9 @@
264
295
  "source": "Anthropic official docs",
265
296
  "canonical_name": "claude-3-sonnet",
266
297
  "aliases": [],
267
- "max_tokens": 200000
298
+ "max_tokens": 200000,
299
+ "video_support": false,
300
+ "video_input_mode": "frames"
268
301
  },
269
302
  "claude-3-haiku": {
270
303
  "max_output_tokens": 4096,
@@ -280,7 +313,9 @@
280
313
  "source": "Anthropic official docs",
281
314
  "canonical_name": "claude-3-haiku",
282
315
  "aliases": [],
283
- "max_tokens": 200000
316
+ "max_tokens": 200000,
317
+ "video_support": false,
318
+ "video_input_mode": "frames"
284
319
  },
285
320
  "claude-haiku-4-5": {
286
321
  "max_output_tokens": 64000,
@@ -300,7 +335,9 @@
300
335
  "claude-haiku-4-5-20251001",
301
336
  "anthropic/claude-haiku-4-5"
302
337
  ],
303
- "max_tokens": 200000
338
+ "max_tokens": 200000,
339
+ "video_support": false,
340
+ "video_input_mode": "frames"
304
341
  },
305
342
  "claude-4-opus": {
306
343
  "max_output_tokens": 4096,
@@ -317,7 +354,9 @@
317
354
  "source": "Anthropic official docs",
318
355
  "canonical_name": "claude-4-opus",
319
356
  "aliases": [],
320
- "max_tokens": 200000
357
+ "max_tokens": 200000,
358
+ "video_support": false,
359
+ "video_input_mode": "frames"
321
360
  },
322
361
  "claude-4.1-opus": {
323
362
  "max_output_tokens": 4096,
@@ -334,7 +373,9 @@
334
373
  "source": "Anthropic official docs",
335
374
  "canonical_name": "claude-4.1-opus",
336
375
  "aliases": [],
337
- "max_tokens": 200000
376
+ "max_tokens": 200000,
377
+ "video_support": false,
378
+ "video_input_mode": "frames"
338
379
  },
339
380
  "claude-4-sonnet": {
340
381
  "max_output_tokens": 8192,
@@ -351,7 +392,9 @@
351
392
  "source": "Anthropic official docs",
352
393
  "canonical_name": "claude-4-sonnet",
353
394
  "aliases": [],
354
- "max_tokens": 200000
395
+ "max_tokens": 200000,
396
+ "video_support": false,
397
+ "video_input_mode": "frames"
355
398
  },
356
399
  "claude-4.5-sonnet": {
357
400
  "max_output_tokens": 64000,
@@ -372,7 +415,9 @@
372
415
  "claude-sonnet-4-5-20250929",
373
416
  "anthropic/claude-sonnet-4-5"
374
417
  ],
375
- "max_tokens": 200000
418
+ "max_tokens": 200000,
419
+ "video_support": false,
420
+ "video_input_mode": "frames"
376
421
  },
377
422
  "claude-opus-4-5": {
378
423
  "max_output_tokens": 64000,
@@ -392,7 +437,9 @@
392
437
  "claude-opus-4-5-20251101",
393
438
  "anthropic/claude-opus-4-5"
394
439
  ],
395
- "max_tokens": 200000
440
+ "max_tokens": 200000,
441
+ "video_support": false,
442
+ "video_input_mode": "frames"
396
443
  },
397
444
  "llama-3.2-1b": {
398
445
  "max_output_tokens": 2048,
@@ -405,7 +452,10 @@
405
452
  "source": "Meta official docs",
406
453
  "canonical_name": "llama-3.2-1b",
407
454
  "aliases": [],
408
- "max_tokens": 8192
455
+ "max_tokens": 8192,
456
+ "video_support": false,
457
+ "max_tools": -1,
458
+ "video_input_mode": "none"
409
459
  },
410
460
  "llama-3.2-3b": {
411
461
  "max_output_tokens": 2048,
@@ -418,7 +468,10 @@
418
468
  "source": "Meta official docs",
419
469
  "canonical_name": "llama-3.2-3b",
420
470
  "aliases": [],
421
- "max_tokens": 8192
471
+ "max_tokens": 8192,
472
+ "video_support": false,
473
+ "max_tools": -1,
474
+ "video_input_mode": "none"
422
475
  },
423
476
  "llama-3.2-11b-vision": {
424
477
  "max_output_tokens": 2048,
@@ -434,7 +487,10 @@
434
487
  "source": "Meta official docs",
435
488
  "canonical_name": "llama-3.2-11b-vision",
436
489
  "aliases": [],
437
- "max_tokens": 128000
490
+ "max_tokens": 128000,
491
+ "video_support": false,
492
+ "max_tools": -1,
493
+ "video_input_mode": "frames"
438
494
  },
439
495
  "llama-3.3-70b": {
440
496
  "max_output_tokens": 8192,
@@ -447,7 +503,10 @@
447
503
  "source": "Meta official docs",
448
504
  "canonical_name": "llama-3.3-70b",
449
505
  "aliases": [],
450
- "max_tokens": 128000
506
+ "max_tokens": 128000,
507
+ "video_support": false,
508
+ "max_tools": -1,
509
+ "video_input_mode": "none"
451
510
  },
452
511
  "llama-3.1-8b": {
453
512
  "max_output_tokens": 8192,
@@ -460,7 +519,10 @@
460
519
  "source": "Meta official docs",
461
520
  "canonical_name": "llama-3.1-8b",
462
521
  "aliases": [],
463
- "max_tokens": 128000
522
+ "max_tokens": 128000,
523
+ "video_support": false,
524
+ "max_tools": -1,
525
+ "video_input_mode": "none"
464
526
  },
465
527
  "llama-3.1-70b": {
466
528
  "max_output_tokens": 8192,
@@ -473,7 +535,10 @@
473
535
  "source": "Meta official docs",
474
536
  "canonical_name": "llama-3.1-70b",
475
537
  "aliases": [],
476
- "max_tokens": 128000
538
+ "max_tokens": 128000,
539
+ "video_support": false,
540
+ "max_tools": -1,
541
+ "video_input_mode": "none"
477
542
  },
478
543
  "llama-3.1-405b": {
479
544
  "max_output_tokens": 8192,
@@ -486,7 +551,10 @@
486
551
  "source": "Meta official docs",
487
552
  "canonical_name": "llama-3.1-405b",
488
553
  "aliases": [],
489
- "max_tokens": 128000
554
+ "max_tokens": 128000,
555
+ "video_support": false,
556
+ "max_tools": -1,
557
+ "video_input_mode": "none"
490
558
  },
491
559
  "llama-4": {
492
560
  "max_output_tokens": 8192,
@@ -499,7 +567,29 @@
499
567
  "source": "Meta announcement",
500
568
  "canonical_name": "llama-4",
501
569
  "aliases": [],
502
- "max_tokens": 10000000
570
+ "max_tokens": 10000000,
571
+ "video_support": false,
572
+ "max_tools": -1,
573
+ "video_input_mode": "frames"
574
+ },
575
+ "llava-next-video-7b-hf": {
576
+ "max_output_tokens": 1024,
577
+ "tool_support": "prompted",
578
+ "structured_output": "prompted",
579
+ "parallel_tools": false,
580
+ "vision_support": true,
581
+ "video_support": true,
582
+ "audio_support": false,
583
+ "notes": "LLaVA-NeXT-Video 7B (HuggingFace Transformers video-input VLM)",
584
+ "source": "HuggingFace model card / Transformers docs",
585
+ "canonical_name": "llava-next-video-7b-hf",
586
+ "aliases": [
587
+ "llava-hf/LLaVA-NeXT-Video-7B-hf",
588
+ "LLaVA-NeXT-Video-7B-hf"
589
+ ],
590
+ "max_tokens": 10250,
591
+ "max_tools": -1,
592
+ "video_input_mode": "native"
503
593
  },
504
594
  "qwen2.5-0.5b": {
505
595
  "max_output_tokens": 8192,
@@ -512,7 +602,10 @@
512
602
  "source": "Alibaba official docs",
513
603
  "canonical_name": "qwen2.5-0.5b",
514
604
  "aliases": [],
515
- "max_tokens": 32768
605
+ "max_tokens": 32768,
606
+ "video_support": false,
607
+ "max_tools": -1,
608
+ "video_input_mode": "none"
516
609
  },
517
610
  "qwen2.5-1.5b": {
518
611
  "max_output_tokens": 8192,
@@ -525,7 +618,10 @@
525
618
  "source": "Alibaba official docs",
526
619
  "canonical_name": "qwen2.5-1.5b",
527
620
  "aliases": [],
528
- "max_tokens": 32768
621
+ "max_tokens": 32768,
622
+ "video_support": false,
623
+ "max_tools": -1,
624
+ "video_input_mode": "none"
529
625
  },
530
626
  "qwen2.5-3b": {
531
627
  "max_output_tokens": 8192,
@@ -538,7 +634,10 @@
538
634
  "source": "Alibaba official docs",
539
635
  "canonical_name": "qwen2.5-3b",
540
636
  "aliases": [],
541
- "max_tokens": 32768
637
+ "max_tokens": 32768,
638
+ "video_support": false,
639
+ "max_tools": -1,
640
+ "video_input_mode": "none"
542
641
  },
543
642
  "qwen2.5-7b": {
544
643
  "max_output_tokens": 8192,
@@ -551,7 +650,10 @@
551
650
  "source": "Alibaba official docs",
552
651
  "canonical_name": "qwen2.5-7b",
553
652
  "aliases": [],
554
- "max_tokens": 131072
653
+ "max_tokens": 131072,
654
+ "video_support": false,
655
+ "max_tools": -1,
656
+ "video_input_mode": "none"
555
657
  },
556
658
  "qwen2.5-14b": {
557
659
  "max_output_tokens": 8192,
@@ -564,7 +666,10 @@
564
666
  "source": "Alibaba official docs",
565
667
  "canonical_name": "qwen2.5-14b",
566
668
  "aliases": [],
567
- "max_tokens": 131072
669
+ "max_tokens": 131072,
670
+ "video_support": false,
671
+ "max_tools": -1,
672
+ "video_input_mode": "none"
568
673
  },
569
674
  "qwen2.5-32b": {
570
675
  "max_output_tokens": 8192,
@@ -577,7 +682,10 @@
577
682
  "source": "Alibaba official docs",
578
683
  "canonical_name": "qwen2.5-32b",
579
684
  "aliases": [],
580
- "max_tokens": 131072
685
+ "max_tokens": 131072,
686
+ "video_support": false,
687
+ "max_tools": -1,
688
+ "video_input_mode": "none"
581
689
  },
582
690
  "qwen2.5-72b": {
583
691
  "max_output_tokens": 8192,
@@ -590,7 +698,10 @@
590
698
  "source": "Alibaba official docs",
591
699
  "canonical_name": "qwen2.5-72b",
592
700
  "aliases": [],
593
- "max_tokens": 131072
701
+ "max_tokens": 131072,
702
+ "video_support": false,
703
+ "max_tools": -1,
704
+ "video_input_mode": "none"
594
705
  },
595
706
  "qwen3-0.6b": {
596
707
  "max_output_tokens": 8192,
@@ -600,11 +711,15 @@
600
711
  "vision_support": false,
601
712
  "audio_support": false,
602
713
  "thinking_support": true,
714
+ "thinking_control": "/no_think",
603
715
  "notes": "Qwen3 base model with thinking capabilities",
604
716
  "source": "Alibaba Qwen3 technical report",
605
717
  "canonical_name": "qwen3-0.6b",
606
718
  "aliases": [],
607
- "max_tokens": 32768
719
+ "max_tokens": 32768,
720
+ "video_support": false,
721
+ "max_tools": -1,
722
+ "video_input_mode": "none"
608
723
  },
609
724
  "qwen3-1.7b": {
610
725
  "max_output_tokens": 8192,
@@ -614,11 +729,15 @@
614
729
  "vision_support": false,
615
730
  "audio_support": false,
616
731
  "thinking_support": true,
732
+ "thinking_control": "/no_think",
617
733
  "notes": "Qwen3 1.7B model with thinking capabilities",
618
734
  "source": "Alibaba Qwen3 technical report",
619
735
  "canonical_name": "qwen3-1.7b",
620
736
  "aliases": [],
621
- "max_tokens": 32768
737
+ "max_tokens": 32768,
738
+ "video_support": false,
739
+ "max_tools": -1,
740
+ "video_input_mode": "none"
622
741
  },
623
742
  "qwen3-4b": {
624
743
  "max_output_tokens": 8192,
@@ -628,11 +747,54 @@
628
747
  "vision_support": false,
629
748
  "audio_support": false,
630
749
  "thinking_support": true,
750
+ "thinking_control": "/no_think",
631
751
  "notes": "Qwen3 4B model with extended context via YaRN scaling",
632
752
  "source": "Alibaba Qwen3 technical report",
633
753
  "canonical_name": "qwen3-4b",
634
754
  "aliases": [],
635
- "max_tokens": 131072
755
+ "max_tokens": 131072,
756
+ "video_support": false,
757
+ "max_tools": -1,
758
+ "video_input_mode": "none"
759
+ },
760
+ "qwen3-4b-2507": {
761
+ "max_output_tokens": 8192,
762
+ "tool_support": "native",
763
+ "structured_output": "native",
764
+ "parallel_tools": false,
765
+ "vision_support": false,
766
+ "audio_support": false,
767
+ "thinking_support": false,
768
+ "notes": "Qwen3-4B-2507 non-thinking instruct variant. Supports only non-thinking mode; does not generate <think></think> blocks.",
769
+ "source": "LM Studio model card (Qwen/Qwen3-4B-2507) and Qwen3 2507 release notes",
770
+ "canonical_name": "qwen3-4b-2507",
771
+ "aliases": [
772
+ "qwen/qwen3-4b-2507"
773
+ ],
774
+ "max_tokens": 262144,
775
+ "video_support": false,
776
+ "max_tools": -1,
777
+ "video_input_mode": "none"
778
+ },
779
+ "qwen3-4b-thinking-2507": {
780
+ "max_output_tokens": 8192,
781
+ "tool_support": "native",
782
+ "structured_output": "native",
783
+ "parallel_tools": false,
784
+ "vision_support": false,
785
+ "audio_support": false,
786
+ "thinking_support": true,
787
+ "thinking_output_field": "reasoning_content",
788
+ "notes": "Qwen3-4B-Thinking-2507 thinking-only variant. The decoded output often contains only the closing </think> tag, with the opening <think> supplied by the chat template.",
789
+ "source": "LM Studio model card (Qwen/Qwen3-4B-Thinking-2507) and Qwen3 Thinking 2507 docs",
790
+ "canonical_name": "qwen3-4b-thinking-2507",
791
+ "aliases": [
792
+ "qwen/qwen3-4b-thinking-2507"
793
+ ],
794
+ "max_tokens": 262144,
795
+ "video_support": false,
796
+ "max_tools": -1,
797
+ "video_input_mode": "none"
636
798
  },
637
799
  "qwen3-32b": {
638
800
  "max_output_tokens": 8192,
@@ -642,11 +804,107 @@
642
804
  "vision_support": false,
643
805
  "audio_support": false,
644
806
  "thinking_support": true,
807
+ "thinking_control": "/no_think",
645
808
  "notes": "Qwen3 32B model with advanced thinking capabilities",
646
809
  "source": "Alibaba Qwen3 technical report",
647
810
  "canonical_name": "qwen3-32b",
648
811
  "aliases": [],
649
- "max_tokens": 131072
812
+ "max_tokens": 131072,
813
+ "video_support": false,
814
+ "max_tools": -1,
815
+ "video_input_mode": "none"
816
+ },
817
+ "sera-32b": {
818
+ "max_output_tokens": 8192,
819
+ "tool_support": "prompted",
820
+ "structured_output": "prompted",
821
+ "parallel_tools": false,
822
+ "vision_support": false,
823
+ "audio_support": false,
824
+ "thinking_support": true,
825
+ "thinking_control": "/no_think",
826
+ "notes": "AllenAI SERA-32B coding agent model (Qwen3-32B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
827
+ "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-32B)",
828
+ "canonical_name": "sera-32b",
829
+ "aliases": [
830
+ "allenai/SERA-32B",
831
+ "SERA-32B",
832
+ "SERA32B",
833
+ "SERA_32B"
834
+ ],
835
+ "max_tokens": 32768,
836
+ "video_support": false,
837
+ "max_tools": -1,
838
+ "video_input_mode": "none"
839
+ },
840
+ "sera-32b-ga": {
841
+ "max_output_tokens": 8192,
842
+ "tool_support": "prompted",
843
+ "structured_output": "prompted",
844
+ "parallel_tools": false,
845
+ "vision_support": false,
846
+ "audio_support": false,
847
+ "thinking_support": true,
848
+ "thinking_control": "/no_think",
849
+ "notes": "AllenAI SERA-32B-GA coding agent model (Qwen3-32B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
850
+ "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-32B-GA)",
851
+ "canonical_name": "sera-32b-ga",
852
+ "aliases": [
853
+ "allenai/SERA-32B-GA",
854
+ "SERA-32B-GA",
855
+ "SERA32BGA",
856
+ "SERA_32B_GA"
857
+ ],
858
+ "max_tokens": 32768,
859
+ "video_support": false,
860
+ "max_tools": -1,
861
+ "video_input_mode": "none"
862
+ },
863
+ "sera-8b": {
864
+ "max_output_tokens": 8192,
865
+ "tool_support": "prompted",
866
+ "structured_output": "prompted",
867
+ "parallel_tools": false,
868
+ "vision_support": false,
869
+ "audio_support": false,
870
+ "thinking_support": true,
871
+ "thinking_control": "/no_think",
872
+ "notes": "AllenAI SERA-8B coding agent model (Qwen3-8B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
873
+ "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-8B)",
874
+ "canonical_name": "sera-8b",
875
+ "aliases": [
876
+ "allenai/SERA-8B",
877
+ "SERA-8B",
878
+ "SERA8B",
879
+ "SERA_8B"
880
+ ],
881
+ "max_tokens": 32768,
882
+ "video_support": false,
883
+ "max_tools": -1,
884
+ "video_input_mode": "none"
885
+ },
886
+ "sera-8b-ga": {
887
+ "max_output_tokens": 8192,
888
+ "tool_support": "prompted",
889
+ "structured_output": "prompted",
890
+ "parallel_tools": false,
891
+ "vision_support": false,
892
+ "audio_support": false,
893
+ "thinking_support": true,
894
+ "thinking_control": "/no_think",
895
+ "notes": "AllenAI SERA-8B-GA coding agent model (Qwen3-8B-based). 32K max sequence length; tool calls are emitted in <tool_call>...</tool_call> blocks.",
896
+ "source": "SERA paper (sera.pdf) and AllenAI model card (huggingface.co/allenai/SERA-8B-GA)",
897
+ "canonical_name": "sera-8b-ga",
898
+ "aliases": [
899
+ "allenai/SERA-8B-GA",
900
+ "SERA-8B-GA",
901
+ "SERA8BGA",
902
+ "SERA_8B_GA"
903
+ ],
904
+ "max_tokens": 32768,
905
+ "video_support": false,
906
+ "max_tools": -1,
907
+ "video_input_mode": "none"
650
908
  },
651
909
  "qwen3-30b-a3b": {
652
910
  "max_output_tokens": 8192,
@@ -656,11 +914,15 @@
656
914
  "vision_support": false,
657
915
  "audio_support": false,
658
916
  "thinking_support": true,
917
+ "thinking_control": "/no_think",
659
918
  "notes": "Qwen3 MoE model with 4-bit precision, 30B total/3B active parameters",
660
919
  "source": "Alibaba Qwen3 technical report",
661
920
  "canonical_name": "qwen3-30b-a3b",
662
921
  "aliases": [],
663
- "max_tokens": 40960
922
+ "max_tokens": 40960,
923
+ "video_support": false,
924
+ "max_tools": -1,
925
+ "video_input_mode": "none"
664
926
  },
665
927
  "qwen3-30b-a3b-2507": {
666
928
  "max_output_tokens": 8192,
@@ -676,7 +938,10 @@
676
938
  "aliases": [
677
939
  "qwen/qwen3-30b-a3b-2507"
678
940
  ],
679
- "max_tokens": 262144
941
+ "max_tokens": 262144,
942
+ "video_support": false,
943
+ "max_tools": -1,
944
+ "video_input_mode": "none"
680
945
  },
681
946
  "qwen3-coder-30b": {
682
947
  "max_output_tokens": 65536,
@@ -698,7 +963,10 @@
698
963
  "qwen3-coder-30b-a3b",
699
964
  "qwen3-coder-30b-a3b-instruct"
700
965
  ],
701
- "max_tokens": 262144
966
+ "max_tokens": 262144,
967
+ "video_support": false,
968
+ "max_tools": -1,
969
+ "video_input_mode": "none"
702
970
  },
703
971
  "qwen2-vl": {
704
972
  "max_output_tokens": 8192,
@@ -713,7 +981,10 @@
713
981
  "source": "Alibaba official docs",
714
982
  "canonical_name": "qwen2-vl",
715
983
  "aliases": [],
716
- "max_tokens": 32768
984
+ "max_tokens": 32768,
985
+ "video_support": false,
986
+ "max_tools": -1,
987
+ "video_input_mode": "frames"
717
988
  },
718
989
  "qwen2.5-vl": {
719
990
  "max_output_tokens": 8192,
@@ -729,7 +1000,10 @@
729
1000
  "source": "Alibaba official docs",
730
1001
  "canonical_name": "qwen2.5-vl",
731
1002
  "aliases": [],
732
- "max_tokens": 128000
1003
+ "max_tokens": 128000,
1004
+ "video_support": false,
1005
+ "max_tools": -1,
1006
+ "video_input_mode": "frames"
733
1007
  },
734
1008
  "phi-2": {
735
1009
  "max_output_tokens": 2048,
@@ -741,7 +1015,10 @@
741
1015
  "source": "Microsoft official docs",
742
1016
  "canonical_name": "phi-2",
743
1017
  "aliases": [],
744
- "max_tokens": 2048
1018
+ "max_tokens": 2048,
1019
+ "video_support": false,
1020
+ "max_tools": 0,
1021
+ "video_input_mode": "none"
745
1022
  },
746
1023
  "phi-3-mini": {
747
1024
  "max_output_tokens": 4096,
@@ -754,7 +1031,10 @@
754
1031
  "source": "Microsoft official docs",
755
1032
  "canonical_name": "phi-3-mini",
756
1033
  "aliases": [],
757
- "max_tokens": 4096
1034
+ "max_tokens": 4096,
1035
+ "video_support": false,
1036
+ "max_tools": -1,
1037
+ "video_input_mode": "none"
758
1038
  },
759
1039
  "phi-3-small": {
760
1040
  "max_output_tokens": 8192,
@@ -766,7 +1046,10 @@
766
1046
  "source": "Microsoft official docs",
767
1047
  "canonical_name": "phi-3-small",
768
1048
  "aliases": [],
769
- "max_tokens": 8192
1049
+ "max_tokens": 8192,
1050
+ "video_support": false,
1051
+ "max_tools": -1,
1052
+ "video_input_mode": "none"
770
1053
  },
771
1054
  "phi-3-medium": {
772
1055
  "max_output_tokens": 4096,
@@ -778,7 +1061,10 @@
778
1061
  "source": "Microsoft official docs",
779
1062
  "canonical_name": "phi-3-medium",
780
1063
  "aliases": [],
781
- "max_tokens": 128000
1064
+ "max_tokens": 128000,
1065
+ "video_support": false,
1066
+ "max_tools": -1,
1067
+ "video_input_mode": "none"
782
1068
  },
783
1069
  "phi-3.5-mini": {
784
1070
  "max_output_tokens": 4096,
@@ -790,7 +1076,10 @@
790
1076
  "source": "Microsoft official docs",
791
1077
  "canonical_name": "phi-3.5-mini",
792
1078
  "aliases": [],
793
- "max_tokens": 128000
1079
+ "max_tokens": 128000,
1080
+ "video_support": false,
1081
+ "max_tools": -1,
1082
+ "video_input_mode": "none"
794
1083
  },
795
1084
  "phi-3.5-moe": {
796
1085
  "max_output_tokens": 4096,
@@ -803,7 +1092,10 @@
803
1092
  "source": "Microsoft official docs",
804
1093
  "canonical_name": "phi-3.5-moe",
805
1094
  "aliases": [],
806
- "max_tokens": 128000
1095
+ "max_tokens": 128000,
1096
+ "video_support": false,
1097
+ "max_tools": -1,
1098
+ "video_input_mode": "none"
807
1099
  },
808
1100
  "phi-3-vision": {
809
1101
  "max_output_tokens": 4096,
@@ -818,7 +1110,10 @@
818
1110
  "source": "Microsoft official docs",
819
1111
  "canonical_name": "phi-3-vision",
820
1112
  "aliases": [],
821
- "max_tokens": 128000
1113
+ "max_tokens": 128000,
1114
+ "video_support": false,
1115
+ "max_tools": -1,
1116
+ "video_input_mode": "frames"
822
1117
  },
823
1118
  "phi-4": {
824
1119
  "max_output_tokens": 16000,
@@ -831,7 +1126,10 @@
831
1126
  "source": "Microsoft official docs",
832
1127
  "canonical_name": "phi-4",
833
1128
  "aliases": [],
834
- "max_tokens": 16000
1129
+ "max_tokens": 16000,
1130
+ "video_support": false,
1131
+ "max_tools": -1,
1132
+ "video_input_mode": "none"
835
1133
  },
836
1134
  "mistral-7b": {
837
1135
  "max_output_tokens": 8192,
@@ -843,7 +1141,10 @@
843
1141
  "source": "Mistral AI docs",
844
1142
  "canonical_name": "mistral-7b",
845
1143
  "aliases": [],
846
- "max_tokens": 8192
1144
+ "max_tokens": 8192,
1145
+ "video_support": false,
1146
+ "max_tools": -1,
1147
+ "video_input_mode": "none"
847
1148
  },
848
1149
  "mixtral-8x7b": {
849
1150
  "max_output_tokens": 32768,
@@ -856,7 +1157,10 @@
856
1157
  "source": "Mistral AI docs",
857
1158
  "canonical_name": "mixtral-8x7b",
858
1159
  "aliases": [],
859
- "max_tokens": 32768
1160
+ "max_tokens": 32768,
1161
+ "video_support": false,
1162
+ "max_tools": -1,
1163
+ "video_input_mode": "none"
860
1164
  },
861
1165
  "mixtral-8x22b": {
862
1166
  "max_output_tokens": 65536,
@@ -868,7 +1172,10 @@
868
1172
  "source": "Mistral AI docs",
869
1173
  "canonical_name": "mixtral-8x22b",
870
1174
  "aliases": [],
871
- "max_tokens": 65536
1175
+ "max_tokens": 65536,
1176
+ "video_support": false,
1177
+ "max_tools": -1,
1178
+ "video_input_mode": "none"
872
1179
  },
873
1180
  "mistral-small": {
874
1181
  "max_output_tokens": 32768,
@@ -880,7 +1187,10 @@
880
1187
  "source": "Mistral AI docs",
881
1188
  "canonical_name": "mistral-small",
882
1189
  "aliases": [],
883
- "max_tokens": 32768
1190
+ "max_tokens": 32768,
1191
+ "video_support": false,
1192
+ "max_tools": -1,
1193
+ "video_input_mode": "none"
884
1194
  },
885
1195
  "mistral-medium": {
886
1196
  "max_output_tokens": 32768,
@@ -892,7 +1202,10 @@
892
1202
  "source": "Mistral AI docs",
893
1203
  "canonical_name": "mistral-medium",
894
1204
  "aliases": [],
895
- "max_tokens": 32768
1205
+ "max_tokens": 32768,
1206
+ "video_support": false,
1207
+ "max_tools": -1,
1208
+ "video_input_mode": "none"
896
1209
  },
897
1210
  "mistral-large": {
898
1211
  "max_output_tokens": 128000,
@@ -904,7 +1217,10 @@
904
1217
  "source": "Mistral AI docs",
905
1218
  "canonical_name": "mistral-large",
906
1219
  "aliases": [],
907
- "max_tokens": 128000
1220
+ "max_tokens": 128000,
1221
+ "video_support": false,
1222
+ "max_tools": -1,
1223
+ "video_input_mode": "none"
908
1224
  },
909
1225
  "codestral": {
910
1226
  "max_output_tokens": 32768,
@@ -917,7 +1233,10 @@
917
1233
  "source": "Mistral AI docs",
918
1234
  "canonical_name": "codestral",
919
1235
  "aliases": [],
920
- "max_tokens": 32768
1236
+ "max_tokens": 32768,
1237
+ "video_support": false,
1238
+ "max_tools": -1,
1239
+ "video_input_mode": "none"
921
1240
  },
922
1241
  "magistral-small-2509": {
923
1242
  "max_output_tokens": 8192,
@@ -937,7 +1256,9 @@
937
1256
  "aliases": [
938
1257
  "mistralai/magistral-small-2509"
939
1258
  ],
940
- "max_tokens": 128000
1259
+ "max_tokens": 128000,
1260
+ "max_tools": -1,
1261
+ "video_input_mode": "frames"
941
1262
  },
942
1263
  "Qwen/Qwen3-VL-8B-Instruct-FP8": {
943
1264
  "max_output_tokens": 8192,
@@ -961,7 +1282,9 @@
961
1282
  "qwen3-vl-8b-fp8",
962
1283
  "qwen3-vl-8b-instruct-fp8"
963
1284
  ],
964
- "max_tokens": 262144
1285
+ "max_tokens": 262144,
1286
+ "max_tools": -1,
1287
+ "video_input_mode": "frames"
965
1288
  },
966
1289
  "llama3.2-vision:11b": {
967
1290
  "max_output_tokens": 4096,
@@ -1012,7 +1335,9 @@
1012
1335
  "llama3.2-vision-11b",
1013
1336
  "llama-3.2-vision:11b"
1014
1337
  ],
1015
- "max_tokens": 131072
1338
+ "max_tokens": 131072,
1339
+ "max_tools": -1,
1340
+ "video_input_mode": "frames"
1016
1341
  },
1017
1342
  "llama3.2-vision:70b": {
1018
1343
  "max_output_tokens": 4096,
@@ -1038,7 +1363,9 @@
1038
1363
  "llama3.2-vision-70b",
1039
1364
  "llama-3.2-vision:70b"
1040
1365
  ],
1041
- "max_tokens": 131072
1366
+ "max_tokens": 131072,
1367
+ "max_tools": -1,
1368
+ "video_input_mode": "frames"
1042
1369
  },
1043
1370
  "llama3.2-vision:90b": {
1044
1371
  "max_output_tokens": 4096,
@@ -1064,7 +1391,9 @@
1064
1391
  "llama3.2-vision-90b",
1065
1392
  "llama-3.2-vision:90b"
1066
1393
  ],
1067
- "max_tokens": 131072
1394
+ "max_tokens": 131072,
1395
+ "max_tools": -1,
1396
+ "video_input_mode": "frames"
1068
1397
  },
1069
1398
  "gemma-2b": {
1070
1399
  "max_output_tokens": 8192,
@@ -1076,7 +1405,10 @@
1076
1405
  "source": "Google docs",
1077
1406
  "canonical_name": "gemma-2b",
1078
1407
  "aliases": [],
1079
- "max_tokens": 8192
1408
+ "max_tokens": 8192,
1409
+ "video_support": false,
1410
+ "max_tools": 0,
1411
+ "video_input_mode": "none"
1080
1412
  },
1081
1413
  "gemma-7b": {
1082
1414
  "max_output_tokens": 8192,
@@ -1088,7 +1420,10 @@
1088
1420
  "source": "Google docs",
1089
1421
  "canonical_name": "gemma-7b",
1090
1422
  "aliases": [],
1091
- "max_tokens": 8192
1423
+ "max_tokens": 8192,
1424
+ "video_support": false,
1425
+ "max_tools": 0,
1426
+ "video_input_mode": "none"
1092
1427
  },
1093
1428
  "gemma2-9b": {
1094
1429
  "max_output_tokens": 8192,
@@ -1100,7 +1435,10 @@
1100
1435
  "source": "Google docs",
1101
1436
  "canonical_name": "gemma2-9b",
1102
1437
  "aliases": [],
1103
- "max_tokens": 8192
1438
+ "max_tokens": 8192,
1439
+ "video_support": false,
1440
+ "max_tools": -1,
1441
+ "video_input_mode": "none"
1104
1442
  },
1105
1443
  "gemma2-27b": {
1106
1444
  "max_output_tokens": 8192,
@@ -1112,7 +1450,10 @@
1112
1450
  "source": "Google docs",
1113
1451
  "canonical_name": "gemma2-27b",
1114
1452
  "aliases": [],
1115
- "max_tokens": 8192
1453
+ "max_tokens": 8192,
1454
+ "video_support": false,
1455
+ "max_tools": -1,
1456
+ "video_input_mode": "none"
1116
1457
  },
1117
1458
  "gemma3": {
1118
1459
  "max_output_tokens": 8192,
@@ -1125,7 +1466,10 @@
1125
1466
  "source": "Google docs",
1126
1467
  "canonical_name": "gemma3",
1127
1468
  "aliases": [],
1128
- "max_tokens": 128000
1469
+ "max_tokens": 128000,
1470
+ "video_support": false,
1471
+ "max_tools": -1,
1472
+ "video_input_mode": "none"
1129
1473
  },
1130
1474
  "codegemma": {
1131
1475
  "max_output_tokens": 8192,
@@ -1138,7 +1482,10 @@
1138
1482
  "source": "Google docs",
1139
1483
  "canonical_name": "codegemma",
1140
1484
  "aliases": [],
1141
- "max_tokens": 8192
1485
+ "max_tokens": 8192,
1486
+ "video_support": false,
1487
+ "max_tools": 0,
1488
+ "video_input_mode": "none"
1142
1489
  },
1143
1490
  "paligemma": {
1144
1491
  "max_output_tokens": 1024,
@@ -1156,7 +1503,10 @@
1156
1503
  "source": "Google docs",
1157
1504
  "canonical_name": "paligemma",
1158
1505
  "aliases": [],
1159
- "max_tokens": 8192
1506
+ "max_tokens": 8192,
1507
+ "video_support": false,
1508
+ "max_tools": 0,
1509
+ "video_input_mode": "frames"
1160
1510
  },
1161
1511
  "glm-4": {
1162
1512
  "max_output_tokens": 4096,
@@ -1169,7 +1519,10 @@
1169
1519
  "source": "Model documentation",
1170
1520
  "canonical_name": "glm-4",
1171
1521
  "aliases": [],
1172
- "max_tokens": 128000
1522
+ "max_tokens": 128000,
1523
+ "video_support": false,
1524
+ "max_tools": -1,
1525
+ "video_input_mode": "none"
1173
1526
  },
1174
1527
  "glm-4-9b": {
1175
1528
  "max_output_tokens": 4096,
@@ -1182,7 +1535,10 @@
1182
1535
  "source": "Model documentation",
1183
1536
  "canonical_name": "glm-4-9b",
1184
1537
  "aliases": [],
1185
- "max_tokens": 128000
1538
+ "max_tokens": 128000,
1539
+ "video_support": false,
1540
+ "max_tools": -1,
1541
+ "video_input_mode": "none"
1186
1542
  },
1187
1543
  "glm-4-9b-0414-4bit": {
1188
1544
  "max_output_tokens": 4096,
@@ -1195,7 +1551,10 @@
1195
1551
  "source": "Model documentation",
1196
1552
  "canonical_name": "glm-4-9b-0414-4bit",
1197
1553
  "aliases": [],
1198
- "max_tokens": 128000
1554
+ "max_tokens": 128000,
1555
+ "video_support": false,
1556
+ "max_tools": -1,
1557
+ "video_input_mode": "none"
1199
1558
  },
1200
1559
  "deepseek-r1": {
1201
1560
  "max_output_tokens": 8192,
@@ -1208,7 +1567,10 @@
1208
1567
  "source": "MLX community",
1209
1568
  "canonical_name": "deepseek-r1",
1210
1569
  "aliases": [],
1211
- "max_tokens": 32768
1570
+ "max_tokens": 32768,
1571
+ "video_support": false,
1572
+ "max_tools": -1,
1573
+ "video_input_mode": "none"
1212
1574
  },
1213
1575
  "qwen3": {
1214
1576
  "max_output_tokens": 8192,
@@ -1221,7 +1583,10 @@
1221
1583
  "source": "MLX community",
1222
1584
  "canonical_name": "qwen3",
1223
1585
  "aliases": [],
1224
- "max_tokens": 32768
1586
+ "max_tokens": 32768,
1587
+ "video_support": false,
1588
+ "max_tools": -1,
1589
+ "video_input_mode": "none"
1225
1590
  },
1226
1591
  "qwen3-14b": {
1227
1592
  "max_output_tokens": 8192,
@@ -1231,11 +1596,15 @@
1231
1596
  "vision_support": false,
1232
1597
  "audio_support": false,
1233
1598
  "thinking_support": true,
1599
+ "thinking_control": "/no_think",
1234
1600
  "notes": "Qwen3 14B model with thinking capabilities",
1235
1601
  "source": "Alibaba Qwen3 technical report",
1236
1602
  "canonical_name": "qwen3-14b",
1237
1603
  "aliases": [],
1238
- "max_tokens": 131072
1604
+ "max_tokens": 131072,
1605
+ "video_support": false,
1606
+ "max_tools": -1,
1607
+ "video_input_mode": "none"
1239
1608
  },
1240
1609
  "qwen3-next-80b-a3b": {
1241
1610
  "max_output_tokens": 16384,
@@ -1249,9 +1618,13 @@
1249
1618
  "source": "Alibaba Qwen3-Next technical report",
1250
1619
  "canonical_name": "qwen3-next-80b-a3b",
1251
1620
  "aliases": [
1252
- "qwen/qwen3-next-80b"
1621
+ "qwen/qwen3-next-80b",
1622
+ "qwen3-next-80b"
1253
1623
  ],
1254
- "max_tokens": 262144
1624
+ "max_tokens": 262144,
1625
+ "video_support": false,
1626
+ "max_tools": -1,
1627
+ "video_input_mode": "none"
1255
1628
  },
1256
1629
  "gpt-5": {
1257
1630
  "max_output_tokens": 8192,
@@ -1265,7 +1638,9 @@
1265
1638
  "source": "OpenAI official docs",
1266
1639
  "canonical_name": "gpt-5",
1267
1640
  "aliases": [],
1268
- "max_tokens": 200000
1641
+ "max_tokens": 200000,
1642
+ "video_support": false,
1643
+ "video_input_mode": "frames"
1269
1644
  },
1270
1645
  "gpt-5-turbo": {
1271
1646
  "max_output_tokens": 4096,
@@ -1279,7 +1654,9 @@
1279
1654
  "source": "OpenAI official docs",
1280
1655
  "canonical_name": "gpt-5-turbo",
1281
1656
  "aliases": [],
1282
- "max_tokens": 200000
1657
+ "max_tokens": 200000,
1658
+ "video_support": false,
1659
+ "video_input_mode": "frames"
1283
1660
  },
1284
1661
  "gpt-5-pro": {
1285
1662
  "max_output_tokens": 16384,
@@ -1293,7 +1670,9 @@
1293
1670
  "source": "OpenAI official docs",
1294
1671
  "canonical_name": "gpt-5-pro",
1295
1672
  "aliases": [],
1296
- "max_tokens": 200000
1673
+ "max_tokens": 200000,
1674
+ "video_support": false,
1675
+ "video_input_mode": "frames"
1297
1676
  },
1298
1677
  "gpt-5-mini": {
1299
1678
  "max_output_tokens": 8192,
@@ -1307,7 +1686,9 @@
1307
1686
  "source": "OpenAI official docs",
1308
1687
  "canonical_name": "gpt-5-mini",
1309
1688
  "aliases": [],
1310
- "max_tokens": 200000
1689
+ "max_tokens": 200000,
1690
+ "video_support": false,
1691
+ "video_input_mode": "frames"
1311
1692
  },
1312
1693
  "gpt-5-vision": {
1313
1694
  "max_output_tokens": 8192,
@@ -1324,7 +1705,9 @@
1324
1705
  "source": "OpenAI official docs",
1325
1706
  "canonical_name": "gpt-5-vision",
1326
1707
  "aliases": [],
1327
- "max_tokens": 200000
1708
+ "max_tokens": 200000,
1709
+ "video_support": false,
1710
+ "video_input_mode": "frames"
1328
1711
  },
1329
1712
  "qwen3-8b": {
1330
1713
  "max_output_tokens": 8192,
@@ -1334,11 +1717,15 @@
1334
1717
  "vision_support": false,
1335
1718
  "audio_support": false,
1336
1719
  "thinking_support": true,
1720
+ "thinking_control": "/no_think",
1337
1721
  "notes": "Qwen3 8B model with thinking capabilities",
1338
1722
  "source": "Alibaba Qwen3 technical report",
1339
1723
  "canonical_name": "qwen3-8b",
1340
1724
  "aliases": [],
1341
- "max_tokens": 131072
1725
+ "max_tokens": 131072,
1726
+ "video_support": false,
1727
+ "max_tools": -1,
1728
+ "video_input_mode": "none"
1342
1729
  },
1343
1730
  "qwen3-235b-a22b": {
1344
1731
  "max_output_tokens": 8192,
@@ -1348,11 +1735,15 @@
1348
1735
  "vision_support": false,
1349
1736
  "audio_support": false,
1350
1737
  "thinking_support": true,
1738
+ "thinking_control": "/no_think",
1351
1739
  "notes": "Qwen3 MoE model with 4-bit precision, 235B total/22B active parameters",
1352
1740
  "source": "Alibaba Qwen3 technical report",
1353
1741
  "canonical_name": "qwen3-235b-a22b",
1354
1742
  "aliases": [],
1355
- "max_tokens": 40960
1743
+ "max_tokens": 40960,
1744
+ "video_support": false,
1745
+ "max_tools": -1,
1746
+ "video_input_mode": "none"
1356
1747
  },
1357
1748
  "qwen3-vl": {
1358
1749
  "max_output_tokens": 8192,
@@ -1372,7 +1763,9 @@
1372
1763
  "source": "Alibaba Qwen3-VL technical report",
1373
1764
  "canonical_name": "qwen3-vl",
1374
1765
  "aliases": [],
1375
- "max_tokens": 131072
1766
+ "max_tokens": 131072,
1767
+ "max_tools": -1,
1768
+ "video_input_mode": "native"
1376
1769
  },
1377
1770
  "qwen2.5-vl-7b": {
1378
1771
  "max_output_tokens": 8192,
@@ -1400,8 +1793,11 @@
1400
1793
  "qwen/qwen2.5-vl-7b",
1401
1794
  "unsloth/Qwen2.5-VL-7B-Instruct-GGUF"
1402
1795
  ],
1403
- "max_tokens": 128000
1404
- },
1796
+ "max_tokens": 128000,
1797
+ "video_support": false,
1798
+ "max_tools": -1,
1799
+ "video_input_mode": "frames"
1800
+ },
1405
1801
  "gemma3-4b": {
1406
1802
  "max_output_tokens": 8192,
1407
1803
  "tool_support": "native",
@@ -1429,7 +1825,9 @@
1429
1825
  "aliases": [
1430
1826
  "gemma3:4b"
1431
1827
  ],
1432
- "max_tokens": 128000
1828
+ "max_tokens": 128000,
1829
+ "max_tools": -1,
1830
+ "video_input_mode": "frames"
1433
1831
  },
1434
1832
  "qwen2.5vl:7b": {
1435
1833
  "max_output_tokens": 8192,
@@ -1451,7 +1849,10 @@
1451
1849
  "aliases": [
1452
1850
  "qwen2.5vl"
1453
1851
  ],
1454
- "max_tokens": 128000
1852
+ "max_tokens": 128000,
1853
+ "video_support": false,
1854
+ "max_tools": -1,
1855
+ "video_input_mode": "frames"
1455
1856
  },
1456
1857
  "gemma3:4b-it-qat": {
1457
1858
  "max_output_tokens": 8192,
@@ -1472,7 +1873,9 @@
1472
1873
  "source": "Ollama model library",
1473
1874
  "canonical_name": "gemma3:4b-it-qat",
1474
1875
  "aliases": [],
1475
- "max_tokens": 128000
1876
+ "max_tokens": 128000,
1877
+ "max_tools": -1,
1878
+ "video_input_mode": "frames"
1476
1879
  },
1477
1880
  "gemma3n:e4b": {
1478
1881
  "max_output_tokens": 8192,
@@ -1501,7 +1904,9 @@
1501
1904
  "gemma3n:e2b:latest",
1502
1905
  "gemma3n:e2b"
1503
1906
  ],
1504
- "max_tokens": 32768
1907
+ "max_tokens": 32768,
1908
+ "max_tools": -1,
1909
+ "video_input_mode": "native"
1505
1910
  },
1506
1911
  "seed-oss": {
1507
1912
  "max_output_tokens": 8192,
@@ -1516,7 +1921,10 @@
1516
1921
  "source": "ByteDance SEED-OSS documentation",
1517
1922
  "canonical_name": "seed-oss",
1518
1923
  "aliases": [],
1519
- "max_tokens": 524288
1924
+ "max_tokens": 524288,
1925
+ "video_support": false,
1926
+ "max_tools": -1,
1927
+ "video_input_mode": "none"
1520
1928
  },
1521
1929
  "glm-4.5": {
1522
1930
  "max_output_tokens": 4096,
@@ -1530,7 +1938,10 @@
1530
1938
  "source": "Zhipu AI GLM-4.5 announcement",
1531
1939
  "canonical_name": "glm-4.5",
1532
1940
  "aliases": [],
1533
- "max_tokens": 128000
1941
+ "max_tokens": 128000,
1942
+ "video_support": false,
1943
+ "max_tools": -1,
1944
+ "video_input_mode": "none"
1534
1945
  },
1535
1946
  "glm-4.6": {
1536
1947
  "max_output_tokens": 4096,
@@ -1548,7 +1959,10 @@
1548
1959
  "zai-org/GLM-4.6-FP8",
1549
1960
  "glm-4.6-fp8"
1550
1961
  ],
1551
- "max_tokens": 128000
1962
+ "max_tokens": 128000,
1963
+ "video_support": false,
1964
+ "max_tools": -1,
1965
+ "video_input_mode": "none"
1552
1966
  },
1553
1967
  "glm-4.5-air": {
1554
1968
  "max_output_tokens": 4096,
@@ -1562,7 +1976,55 @@
1562
1976
  "source": "Zhipu AI GLM-4.5-Air announcement",
1563
1977
  "canonical_name": "glm-4.5-air",
1564
1978
  "aliases": [],
1565
- "max_tokens": 128000
1979
+ "max_tokens": 128000,
1980
+ "video_support": false,
1981
+ "max_tools": -1,
1982
+ "video_input_mode": "none"
1983
+ },
1984
+ "glm-4.7-flash": {
1985
+ "max_output_tokens": 131072,
1986
+ "tool_support": "native",
1987
+ "structured_output": "native",
1988
+ "parallel_tools": true,
1989
+ "vision_support": false,
1990
+ "audio_support": false,
1991
+ "thinking_support": true,
1992
+ "thinking_modes": [
1993
+ "interleaved_thinking",
1994
+ "preserved_thinking",
1995
+ "turn_level_thinking"
1996
+ ],
1997
+ "architecture": "mixture_of_experts",
1998
+ "total_parameters": "30B",
1999
+ "active_parameters": "3B",
2000
+ "experts": 64,
2001
+ "shared_experts": 1,
2002
+ "experts_activated": 4,
2003
+ "tensor_type": "BF16",
2004
+ "attention_mechanism": "grouped_query_attention",
2005
+ "positional_encoding": "rope",
2006
+ "transformer_layers": 47,
2007
+ "agentic_coding": true,
2008
+ "ui_generation": true,
2009
+ "notes": "GLM-4.7-Flash lightweight MoE model (30B total/3B active, 64 routed experts + 1 shared/4 activated) optimized for high-speed agentic coding and complex reasoning. Features Interleaved Thinking (reasoning before actions), Preserved Thinking (cross-turn consistency), and Turn-level Thinking (per-turn toggle). BF16 precision (~62.5GB). Compatible with vLLM, SGLang, and Hugging Face Transformers. Strong performance on SWE-bench Verified (59.2%), AIME 25 (91.6%), \u03c4\u00b2-Bench (79.5%), GPQA (75.2%), HLE (14.4%). MIT license. Recommended: temp 0.7 for coding, 0 for tool tasks.",
2010
+ "source": "HuggingFace zai-org/GLM-4.7-Flash official model card",
2011
+ "canonical_name": "glm-4.7-flash",
2012
+ "aliases": [
2013
+ "zai-org/glm-4.7-flash",
2014
+ "z-ai/glm-4.7-flash"
2015
+ ],
2016
+ "max_tokens": 128000,
2017
+ "license": "MIT",
2018
+ "inference_parameters": {
2019
+ "temperature": 1.0,
2020
+ "top_p": 0.95,
2021
+ "max_new_tokens": 131072,
2022
+ "coding_temp": 0.7,
2023
+ "agentic_temp": 0.0
2024
+ },
2025
+ "video_support": false,
2026
+ "max_tools": -1,
2027
+ "video_input_mode": "none"
1566
2028
  },
1567
2029
  "llama-4-109b": {
1568
2030
  "max_output_tokens": 8192,
@@ -1575,7 +2037,10 @@
1575
2037
  "source": "Meta LLaMA 4 announcement",
1576
2038
  "canonical_name": "llama-4-109b",
1577
2039
  "aliases": [],
1578
- "max_tokens": 10000000
2040
+ "max_tokens": 10000000,
2041
+ "video_support": false,
2042
+ "max_tools": -1,
2043
+ "video_input_mode": "frames"
1579
2044
  },
1580
2045
  "granite3.2:2b": {
1581
2046
  "max_output_tokens": 8192,
@@ -1590,7 +2055,10 @@
1590
2055
  "aliases": [
1591
2056
  "granite3.2-2b"
1592
2057
  ],
1593
- "max_tokens": 32768
2058
+ "max_tokens": 32768,
2059
+ "video_support": false,
2060
+ "max_tools": -1,
2061
+ "video_input_mode": "none"
1594
2062
  },
1595
2063
  "granite3.2:8b": {
1596
2064
  "max_output_tokens": 8192,
@@ -1605,7 +2073,10 @@
1605
2073
  "aliases": [
1606
2074
  "granite3.2-8b"
1607
2075
  ],
1608
- "max_tokens": 32768
2076
+ "max_tokens": 32768,
2077
+ "video_support": false,
2078
+ "max_tools": -1,
2079
+ "video_input_mode": "none"
1609
2080
  },
1610
2081
  "granite3.2-vision:2b": {
1611
2082
  "max_output_tokens": 8192,
@@ -1631,7 +2102,9 @@
1631
2102
  "granite-vision",
1632
2103
  "ibm-granite-vision"
1633
2104
  ],
1634
- "max_tokens": 32768
2105
+ "max_tokens": 32768,
2106
+ "max_tools": -1,
2107
+ "video_input_mode": "frames"
1635
2108
  },
1636
2109
  "gemini-2.5-flash": {
1637
2110
  "max_output_tokens": 8192,
@@ -1657,7 +2130,8 @@
1657
2130
  "aliases": [
1658
2131
  "gemini-2.5-flash-001"
1659
2132
  ],
1660
- "max_tokens": 1000000
2133
+ "max_tokens": 1000000,
2134
+ "video_input_mode": "native"
1661
2135
  },
1662
2136
  "gemini-2.5-pro": {
1663
2137
  "max_output_tokens": 65536,
@@ -1673,7 +2147,7 @@
1673
2147
  "448x448",
1674
2148
  "1024x1024"
1675
2149
  ],
1676
- "max_image_resolution": "768x768",
2150
+ "max_image_resolution": "768x768",
1677
2151
  "image_tokenization_method": "gemini_vision_encoder",
1678
2152
  "thinking_support": true,
1679
2153
  "thinking_budget": true,
@@ -1683,7 +2157,8 @@
1683
2157
  "aliases": [
1684
2158
  "gemini-2.5-pro-001"
1685
2159
  ],
1686
- "max_tokens": 1048576
2160
+ "max_tokens": 1048576,
2161
+ "video_input_mode": "native"
1687
2162
  },
1688
2163
  "granite3.3:2b": {
1689
2164
  "max_output_tokens": 8192,
@@ -1698,7 +2173,10 @@
1698
2173
  "aliases": [
1699
2174
  "granite3.3-2b"
1700
2175
  ],
1701
- "max_tokens": 32768
2176
+ "max_tokens": 32768,
2177
+ "video_support": false,
2178
+ "max_tools": -1,
2179
+ "video_input_mode": "none"
1702
2180
  },
1703
2181
  "granite3.3:8b": {
1704
2182
  "max_output_tokens": 8192,
@@ -1713,7 +2191,10 @@
1713
2191
  "aliases": [
1714
2192
  "granite3.3-8b"
1715
2193
  ],
1716
- "max_tokens": 32768
2194
+ "max_tokens": 32768,
2195
+ "video_support": false,
2196
+ "max_tools": -1,
2197
+ "video_input_mode": "none"
1717
2198
  },
1718
2199
  "embeddinggemma:300m": {
1719
2200
  "max_output_tokens": 0,
@@ -1729,7 +2210,42 @@
1729
2210
  "google/embeddinggemma-300m"
1730
2211
  ],
1731
2212
  "max_tokens": 8192,
1732
- "model_type": "embedding"
2213
+ "model_type": "embedding",
2214
+ "video_support": false,
2215
+ "max_tools": 0,
2216
+ "video_input_mode": "none"
2217
+ },
2218
+ "nomic-embed-text-v1.5": {
2219
+ "max_output_tokens": 0,
2220
+ "tool_support": "none",
2221
+ "structured_output": "none",
2222
+ "parallel_tools": false,
2223
+ "vision_support": false,
2224
+ "audio_support": false,
2225
+ "notes": "Nomic Embed v1.5 text embedding model (Matryoshka). Embedding dims commonly used: 768 (and truncation-friendly sizes like 512/256/128/64). Not a text-generative model.",
2226
+ "source": "Nomic AI documentation + HuggingFace model card",
2227
+ "canonical_name": "nomic-embed-text-v1.5",
2228
+ "aliases": [
2229
+ "nomic-ai/nomic-embed-text-v1.5",
2230
+ "nomic-embed-text-v1.5",
2231
+ "text-embedding-nomic-embed-text-v1.5",
2232
+ "text-embedding-nomic-embed-text-v1.5@q6_k",
2233
+ "nomic-embed-text-v1.5@q6_k"
2234
+ ],
2235
+ "max_tokens": 8192,
2236
+ "model_type": "embedding",
2237
+ "embedding_dimension": 768,
2238
+ "matryoshka_dims": [
2239
+ 768,
2240
+ 512,
2241
+ 256,
2242
+ 128,
2243
+ 64
2244
+ ],
2245
+ "embedding_support": true,
2246
+ "video_support": false,
2247
+ "max_tools": 0,
2248
+ "video_input_mode": "none"
1733
2249
  },
1734
2250
  "blip-image-captioning-base": {
1735
2251
  "max_output_tokens": 512,
@@ -1754,7 +2270,9 @@
1754
2270
  "aliases": [
1755
2271
  "Salesforce/blip-image-captioning-base"
1756
2272
  ],
1757
- "max_tokens": 2048
2273
+ "max_tokens": 2048,
2274
+ "max_tools": 0,
2275
+ "video_input_mode": "frames"
1758
2276
  },
1759
2277
  "glyph": {
1760
2278
  "max_output_tokens": 8192,
@@ -1780,7 +2298,7 @@
1780
2298
  "conversation_template": {
1781
2299
  "system_prefix": "<|system|>\n",
1782
2300
  "system_suffix": "\n",
1783
- "user_prefix": "<|user|>\n",
2301
+ "user_prefix": "<|user|>\n",
1784
2302
  "user_suffix": "\n",
1785
2303
  "assistant_prefix": "<|assistant|>\n",
1786
2304
  "assistant_suffix": "\n"
@@ -1798,7 +2316,9 @@
1798
2316
  "max_tokens": 131072,
1799
2317
  "license": "MIT",
1800
2318
  "arxiv": "2510.17800",
1801
- "repository": "https://github.com/thu-coai/Glyph"
2319
+ "repository": "https://github.com/thu-coai/Glyph",
2320
+ "max_tools": -1,
2321
+ "video_input_mode": "frames"
1802
2322
  },
1803
2323
  "glm-4.1v-9b-base": {
1804
2324
  "max_output_tokens": 8192,
@@ -1828,7 +2348,9 @@
1828
2348
  "aliases": [
1829
2349
  "zai-org/GLM-4.1V-9B-Base"
1830
2350
  ],
1831
- "max_tokens": 131072
2351
+ "max_tokens": 131072,
2352
+ "max_tools": -1,
2353
+ "video_input_mode": "frames"
1832
2354
  },
1833
2355
  "glm-4.1v-9b-thinking": {
1834
2356
  "max_output_tokens": 8192,
@@ -1865,7 +2387,9 @@
1865
2387
  "glm4.1v-9b-thinking"
1866
2388
  ],
1867
2389
  "max_tokens": 65536,
1868
- "arxiv": "2507.01006"
2390
+ "arxiv": "2507.01006",
2391
+ "max_tools": -1,
2392
+ "video_input_mode": "frames"
1869
2393
  },
1870
2394
  "mistral-small-3.1-24b-instruct": {
1871
2395
  "max_output_tokens": 8192,
@@ -1888,7 +2412,9 @@
1888
2412
  ],
1889
2413
  "max_tokens": 131072,
1890
2414
  "total_parameters": "24B",
1891
- "release_date": "2025-03-17"
2415
+ "release_date": "2025-03-17",
2416
+ "max_tools": -1,
2417
+ "video_input_mode": "frames"
1892
2418
  },
1893
2419
  "mistral-small-3.2-24b-instruct": {
1894
2420
  "max_output_tokens": 8192,
@@ -1913,7 +2439,9 @@
1913
2439
  ],
1914
2440
  "max_tokens": 131072,
1915
2441
  "total_parameters": "24B",
1916
- "release_date": "2025-06-01"
2442
+ "release_date": "2025-06-01",
2443
+ "max_tools": -1,
2444
+ "video_input_mode": "frames"
1917
2445
  },
1918
2446
  "llama-4-scout": {
1919
2447
  "max_output_tokens": 8192,
@@ -1942,7 +2470,9 @@
1942
2470
  "max_tokens": 10000000,
1943
2471
  "release_date": "2025-04-05",
1944
2472
  "image_patch_size": 14,
1945
- "max_image_tokens": 6400
2473
+ "max_image_tokens": 6400,
2474
+ "max_tools": -1,
2475
+ "video_input_mode": "frames"
1946
2476
  },
1947
2477
  "llama-4-maverick": {
1948
2478
  "max_output_tokens": 8192,
@@ -1970,7 +2500,9 @@
1970
2500
  "max_tokens": 1000000,
1971
2501
  "release_date": "2025-04-05",
1972
2502
  "image_patch_size": 14,
1973
- "max_image_tokens": 6400
2503
+ "max_image_tokens": 6400,
2504
+ "max_tools": -1,
2505
+ "video_input_mode": "frames"
1974
2506
  },
1975
2507
  "llama-4-behemoth": {
1976
2508
  "max_output_tokens": 8192,
@@ -1999,7 +2531,9 @@
1999
2531
  "release_date": "2025-04-05",
2000
2532
  "status": "announced",
2001
2533
  "image_patch_size": 14,
2002
- "max_image_tokens": 6400
2534
+ "max_image_tokens": 6400,
2535
+ "max_tools": -1,
2536
+ "video_input_mode": "frames"
2003
2537
  },
2004
2538
  "minimax-m2": {
2005
2539
  "max_output_tokens": 131072,
@@ -2021,7 +2555,6 @@
2021
2555
  "aliases": [
2022
2556
  "MiniMaxAI/MiniMax-M2",
2023
2557
  "mlx-community/minimax-m2",
2024
- "mlx-community/MiniMax-M2",
2025
2558
  "unsloth/MiniMax-M2-GGUF",
2026
2559
  "minimax-m2-230b",
2027
2560
  "minimax-m2-10b-active",
@@ -2035,7 +2568,9 @@
2035
2568
  "top_p": 0.95,
2036
2569
  "top_k": 40
2037
2570
  },
2038
- "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax."
2571
+ "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2 and is built by MiniMax.",
2572
+ "max_tools": -1,
2573
+ "video_input_mode": "none"
2039
2574
  },
2040
2575
  "minimax-m2.1": {
2041
2576
  "max_output_tokens": 131072,
@@ -2058,7 +2593,6 @@
2058
2593
  "canonical_name": "minimax-m2.1",
2059
2594
  "aliases": [
2060
2595
  "MiniMaxAI/MiniMax-M2.1",
2061
- "minimaxai/minimax-m2.1",
2062
2596
  "minimax-m2.1-229b",
2063
2597
  "minimax-m2.1-10b-active",
2064
2598
  "minimax/minimax-m2.1"
@@ -2095,14 +2629,16 @@
2095
2629
  "SciCode": 41.0,
2096
2630
  "IFBench": 70.0,
2097
2631
  "AA-LCR": 62.0,
2098
- "τ²-Bench Telecom": 87.0
2632
+ "\u03c4\u00b2-Bench Telecom": 87.0
2099
2633
  },
2100
2634
  "inference_parameters": {
2101
2635
  "temperature": 1.0,
2102
2636
  "top_p": 0.95,
2103
2637
  "top_k": 40
2104
2638
  },
2105
- "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax."
2639
+ "default_system_prompt": "You are a helpful assistant. Your name is MiniMax-M2.1 and is built by MiniMax.",
2640
+ "max_tools": -1,
2641
+ "video_input_mode": "none"
2106
2642
  },
2107
2643
  "glm-4.6v": {
2108
2644
  "max_output_tokens": 16384,
@@ -2134,7 +2670,10 @@
2134
2670
  "end": "<|end_of_box|>"
2135
2671
  },
2136
2672
  "thinking_control": "/nothink",
2137
- "thinking_tags": ["<think>", "</think>"],
2673
+ "thinking_tags": [
2674
+ "<think>",
2675
+ "</think>"
2676
+ ],
2138
2677
  "notes": "GLM-4.6V foundation model (106B params) for cloud deployment. Native multimodal function calling with vision-driven tool use using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Supports interleaved image-text generation, 128K context, multimodal document understanding, and frontend replication from screenshots. Generates reasoning in 'reasoning_content' field or <think></think> tags. Achieves SoTA performance in visual understanding among similar parameter scales. Thinking can be disabled with '/nothink' suffix in user message. See: https://github.com/zai-org/GLM-V",
2139
2678
  "source": "HuggingFace zai-org/GLM-4.6V and GLM-V GitHub",
2140
2679
  "canonical_name": "glm-4.6v",
@@ -2147,7 +2686,9 @@
2147
2686
  "max_tokens": 128000,
2148
2687
  "release_date": "2025-05-07",
2149
2688
  "arxiv": "2507.01006",
2150
- "license": "MIT"
2689
+ "license": "MIT",
2690
+ "max_tools": -1,
2691
+ "video_input_mode": "frames"
2151
2692
  },
2152
2693
  "glm-4.6v-flash": {
2153
2694
  "max_output_tokens": 8192,
@@ -2178,7 +2719,10 @@
2178
2719
  "end": "<|end_of_box|>"
2179
2720
  },
2180
2721
  "thinking_control": "/nothink",
2181
- "thinking_tags": ["<think>", "</think>"],
2722
+ "thinking_tags": [
2723
+ "<think>",
2724
+ "</think>"
2725
+ ],
2182
2726
  "notes": "GLM-4.6V-Flash lightweight model (9B params) optimized for local deployment and low-latency applications. Maintains native multimodal function calling using XML format: <tool_call>function_name\\n<arg_key>key</arg_key>\\n<arg_value>value</arg_value>\\n</tool_call>. Generates reasoning in 'reasoning_content' field or <think></think> tags. Ideal for edge and resource-constrained environments while preserving core GLM-4.6V capabilities. Thinking can be disabled with '/nothink' suffix. See: https://github.com/zai-org/GLM-V",
2183
2727
  "source": "HuggingFace zai-org/GLM-4.6V-Flash and GLM-V GitHub",
2184
2728
  "canonical_name": "glm-4.6v-flash",
@@ -2191,7 +2735,9 @@
2191
2735
  "max_tokens": 128000,
2192
2736
  "release_date": "2025-05-07",
2193
2737
  "arxiv": "2507.01006",
2194
- "license": "MIT"
2738
+ "license": "MIT",
2739
+ "max_tools": -1,
2740
+ "video_input_mode": "frames"
2195
2741
  },
2196
2742
  "glm-4.7": {
2197
2743
  "max_output_tokens": 32768,
@@ -2205,14 +2751,18 @@
2205
2751
  "architecture": "mixture_of_experts",
2206
2752
  "total_parameters": "358B",
2207
2753
  "thinking_paradigm": "multi_mode",
2208
- "thinking_modes": ["interleaved_thinking", "preserved_thinking", "turn_level_thinking"],
2754
+ "thinking_modes": [
2755
+ "interleaved_thinking",
2756
+ "preserved_thinking",
2757
+ "turn_level_thinking"
2758
+ ],
2209
2759
  "native_function_calling": true,
2210
2760
  "agentic_coding": true,
2211
2761
  "terminal_tasks": true,
2212
2762
  "web_browsing": true,
2213
2763
  "tool_calling_parser": "glm47",
2214
2764
  "reasoning_parser": "glm45",
2215
- "notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (τ²-Bench: 87.4%), web browsing (BrowseComp: 52%), and complex reasoning (HLE w/ Tools: 42.8%, AIME 2025: 95.7%). 128K context window with 32K output capacity. Optimized for modern coding environments including Claude Code, Kilo Code, Cline, Roo Code.",
2765
+ "notes": "GLM-4.7 latest MoE model (358B params) with enhanced coding, reasoning, and agentic capabilities. Achieves 73.8% on SWE-bench Verified, 66.7% on SWE-bench Multilingual, and 41% on Terminal Bench 2.0. Supports advanced thinking modes: Interleaved (think before actions), Preserved (cross-turn consistency), and Turn-level. Excels at tool using (\u03c4\u00b2-Bench: 87.4%), web browsing (BrowseComp: 52%), and complex reasoning (HLE w/ Tools: 42.8%, AIME 2025: 95.7%). 128K context window with 32K output capacity. Optimized for modern coding environments including Claude Code, Kilo Code, Cline, Roo Code.",
2216
2766
  "source": "HuggingFace zai-org/GLM-4.7 and GLM technical blog",
2217
2767
  "canonical_name": "glm-4.7",
2218
2768
  "aliases": [
@@ -2240,7 +2790,7 @@
2240
2790
  "BrowseComp": 52.0,
2241
2791
  "BrowseComp (w/ Context Manage)": 67.5,
2242
2792
  "BrowseComp-Zh": 66.6,
2243
- "τ²-Bench": 87.4,
2793
+ "\u03c4\u00b2-Bench": 87.4,
2244
2794
  "MMLU-Pro": 84.3,
2245
2795
  "GPQA-Diamond": 85.7
2246
2796
  },
@@ -2249,7 +2799,9 @@
2249
2799
  "top_p": 0.95,
2250
2800
  "enable_thinking": true,
2251
2801
  "clear_thinking": false
2252
- }
2802
+ },
2803
+ "max_tools": -1,
2804
+ "video_input_mode": "none"
2253
2805
  },
2254
2806
  "devstral-small-2-24b": {
2255
2807
  "max_output_tokens": 16384,
@@ -2269,12 +2821,17 @@
2269
2821
  "canonical_name": "devstral-small-2-24b",
2270
2822
  "aliases": [
2271
2823
  "mistralai/Devstral-Small-2-24B-Instruct-2512",
2824
+ "mistralai/devstral-small-2-2512",
2825
+ "mistralai/devstral-small-2",
2272
2826
  "devstral-small-2",
2827
+ "devstral-small-2-2512",
2273
2828
  "devstral-small-2-24b-instruct"
2274
2829
  ],
2275
2830
  "max_tokens": 262144,
2276
2831
  "release_date": "2025-12",
2277
- "license": "Apache-2.0"
2832
+ "license": "Apache-2.0",
2833
+ "max_tools": -1,
2834
+ "video_input_mode": "none"
2278
2835
  },
2279
2836
  "devstral-2-123b": {
2280
2837
  "max_output_tokens": 16384,
@@ -2294,12 +2851,15 @@
2294
2851
  "canonical_name": "devstral-2-123b",
2295
2852
  "aliases": [
2296
2853
  "mistralai/Devstral-2-123B-Instruct-2512",
2854
+ "mistralai/devstral-2",
2297
2855
  "devstral-2",
2298
2856
  "devstral-2-123b-instruct"
2299
2857
  ],
2300
2858
  "max_tokens": 262144,
2301
2859
  "release_date": "2025-12",
2302
- "license": "Modified-MIT"
2860
+ "license": "Modified-MIT",
2861
+ "max_tools": -1,
2862
+ "video_input_mode": "none"
2303
2863
  },
2304
2864
  "qwen3-235b-a22b-2507": {
2305
2865
  "max_output_tokens": 16384,
@@ -2327,7 +2887,9 @@
2327
2887
  "max_tokens": 262144,
2328
2888
  "release_date": "2025-07",
2329
2889
  "arxiv": "2505.09388",
2330
- "license": "Apache-2.0"
2890
+ "license": "Apache-2.0",
2891
+ "max_tools": -1,
2892
+ "video_input_mode": "none"
2331
2893
  },
2332
2894
  "qwen3-235b-a22b-2507-fp8": {
2333
2895
  "max_output_tokens": 16384,
@@ -2356,7 +2918,9 @@
2356
2918
  "max_tokens": 262144,
2357
2919
  "release_date": "2025-07",
2358
2920
  "arxiv": "2505.09388",
2359
- "license": "Apache-2.0"
2921
+ "license": "Apache-2.0",
2922
+ "max_tools": -1,
2923
+ "video_input_mode": "none"
2360
2924
  },
2361
2925
  "granite-4.0-h-tiny": {
2362
2926
  "max_output_tokens": 16384,
@@ -2388,7 +2952,9 @@
2388
2952
  ],
2389
2953
  "max_tokens": 131072,
2390
2954
  "release_date": "2025-10-02",
2391
- "license": "Apache-2.0"
2955
+ "license": "Apache-2.0",
2956
+ "max_tools": -1,
2957
+ "video_input_mode": "none"
2392
2958
  },
2393
2959
  "gpt-oss-20b": {
2394
2960
  "max_output_tokens": 8192,
@@ -2406,7 +2972,11 @@
2406
2972
  "tensor_type": "BF16+U8",
2407
2973
  "quantization_method": "MXFP4",
2408
2974
  "response_format": "harmony",
2409
- "reasoning_levels": ["low", "medium", "high"],
2975
+ "reasoning_levels": [
2976
+ "low",
2977
+ "medium",
2978
+ "high"
2979
+ ],
2410
2980
  "agentic_capabilities": true,
2411
2981
  "function_calling": true,
2412
2982
  "web_browsing": true,
@@ -2422,7 +2992,9 @@
2422
2992
  "max_tokens": 128000,
2423
2993
  "release_date": "2025-08",
2424
2994
  "arxiv": "2508.10925",
2425
- "license": "Apache-2.0"
2995
+ "license": "Apache-2.0",
2996
+ "max_tools": -1,
2997
+ "video_input_mode": "none"
2426
2998
  },
2427
2999
  "gpt-oss-120b": {
2428
3000
  "max_output_tokens": 8192,
@@ -2440,7 +3012,11 @@
2440
3012
  "tensor_type": "BF16+U8",
2441
3013
  "quantization_method": "MXFP4",
2442
3014
  "response_format": "harmony",
2443
- "reasoning_levels": ["low", "medium", "high"],
3015
+ "reasoning_levels": [
3016
+ "low",
3017
+ "medium",
3018
+ "high"
3019
+ ],
2444
3020
  "agentic_capabilities": true,
2445
3021
  "function_calling": true,
2446
3022
  "web_browsing": true,
@@ -2457,7 +3033,9 @@
2457
3033
  "max_tokens": 128000,
2458
3034
  "release_date": "2025-08",
2459
3035
  "arxiv": "2508.10925",
2460
- "license": "Apache-2.0"
3036
+ "license": "Apache-2.0",
3037
+ "max_tools": -1,
3038
+ "video_input_mode": "none"
2461
3039
  },
2462
3040
  "qwen3-vl-2b": {
2463
3041
  "max_output_tokens": 8192,
@@ -2484,7 +3062,11 @@
2484
3062
  "spatial_perception": true,
2485
3063
  "document_understanding": true,
2486
3064
  "ocr_languages": 32,
2487
- "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
3065
+ "architecture_updates": [
3066
+ "Interleaved-MRoPE",
3067
+ "DeepStack",
3068
+ "Text-Timestamp-Alignment"
3069
+ ],
2488
3070
  "notes": "Qwen3-VL 2B dense model with 256K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
2489
3071
  "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2490
3072
  "canonical_name": "qwen3-vl-2b",
@@ -2495,7 +3077,9 @@
2495
3077
  "max_tokens": 262144,
2496
3078
  "release_date": "2025-05",
2497
3079
  "arxiv": "2505.09388",
2498
- "license": "Apache-2.0"
3080
+ "license": "Apache-2.0",
3081
+ "max_tools": -1,
3082
+ "video_input_mode": "native"
2499
3083
  },
2500
3084
  "qwen3-vl-4b": {
2501
3085
  "max_output_tokens": 8192,
@@ -2523,7 +3107,11 @@
2523
3107
  "document_understanding": true,
2524
3108
  "ocr_languages": 32,
2525
3109
  "total_parameters": "4.83B",
2526
- "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
3110
+ "architecture_updates": [
3111
+ "Interleaved-MRoPE",
3112
+ "DeepStack",
3113
+ "Text-Timestamp-Alignment"
3114
+ ],
2527
3115
  "notes": "Qwen3-VL 4B dense model (4.83B params) with 256K context, optimized for LMStudio. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding. FP8 checkpoints available.",
2528
3116
  "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2529
3117
  "canonical_name": "qwen3-vl-4b",
@@ -2535,7 +3123,9 @@
2535
3123
  "max_tokens": 262144,
2536
3124
  "release_date": "2025-05",
2537
3125
  "arxiv": "2505.09388",
2538
- "license": "Apache-2.0"
3126
+ "license": "Apache-2.0",
3127
+ "max_tools": -1,
3128
+ "video_input_mode": "native"
2539
3129
  },
2540
3130
  "qwen3-vl-8b": {
2541
3131
  "max_output_tokens": 8192,
@@ -2563,7 +3153,11 @@
2563
3153
  "document_understanding": true,
2564
3154
  "ocr_languages": 32,
2565
3155
  "total_parameters": "8.77B",
2566
- "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
3156
+ "architecture_updates": [
3157
+ "Interleaved-MRoPE",
3158
+ "DeepStack",
3159
+ "Text-Timestamp-Alignment"
3160
+ ],
2567
3161
  "notes": "Qwen3-VL 8B dense model (8.77B params) with 256K context, optimized for LMStudio. Most powerful vision-language model in Qwen series. Visual agent for GUI operation, visual coding, advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. FP8 checkpoints available.",
2568
3162
  "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2569
3163
  "canonical_name": "qwen3-vl-8b",
@@ -2575,7 +3169,9 @@
2575
3169
  "max_tokens": 262144,
2576
3170
  "release_date": "2025-05",
2577
3171
  "arxiv": "2505.09388",
2578
- "license": "Apache-2.0"
3172
+ "license": "Apache-2.0",
3173
+ "max_tools": -1,
3174
+ "video_input_mode": "native"
2579
3175
  },
2580
3176
  "qwen3-vl-30b-a3b": {
2581
3177
  "max_output_tokens": 8192,
@@ -2605,7 +3201,11 @@
2605
3201
  "architecture": "mixture_of_experts",
2606
3202
  "total_parameters": "30.5B",
2607
3203
  "active_parameters": "3.3B",
2608
- "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
3204
+ "architecture_updates": [
3205
+ "Interleaved-MRoPE",
3206
+ "DeepStack",
3207
+ "Text-Timestamp-Alignment"
3208
+ ],
2609
3209
  "notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model in the series. 128K context. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs.",
2610
3210
  "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2611
3211
  "canonical_name": "qwen3-vl-30b-a3b",
@@ -2617,7 +3217,9 @@
2617
3217
  "max_tokens": 131072,
2618
3218
  "release_date": "2025-05",
2619
3219
  "arxiv": "2505.09388",
2620
- "license": "Apache-2.0"
3220
+ "license": "Apache-2.0",
3221
+ "max_tools": -1,
3222
+ "video_input_mode": "native"
2621
3223
  },
2622
3224
  "qwen3-vl-235b-a22b": {
2623
3225
  "max_output_tokens": 8192,
@@ -2649,7 +3251,11 @@
2649
3251
  "active_parameters": "22B",
2650
3252
  "experts": 128,
2651
3253
  "experts_activated": 8,
2652
- "architecture_updates": ["Interleaved-MRoPE", "DeepStack", "Text-Timestamp-Alignment"],
3254
+ "architecture_updates": [
3255
+ "Interleaved-MRoPE",
3256
+ "DeepStack",
3257
+ "Text-Timestamp-Alignment"
3258
+ ],
2653
3259
  "notes": "Qwen3-VL 235B MoE model (235B total/22B active, 128 experts/8 activated), flagship vision model. 256K context expandable to 1M. Visual agent for GUI operation, visual coding (Draw.io/HTML/CSS/JS), advanced spatial perception with 2D/3D grounding. Enhanced OCR (32 languages), long video understanding with second-level indexing. Text understanding on par with pure LLMs. Superior visual perception and reasoning.",
2654
3260
  "source": "Qwen HuggingFace and Qwen3 technical report arXiv:2505.09388",
2655
3261
  "canonical_name": "qwen3-vl-235b-a22b",
@@ -2660,7 +3266,9 @@
2660
3266
  "max_tokens": 262144,
2661
3267
  "release_date": "2025-05",
2662
3268
  "arxiv": "2505.09388",
2663
- "license": "Apache-2.0"
3269
+ "license": "Apache-2.0",
3270
+ "max_tools": -1,
3271
+ "video_input_mode": "native"
2664
3272
  },
2665
3273
  "nemotron-3-nano-30b-a3b": {
2666
3274
  "max_output_tokens": 8192,
@@ -2686,7 +3294,14 @@
2686
3294
  "agentic_capabilities": true,
2687
3295
  "function_calling": true,
2688
3296
  "tool_calling_format": "json",
2689
- "languages": ["English", "German", "Spanish", "French", "Italian", "Japanese"],
3297
+ "languages": [
3298
+ "English",
3299
+ "German",
3300
+ "Spanish",
3301
+ "French",
3302
+ "Italian",
3303
+ "Japanese"
3304
+ ],
2690
3305
  "notes": "NVIDIA Nemotron-3-Nano hybrid MoE model (30B total/3.5B active, 128 experts/6 activated + 1 shared). Combines 23 Mamba-2 layers with 6 Attention layers. Unified model for reasoning and non-reasoning tasks with configurable reasoning mode. Generates reasoning trace before final response. 256K context extendable to 1M with YaRN. Strong performance on AIME25 (99.2% with tools), SWE-Bench (38.8%), MiniF2F (50.0% pass@1). Native tool calling via chatml-function-calling format. Commercial use ready.",
2691
3306
  "source": "NVIDIA Nemotron HuggingFace and technical report",
2692
3307
  "canonical_name": "nemotron-3-nano-30b-a3b",
@@ -2720,7 +3335,9 @@
2720
3335
  "Scale AI Multi Challenge": 38.5,
2721
3336
  "Arena-Hard-V2 (Hard Prompt)": 72.1,
2722
3337
  "Arena-Hard-V2 (Average)": 67.7
2723
- }
3338
+ },
3339
+ "max_tools": -1,
3340
+ "video_input_mode": "none"
2724
3341
  }
2725
3342
  },
2726
3343
  "tool_support_levels": {
@@ -2804,7 +3421,9 @@
2804
3421
  "source": "AbstractCore generic fallback",
2805
3422
  "canonical_name": "generic_vision_model",
2806
3423
  "aliases": [],
2807
- "max_tokens": 32768
3424
+ "max_tokens": 32768,
3425
+ "max_tools": -1,
3426
+ "video_input_mode": "frames"
2808
3427
  },
2809
3428
  "default_capabilities": {
2810
3429
  "max_output_tokens": 4096,
@@ -2817,6 +3436,8 @@
2817
3436
  "thinking_budget": false,
2818
3437
  "video_support": false,
2819
3438
  "fim_support": false,
2820
- "max_tokens": 16384
3439
+ "max_tokens": 16384,
3440
+ "max_tools": 0,
3441
+ "video_input_mode": "none"
2821
3442
  }
2822
3443
  }