pi-free 2.0.12 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1301 @@
1
+ // Auto-generated benchmark data chunk 5
2
+ // Models: glm-4.5-air .. seed-oss-36b-instruct (68 entries)
3
+ // Last updated: 2026-06-01
4
+ // DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
5
+
6
+ import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
7
+
8
+ export const BENCHMARKS_CHUNK_5: Record<string, HardcodedBenchmark> = {
9
+ "glm-4.5-air": {
10
+ // AA specific benchmarks
11
+ codingIndex: 23.8,
12
+ mathIndex: 80.7,
13
+
14
+ // Academic benchmarks
15
+ mmluPro: 0.815,
16
+ gpqa: 0.733,
17
+ hle: 0.068,
18
+
19
+ // Capabilities
20
+ contextWindow: 8192,
21
+ supportsReasoning: false,
22
+ supportsVision: false,
23
+
24
+ // Metadata
25
+ lastUpdated: "2026-06-01",
26
+ originalModel: "GLM-4.5-Air",
27
+ },
28
+ "glm-4.6v-reasoning": {
29
+ // AA specific benchmarks
30
+ codingIndex: 19.7,
31
+ mathIndex: 85.3,
32
+
33
+ // Academic benchmarks
34
+ mmluPro: 0.799,
35
+ gpqa: 0.719,
36
+ hle: 0.089,
37
+
38
+ // Capabilities
39
+ contextWindow: 8192,
40
+ supportsReasoning: false,
41
+ supportsVision: false,
42
+
43
+ // Metadata
44
+ lastUpdated: "2026-06-01",
45
+ originalModel: "GLM-4.6V (Reasoning)",
46
+ },
47
+ "glm-4.5v-non-reasoning": {
48
+ // AA specific benchmarks
49
+ codingIndex: 10.8,
50
+ mathIndex: 15.3,
51
+
52
+ // Academic benchmarks
53
+ mmluPro: 0.751,
54
+ gpqa: 0.573,
55
+ hle: 0.036,
56
+
57
+ // Capabilities
58
+ contextWindow: 8192,
59
+ supportsReasoning: false,
60
+ supportsVision: false,
61
+
62
+ // Metadata
63
+ lastUpdated: "2026-06-01",
64
+ originalModel: "GLM-4.5V (Non-reasoning)",
65
+ },
66
+ "command-r+-apr-24": {
67
+ // AA specific benchmarks
68
+ codingIndex: undefined,
69
+ mathIndex: undefined,
70
+
71
+ // Academic benchmarks
72
+ mmluPro: 0.432,
73
+ gpqa: 0.323,
74
+ hle: 0.045,
75
+
76
+ // Capabilities
77
+ contextWindow: 8192,
78
+ supportsReasoning: false,
79
+ supportsVision: false,
80
+
81
+ // Metadata
82
+ lastUpdated: "2026-06-01",
83
+ originalModel: "Command-R+ (Apr '24)",
84
+ },
85
+ "command-r-mar-24": {
86
+ // AA specific benchmarks
87
+ codingIndex: undefined,
88
+ mathIndex: undefined,
89
+
90
+ // Academic benchmarks
91
+ mmluPro: 0.338,
92
+ gpqa: 0.284,
93
+ hle: 0.048,
94
+
95
+ // Capabilities
96
+ contextWindow: 8192,
97
+ supportsReasoning: false,
98
+ supportsVision: false,
99
+
100
+ // Metadata
101
+ lastUpdated: "2026-06-01",
102
+ originalModel: "Command-R (Mar '24)",
103
+ },
104
+ "apriel-v1.5-15b-thinker": {
105
+ // AA specific benchmarks
106
+ codingIndex: 18.7,
107
+ mathIndex: 87.5,
108
+
109
+ // Academic benchmarks
110
+ mmluPro: 0.773,
111
+ gpqa: 0.713,
112
+ hle: 0.12,
113
+
114
+ // Capabilities
115
+ contextWindow: 8192,
116
+ supportsReasoning: false,
117
+ supportsVision: false,
118
+
119
+ // Metadata
120
+ lastUpdated: "2026-06-01",
121
+ originalModel: "Apriel-v1.5-15B-Thinker",
122
+ },
123
+ "jamba-1.5-large": {
124
+ // AA specific benchmarks
125
+ codingIndex: undefined,
126
+ mathIndex: undefined,
127
+
128
+ // Academic benchmarks
129
+ mmluPro: 0.572,
130
+ gpqa: 0.427,
131
+ hle: 0.04,
132
+
133
+ // Capabilities
134
+ contextWindow: 8192,
135
+ supportsReasoning: false,
136
+ supportsVision: false,
137
+
138
+ // Metadata
139
+ lastUpdated: "2026-06-01",
140
+ originalModel: "Jamba 1.5 Large",
141
+ },
142
+ "jamba-1.5-mini": {
143
+ // AA specific benchmarks
144
+ codingIndex: undefined,
145
+ mathIndex: undefined,
146
+
147
+ // Academic benchmarks
148
+ mmluPro: 0.371,
149
+ gpqa: 0.302,
150
+ hle: 0.051,
151
+
152
+ // Capabilities
153
+ contextWindow: 8192,
154
+ supportsReasoning: false,
155
+ supportsVision: false,
156
+
157
+ // Metadata
158
+ lastUpdated: "2026-06-01",
159
+ originalModel: "Jamba 1.5 Mini",
160
+ },
161
+ "jamba-1.6-mini": {
162
+ // AA specific benchmarks
163
+ codingIndex: undefined,
164
+ mathIndex: undefined,
165
+
166
+ // Academic benchmarks
167
+ mmluPro: 0.367,
168
+ gpqa: 0.3,
169
+ hle: 0.046,
170
+
171
+ // Capabilities
172
+ contextWindow: 8192,
173
+ supportsReasoning: false,
174
+ supportsVision: false,
175
+
176
+ // Metadata
177
+ lastUpdated: "2026-06-01",
178
+ originalModel: "Jamba 1.6 Mini",
179
+ },
180
+ "jamba-1.6-large": {
181
+ // AA specific benchmarks
182
+ codingIndex: undefined,
183
+ mathIndex: undefined,
184
+
185
+ // Academic benchmarks
186
+ mmluPro: 0.565,
187
+ gpqa: 0.387,
188
+ hle: 0.04,
189
+
190
+ // Capabilities
191
+ contextWindow: 8192,
192
+ supportsReasoning: false,
193
+ supportsVision: false,
194
+
195
+ // Metadata
196
+ lastUpdated: "2026-06-01",
197
+ originalModel: "Jamba 1.6 Large",
198
+ },
199
+ "arctic-instruct": {
200
+ // AA specific benchmarks
201
+ codingIndex: undefined,
202
+ mathIndex: undefined,
203
+
204
+ // Academic benchmarks
205
+ mmluPro: undefined,
206
+ gpqa: undefined,
207
+ hle: undefined,
208
+
209
+ // Capabilities
210
+ contextWindow: 8192,
211
+ supportsReasoning: false,
212
+ supportsVision: false,
213
+
214
+ // Metadata
215
+ lastUpdated: "2026-06-01",
216
+ originalModel: "Arctic Instruct",
217
+ },
218
+ "qwen2.5-max": {
219
+ // AA specific benchmarks
220
+ codingIndex: undefined,
221
+ mathIndex: undefined,
222
+
223
+ // Academic benchmarks
224
+ mmluPro: 0.762,
225
+ gpqa: 0.587,
226
+ hle: 0.045,
227
+
228
+ // Capabilities
229
+ contextWindow: 8192,
230
+ supportsReasoning: false,
231
+ supportsVision: false,
232
+
233
+ // Metadata
234
+ lastUpdated: "2026-06-01",
235
+ originalModel: "Qwen2.5 Max",
236
+ },
237
+ "qwen2.5-instruct-72b": {
238
+ // AA specific benchmarks
239
+ codingIndex: 11.9,
240
+ mathIndex: 14,
241
+
242
+ // Academic benchmarks
243
+ mmluPro: 0.72,
244
+ gpqa: 0.491,
245
+ hle: 0.042,
246
+
247
+ // Capabilities
248
+ contextWindow: 8192,
249
+ supportsReasoning: false,
250
+ supportsVision: false,
251
+
252
+ // Metadata
253
+ lastUpdated: "2026-06-01",
254
+ originalModel: "Qwen2.5 Instruct 72B",
255
+ },
256
+ "qwen2.5-coder-instruct-32b": {
257
+ // AA specific benchmarks
258
+ codingIndex: undefined,
259
+ mathIndex: undefined,
260
+
261
+ // Academic benchmarks
262
+ mmluPro: 0.635,
263
+ gpqa: 0.417,
264
+ hle: 0.038,
265
+
266
+ // Capabilities
267
+ contextWindow: 8192,
268
+ supportsReasoning: false,
269
+ supportsVision: false,
270
+
271
+ // Metadata
272
+ lastUpdated: "2026-06-01",
273
+ originalModel: "Qwen2.5 Coder Instruct 32B",
274
+ },
275
+ "qwen2.5-turbo": {
276
+ // AA specific benchmarks
277
+ codingIndex: undefined,
278
+ mathIndex: undefined,
279
+
280
+ // Academic benchmarks
281
+ mmluPro: 0.633,
282
+ gpqa: 0.41,
283
+ hle: 0.042,
284
+
285
+ // Capabilities
286
+ contextWindow: 8192,
287
+ supportsReasoning: false,
288
+ supportsVision: false,
289
+
290
+ // Metadata
291
+ lastUpdated: "2026-06-01",
292
+ originalModel: "Qwen2.5 Turbo",
293
+ },
294
+ "qwen2-instruct-72b": {
295
+ // AA specific benchmarks
296
+ codingIndex: undefined,
297
+ mathIndex: undefined,
298
+
299
+ // Academic benchmarks
300
+ mmluPro: 0.622,
301
+ gpqa: 0.371,
302
+ hle: 0.037,
303
+
304
+ // Capabilities
305
+ contextWindow: 8192,
306
+ supportsReasoning: false,
307
+ supportsVision: false,
308
+
309
+ // Metadata
310
+ lastUpdated: "2026-06-01",
311
+ originalModel: "Qwen2 Instruct 72B",
312
+ },
313
+ "qwq-32b": {
314
+ // AA specific benchmarks
315
+ codingIndex: undefined,
316
+ mathIndex: 29,
317
+
318
+ // Academic benchmarks
319
+ mmluPro: 0.764,
320
+ gpqa: 0.593,
321
+ hle: 0.082,
322
+
323
+ // Capabilities
324
+ contextWindow: 8192,
325
+ supportsReasoning: false,
326
+ supportsVision: false,
327
+
328
+ // Metadata
329
+ lastUpdated: "2026-06-01",
330
+ originalModel: "QwQ 32B",
331
+ },
332
+ "qwen3-vl-235b-a22b-instruct": {
333
+ // AA specific benchmarks
334
+ codingIndex: 16.5,
335
+ mathIndex: 70.7,
336
+
337
+ // Academic benchmarks
338
+ mmluPro: 0.823,
339
+ gpqa: 0.712,
340
+ hle: 0.063,
341
+
342
+ // Capabilities
343
+ contextWindow: 8192,
344
+ supportsReasoning: false,
345
+ supportsVision: false,
346
+
347
+ // Metadata
348
+ lastUpdated: "2026-06-01",
349
+ originalModel: "Qwen3 VL 235B A22B Instruct",
350
+ },
351
+ "qwen3-coder-30b-a3b-instruct": {
352
+ // AA specific benchmarks
353
+ codingIndex: 19.4,
354
+ mathIndex: 29,
355
+
356
+ // Academic benchmarks
357
+ mmluPro: 0.706,
358
+ gpqa: 0.516,
359
+ hle: 0.04,
360
+
361
+ // Capabilities
362
+ contextWindow: 8192,
363
+ supportsReasoning: false,
364
+ supportsVision: false,
365
+
366
+ // Metadata
367
+ lastUpdated: "2026-06-01",
368
+ originalModel: "Qwen3 Coder 30B A3B Instruct",
369
+ },
370
+ "qwen3-4b-non-reasoning": {
371
+ // AA specific benchmarks
372
+ codingIndex: undefined,
373
+ mathIndex: undefined,
374
+
375
+ // Academic benchmarks
376
+ mmluPro: 0.586,
377
+ gpqa: 0.398,
378
+ hle: 0.037,
379
+
380
+ // Capabilities
381
+ contextWindow: 8192,
382
+ supportsReasoning: false,
383
+ supportsVision: false,
384
+
385
+ // Metadata
386
+ lastUpdated: "2026-06-01",
387
+ originalModel: "Qwen3 4B (Non-reasoning)",
388
+ },
389
+ "qwen3-235b-a22b-2507-reasoning": {
390
+ // AA specific benchmarks
391
+ codingIndex: 23.2,
392
+ mathIndex: 91,
393
+
394
+ // Academic benchmarks
395
+ mmluPro: 0.843,
396
+ gpqa: 0.79,
397
+ hle: 0.15,
398
+
399
+ // Capabilities
400
+ contextWindow: 8192,
401
+ supportsReasoning: false,
402
+ supportsVision: false,
403
+
404
+ // Metadata
405
+ lastUpdated: "2026-06-01",
406
+ originalModel: "Qwen3 235B A22B 2507 (Reasoning)",
407
+ },
408
+ "qwen3-235b-a22b-2507-instruct": {
409
+ // AA specific benchmarks
410
+ codingIndex: 22.1,
411
+ mathIndex: 71.7,
412
+
413
+ // Academic benchmarks
414
+ mmluPro: 0.828,
415
+ gpqa: 0.753,
416
+ hle: 0.106,
417
+
418
+ // Capabilities
419
+ contextWindow: 8192,
420
+ supportsReasoning: false,
421
+ supportsVision: false,
422
+
423
+ // Metadata
424
+ lastUpdated: "2026-06-01",
425
+ originalModel: "Qwen3 235B A22B 2507 Instruct",
426
+ },
427
+ "qwen3-coder-480b-a35b-instruct": {
428
+ // AA specific benchmarks
429
+ codingIndex: 24.6,
430
+ mathIndex: 39.3,
431
+
432
+ // Academic benchmarks
433
+ mmluPro: 0.788,
434
+ gpqa: 0.618,
435
+ hle: 0.044,
436
+
437
+ // Capabilities
438
+ contextWindow: 8192,
439
+ supportsReasoning: false,
440
+ supportsVision: false,
441
+
442
+ // Metadata
443
+ lastUpdated: "2026-06-01",
444
+ originalModel: "Qwen3 Coder 480B A35B Instruct",
445
+ },
446
+ "qwen3.5-27b-reasoning": {
447
+ // AA specific benchmarks
448
+ codingIndex: 34.9,
449
+ mathIndex: undefined,
450
+
451
+ // Academic benchmarks
452
+ mmluPro: undefined,
453
+ gpqa: 0.858,
454
+ hle: 0.222,
455
+
456
+ // Capabilities
457
+ contextWindow: 8192,
458
+ supportsReasoning: false,
459
+ supportsVision: false,
460
+
461
+ // Metadata
462
+ lastUpdated: "2026-06-01",
463
+ originalModel: "Qwen3.5 27B (Reasoning)",
464
+ },
465
+ "qwen3-235b-a22b-reasoning": {
466
+ // AA specific benchmarks
467
+ codingIndex: 17.4,
468
+ mathIndex: 82,
469
+
470
+ // Academic benchmarks
471
+ mmluPro: 0.828,
472
+ gpqa: 0.7,
473
+ hle: 0.117,
474
+
475
+ // Capabilities
476
+ contextWindow: 8192,
477
+ supportsReasoning: false,
478
+ supportsVision: false,
479
+
480
+ // Metadata
481
+ lastUpdated: "2026-06-01",
482
+ originalModel: "Qwen3 235B A22B (Reasoning)",
483
+ },
484
+ "qwen3-235b-a22b-non-reasoning": {
485
+ // AA specific benchmarks
486
+ codingIndex: 14,
487
+ mathIndex: 23.7,
488
+
489
+ // Academic benchmarks
490
+ mmluPro: 0.762,
491
+ gpqa: 0.613,
492
+ hle: 0.047,
493
+
494
+ // Capabilities
495
+ contextWindow: 8192,
496
+ supportsReasoning: false,
497
+ supportsVision: false,
498
+
499
+ // Metadata
500
+ lastUpdated: "2026-06-01",
501
+ originalModel: "Qwen3 235B A22B (Non-reasoning)",
502
+ },
503
+ "qwen3.5-35b-a3b-reasoning": {
504
+ // AA specific benchmarks
505
+ codingIndex: 30.3,
506
+ mathIndex: undefined,
507
+
508
+ // Academic benchmarks
509
+ mmluPro: undefined,
510
+ gpqa: 0.845,
511
+ hle: 0.197,
512
+
513
+ // Capabilities
514
+ contextWindow: 8192,
515
+ supportsReasoning: false,
516
+ supportsVision: false,
517
+
518
+ // Metadata
519
+ lastUpdated: "2026-06-01",
520
+ originalModel: "Qwen3.5 35B A3B (Reasoning)",
521
+ },
522
+ "qwen3-32b-non-reasoning": {
523
+ // AA specific benchmarks
524
+ codingIndex: undefined,
525
+ mathIndex: 19.7,
526
+
527
+ // Academic benchmarks
528
+ mmluPro: 0.727,
529
+ gpqa: 0.535,
530
+ hle: 0.043,
531
+
532
+ // Capabilities
533
+ contextWindow: 8192,
534
+ supportsReasoning: false,
535
+ supportsVision: false,
536
+
537
+ // Metadata
538
+ lastUpdated: "2026-06-01",
539
+ originalModel: "Qwen3 32B (Non-reasoning)",
540
+ },
541
+ "qwq-32b-preview": {
542
+ // AA specific benchmarks
543
+ codingIndex: undefined,
544
+ mathIndex: undefined,
545
+
546
+ // Academic benchmarks
547
+ mmluPro: 0.648,
548
+ gpqa: 0.557,
549
+ hle: 0.048,
550
+
551
+ // Capabilities
552
+ contextWindow: 8192,
553
+ supportsReasoning: false,
554
+ supportsVision: false,
555
+
556
+ // Metadata
557
+ lastUpdated: "2026-06-01",
558
+ originalModel: "QwQ 32B-Preview",
559
+ },
560
+ "qwen3-vl-4b-reasoning": {
561
+ // AA specific benchmarks
562
+ codingIndex: 6.7,
563
+ mathIndex: 25.7,
564
+
565
+ // Academic benchmarks
566
+ mmluPro: 0.7,
567
+ gpqa: 0.494,
568
+ hle: 0.044,
569
+
570
+ // Capabilities
571
+ contextWindow: 8192,
572
+ supportsReasoning: false,
573
+ supportsVision: false,
574
+
575
+ // Metadata
576
+ lastUpdated: "2026-06-01",
577
+ originalModel: "Qwen3 VL 4B (Reasoning)",
578
+ },
579
+ "qwen3-vl-4b-instruct": {
580
+ // AA specific benchmarks
581
+ codingIndex: 4.6,
582
+ mathIndex: 37,
583
+
584
+ // Academic benchmarks
585
+ mmluPro: 0.634,
586
+ gpqa: 0.371,
587
+ hle: 0.037,
588
+
589
+ // Capabilities
590
+ contextWindow: 8192,
591
+ supportsReasoning: false,
592
+ supportsVision: false,
593
+
594
+ // Metadata
595
+ lastUpdated: "2026-06-01",
596
+ originalModel: "Qwen3 VL 4B Instruct",
597
+ },
598
+ "qwen3-30b-a3b-non-reasoning": {
599
+ // AA specific benchmarks
600
+ codingIndex: 13.3,
601
+ mathIndex: 21.7,
602
+
603
+ // Academic benchmarks
604
+ mmluPro: 0.71,
605
+ gpqa: 0.515,
606
+ hle: 0.046,
607
+
608
+ // Capabilities
609
+ contextWindow: 8192,
610
+ supportsReasoning: false,
611
+ supportsVision: false,
612
+
613
+ // Metadata
614
+ lastUpdated: "2026-06-01",
615
+ originalModel: "Qwen3 30B A3B (Non-reasoning)",
616
+ },
617
+ "qwen3-14b-non-reasoning": {
618
+ // AA specific benchmarks
619
+ codingIndex: 12.4,
620
+ mathIndex: 58,
621
+
622
+ // Academic benchmarks
623
+ mmluPro: 0.675,
624
+ gpqa: 0.47,
625
+ hle: 0.042,
626
+
627
+ // Capabilities
628
+ contextWindow: 8192,
629
+ supportsReasoning: false,
630
+ supportsVision: false,
631
+
632
+ // Metadata
633
+ lastUpdated: "2026-06-01",
634
+ originalModel: "Qwen3 14B (Non-reasoning)",
635
+ },
636
+ "qwen3-32b-reasoning": {
637
+ // AA specific benchmarks
638
+ codingIndex: 13.8,
639
+ mathIndex: 73,
640
+
641
+ // Academic benchmarks
642
+ mmluPro: 0.798,
643
+ gpqa: 0.668,
644
+ hle: 0.083,
645
+
646
+ // Capabilities
647
+ contextWindow: 8192,
648
+ supportsReasoning: false,
649
+ supportsVision: false,
650
+
651
+ // Metadata
652
+ lastUpdated: "2026-06-01",
653
+ originalModel: "Qwen3 32B (Reasoning)",
654
+ },
655
+ "qwen3-vl-8b-reasoning": {
656
+ // AA specific benchmarks
657
+ codingIndex: 9.8,
658
+ mathIndex: 30.7,
659
+
660
+ // Academic benchmarks
661
+ mmluPro: 0.749,
662
+ gpqa: 0.579,
663
+ hle: 0.033,
664
+
665
+ // Capabilities
666
+ contextWindow: 8192,
667
+ supportsReasoning: false,
668
+ supportsVision: false,
669
+
670
+ // Metadata
671
+ lastUpdated: "2026-06-01",
672
+ originalModel: "Qwen3 VL 8B (Reasoning)",
673
+ },
674
+ "qwen3-vl-30b-a3b-reasoning": {
675
+ // AA specific benchmarks
676
+ codingIndex: 13.1,
677
+ mathIndex: 82.3,
678
+
679
+ // Academic benchmarks
680
+ mmluPro: 0.807,
681
+ gpqa: 0.72,
682
+ hle: 0.087,
683
+
684
+ // Capabilities
685
+ contextWindow: 8192,
686
+ supportsReasoning: false,
687
+ supportsVision: false,
688
+
689
+ // Metadata
690
+ lastUpdated: "2026-06-01",
691
+ originalModel: "Qwen3 VL 30B A3B (Reasoning)",
692
+ },
693
+ "qwen3-vl-235b-a22b-reasoning": {
694
+ // AA specific benchmarks
695
+ codingIndex: 20.9,
696
+ mathIndex: 88.3,
697
+
698
+ // Academic benchmarks
699
+ mmluPro: 0.836,
700
+ gpqa: 0.772,
701
+ hle: 0.101,
702
+
703
+ // Capabilities
704
+ contextWindow: 8192,
705
+ supportsReasoning: false,
706
+ supportsVision: false,
707
+
708
+ // Metadata
709
+ lastUpdated: "2026-06-01",
710
+ originalModel: "Qwen3 VL 235B A22B (Reasoning)",
711
+ },
712
+ "qwen3-8b-non-reasoning": {
713
+ // AA specific benchmarks
714
+ codingIndex: 7.1,
715
+ mathIndex: 24.3,
716
+
717
+ // Academic benchmarks
718
+ mmluPro: 0.643,
719
+ gpqa: 0.452,
720
+ hle: 0.028,
721
+
722
+ // Capabilities
723
+ contextWindow: 8192,
724
+ supportsReasoning: false,
725
+ supportsVision: false,
726
+
727
+ // Metadata
728
+ lastUpdated: "2026-06-01",
729
+ originalModel: "Qwen3 8B (Non-reasoning)",
730
+ },
731
+ "qwen3-8b-reasoning": {
732
+ // AA specific benchmarks
733
+ codingIndex: 9,
734
+ mathIndex: 19,
735
+
736
+ // Academic benchmarks
737
+ mmluPro: 0.743,
738
+ gpqa: 0.589,
739
+ hle: 0.042,
740
+
741
+ // Capabilities
742
+ contextWindow: 8192,
743
+ supportsReasoning: false,
744
+ supportsVision: false,
745
+
746
+ // Metadata
747
+ lastUpdated: "2026-06-01",
748
+ originalModel: "Qwen3 8B (Reasoning)",
749
+ },
750
+ "qwen3-max-thinking-preview": {
751
+ // AA specific benchmarks
752
+ codingIndex: 24.5,
753
+ mathIndex: 82.3,
754
+
755
+ // Academic benchmarks
756
+ mmluPro: 0.824,
757
+ gpqa: 0.776,
758
+ hle: 0.12,
759
+
760
+ // Capabilities
761
+ contextWindow: 8192,
762
+ supportsReasoning: false,
763
+ supportsVision: false,
764
+
765
+ // Metadata
766
+ lastUpdated: "2026-06-01",
767
+ originalModel: "Qwen3 Max Thinking (Preview)",
768
+ },
769
+ "qwen1.5-chat-110b": {
770
+ // AA specific benchmarks
771
+ codingIndex: undefined,
772
+ mathIndex: undefined,
773
+
774
+ // Academic benchmarks
775
+ mmluPro: undefined,
776
+ gpqa: 0.289,
777
+ hle: undefined,
778
+
779
+ // Capabilities
780
+ contextWindow: 8192,
781
+ supportsReasoning: false,
782
+ supportsVision: false,
783
+
784
+ // Metadata
785
+ lastUpdated: "2026-06-01",
786
+ originalModel: "Qwen1.5 Chat 110B",
787
+ },
788
+ "qwen2.5-instruct-32b": {
789
+ // AA specific benchmarks
790
+ codingIndex: undefined,
791
+ mathIndex: undefined,
792
+
793
+ // Academic benchmarks
794
+ mmluPro: 0.697,
795
+ gpqa: 0.466,
796
+ hle: 0.038,
797
+
798
+ // Capabilities
799
+ contextWindow: 8192,
800
+ supportsReasoning: false,
801
+ supportsVision: false,
802
+
803
+ // Metadata
804
+ lastUpdated: "2026-06-01",
805
+ originalModel: "Qwen2.5 Instruct 32B",
806
+ },
807
+ "qwen3-vl-30b-a3b-instruct": {
808
+ // AA specific benchmarks
809
+ codingIndex: 14.3,
810
+ mathIndex: 72.3,
811
+
812
+ // Academic benchmarks
813
+ mmluPro: 0.764,
814
+ gpqa: 0.695,
815
+ hle: 0.064,
816
+
817
+ // Capabilities
818
+ contextWindow: 8192,
819
+ supportsReasoning: false,
820
+ supportsVision: false,
821
+
822
+ // Metadata
823
+ lastUpdated: "2026-06-01",
824
+ originalModel: "Qwen3 VL 30B A3B Instruct",
825
+ },
826
+ "qwen3.5-27b-non-reasoning": {
827
+ // AA specific benchmarks
828
+ codingIndex: 33.4,
829
+ mathIndex: undefined,
830
+
831
+ // Academic benchmarks
832
+ mmluPro: undefined,
833
+ gpqa: 0.842,
834
+ hle: 0.132,
835
+
836
+ // Capabilities
837
+ contextWindow: 8192,
838
+ supportsReasoning: false,
839
+ supportsVision: false,
840
+
841
+ // Metadata
842
+ lastUpdated: "2026-06-01",
843
+ originalModel: "Qwen3.5 27B (Non-reasoning)",
844
+ },
845
+ "qwen3-14b-reasoning": {
846
+ // AA specific benchmarks
847
+ codingIndex: 13.1,
848
+ mathIndex: 55.7,
849
+
850
+ // Academic benchmarks
851
+ mmluPro: 0.774,
852
+ gpqa: 0.604,
853
+ hle: 0.043,
854
+
855
+ // Capabilities
856
+ contextWindow: 8192,
857
+ supportsReasoning: false,
858
+ supportsVision: false,
859
+
860
+ // Metadata
861
+ lastUpdated: "2026-06-01",
862
+ originalModel: "Qwen3 14B (Reasoning)",
863
+ },
864
+ "qwen3-30b-a3b-reasoning": {
865
+ // AA specific benchmarks
866
+ codingIndex: 11,
867
+ mathIndex: 72.3,
868
+
869
+ // Academic benchmarks
870
+ mmluPro: 0.777,
871
+ gpqa: 0.616,
872
+ hle: 0.066,
873
+
874
+ // Capabilities
875
+ contextWindow: 8192,
876
+ supportsReasoning: false,
877
+ supportsVision: false,
878
+
879
+ // Metadata
880
+ lastUpdated: "2026-06-01",
881
+ originalModel: "Qwen3 30B A3B (Reasoning)",
882
+ },
883
+ "qwen3-0.6b-non-reasoning": {
884
+ // AA specific benchmarks
885
+ codingIndex: 1.4,
886
+ mathIndex: 10.3,
887
+
888
+ // Academic benchmarks
889
+ mmluPro: 0.231,
890
+ gpqa: 0.231,
891
+ hle: 0.052,
892
+
893
+ // Capabilities
894
+ contextWindow: 8192,
895
+ supportsReasoning: false,
896
+ supportsVision: false,
897
+
898
+ // Metadata
899
+ lastUpdated: "2026-06-01",
900
+ originalModel: "Qwen3 0.6B (Non-reasoning)",
901
+ },
902
+ "qwen3-vl-8b-instruct": {
903
+ // AA specific benchmarks
904
+ codingIndex: 7.3,
905
+ mathIndex: 27.3,
906
+
907
+ // Academic benchmarks
908
+ mmluPro: 0.686,
909
+ gpqa: 0.427,
910
+ hle: 0.029,
911
+
912
+ // Capabilities
913
+ contextWindow: 8192,
914
+ supportsReasoning: false,
915
+ supportsVision: false,
916
+
917
+ // Metadata
918
+ lastUpdated: "2026-06-01",
919
+ originalModel: "Qwen3 VL 8B Instruct",
920
+ },
921
+ "qwen3-4b-reasoning": {
922
+ // AA specific benchmarks
923
+ codingIndex: undefined,
924
+ mathIndex: 22.3,
925
+
926
+ // Academic benchmarks
927
+ mmluPro: 0.696,
928
+ gpqa: 0.522,
929
+ hle: 0.051,
930
+
931
+ // Capabilities
932
+ contextWindow: 8192,
933
+ supportsReasoning: false,
934
+ supportsVision: false,
935
+
936
+ // Metadata
937
+ lastUpdated: "2026-06-01",
938
+ originalModel: "Qwen3 4B (Reasoning)",
939
+ },
940
+ "qwen3-1.7b-non-reasoning": {
941
+ // AA specific benchmarks
942
+ codingIndex: 2.3,
943
+ mathIndex: 7.3,
944
+
945
+ // Academic benchmarks
946
+ mmluPro: 0.411,
947
+ gpqa: 0.283,
948
+ hle: 0.052,
949
+
950
+ // Capabilities
951
+ contextWindow: 8192,
952
+ supportsReasoning: false,
953
+ supportsVision: false,
954
+
955
+ // Metadata
956
+ lastUpdated: "2026-06-01",
957
+ originalModel: "Qwen3 1.7B (Non-reasoning)",
958
+ },
959
+ "qwen3-vl-32b-reasoning": {
960
+ // AA specific benchmarks
961
+ codingIndex: 14.5,
962
+ mathIndex: 84.7,
963
+
964
+ // Academic benchmarks
965
+ mmluPro: 0.818,
966
+ gpqa: 0.733,
967
+ hle: 0.096,
968
+
969
+ // Capabilities
970
+ contextWindow: 8192,
971
+ supportsReasoning: false,
972
+ supportsVision: false,
973
+
974
+ // Metadata
975
+ lastUpdated: "2026-06-01",
976
+ originalModel: "Qwen3 VL 32B (Reasoning)",
977
+ },
978
+ "qwen3-30b-a3b-2507-instruct": {
979
+ // AA specific benchmarks
980
+ codingIndex: 14.2,
981
+ mathIndex: 66.3,
982
+
983
+ // Academic benchmarks
984
+ mmluPro: 0.777,
985
+ gpqa: 0.659,
986
+ hle: 0.068,
987
+
988
+ // Capabilities
989
+ contextWindow: 8192,
990
+ supportsReasoning: false,
991
+ supportsVision: false,
992
+
993
+ // Metadata
994
+ lastUpdated: "2026-06-01",
995
+ originalModel: "Qwen3 30B A3B 2507 Instruct",
996
+ },
997
+ "qwen3-30b-a3b-2507-reasoning": {
998
+ // AA specific benchmarks
999
+ codingIndex: 14.6,
1000
+ mathIndex: 56.3,
1001
+
1002
+ // Academic benchmarks
1003
+ mmluPro: 0.805,
1004
+ gpqa: 0.707,
1005
+ hle: 0.098,
1006
+
1007
+ // Capabilities
1008
+ contextWindow: 8192,
1009
+ supportsReasoning: false,
1010
+ supportsVision: false,
1011
+
1012
+ // Metadata
1013
+ lastUpdated: "2026-06-01",
1014
+ originalModel: "Qwen3 30B A3B 2507 (Reasoning)",
1015
+ },
1016
+ "qwen-chat-72b": {
1017
+ // AA specific benchmarks
1018
+ codingIndex: undefined,
1019
+ mathIndex: undefined,
1020
+
1021
+ // Academic benchmarks
1022
+ mmluPro: undefined,
1023
+ gpqa: undefined,
1024
+ hle: undefined,
1025
+
1026
+ // Capabilities
1027
+ contextWindow: 8192,
1028
+ supportsReasoning: false,
1029
+ supportsVision: false,
1030
+
1031
+ // Metadata
1032
+ lastUpdated: "2026-06-01",
1033
+ originalModel: "Qwen Chat 72B",
1034
+ },
1035
+ "qwen3-vl-32b-instruct": {
1036
+ // AA specific benchmarks
1037
+ codingIndex: 15.6,
1038
+ mathIndex: 68.3,
1039
+
1040
+ // Academic benchmarks
1041
+ mmluPro: 0.791,
1042
+ gpqa: 0.671,
1043
+ hle: 0.063,
1044
+
1045
+ // Capabilities
1046
+ contextWindow: 8192,
1047
+ supportsReasoning: false,
1048
+ supportsVision: false,
1049
+
1050
+ // Metadata
1051
+ lastUpdated: "2026-06-01",
1052
+ originalModel: "Qwen3 VL 32B Instruct",
1053
+ },
1054
+ "qwen3-0.6b-reasoning": {
1055
+ // AA specific benchmarks
1056
+ codingIndex: 0.9,
1057
+ mathIndex: 18,
1058
+
1059
+ // Academic benchmarks
1060
+ mmluPro: 0.347,
1061
+ gpqa: 0.239,
1062
+ hle: 0.057,
1063
+
1064
+ // Capabilities
1065
+ contextWindow: 8192,
1066
+ supportsReasoning: false,
1067
+ supportsVision: false,
1068
+
1069
+ // Metadata
1070
+ lastUpdated: "2026-06-01",
1071
+ originalModel: "Qwen3 0.6B (Reasoning)",
1072
+ },
1073
+ "qwen3-1.7b-reasoning": {
1074
+ // AA specific benchmarks
1075
+ codingIndex: 1.4,
1076
+ mathIndex: 38.7,
1077
+
1078
+ // Academic benchmarks
1079
+ mmluPro: 0.57,
1080
+ gpqa: 0.356,
1081
+ hle: 0.048,
1082
+
1083
+ // Capabilities
1084
+ contextWindow: 8192,
1085
+ supportsReasoning: false,
1086
+ supportsVision: false,
1087
+
1088
+ // Metadata
1089
+ lastUpdated: "2026-06-01",
1090
+ originalModel: "Qwen3 1.7B (Reasoning)",
1091
+ },
1092
+ "qwen3-max-preview": {
1093
+ // AA specific benchmarks
1094
+ codingIndex: 25.5,
1095
+ mathIndex: 75,
1096
+
1097
+ // Academic benchmarks
1098
+ mmluPro: 0.838,
1099
+ gpqa: 0.764,
1100
+ hle: 0.093,
1101
+
1102
+ // Capabilities
1103
+ contextWindow: 8192,
1104
+ supportsReasoning: false,
1105
+ supportsVision: false,
1106
+
1107
+ // Metadata
1108
+ lastUpdated: "2026-06-01",
1109
+ originalModel: "Qwen3 Max (Preview)",
1110
+ },
1111
+ "qwen3.6-max-preview": {
1112
+ // AA specific benchmarks
1113
+ codingIndex: 44.9,
1114
+ mathIndex: undefined,
1115
+
1116
+ // Academic benchmarks
1117
+ mmluPro: undefined,
1118
+ gpqa: 0.888,
1119
+ hle: 0.289,
1120
+
1121
+ // Capabilities
1122
+ contextWindow: 8192,
1123
+ supportsReasoning: false,
1124
+ supportsVision: false,
1125
+
1126
+ // Metadata
1127
+ lastUpdated: "2026-06-01",
1128
+ originalModel: "Qwen3.6 Max Preview",
1129
+ },
1130
+ "qwen3-max": {
1131
+ // AA specific benchmarks
1132
+ codingIndex: 26.4,
1133
+ mathIndex: 80.7,
1134
+
1135
+ // Academic benchmarks
1136
+ mmluPro: 0.841,
1137
+ gpqa: 0.764,
1138
+ hle: 0.111,
1139
+
1140
+ // Capabilities
1141
+ contextWindow: 8192,
1142
+ supportsReasoning: false,
1143
+ supportsVision: false,
1144
+
1145
+ // Metadata
1146
+ lastUpdated: "2026-06-01",
1147
+ originalModel: "Qwen3 Max",
1148
+ },
1149
+ "qwen3-max-thinking": {
1150
+ // AA specific benchmarks
1151
+ codingIndex: 30.5,
1152
+ mathIndex: undefined,
1153
+
1154
+ // Academic benchmarks
1155
+ mmluPro: undefined,
1156
+ gpqa: 0.861,
1157
+ hle: 0.262,
1158
+
1159
+ // Capabilities
1160
+ contextWindow: 8192,
1161
+ supportsReasoning: false,
1162
+ supportsVision: false,
1163
+
1164
+ // Metadata
1165
+ lastUpdated: "2026-06-01",
1166
+ originalModel: "Qwen3 Max Thinking",
1167
+ },
1168
+ "qwen3-4b-2507-reasoning": {
1169
+ // AA specific benchmarks
1170
+ codingIndex: 9.5,
1171
+ mathIndex: 82.7,
1172
+
1173
+ // Academic benchmarks
1174
+ mmluPro: 0.743,
1175
+ gpqa: 0.667,
1176
+ hle: 0.059,
1177
+
1178
+ // Capabilities
1179
+ contextWindow: 8192,
1180
+ supportsReasoning: false,
1181
+ supportsVision: false,
1182
+
1183
+ // Metadata
1184
+ lastUpdated: "2026-06-01",
1185
+ originalModel: "Qwen3 4B 2507 (Reasoning)",
1186
+ },
1187
+ "qwen3-4b-2507-instruct": {
1188
+ // AA specific benchmarks
1189
+ codingIndex: 9,
1190
+ mathIndex: 52.3,
1191
+
1192
+ // Academic benchmarks
1193
+ mmluPro: 0.672,
1194
+ gpqa: 0.517,
1195
+ hle: 0.047,
1196
+
1197
+ // Capabilities
1198
+ contextWindow: 8192,
1199
+ supportsReasoning: false,
1200
+ supportsVision: false,
1201
+
1202
+ // Metadata
1203
+ lastUpdated: "2026-06-01",
1204
+ originalModel: "Qwen3 4B 2507 Instruct",
1205
+ },
1206
+ "qwen2.5-coder-instruct-7b": {
1207
+ // AA specific benchmarks
1208
+ codingIndex: undefined,
1209
+ mathIndex: undefined,
1210
+
1211
+ // Academic benchmarks
1212
+ mmluPro: 0.473,
1213
+ gpqa: 0.339,
1214
+ hle: 0.048,
1215
+
1216
+ // Capabilities
1217
+ contextWindow: 8192,
1218
+ supportsReasoning: false,
1219
+ supportsVision: false,
1220
+
1221
+ // Metadata
1222
+ lastUpdated: "2026-06-01",
1223
+ originalModel: "Qwen2.5 Coder Instruct 7B ",
1224
+ },
1225
+ "ling-1t": {
1226
+ // AA specific benchmarks
1227
+ codingIndex: 18.8,
1228
+ mathIndex: 71.3,
1229
+
1230
+ // Academic benchmarks
1231
+ mmluPro: 0.822,
1232
+ gpqa: 0.719,
1233
+ hle: 0.072,
1234
+
1235
+ // Capabilities
1236
+ contextWindow: 8192,
1237
+ supportsReasoning: false,
1238
+ supportsVision: false,
1239
+
1240
+ // Metadata
1241
+ lastUpdated: "2026-06-01",
1242
+ originalModel: "Ling-1T",
1243
+ },
1244
+ "ling-flash-2.0": {
1245
+ // AA specific benchmarks
1246
+ codingIndex: 16.7,
1247
+ mathIndex: 65.3,
1248
+
1249
+ // Academic benchmarks
1250
+ mmluPro: 0.777,
1251
+ gpqa: 0.657,
1252
+ hle: 0.063,
1253
+
1254
+ // Capabilities
1255
+ contextWindow: 8192,
1256
+ supportsReasoning: false,
1257
+ supportsVision: false,
1258
+
1259
+ // Metadata
1260
+ lastUpdated: "2026-06-01",
1261
+ originalModel: "Ling-flash-2.0",
1262
+ },
1263
+ "ring-1t": {
1264
+ // AA specific benchmarks
1265
+ codingIndex: 16.8,
1266
+ mathIndex: 89.3,
1267
+
1268
+ // Academic benchmarks
1269
+ mmluPro: 0.806,
1270
+ gpqa: 0.774,
1271
+ hle: 0.102,
1272
+
1273
+ // Capabilities
1274
+ contextWindow: 8192,
1275
+ supportsReasoning: false,
1276
+ supportsVision: false,
1277
+
1278
+ // Metadata
1279
+ lastUpdated: "2026-06-01",
1280
+ originalModel: "Ring-1T",
1281
+ },
1282
+ "seed-oss-36b-instruct": {
1283
+ // AA specific benchmarks
1284
+ codingIndex: 16.7,
1285
+ mathIndex: 84.7,
1286
+
1287
+ // Academic benchmarks
1288
+ mmluPro: 0.815,
1289
+ gpqa: 0.726,
1290
+ hle: 0.091,
1291
+
1292
+ // Capabilities
1293
+ contextWindow: 8192,
1294
+ supportsReasoning: false,
1295
+ supportsVision: false,
1296
+
1297
+ // Metadata
1298
+ lastUpdated: "2026-06-01",
1299
+ originalModel: "Seed-OSS-36B-Instruct",
1300
+ },
1301
+ };