pi-free 2.0.12 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,20 @@
1
1
  // Auto-generated benchmark data chunk 4
2
- // Models: lfm-40b .. seed-oss-36b-instruct (89 entries)
2
+ // Models: mistral-medium-3.1 .. glm-4.5v-reasoning (90 entries)
3
+ // Last updated: 2026-06-01
3
4
  // DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
4
5
 
5
6
  import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
6
7
 
7
8
  export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
8
- "lfm-40b": {
9
- // AA Intelligence Index (composite score)
10
- intelligenceIndex: 8.8,
11
- normalizedScore: 13,
12
-
9
+ "mistral-medium-3.1": {
13
10
  // AA specific benchmarks
14
- codingIndex: undefined,
15
- mathIndex: undefined,
11
+ codingIndex: 18.3,
12
+ mathIndex: 38.3,
16
13
 
17
14
  // Academic benchmarks
18
- mmluPro: 0.425,
19
- gpqa: 0.327,
20
- hle: 0.049,
15
+ mmluPro: 0.683,
16
+ gpqa: 0.588,
17
+ hle: 0.044,
21
18
 
22
19
  // Capabilities
23
20
  contextWindow: 8192,
@@ -25,21 +22,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
25
22
  supportsVision: false,
26
23
 
27
24
  // Metadata
28
- lastUpdated: "2026-04-06",
25
+ lastUpdated: "2026-06-01",
26
+ originalModel: "Mistral Medium 3.1",
29
27
  },
30
- "lfm2-1.2b": {
31
- // AA Intelligence Index (composite score)
32
- intelligenceIndex: 6.3,
33
- normalizedScore: 9,
34
-
28
+ "deepseek-r1-distill-llama-70b": {
35
29
  // AA specific benchmarks
36
- codingIndex: 0.8,
37
- mathIndex: 3.3,
30
+ codingIndex: 11.4,
31
+ mathIndex: 53.7,
38
32
 
39
33
  // Academic benchmarks
40
- mmluPro: 0.257,
41
- gpqa: 0.228,
42
- hle: 0.057,
34
+ mmluPro: 0.795,
35
+ gpqa: 0.402,
36
+ hle: 0.061,
43
37
 
44
38
  // Capabilities
45
39
  contextWindow: 8192,
@@ -47,21 +41,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
47
41
  supportsVision: false,
48
42
 
49
43
  // Metadata
50
- lastUpdated: "2026-04-06",
44
+ lastUpdated: "2026-06-01",
45
+ originalModel: "DeepSeek R1 Distill Llama 70B",
51
46
  },
52
- "solar-mini": {
53
- // AA Intelligence Index (composite score)
54
- intelligenceIndex: 11.9,
55
- normalizedScore: 17,
56
-
47
+ "deepseek-r1-distill-qwen-32b": {
57
48
  // AA specific benchmarks
58
49
  codingIndex: undefined,
59
- mathIndex: undefined,
50
+ mathIndex: 63,
60
51
 
61
52
  // Academic benchmarks
62
- mmluPro: undefined,
63
- gpqa: undefined,
64
- hle: undefined,
53
+ mmluPro: 0.739,
54
+ gpqa: 0.615,
55
+ hle: 0.055,
65
56
 
66
57
  // Capabilities
67
58
  contextWindow: 8192,
@@ -69,21 +60,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
69
60
  supportsVision: false,
70
61
 
71
62
  // Metadata
72
- lastUpdated: "2026-04-06",
63
+ lastUpdated: "2026-06-01",
64
+ originalModel: "DeepSeek R1 Distill Qwen 32B",
73
65
  },
74
- "solar-pro-2-preview-non-reasoning": {
75
- // AA Intelligence Index (composite score)
76
- intelligenceIndex: 16,
77
- normalizedScore: 23,
78
-
66
+ "deepseek-v3-dec-24": {
79
67
  // AA specific benchmarks
80
- codingIndex: undefined,
81
- mathIndex: undefined,
68
+ codingIndex: 16.4,
69
+ mathIndex: 26,
82
70
 
83
71
  // Academic benchmarks
84
- mmluPro: 0.725,
85
- gpqa: 0.544,
86
- hle: 0.038,
72
+ mmluPro: 0.752,
73
+ gpqa: 0.557,
74
+ hle: 0.036,
87
75
 
88
76
  // Capabilities
89
77
  contextWindow: 8192,
@@ -91,21 +79,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
91
79
  supportsVision: false,
92
80
 
93
81
  // Metadata
94
- lastUpdated: "2026-04-06",
82
+ lastUpdated: "2026-06-01",
83
+ originalModel: "DeepSeek V3 (Dec '24)",
95
84
  },
96
- "solar-pro-2-preview-reasoning": {
97
- // AA Intelligence Index (composite score)
98
- intelligenceIndex: 18.8,
99
- normalizedScore: 27,
100
-
85
+ "deepseek-r1-distill-qwen-14b": {
101
86
  // AA specific benchmarks
102
87
  codingIndex: undefined,
103
- mathIndex: undefined,
88
+ mathIndex: 55.7,
104
89
 
105
90
  // Academic benchmarks
106
- mmluPro: 0.768,
107
- gpqa: 0.578,
108
- hle: 0.057,
91
+ mmluPro: 0.74,
92
+ gpqa: 0.484,
93
+ hle: 0.044,
109
94
 
110
95
  // Capabilities
111
96
  contextWindow: 8192,
@@ -113,21 +98,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
113
98
  supportsVision: false,
114
99
 
115
100
  // Metadata
116
- lastUpdated: "2026-04-06",
101
+ lastUpdated: "2026-06-01",
102
+ originalModel: "DeepSeek R1 Distill Qwen 14B",
117
103
  },
118
- "dbrx-instruct": {
119
- // AA Intelligence Index (composite score)
120
- intelligenceIndex: 8.3,
121
- normalizedScore: 12,
122
-
104
+ "deepseek-v2.5-dec-24": {
123
105
  // AA specific benchmarks
124
106
  codingIndex: undefined,
125
107
  mathIndex: undefined,
126
108
 
127
109
  // Academic benchmarks
128
- mmluPro: 0.397,
129
- gpqa: 0.331,
130
- hle: 0.066,
110
+ mmluPro: undefined,
111
+ gpqa: undefined,
112
+ hle: undefined,
131
113
 
132
114
  // Capabilities
133
115
  contextWindow: 8192,
@@ -135,21 +117,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
135
117
  supportsVision: false,
136
118
 
137
119
  // Metadata
138
- lastUpdated: "2026-04-06",
120
+ lastUpdated: "2026-06-01",
121
+ originalModel: "DeepSeek-V2.5 (Dec '24)",
139
122
  },
140
- "minimax-m2.5": {
141
- // AA Intelligence Index (composite score)
142
- intelligenceIndex: 41.9,
143
- normalizedScore: 60,
144
-
123
+ "deepseek-coder-v2": {
145
124
  // AA specific benchmarks
146
- codingIndex: 37.4,
125
+ codingIndex: undefined,
147
126
  mathIndex: undefined,
148
127
 
149
128
  // Academic benchmarks
150
129
  mmluPro: undefined,
151
- gpqa: 0.848,
152
- hle: 0.191,
130
+ gpqa: undefined,
131
+ hle: undefined,
153
132
 
154
133
  // Capabilities
155
134
  contextWindow: 8192,
@@ -157,21 +136,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
157
136
  supportsVision: false,
158
137
 
159
138
  // Metadata
160
- lastUpdated: "2026-04-06",
139
+ lastUpdated: "2026-06-01",
140
+ originalModel: "DeepSeek-Coder-V2",
161
141
  },
162
- "minimax-m2.1": {
163
- // AA Intelligence Index (composite score)
164
- intelligenceIndex: 39.4,
165
- normalizedScore: 56,
166
-
142
+ "deepseek-r1-distill-llama-8b": {
167
143
  // AA specific benchmarks
168
- codingIndex: 32.8,
169
- mathIndex: 82.7,
144
+ codingIndex: undefined,
145
+ mathIndex: 41.3,
170
146
 
171
147
  // Academic benchmarks
172
- mmluPro: 0.875,
173
- gpqa: 0.83,
174
- hle: 0.222,
148
+ mmluPro: 0.543,
149
+ gpqa: 0.302,
150
+ hle: 0.042,
175
151
 
176
152
  // Capabilities
177
153
  contextWindow: 8192,
@@ -179,21 +155,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
179
155
  supportsVision: false,
180
156
 
181
157
  // Metadata
182
- lastUpdated: "2026-04-06",
158
+ lastUpdated: "2026-06-01",
159
+ originalModel: "DeepSeek R1 Distill Llama 8B",
183
160
  },
184
- "minimax-m1-80k": {
185
- // AA Intelligence Index (composite score)
186
- intelligenceIndex: 24.4,
187
- normalizedScore: 35,
188
-
161
+ "deepseek-llm-67b-chat-v1": {
189
162
  // AA specific benchmarks
190
- codingIndex: 14.5,
191
- mathIndex: 61,
163
+ codingIndex: undefined,
164
+ mathIndex: undefined,
192
165
 
193
166
  // Academic benchmarks
194
- mmluPro: 0.816,
195
- gpqa: 0.697,
196
- hle: 0.082,
167
+ mmluPro: undefined,
168
+ gpqa: undefined,
169
+ hle: undefined,
197
170
 
198
171
  // Capabilities
199
172
  contextWindow: 8192,
@@ -201,21 +174,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
201
174
  supportsVision: false,
202
175
 
203
176
  // Metadata
204
- lastUpdated: "2026-04-06",
177
+ lastUpdated: "2026-06-01",
178
+ originalModel: "DeepSeek LLM 67B Chat (V1)",
205
179
  },
206
- "minimax-m2": {
207
- // AA Intelligence Index (composite score)
208
- intelligenceIndex: 36.1,
209
- normalizedScore: 52,
210
-
180
+ "deepseek-r1-distill-qwen-1.5b": {
211
181
  // AA specific benchmarks
212
- codingIndex: 29.2,
213
- mathIndex: 78.3,
182
+ codingIndex: undefined,
183
+ mathIndex: 22,
214
184
 
215
185
  // Academic benchmarks
216
- mmluPro: 0.82,
217
- gpqa: 0.777,
218
- hle: 0.125,
186
+ mmluPro: 0.269,
187
+ gpqa: 0.098,
188
+ hle: 0.033,
219
189
 
220
190
  // Capabilities
221
191
  contextWindow: 8192,
@@ -223,21 +193,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
223
193
  supportsVision: false,
224
194
 
225
195
  // Metadata
226
- lastUpdated: "2026-04-06",
196
+ lastUpdated: "2026-06-01",
197
+ originalModel: "DeepSeek R1 Distill Qwen 1.5B",
227
198
  },
228
- "minimax-m1-40k": {
229
- // AA Intelligence Index (composite score)
230
- intelligenceIndex: 20.9,
231
- normalizedScore: 30,
232
-
199
+ "deepseek-v3-0324": {
233
200
  // AA specific benchmarks
234
- codingIndex: 14.1,
235
- mathIndex: 13.7,
201
+ codingIndex: 22,
202
+ mathIndex: 41,
236
203
 
237
204
  // Academic benchmarks
238
- mmluPro: 0.808,
239
- gpqa: 0.682,
240
- hle: 0.075,
205
+ mmluPro: 0.819,
206
+ gpqa: 0.655,
207
+ hle: 0.052,
241
208
 
242
209
  // Capabilities
243
210
  contextWindow: 8192,
@@ -245,21 +212,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
245
212
  supportsVision: false,
246
213
 
247
214
  // Metadata
248
- lastUpdated: "2026-04-06",
215
+ lastUpdated: "2026-06-01",
216
+ originalModel: "DeepSeek V3 0324",
249
217
  },
250
- "kimi-k2-thinking": {
251
- // AA Intelligence Index (composite score)
252
- intelligenceIndex: 40.9,
253
- normalizedScore: 58,
254
-
218
+ "deepseek-v3.2-reasoning": {
255
219
  // AA specific benchmarks
256
- codingIndex: 34.8,
257
- mathIndex: 94.7,
220
+ codingIndex: 36.7,
221
+ mathIndex: 92,
258
222
 
259
223
  // Academic benchmarks
260
- mmluPro: 0.848,
261
- gpqa: 0.838,
262
- hle: 0.223,
224
+ mmluPro: 0.862,
225
+ gpqa: 0.84,
226
+ hle: 0.222,
263
227
 
264
228
  // Capabilities
265
229
  contextWindow: 8192,
@@ -267,21 +231,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
267
231
  supportsVision: false,
268
232
 
269
233
  // Metadata
270
- lastUpdated: "2026-04-06",
234
+ lastUpdated: "2026-06-01",
235
+ originalModel: "DeepSeek V3.2 (Reasoning)",
271
236
  },
272
- "kimi-k2-0905": {
273
- // AA Intelligence Index (composite score)
274
- intelligenceIndex: 30.9,
275
- normalizedScore: 44,
276
-
237
+ "deepseek-v3.2-non-reasoning": {
277
238
  // AA specific benchmarks
278
- codingIndex: 25.9,
279
- mathIndex: 57.3,
239
+ codingIndex: 34.6,
240
+ mathIndex: 59,
280
241
 
281
242
  // Academic benchmarks
282
- mmluPro: 0.819,
283
- gpqa: 0.767,
284
- hle: 0.063,
243
+ mmluPro: 0.837,
244
+ gpqa: 0.751,
245
+ hle: 0.105,
285
246
 
286
247
  // Capabilities
287
248
  contextWindow: 8192,
@@ -289,21 +250,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
289
250
  supportsVision: false,
290
251
 
291
252
  // Metadata
292
- lastUpdated: "2026-04-06",
253
+ lastUpdated: "2026-06-01",
254
+ originalModel: "DeepSeek V3.2 (Non-reasoning)",
293
255
  },
294
- "kimi-k2": {
295
- // AA Intelligence Index (composite score)
296
- intelligenceIndex: 26.3,
297
- normalizedScore: 38,
298
-
256
+ "deepseek-r1-jan-25": {
299
257
  // AA specific benchmarks
300
- codingIndex: 22.1,
301
- mathIndex: 57,
258
+ codingIndex: 15.9,
259
+ mathIndex: 68,
302
260
 
303
261
  // Academic benchmarks
304
- mmluPro: 0.824,
305
- gpqa: 0.766,
306
- hle: 0.07,
262
+ mmluPro: 0.844,
263
+ gpqa: 0.708,
264
+ hle: 0.093,
307
265
 
308
266
  // Capabilities
309
267
  contextWindow: 8192,
@@ -311,21 +269,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
311
269
  supportsVision: false,
312
270
 
313
271
  // Metadata
314
- lastUpdated: "2026-04-06",
272
+ lastUpdated: "2026-06-01",
273
+ originalModel: "DeepSeek R1 (Jan '25)",
315
274
  },
316
- "llama-3.1-tulu3-405b": {
317
- // AA Intelligence Index (composite score)
318
- intelligenceIndex: 14.1,
319
- normalizedScore: 20,
320
-
275
+ "deepseek-v3.1-non-reasoning": {
321
276
  // AA specific benchmarks
322
- codingIndex: undefined,
323
- mathIndex: undefined,
277
+ codingIndex: 28.4,
278
+ mathIndex: 49.7,
324
279
 
325
280
  // Academic benchmarks
326
- mmluPro: 0.716,
327
- gpqa: 0.516,
328
- hle: 0.035,
281
+ mmluPro: 0.833,
282
+ gpqa: 0.735,
283
+ hle: 0.063,
329
284
 
330
285
  // Capabilities
331
286
  contextWindow: 8192,
@@ -333,21 +288,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
333
288
  supportsVision: false,
334
289
 
335
290
  // Metadata
336
- lastUpdated: "2026-04-06",
291
+ lastUpdated: "2026-06-01",
292
+ originalModel: "DeepSeek V3.1 (Non-reasoning)",
337
293
  },
338
- "olmo-2-7b": {
339
- // AA Intelligence Index (composite score)
340
- intelligenceIndex: 9.3,
341
- normalizedScore: 13,
342
-
294
+ "deepseek-r1-0528-may-25": {
343
295
  // AA specific benchmarks
344
- codingIndex: 1.2,
345
- mathIndex: 0.7,
296
+ codingIndex: 24,
297
+ mathIndex: 76,
346
298
 
347
299
  // Academic benchmarks
348
- mmluPro: 0.282,
349
- gpqa: 0.288,
350
- hle: 0.055,
300
+ mmluPro: 0.849,
301
+ gpqa: 0.813,
302
+ hle: 0.149,
351
303
 
352
304
  // Capabilities
353
305
  contextWindow: 8192,
@@ -355,21 +307,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
355
307
  supportsVision: false,
356
308
 
357
309
  // Metadata
358
- lastUpdated: "2026-04-06",
310
+ lastUpdated: "2026-06-01",
311
+ originalModel: "DeepSeek R1 0528 (May '25)",
359
312
  },
360
- "olmo-2-32b": {
361
- // AA Intelligence Index (composite score)
362
- intelligenceIndex: 10.6,
363
- normalizedScore: 15,
364
-
313
+ "deepseek-v3.1-terminus-non-reasoning": {
365
314
  // AA specific benchmarks
366
- codingIndex: 2.7,
367
- mathIndex: 3.3,
315
+ codingIndex: 31.9,
316
+ mathIndex: 53.7,
368
317
 
369
318
  // Academic benchmarks
370
- mmluPro: 0.511,
371
- gpqa: 0.328,
372
- hle: 0.037,
319
+ mmluPro: 0.836,
320
+ gpqa: 0.751,
321
+ hle: 0.084,
373
322
 
374
323
  // Capabilities
375
324
  contextWindow: 8192,
@@ -377,21 +326,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
377
326
  supportsVision: false,
378
327
 
379
328
  // Metadata
380
- lastUpdated: "2026-04-06",
329
+ lastUpdated: "2026-06-01",
330
+ originalModel: "DeepSeek V3.1 Terminus (Non-reasoning)",
381
331
  },
382
- "olmo-3-32b-think": {
383
- // AA Intelligence Index (composite score)
384
- intelligenceIndex: 12.1,
385
- normalizedScore: 17,
386
-
332
+ "deepseek-v3.1-reasoning": {
387
333
  // AA specific benchmarks
388
- codingIndex: 10.5,
389
- mathIndex: 73.7,
334
+ codingIndex: 29.7,
335
+ mathIndex: 89.7,
390
336
 
391
337
  // Academic benchmarks
392
- mmluPro: 0.759,
393
- gpqa: 0.61,
394
- hle: 0.059,
338
+ mmluPro: 0.851,
339
+ gpqa: 0.779,
340
+ hle: 0.13,
395
341
 
396
342
  // Capabilities
397
343
  contextWindow: 8192,
@@ -399,21 +345,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
399
345
  supportsVision: false,
400
346
 
401
347
  // Metadata
402
- lastUpdated: "2026-04-06",
348
+ lastUpdated: "2026-06-01",
349
+ originalModel: "DeepSeek V3.1 (Reasoning)",
403
350
  },
404
- "granite-3.3-8b-non-reasoning": {
405
- // AA Intelligence Index (composite score)
406
- intelligenceIndex: 7,
407
- normalizedScore: 10,
408
-
351
+ "deepseek-v3.1-terminus-reasoning": {
409
352
  // AA specific benchmarks
410
- codingIndex: 3.4,
411
- mathIndex: 6.7,
353
+ codingIndex: 33.7,
354
+ mathIndex: 89.7,
412
355
 
413
356
  // Academic benchmarks
414
- mmluPro: 0.468,
415
- gpqa: 0.338,
416
- hle: 0.042,
357
+ mmluPro: 0.851,
358
+ gpqa: 0.792,
359
+ hle: 0.152,
417
360
 
418
361
  // Capabilities
419
362
  contextWindow: 8192,
@@ -421,13 +364,29 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
421
364
  supportsVision: false,
422
365
 
423
366
  // Metadata
424
- lastUpdated: "2026-04-06",
367
+ lastUpdated: "2026-06-01",
368
+ originalModel: "DeepSeek V3.1 Terminus (Reasoning)",
425
369
  },
426
- "reka-flash-sep-24": {
427
- // AA Intelligence Index (composite score)
428
- intelligenceIndex: 12,
429
- normalizedScore: 17,
370
+ "deepseek-v3.2-exp-non-reasoning": {
371
+ // AA specific benchmarks
372
+ codingIndex: 30,
373
+ mathIndex: 57.7,
374
+
375
+ // Academic benchmarks
376
+ mmluPro: 0.836,
377
+ gpqa: 0.738,
378
+ hle: 0.086,
379
+
380
+ // Capabilities
381
+ contextWindow: 8192,
382
+ supportsReasoning: false,
383
+ supportsVision: false,
430
384
 
385
+ // Metadata
386
+ lastUpdated: "2026-06-01",
387
+ originalModel: "DeepSeek V3.2 Exp (Non-reasoning)",
388
+ },
389
+ "deepseek-v2.5": {
431
390
  // AA specific benchmarks
432
391
  codingIndex: undefined,
433
392
  mathIndex: undefined,
@@ -443,21 +402,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
443
402
  supportsVision: false,
444
403
 
445
404
  // Metadata
446
- lastUpdated: "2026-04-06",
405
+ lastUpdated: "2026-06-01",
406
+ originalModel: "DeepSeek-V2.5",
447
407
  },
448
- "hermes-3---llama-3.1-70b": {
449
- // AA Intelligence Index (composite score)
450
- intelligenceIndex: 10.6,
451
- normalizedScore: 15,
452
-
408
+ "deepseek-coder-v2-lite-instruct": {
453
409
  // AA specific benchmarks
454
410
  codingIndex: undefined,
455
411
  mathIndex: undefined,
456
412
 
457
413
  // Academic benchmarks
458
- mmluPro: 0.571,
459
- gpqa: 0.401,
460
- hle: 0.041,
414
+ mmluPro: 0.429,
415
+ gpqa: 0.319,
416
+ hle: 0.053,
461
417
 
462
418
  // Capabilities
463
419
  contextWindow: 8192,
@@ -465,21 +421,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
465
421
  supportsVision: false,
466
422
 
467
423
  // Metadata
468
- lastUpdated: "2026-04-06",
424
+ lastUpdated: "2026-06-01",
425
+ originalModel: "DeepSeek Coder V2 Lite Instruct",
469
426
  },
470
- "mimo-v2-flash-reasoning": {
471
- // AA Intelligence Index (composite score)
472
- intelligenceIndex: 39.2,
473
- normalizedScore: 56,
474
-
427
+ "deepseek-r1-0528-qwen3-8b": {
475
428
  // AA specific benchmarks
476
- codingIndex: 31.8,
477
- mathIndex: 96.3,
429
+ codingIndex: 7.8,
430
+ mathIndex: 63.7,
478
431
 
479
432
  // Academic benchmarks
480
- mmluPro: 0.843,
481
- gpqa: 0.846,
482
- hle: 0.211,
433
+ mmluPro: 0.739,
434
+ gpqa: 0.612,
435
+ hle: 0.056,
483
436
 
484
437
  // Capabilities
485
438
  contextWindow: 8192,
@@ -487,21 +440,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
487
440
  supportsVision: false,
488
441
 
489
442
  // Metadata
490
- lastUpdated: "2026-04-06",
443
+ lastUpdated: "2026-06-01",
444
+ originalModel: "DeepSeek R1 0528 Qwen3 8B",
491
445
  },
492
- "sarvam-m-reasoning": {
493
- // AA Intelligence Index (composite score)
494
- intelligenceIndex: 8.4,
495
- normalizedScore: 12,
496
-
446
+ "deepseek-v3.2-exp-reasoning": {
497
447
  // AA specific benchmarks
498
- codingIndex: 7.5,
499
- mathIndex: undefined,
448
+ codingIndex: 33.3,
449
+ mathIndex: 87.7,
500
450
 
501
451
  // Academic benchmarks
502
- mmluPro: 0.696,
503
- gpqa: 0.416,
504
- hle: 0.033,
452
+ mmluPro: 0.85,
453
+ gpqa: 0.797,
454
+ hle: 0.138,
505
455
 
506
456
  // Capabilities
507
457
  contextWindow: 8192,
@@ -509,21 +459,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
509
459
  supportsVision: false,
510
460
 
511
461
  // Metadata
512
- lastUpdated: "2026-04-06",
462
+ lastUpdated: "2026-06-01",
463
+ originalModel: "DeepSeek V3.2 Exp (Reasoning)",
513
464
  },
514
- "glm-4.6-non-reasoning": {
515
- // AA Intelligence Index (composite score)
516
- intelligenceIndex: 30.2,
517
- normalizedScore: 43,
518
-
465
+ "deepseek-v3.2-speciale": {
519
466
  // AA specific benchmarks
520
- codingIndex: 30.2,
521
- mathIndex: 44.3,
467
+ codingIndex: 37.9,
468
+ mathIndex: 96.7,
522
469
 
523
470
  // Academic benchmarks
524
- mmluPro: 0.784,
525
- gpqa: 0.632,
526
- hle: 0.052,
471
+ mmluPro: 0.863,
472
+ gpqa: 0.871,
473
+ hle: 0.261,
527
474
 
528
475
  // Capabilities
529
476
  contextWindow: 8192,
@@ -531,21 +478,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
531
478
  supportsVision: false,
532
479
 
533
480
  // Metadata
534
- lastUpdated: "2026-04-06",
481
+ lastUpdated: "2026-06-01",
482
+ originalModel: "DeepSeek V3.2 Speciale",
535
483
  },
536
- "glm-4.7-reasoning": {
537
- // AA Intelligence Index (composite score)
538
- intelligenceIndex: 42.1,
539
- normalizedScore: 60,
540
-
484
+ "deepseek-v2-chat": {
541
485
  // AA specific benchmarks
542
- codingIndex: 36.3,
543
- mathIndex: 95,
486
+ codingIndex: undefined,
487
+ mathIndex: undefined,
544
488
 
545
489
  // Academic benchmarks
546
- mmluPro: 0.856,
547
- gpqa: 0.859,
548
- hle: 0.251,
490
+ mmluPro: undefined,
491
+ gpqa: undefined,
492
+ hle: undefined,
549
493
 
550
494
  // Capabilities
551
495
  contextWindow: 8192,
@@ -553,21 +497,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
553
497
  supportsVision: false,
554
498
 
555
499
  // Metadata
556
- lastUpdated: "2026-04-06",
500
+ lastUpdated: "2026-06-01",
501
+ originalModel: "DeepSeek-V2-Chat",
557
502
  },
558
- "glm-4.7-flash-non-reasoning": {
559
- // AA Intelligence Index (composite score)
560
- intelligenceIndex: 22.1,
561
- normalizedScore: 32,
562
-
503
+ "sonar-pro": {
563
504
  // AA specific benchmarks
564
- codingIndex: 11,
505
+ codingIndex: undefined,
565
506
  mathIndex: undefined,
566
507
 
567
508
  // Academic benchmarks
568
- mmluPro: undefined,
569
- gpqa: 0.452,
570
- hle: 0.049,
509
+ mmluPro: 0.755,
510
+ gpqa: 0.578,
511
+ hle: 0.079,
571
512
 
572
513
  // Capabilities
573
514
  contextWindow: 8192,
@@ -575,21 +516,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
575
516
  supportsVision: false,
576
517
 
577
518
  // Metadata
578
- lastUpdated: "2026-04-06",
519
+ lastUpdated: "2026-06-01",
520
+ originalModel: "Sonar Pro",
579
521
  },
580
- "glm-4.7-non-reasoning": {
581
- // AA Intelligence Index (composite score)
582
- intelligenceIndex: 34.2,
583
- normalizedScore: 49,
584
-
522
+ "sonar-reasoning-pro": {
585
523
  // AA specific benchmarks
586
- codingIndex: 32,
587
- mathIndex: 48,
524
+ codingIndex: undefined,
525
+ mathIndex: undefined,
588
526
 
589
527
  // Academic benchmarks
590
- mmluPro: 0.794,
591
- gpqa: 0.664,
592
- hle: 0.061,
528
+ mmluPro: undefined,
529
+ gpqa: undefined,
530
+ hle: undefined,
593
531
 
594
532
  // Capabilities
595
533
  contextWindow: 8192,
@@ -597,21 +535,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
597
535
  supportsVision: false,
598
536
 
599
537
  // Metadata
600
- lastUpdated: "2026-04-06",
538
+ lastUpdated: "2026-06-01",
539
+ originalModel: "Sonar Reasoning Pro",
601
540
  },
602
- "glm-4.5v-non-reasoning": {
603
- // AA Intelligence Index (composite score)
604
- intelligenceIndex: 12.7,
605
- normalizedScore: 18,
606
-
541
+ "sonar-reasoning": {
607
542
  // AA specific benchmarks
608
- codingIndex: 10.8,
609
- mathIndex: 15.3,
543
+ codingIndex: undefined,
544
+ mathIndex: undefined,
610
545
 
611
546
  // Academic benchmarks
612
- mmluPro: 0.751,
613
- gpqa: 0.573,
614
- hle: 0.036,
547
+ mmluPro: undefined,
548
+ gpqa: 0.623,
549
+ hle: undefined,
615
550
 
616
551
  // Capabilities
617
552
  contextWindow: 8192,
@@ -619,21 +554,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
619
554
  supportsVision: false,
620
555
 
621
556
  // Metadata
622
- lastUpdated: "2026-04-06",
557
+ lastUpdated: "2026-06-01",
558
+ originalModel: "Sonar Reasoning",
623
559
  },
624
- "glm-4.5-reasoning": {
625
- // AA Intelligence Index (composite score)
626
- intelligenceIndex: 26.4,
627
- normalizedScore: 38,
628
-
560
+ "sonar": {
629
561
  // AA specific benchmarks
630
- codingIndex: 26.3,
631
- mathIndex: 73.7,
562
+ codingIndex: undefined,
563
+ mathIndex: undefined,
632
564
 
633
565
  // Academic benchmarks
634
- mmluPro: 0.835,
635
- gpqa: 0.782,
636
- hle: 0.122,
566
+ mmluPro: 0.689,
567
+ gpqa: 0.471,
568
+ hle: 0.073,
637
569
 
638
570
  // Capabilities
639
571
  contextWindow: 8192,
@@ -641,21 +573,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
641
573
  supportsVision: false,
642
574
 
643
575
  // Metadata
644
- lastUpdated: "2026-04-06",
576
+ lastUpdated: "2026-06-01",
577
+ originalModel: "Sonar",
645
578
  },
646
- "glm-4.6-reasoning": {
647
- // AA Intelligence Index (composite score)
648
- intelligenceIndex: 32.5,
649
- normalizedScore: 46,
650
-
579
+ "grok-beta": {
651
580
  // AA specific benchmarks
652
- codingIndex: 29.5,
653
- mathIndex: 86,
581
+ codingIndex: undefined,
582
+ mathIndex: undefined,
654
583
 
655
584
  // Academic benchmarks
656
- mmluPro: 0.829,
657
- gpqa: 0.78,
658
- hle: 0.133,
585
+ mmluPro: 0.703,
586
+ gpqa: 0.471,
587
+ hle: 0.047,
659
588
 
660
589
  // Capabilities
661
590
  contextWindow: 8192,
@@ -663,21 +592,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
663
592
  supportsVision: false,
664
593
 
665
594
  // Metadata
666
- lastUpdated: "2026-04-06",
595
+ lastUpdated: "2026-06-01",
596
+ originalModel: "Grok Beta",
667
597
  },
668
- "glm-4.7-flash-reasoning": {
669
- // AA Intelligence Index (composite score)
670
- intelligenceIndex: 30.1,
671
- normalizedScore: 43,
672
-
598
+ "grok-3": {
673
599
  // AA specific benchmarks
674
- codingIndex: 25.9,
675
- mathIndex: undefined,
600
+ codingIndex: 19.8,
601
+ mathIndex: 58,
676
602
 
677
603
  // Academic benchmarks
678
- mmluPro: undefined,
679
- gpqa: 0.581,
680
- hle: 0.071,
604
+ mmluPro: 0.799,
605
+ gpqa: 0.693,
606
+ hle: 0.051,
681
607
 
682
608
  // Capabilities
683
609
  contextWindow: 8192,
@@ -685,21 +611,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
685
611
  supportsVision: false,
686
612
 
687
613
  // Metadata
688
- lastUpdated: "2026-04-06",
614
+ lastUpdated: "2026-06-01",
615
+ originalModel: "Grok 3",
689
616
  },
690
- "glm-4.5v-reasoning": {
691
- // AA Intelligence Index (composite score)
692
- intelligenceIndex: 15.1,
693
- normalizedScore: 22,
694
-
617
+ "grok-3-mini-reasoning-high": {
695
618
  // AA specific benchmarks
696
- codingIndex: 10.9,
697
- mathIndex: 73,
619
+ codingIndex: 25.2,
620
+ mathIndex: 84.7,
698
621
 
699
622
  // Academic benchmarks
700
- mmluPro: 0.788,
701
- gpqa: 0.684,
702
- hle: 0.059,
623
+ mmluPro: 0.828,
624
+ gpqa: 0.791,
625
+ hle: 0.111,
703
626
 
704
627
  // Capabilities
705
628
  contextWindow: 8192,
@@ -707,21 +630,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
707
630
  supportsVision: false,
708
631
 
709
632
  // Metadata
710
- lastUpdated: "2026-04-06",
633
+ lastUpdated: "2026-06-01",
634
+ originalModel: "Grok 3 mini Reasoning (high)",
711
635
  },
712
- "glm-4.5-air": {
713
- // AA Intelligence Index (composite score)
714
- intelligenceIndex: 23.2,
715
- normalizedScore: 33,
716
-
636
+ "grok-4.20-0309-v2-reasoning": {
717
637
  // AA specific benchmarks
718
- codingIndex: 23.8,
719
- mathIndex: 80.7,
638
+ codingIndex: 40.5,
639
+ mathIndex: undefined,
720
640
 
721
641
  // Academic benchmarks
722
- mmluPro: 0.815,
723
- gpqa: 0.733,
724
- hle: 0.068,
642
+ mmluPro: undefined,
643
+ gpqa: 0.911,
644
+ hle: 0.322,
725
645
 
726
646
  // Capabilities
727
647
  contextWindow: 8192,
@@ -729,21 +649,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
729
649
  supportsVision: false,
730
650
 
731
651
  // Metadata
732
- lastUpdated: "2026-04-06",
652
+ lastUpdated: "2026-06-01",
653
+ originalModel: "Grok 4.20 0309 v2 (Reasoning)",
733
654
  },
734
- "command-r-apr-24": {
735
- // AA Intelligence Index (composite score)
736
- intelligenceIndex: 8.3,
737
- normalizedScore: 12,
738
-
655
+ "grok-4": {
739
656
  // AA specific benchmarks
740
- codingIndex: undefined,
741
- mathIndex: undefined,
657
+ codingIndex: 40.5,
658
+ mathIndex: 92.7,
742
659
 
743
660
  // Academic benchmarks
744
- mmluPro: 0.432,
745
- gpqa: 0.323,
746
- hle: 0.045,
661
+ mmluPro: 0.866,
662
+ gpqa: 0.877,
663
+ hle: 0.239,
747
664
 
748
665
  // Capabilities
749
666
  contextWindow: 8192,
@@ -751,21 +668,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
751
668
  supportsVision: false,
752
669
 
753
670
  // Metadata
754
- lastUpdated: "2026-04-06",
671
+ lastUpdated: "2026-06-01",
672
+ originalModel: "Grok 4",
755
673
  },
756
- "command-r-mar-24": {
757
- // AA Intelligence Index (composite score)
758
- intelligenceIndex: 7.4,
759
- normalizedScore: 11,
760
-
674
+ "grok-4-fast-non-reasoning": {
761
675
  // AA specific benchmarks
762
- codingIndex: undefined,
763
- mathIndex: undefined,
676
+ codingIndex: 19,
677
+ mathIndex: 41.3,
764
678
 
765
679
  // Academic benchmarks
766
- mmluPro: 0.338,
767
- gpqa: 0.284,
768
- hle: 0.048,
680
+ mmluPro: 0.73,
681
+ gpqa: 0.606,
682
+ hle: 0.05,
769
683
 
770
684
  // Capabilities
771
685
  contextWindow: 8192,
@@ -773,21 +687,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
773
687
  supportsVision: false,
774
688
 
775
689
  // Metadata
776
- lastUpdated: "2026-04-06",
690
+ lastUpdated: "2026-06-01",
691
+ originalModel: "Grok 4 Fast (Non-reasoning)",
777
692
  },
778
- "apriel-v1.5-15b-thinker": {
779
- // AA Intelligence Index (composite score)
780
- intelligenceIndex: 28.3,
781
- normalizedScore: 40,
782
-
693
+ "grok-4-fast-reasoning": {
783
694
  // AA specific benchmarks
784
- codingIndex: 18.7,
785
- mathIndex: 87.5,
695
+ codingIndex: 27.4,
696
+ mathIndex: 89.7,
786
697
 
787
698
  // Academic benchmarks
788
- mmluPro: 0.773,
789
- gpqa: 0.713,
790
- hle: 0.12,
699
+ mmluPro: 0.85,
700
+ gpqa: 0.847,
701
+ hle: 0.17,
791
702
 
792
703
  // Capabilities
793
704
  contextWindow: 8192,
@@ -795,21 +706,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
795
706
  supportsVision: false,
796
707
 
797
708
  // Metadata
798
- lastUpdated: "2026-04-06",
709
+ lastUpdated: "2026-06-01",
710
+ originalModel: "Grok 4 Fast (Reasoning)",
799
711
  },
800
- "jamba-1.6-large": {
801
- // AA Intelligence Index (composite score)
802
- intelligenceIndex: 10.6,
803
- normalizedScore: 15,
804
-
712
+ "grok-code-fast-1": {
805
713
  // AA specific benchmarks
806
- codingIndex: undefined,
807
- mathIndex: undefined,
714
+ codingIndex: 23.7,
715
+ mathIndex: 43.3,
808
716
 
809
717
  // Academic benchmarks
810
- mmluPro: 0.565,
811
- gpqa: 0.387,
812
- hle: 0.04,
718
+ mmluPro: 0.793,
719
+ gpqa: 0.727,
720
+ hle: 0.075,
813
721
 
814
722
  // Capabilities
815
723
  contextWindow: 8192,
@@ -817,21 +725,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
817
725
  supportsVision: false,
818
726
 
819
727
  // Metadata
820
- lastUpdated: "2026-04-06",
728
+ lastUpdated: "2026-06-01",
729
+ originalModel: "Grok Code Fast 1",
821
730
  },
822
- "jamba-1.5-large": {
823
- // AA Intelligence Index (composite score)
824
- intelligenceIndex: 10.7,
825
- normalizedScore: 15,
826
-
731
+ "grok-3-reasoning-beta": {
827
732
  // AA specific benchmarks
828
733
  codingIndex: undefined,
829
734
  mathIndex: undefined,
830
735
 
831
736
  // Academic benchmarks
832
- mmluPro: 0.572,
833
- gpqa: 0.427,
834
- hle: 0.04,
737
+ mmluPro: undefined,
738
+ gpqa: undefined,
739
+ hle: undefined,
835
740
 
836
741
  // Capabilities
837
742
  contextWindow: 8192,
@@ -839,21 +744,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
839
744
  supportsVision: false,
840
745
 
841
746
  // Metadata
842
- lastUpdated: "2026-04-06",
747
+ lastUpdated: "2026-06-01",
748
+ originalModel: "Grok 3 Reasoning Beta",
843
749
  },
844
- "jamba-1.5-mini": {
845
- // AA Intelligence Index (composite score)
846
- intelligenceIndex: 8,
847
- normalizedScore: 11,
848
-
750
+ "grok-4.20-0309-reasoning": {
849
751
  // AA specific benchmarks
850
- codingIndex: undefined,
752
+ codingIndex: 42.2,
851
753
  mathIndex: undefined,
852
754
 
853
755
  // Academic benchmarks
854
- mmluPro: 0.371,
855
- gpqa: 0.302,
856
- hle: 0.051,
756
+ mmluPro: undefined,
757
+ gpqa: 0.885,
758
+ hle: 0.3,
857
759
 
858
760
  // Capabilities
859
761
  contextWindow: 8192,
@@ -861,21 +763,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
861
763
  supportsVision: false,
862
764
 
863
765
  // Metadata
864
- lastUpdated: "2026-04-06",
766
+ lastUpdated: "2026-06-01",
767
+ originalModel: "Grok 4.20 0309 (Reasoning)",
865
768
  },
866
- "jamba-1.6-mini": {
867
- // AA Intelligence Index (composite score)
868
- intelligenceIndex: 7.9,
869
- normalizedScore: 11,
870
-
769
+ "grok-4.1-fast-reasoning": {
871
770
  // AA specific benchmarks
872
- codingIndex: undefined,
873
- mathIndex: undefined,
771
+ codingIndex: 30.9,
772
+ mathIndex: 89.3,
874
773
 
875
774
  // Academic benchmarks
876
- mmluPro: 0.367,
877
- gpqa: 0.3,
878
- hle: 0.046,
775
+ mmluPro: 0.854,
776
+ gpqa: 0.853,
777
+ hle: 0.176,
879
778
 
880
779
  // Capabilities
881
780
  contextWindow: 8192,
@@ -883,21 +782,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
883
782
  supportsVision: false,
884
783
 
885
784
  // Metadata
886
- lastUpdated: "2026-04-06",
785
+ lastUpdated: "2026-06-01",
786
+ originalModel: "Grok 4.1 Fast (Reasoning)",
887
787
  },
888
- "arctic-instruct": {
889
- // AA Intelligence Index (composite score)
890
- intelligenceIndex: 8.8,
891
- normalizedScore: 13,
892
-
788
+ "grok-2-dec-24": {
893
789
  // AA specific benchmarks
894
790
  codingIndex: undefined,
895
791
  mathIndex: undefined,
896
792
 
897
793
  // Academic benchmarks
898
- mmluPro: undefined,
899
- gpqa: undefined,
900
- hle: undefined,
794
+ mmluPro: 0.709,
795
+ gpqa: 0.51,
796
+ hle: 0.038,
901
797
 
902
798
  // Capabilities
903
799
  contextWindow: 8192,
@@ -905,21 +801,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
905
801
  supportsVision: false,
906
802
 
907
803
  // Metadata
908
- lastUpdated: "2026-04-06",
804
+ lastUpdated: "2026-06-01",
805
+ originalModel: "Grok 2 (Dec '24)",
909
806
  },
910
- "qwen2.5-max": {
911
- // AA Intelligence Index (composite score)
912
- intelligenceIndex: 16.3,
913
- normalizedScore: 23,
914
-
807
+ "grok-4.1-fast-non-reasoning": {
915
808
  // AA specific benchmarks
916
- codingIndex: undefined,
917
- mathIndex: undefined,
809
+ codingIndex: 19.5,
810
+ mathIndex: 34.3,
918
811
 
919
812
  // Academic benchmarks
920
- mmluPro: 0.762,
921
- gpqa: 0.587,
922
- hle: 0.045,
813
+ mmluPro: 0.743,
814
+ gpqa: 0.637,
815
+ hle: 0.05,
923
816
 
924
817
  // Capabilities
925
818
  contextWindow: 8192,
@@ -927,21 +820,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
927
820
  supportsVision: false,
928
821
 
929
822
  // Metadata
930
- lastUpdated: "2026-04-06",
823
+ lastUpdated: "2026-06-01",
824
+ originalModel: "Grok 4.1 Fast (Non-reasoning)",
931
825
  },
932
- "qwen2.5-instruct-72b": {
933
- // AA Intelligence Index (composite score)
934
- intelligenceIndex: 15.6,
935
- normalizedScore: 22,
936
-
826
+ "grok-4.20-0309-non-reasoning": {
937
827
  // AA specific benchmarks
938
- codingIndex: 11.9,
939
- mathIndex: 14,
828
+ codingIndex: 25.4,
829
+ mathIndex: undefined,
940
830
 
941
831
  // Academic benchmarks
942
- mmluPro: 0.72,
943
- gpqa: 0.491,
944
- hle: 0.042,
832
+ mmluPro: undefined,
833
+ gpqa: 0.785,
834
+ hle: 0.225,
945
835
 
946
836
  // Capabilities
947
837
  contextWindow: 8192,
@@ -949,21 +839,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
949
839
  supportsVision: false,
950
840
 
951
841
  // Metadata
952
- lastUpdated: "2026-04-06",
842
+ lastUpdated: "2026-06-01",
843
+ originalModel: "Grok 4.20 0309 (Non-reasoning)",
953
844
  },
954
- "qwen2.5-coder-instruct-32b": {
955
- // AA Intelligence Index (composite score)
956
- intelligenceIndex: 12.9,
957
- normalizedScore: 18,
958
-
845
+ "grok-4.20-0309-v2-non-reasoning": {
959
846
  // AA specific benchmarks
960
- codingIndex: undefined,
847
+ codingIndex: 22,
961
848
  mathIndex: undefined,
962
849
 
963
850
  // Academic benchmarks
964
- mmluPro: 0.635,
965
- gpqa: 0.417,
966
- hle: 0.038,
851
+ mmluPro: undefined,
852
+ gpqa: 0.776,
853
+ hle: 0.242,
967
854
 
968
855
  // Capabilities
969
856
  contextWindow: 8192,
@@ -971,21 +858,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
971
858
  supportsVision: false,
972
859
 
973
860
  // Metadata
974
- lastUpdated: "2026-04-06",
861
+ lastUpdated: "2026-06-01",
862
+ originalModel: "Grok 4.20 0309 v2 (Non-reasoning)",
975
863
  },
976
- "qwen2.5-turbo": {
977
- // AA Intelligence Index (composite score)
978
- intelligenceIndex: 12,
979
- normalizedScore: 17,
980
-
864
+ "openchat-3.5-1210": {
981
865
  // AA specific benchmarks
982
866
  codingIndex: undefined,
983
867
  mathIndex: undefined,
984
868
 
985
869
  // Academic benchmarks
986
- mmluPro: 0.633,
987
- gpqa: 0.41,
988
- hle: 0.042,
870
+ mmluPro: 0.31,
871
+ gpqa: 0.23,
872
+ hle: 0.048,
989
873
 
990
874
  // Capabilities
991
875
  contextWindow: 8192,
@@ -993,21 +877,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
993
877
  supportsVision: false,
994
878
 
995
879
  // Metadata
996
- lastUpdated: "2026-04-06",
880
+ lastUpdated: "2026-06-01",
881
+ originalModel: "OpenChat 3.5 (1210)",
997
882
  },
998
- "qwen2-instruct-72b": {
999
- // AA Intelligence Index (composite score)
1000
- intelligenceIndex: 11.7,
1001
- normalizedScore: 17,
1002
-
883
+ "nova-pro": {
1003
884
  // AA specific benchmarks
1004
- codingIndex: undefined,
1005
- mathIndex: undefined,
885
+ codingIndex: 11,
886
+ mathIndex: 7,
1006
887
 
1007
888
  // Academic benchmarks
1008
- mmluPro: 0.622,
1009
- gpqa: 0.371,
1010
- hle: 0.037,
889
+ mmluPro: 0.691,
890
+ gpqa: 0.499,
891
+ hle: 0.034,
1011
892
 
1012
893
  // Capabilities
1013
894
  contextWindow: 8192,
@@ -1015,21 +896,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1015
896
  supportsVision: false,
1016
897
 
1017
898
  // Metadata
1018
- lastUpdated: "2026-04-06",
899
+ lastUpdated: "2026-06-01",
900
+ originalModel: "Nova Pro",
1019
901
  },
1020
- "qwen3-vl-30b-a3b-instruct": {
1021
- // AA Intelligence Index (composite score)
1022
- intelligenceIndex: 16.1,
1023
- normalizedScore: 23,
1024
-
902
+ "nova-lite": {
1025
903
  // AA specific benchmarks
1026
- codingIndex: 14.3,
1027
- mathIndex: 72.3,
904
+ codingIndex: 5.1,
905
+ mathIndex: 7,
1028
906
 
1029
907
  // Academic benchmarks
1030
- mmluPro: 0.764,
1031
- gpqa: 0.695,
1032
- hle: 0.064,
908
+ mmluPro: 0.59,
909
+ gpqa: 0.433,
910
+ hle: 0.046,
1033
911
 
1034
912
  // Capabilities
1035
913
  contextWindow: 8192,
@@ -1037,21 +915,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1037
915
  supportsVision: false,
1038
916
 
1039
917
  // Metadata
1040
- lastUpdated: "2026-04-06",
918
+ lastUpdated: "2026-06-01",
919
+ originalModel: "Nova Lite",
1041
920
  },
1042
- "qwen3-235b-a22b-2507-instruct": {
1043
- // AA Intelligence Index (composite score)
1044
- intelligenceIndex: 25,
1045
- normalizedScore: 36,
1046
-
921
+ "phi-3-mini-instruct-3.8b": {
1047
922
  // AA specific benchmarks
1048
- codingIndex: 22.1,
1049
- mathIndex: 71.7,
923
+ codingIndex: 3,
924
+ mathIndex: 0.3,
1050
925
 
1051
926
  // Academic benchmarks
1052
- mmluPro: 0.828,
1053
- gpqa: 0.753,
1054
- hle: 0.106,
927
+ mmluPro: 0.435,
928
+ gpqa: 0.319,
929
+ hle: 0.044,
1055
930
 
1056
931
  // Capabilities
1057
932
  contextWindow: 8192,
@@ -1059,21 +934,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1059
934
  supportsVision: false,
1060
935
 
1061
936
  // Metadata
1062
- lastUpdated: "2026-04-06",
937
+ lastUpdated: "2026-06-01",
938
+ originalModel: "Phi-3 Mini Instruct 3.8B",
1063
939
  },
1064
- "qwen3-32b-non-reasoning": {
1065
- // AA Intelligence Index (composite score)
1066
- intelligenceIndex: 14.5,
1067
- normalizedScore: 21,
1068
-
940
+ "lfm-40b": {
1069
941
  // AA specific benchmarks
1070
942
  codingIndex: undefined,
1071
- mathIndex: 19.7,
943
+ mathIndex: undefined,
1072
944
 
1073
945
  // Academic benchmarks
1074
- mmluPro: 0.727,
1075
- gpqa: 0.535,
1076
- hle: 0.043,
946
+ mmluPro: 0.425,
947
+ gpqa: 0.327,
948
+ hle: 0.049,
1077
949
 
1078
950
  // Capabilities
1079
951
  contextWindow: 8192,
@@ -1081,21 +953,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1081
953
  supportsVision: false,
1082
954
 
1083
955
  // Metadata
1084
- lastUpdated: "2026-04-06",
956
+ lastUpdated: "2026-06-01",
957
+ originalModel: "LFM 40B",
1085
958
  },
1086
- "qwen3-235b-a22b-reasoning": {
1087
- // AA Intelligence Index (composite score)
1088
- intelligenceIndex: 19.8,
1089
- normalizedScore: 28,
1090
-
959
+ "lfm2-1.2b": {
1091
960
  // AA specific benchmarks
1092
- codingIndex: 17.4,
1093
- mathIndex: 82,
961
+ codingIndex: 0.8,
962
+ mathIndex: 3.3,
1094
963
 
1095
964
  // Academic benchmarks
1096
- mmluPro: 0.828,
1097
- gpqa: 0.7,
1098
- hle: 0.117,
965
+ mmluPro: 0.257,
966
+ gpqa: 0.228,
967
+ hle: 0.057,
1099
968
 
1100
969
  // Capabilities
1101
970
  contextWindow: 8192,
@@ -1103,21 +972,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1103
972
  supportsVision: false,
1104
973
 
1105
974
  // Metadata
1106
- lastUpdated: "2026-04-06",
975
+ lastUpdated: "2026-06-01",
976
+ originalModel: "LFM2 1.2B",
1107
977
  },
1108
- "qwen3-235b-a22b-non-reasoning": {
1109
- // AA Intelligence Index (composite score)
1110
- intelligenceIndex: 17,
1111
- normalizedScore: 24,
1112
-
978
+ "solar-mini": {
1113
979
  // AA specific benchmarks
1114
- codingIndex: 14,
1115
- mathIndex: 23.7,
980
+ codingIndex: undefined,
981
+ mathIndex: undefined,
1116
982
 
1117
983
  // Academic benchmarks
1118
- mmluPro: 0.762,
1119
- gpqa: 0.613,
1120
- hle: 0.047,
984
+ mmluPro: undefined,
985
+ gpqa: undefined,
986
+ hle: undefined,
1121
987
 
1122
988
  // Capabilities
1123
989
  contextWindow: 8192,
@@ -1125,21 +991,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1125
991
  supportsVision: false,
1126
992
 
1127
993
  // Metadata
1128
- lastUpdated: "2026-04-06",
994
+ lastUpdated: "2026-06-01",
995
+ originalModel: "Solar Mini",
1129
996
  },
1130
- "qwen3-30b-a3b-2507-reasoning": {
1131
- // AA Intelligence Index (composite score)
1132
- intelligenceIndex: 22.4,
1133
- normalizedScore: 32,
1134
-
997
+ "solar-pro-2-preview-reasoning": {
1135
998
  // AA specific benchmarks
1136
- codingIndex: 14.7,
1137
- mathIndex: 56.3,
999
+ codingIndex: undefined,
1000
+ mathIndex: undefined,
1138
1001
 
1139
1002
  // Academic benchmarks
1140
- mmluPro: 0.805,
1141
- gpqa: 0.707,
1142
- hle: 0.098,
1003
+ mmluPro: 0.768,
1004
+ gpqa: 0.578,
1005
+ hle: 0.057,
1143
1006
 
1144
1007
  // Capabilities
1145
1008
  contextWindow: 8192,
@@ -1147,21 +1010,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1147
1010
  supportsVision: false,
1148
1011
 
1149
1012
  // Metadata
1150
- lastUpdated: "2026-04-06",
1013
+ lastUpdated: "2026-06-01",
1014
+ originalModel: "Solar Pro 2 (Preview) (Reasoning)",
1151
1015
  },
1152
- "qwen3-vl-235b-a22b-instruct": {
1153
- // AA Intelligence Index (composite score)
1154
- intelligenceIndex: 20.8,
1155
- normalizedScore: 30,
1156
-
1016
+ "solar-pro-2-preview-non-reasoning": {
1157
1017
  // AA specific benchmarks
1158
- codingIndex: 16.5,
1159
- mathIndex: 70.7,
1018
+ codingIndex: undefined,
1019
+ mathIndex: undefined,
1160
1020
 
1161
1021
  // Academic benchmarks
1162
- mmluPro: 0.823,
1163
- gpqa: 0.712,
1164
- hle: 0.063,
1022
+ mmluPro: 0.725,
1023
+ gpqa: 0.544,
1024
+ hle: 0.038,
1165
1025
 
1166
1026
  // Capabilities
1167
1027
  contextWindow: 8192,
@@ -1169,21 +1029,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1169
1029
  supportsVision: false,
1170
1030
 
1171
1031
  // Metadata
1172
- lastUpdated: "2026-04-06",
1032
+ lastUpdated: "2026-06-01",
1033
+ originalModel: "Solar Pro 2 (Preview) (Non-reasoning)",
1173
1034
  },
1174
- "qwen3-0.6b-reasoning": {
1175
- // AA Intelligence Index (composite score)
1176
- intelligenceIndex: 6.5,
1177
- normalizedScore: 9,
1178
-
1035
+ "dbrx-instruct": {
1179
1036
  // AA specific benchmarks
1180
- codingIndex: 0.9,
1181
- mathIndex: 18,
1037
+ codingIndex: undefined,
1038
+ mathIndex: undefined,
1182
1039
 
1183
1040
  // Academic benchmarks
1184
- mmluPro: 0.347,
1185
- gpqa: 0.239,
1186
- hle: 0.057,
1041
+ mmluPro: 0.397,
1042
+ gpqa: 0.331,
1043
+ hle: 0.066,
1187
1044
 
1188
1045
  // Capabilities
1189
1046
  contextWindow: 8192,
@@ -1191,21 +1048,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1191
1048
  supportsVision: false,
1192
1049
 
1193
1050
  // Metadata
1194
- lastUpdated: "2026-04-06",
1051
+ lastUpdated: "2026-06-01",
1052
+ originalModel: "DBRX Instruct",
1195
1053
  },
1196
- "qwen3-235b-a22b-2507-reasoning": {
1197
- // AA Intelligence Index (composite score)
1198
- intelligenceIndex: 29.5,
1199
- normalizedScore: 42,
1200
-
1054
+ "minimax-m2.1": {
1201
1055
  // AA specific benchmarks
1202
- codingIndex: 23.2,
1203
- mathIndex: 91,
1056
+ codingIndex: 32.8,
1057
+ mathIndex: 82.7,
1204
1058
 
1205
1059
  // Academic benchmarks
1206
- mmluPro: 0.843,
1207
- gpqa: 0.79,
1208
- hle: 0.15,
1060
+ mmluPro: 0.875,
1061
+ gpqa: 0.83,
1062
+ hle: 0.222,
1209
1063
 
1210
1064
  // Capabilities
1211
1065
  contextWindow: 8192,
@@ -1213,21 +1067,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1213
1067
  supportsVision: false,
1214
1068
 
1215
1069
  // Metadata
1216
- lastUpdated: "2026-04-06",
1070
+ lastUpdated: "2026-06-01",
1071
+ originalModel: "MiniMax-M2.1",
1217
1072
  },
1218
- "qwen3-8b-non-reasoning": {
1219
- // AA Intelligence Index (composite score)
1220
- intelligenceIndex: 10.6,
1221
- normalizedScore: 15,
1222
-
1073
+ "minimax-m2.5": {
1223
1074
  // AA specific benchmarks
1224
- codingIndex: 7.1,
1225
- mathIndex: 24.3,
1075
+ codingIndex: 37.4,
1076
+ mathIndex: undefined,
1226
1077
 
1227
1078
  // Academic benchmarks
1228
- mmluPro: 0.643,
1229
- gpqa: 0.452,
1230
- hle: 0.028,
1079
+ mmluPro: undefined,
1080
+ gpqa: 0.848,
1081
+ hle: 0.191,
1231
1082
 
1232
1083
  // Capabilities
1233
1084
  contextWindow: 8192,
@@ -1235,21 +1086,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1235
1086
  supportsVision: false,
1236
1087
 
1237
1088
  // Metadata
1238
- lastUpdated: "2026-04-06",
1089
+ lastUpdated: "2026-06-01",
1090
+ originalModel: "MiniMax-M2.5",
1239
1091
  },
1240
- "qwen3-4b-2507-reasoning": {
1241
- // AA Intelligence Index (composite score)
1242
- intelligenceIndex: 18.2,
1243
- normalizedScore: 26,
1244
-
1092
+ "minimax-m2": {
1245
1093
  // AA specific benchmarks
1246
- codingIndex: 9.5,
1247
- mathIndex: 82.7,
1094
+ codingIndex: 29.2,
1095
+ mathIndex: 78.3,
1248
1096
 
1249
1097
  // Academic benchmarks
1250
- mmluPro: 0.743,
1251
- gpqa: 0.667,
1252
- hle: 0.059,
1098
+ mmluPro: 0.82,
1099
+ gpqa: 0.777,
1100
+ hle: 0.125,
1253
1101
 
1254
1102
  // Capabilities
1255
1103
  contextWindow: 8192,
@@ -1257,20 +1105,17 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1257
1105
  supportsVision: false,
1258
1106
 
1259
1107
  // Metadata
1260
- lastUpdated: "2026-04-06",
1108
+ lastUpdated: "2026-06-01",
1109
+ originalModel: "MiniMax-M2",
1261
1110
  },
1262
- "qwq-32b": {
1263
- // AA Intelligence Index (composite score)
1264
- intelligenceIndex: 19.7,
1265
- normalizedScore: 28,
1266
-
1111
+ "minimax-m1-80k": {
1267
1112
  // AA specific benchmarks
1268
- codingIndex: undefined,
1269
- mathIndex: 29,
1113
+ codingIndex: 14.5,
1114
+ mathIndex: 61,
1270
1115
 
1271
1116
  // Academic benchmarks
1272
- mmluPro: 0.764,
1273
- gpqa: 0.593,
1117
+ mmluPro: 0.816,
1118
+ gpqa: 0.697,
1274
1119
  hle: 0.082,
1275
1120
 
1276
1121
  // Capabilities
@@ -1279,21 +1124,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1279
1124
  supportsVision: false,
1280
1125
 
1281
1126
  // Metadata
1282
- lastUpdated: "2026-04-06",
1127
+ lastUpdated: "2026-06-01",
1128
+ originalModel: "MiniMax M1 80k",
1283
1129
  },
1284
- "qwen3-32b-reasoning": {
1285
- // AA Intelligence Index (composite score)
1286
- intelligenceIndex: 16.5,
1287
- normalizedScore: 24,
1288
-
1130
+ "minimax-m1-40k": {
1289
1131
  // AA specific benchmarks
1290
- codingIndex: 13.8,
1291
- mathIndex: 73,
1132
+ codingIndex: 14.1,
1133
+ mathIndex: 13.7,
1292
1134
 
1293
1135
  // Academic benchmarks
1294
- mmluPro: 0.798,
1295
- gpqa: 0.668,
1296
- hle: 0.083,
1136
+ mmluPro: 0.808,
1137
+ gpqa: 0.682,
1138
+ hle: 0.075,
1297
1139
 
1298
1140
  // Capabilities
1299
1141
  contextWindow: 8192,
@@ -1301,21 +1143,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1301
1143
  supportsVision: false,
1302
1144
 
1303
1145
  // Metadata
1304
- lastUpdated: "2026-04-06",
1146
+ lastUpdated: "2026-06-01",
1147
+ originalModel: "MiniMax M1 40k",
1305
1148
  },
1306
- "qwen3-4b-2507-instruct": {
1307
- // AA Intelligence Index (composite score)
1308
- intelligenceIndex: 12.9,
1309
- normalizedScore: 18,
1310
-
1149
+ "llama-3.3-nemotron-super-49b-v1-reasoning": {
1311
1150
  // AA specific benchmarks
1312
- codingIndex: 9.1,
1313
- mathIndex: 52.3,
1151
+ codingIndex: 9.4,
1152
+ mathIndex: 54.7,
1314
1153
 
1315
1154
  // Academic benchmarks
1316
- mmluPro: 0.672,
1317
- gpqa: 0.517,
1318
- hle: 0.047,
1155
+ mmluPro: 0.785,
1156
+ gpqa: 0.643,
1157
+ hle: 0.065,
1319
1158
 
1320
1159
  // Capabilities
1321
1160
  contextWindow: 8192,
@@ -1323,21 +1162,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1323
1162
  supportsVision: false,
1324
1163
 
1325
1164
  // Metadata
1326
- lastUpdated: "2026-04-06",
1165
+ lastUpdated: "2026-06-01",
1166
+ originalModel: "Llama 3.3 Nemotron Super 49B v1 (Reasoning)",
1327
1167
  },
1328
- "qwen3-30b-a3b-2507-instruct": {
1329
- // AA Intelligence Index (composite score)
1330
- intelligenceIndex: 15,
1331
- normalizedScore: 21,
1332
-
1168
+ "llama-3.3-nemotron-super-49b-v1-non-reasoning": {
1333
1169
  // AA specific benchmarks
1334
- codingIndex: 14.2,
1335
- mathIndex: 66.3,
1170
+ codingIndex: 7.6,
1171
+ mathIndex: 7.7,
1336
1172
 
1337
1173
  // Academic benchmarks
1338
- mmluPro: 0.777,
1339
- gpqa: 0.659,
1340
- hle: 0.068,
1174
+ mmluPro: 0.698,
1175
+ gpqa: 0.517,
1176
+ hle: 0.035,
1341
1177
 
1342
1178
  // Capabilities
1343
1179
  contextWindow: 8192,
@@ -1345,21 +1181,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1345
1181
  supportsVision: false,
1346
1182
 
1347
1183
  // Metadata
1348
- lastUpdated: "2026-04-06",
1184
+ lastUpdated: "2026-06-01",
1185
+ originalModel: "Llama 3.3 Nemotron Super 49B v1 (Non-reasoning)",
1349
1186
  },
1350
- "qwen3-14b-non-reasoning": {
1351
- // AA Intelligence Index (composite score)
1352
- intelligenceIndex: 12.8,
1353
- normalizedScore: 18,
1354
-
1187
+ "llama-3.1-nemotron-nano-4b-v1.1-reasoning": {
1355
1188
  // AA specific benchmarks
1356
- codingIndex: 12.4,
1357
- mathIndex: 58,
1189
+ codingIndex: undefined,
1190
+ mathIndex: 50,
1358
1191
 
1359
1192
  // Academic benchmarks
1360
- mmluPro: 0.675,
1361
- gpqa: 0.47,
1362
- hle: 0.042,
1193
+ mmluPro: 0.556,
1194
+ gpqa: 0.408,
1195
+ hle: 0.051,
1363
1196
 
1364
1197
  // Capabilities
1365
1198
  contextWindow: 8192,
@@ -1367,21 +1200,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1367
1200
  supportsVision: false,
1368
1201
 
1369
1202
  // Metadata
1370
- lastUpdated: "2026-04-06",
1203
+ lastUpdated: "2026-06-01",
1204
+ originalModel: "Llama 3.1 Nemotron Nano 4B v1.1 (Reasoning)",
1371
1205
  },
1372
- "qwen3-vl-4b-instruct": {
1373
- // AA Intelligence Index (composite score)
1374
- intelligenceIndex: 9.6,
1375
- normalizedScore: 14,
1376
-
1206
+ "kimi-k2.5-reasoning": {
1377
1207
  // AA specific benchmarks
1378
- codingIndex: 4.5,
1379
- mathIndex: 37,
1208
+ codingIndex: 39.6,
1209
+ mathIndex: undefined,
1380
1210
 
1381
1211
  // Academic benchmarks
1382
- mmluPro: 0.634,
1383
- gpqa: 0.371,
1384
- hle: 0.037,
1212
+ mmluPro: undefined,
1213
+ gpqa: 0.879,
1214
+ hle: 0.294,
1385
1215
 
1386
1216
  // Capabilities
1387
1217
  contextWindow: 8192,
@@ -1389,21 +1219,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1389
1219
  supportsVision: false,
1390
1220
 
1391
1221
  // Metadata
1392
- lastUpdated: "2026-04-06",
1222
+ lastUpdated: "2026-06-01",
1223
+ originalModel: "Kimi K2.5 (Reasoning)",
1393
1224
  },
1394
- "qwen2.5-coder-instruct-7b": {
1395
- // AA Intelligence Index (composite score)
1396
- intelligenceIndex: 10,
1397
- normalizedScore: 14,
1398
-
1225
+ "kimi-k2-0905": {
1399
1226
  // AA specific benchmarks
1400
- codingIndex: undefined,
1401
- mathIndex: undefined,
1227
+ codingIndex: 25.9,
1228
+ mathIndex: 57.3,
1402
1229
 
1403
1230
  // Academic benchmarks
1404
- mmluPro: 0.473,
1405
- gpqa: 0.339,
1406
- hle: 0.048,
1231
+ mmluPro: 0.819,
1232
+ gpqa: 0.767,
1233
+ hle: 0.063,
1407
1234
 
1408
1235
  // Capabilities
1409
1236
  contextWindow: 8192,
@@ -1411,21 +1238,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1411
1238
  supportsVision: false,
1412
1239
 
1413
1240
  // Metadata
1414
- lastUpdated: "2026-04-06",
1241
+ lastUpdated: "2026-06-01",
1242
+ originalModel: "Kimi K2 0905",
1415
1243
  },
1416
- "qwen3-30b-a3b-reasoning": {
1417
- // AA Intelligence Index (composite score)
1418
- intelligenceIndex: 15.3,
1419
- normalizedScore: 22,
1420
-
1244
+ "kimi-k2": {
1421
1245
  // AA specific benchmarks
1422
- codingIndex: 11,
1423
- mathIndex: 72.3,
1246
+ codingIndex: 22.1,
1247
+ mathIndex: 57,
1424
1248
 
1425
1249
  // Academic benchmarks
1426
- mmluPro: 0.777,
1427
- gpqa: 0.616,
1428
- hle: 0.066,
1250
+ mmluPro: 0.824,
1251
+ gpqa: 0.766,
1252
+ hle: 0.07,
1429
1253
 
1430
1254
  // Capabilities
1431
1255
  contextWindow: 8192,
@@ -1433,21 +1257,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1433
1257
  supportsVision: false,
1434
1258
 
1435
1259
  // Metadata
1436
- lastUpdated: "2026-04-06",
1260
+ lastUpdated: "2026-06-01",
1261
+ originalModel: "Kimi K2",
1437
1262
  },
1438
- "qwq-32b-preview": {
1439
- // AA Intelligence Index (composite score)
1440
- intelligenceIndex: 15.2,
1441
- normalizedScore: 22,
1442
-
1263
+ "kimi-k2-thinking": {
1443
1264
  // AA specific benchmarks
1444
- codingIndex: undefined,
1445
- mathIndex: undefined,
1265
+ codingIndex: 34.8,
1266
+ mathIndex: 94.7,
1446
1267
 
1447
1268
  // Academic benchmarks
1448
- mmluPro: 0.648,
1449
- gpqa: 0.557,
1450
- hle: 0.048,
1269
+ mmluPro: 0.848,
1270
+ gpqa: 0.838,
1271
+ hle: 0.223,
1451
1272
 
1452
1273
  // Capabilities
1453
1274
  contextWindow: 8192,
@@ -1455,21 +1276,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1455
1276
  supportsVision: false,
1456
1277
 
1457
1278
  // Metadata
1458
- lastUpdated: "2026-04-06",
1279
+ lastUpdated: "2026-06-01",
1280
+ originalModel: "Kimi K2 Thinking",
1459
1281
  },
1460
- "qwen3-vl-32b-instruct": {
1461
- // AA Intelligence Index (composite score)
1462
- intelligenceIndex: 17.2,
1463
- normalizedScore: 25,
1464
-
1282
+ "kimi-k2.5-non-reasoning": {
1465
1283
  // AA specific benchmarks
1466
- codingIndex: 15.6,
1467
- mathIndex: 68.3,
1284
+ codingIndex: 25.8,
1285
+ mathIndex: undefined,
1468
1286
 
1469
1287
  // Academic benchmarks
1470
- mmluPro: 0.791,
1471
- gpqa: 0.671,
1472
- hle: 0.063,
1288
+ mmluPro: undefined,
1289
+ gpqa: 0.789,
1290
+ hle: 0.123,
1473
1291
 
1474
1292
  // Capabilities
1475
1293
  contextWindow: 8192,
@@ -1477,21 +1295,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1477
1295
  supportsVision: false,
1478
1296
 
1479
1297
  // Metadata
1480
- lastUpdated: "2026-04-06",
1298
+ lastUpdated: "2026-06-01",
1299
+ originalModel: "Kimi K2.5 (Non-reasoning)",
1481
1300
  },
1482
- "qwen3-1.7b-non-reasoning": {
1483
- // AA Intelligence Index (composite score)
1484
- intelligenceIndex: 6.8,
1485
- normalizedScore: 10,
1486
-
1301
+ "step-3.5-flash": {
1487
1302
  // AA specific benchmarks
1488
- codingIndex: 2.3,
1489
- mathIndex: 7.3,
1303
+ codingIndex: 31.6,
1304
+ mathIndex: undefined,
1490
1305
 
1491
1306
  // Academic benchmarks
1492
- mmluPro: 0.411,
1493
- gpqa: 0.283,
1494
- hle: 0.052,
1307
+ mmluPro: undefined,
1308
+ gpqa: 0.831,
1309
+ hle: 0.191,
1495
1310
 
1496
1311
  // Capabilities
1497
1312
  contextWindow: 8192,
@@ -1499,21 +1314,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1499
1314
  supportsVision: false,
1500
1315
 
1501
1316
  // Metadata
1502
- lastUpdated: "2026-04-06",
1317
+ lastUpdated: "2026-06-01",
1318
+ originalModel: "Step 3.5 Flash",
1503
1319
  },
1504
- "qwen2.5-instruct-32b": {
1505
- // AA Intelligence Index (composite score)
1506
- intelligenceIndex: 13.2,
1507
- normalizedScore: 19,
1508
-
1320
+ "llama-3.1-tulu3-405b": {
1509
1321
  // AA specific benchmarks
1510
1322
  codingIndex: undefined,
1511
1323
  mathIndex: undefined,
1512
1324
 
1513
1325
  // Academic benchmarks
1514
- mmluPro: 0.697,
1515
- gpqa: 0.466,
1516
- hle: 0.038,
1326
+ mmluPro: 0.716,
1327
+ gpqa: 0.516,
1328
+ hle: 0.035,
1517
1329
 
1518
1330
  // Capabilities
1519
1331
  contextWindow: 8192,
@@ -1521,21 +1333,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1521
1333
  supportsVision: false,
1522
1334
 
1523
1335
  // Metadata
1524
- lastUpdated: "2026-04-06",
1336
+ lastUpdated: "2026-06-01",
1337
+ originalModel: "Llama 3.1 Tulu3 405B",
1525
1338
  },
1526
- "qwen3-4b-non-reasoning": {
1527
- // AA Intelligence Index (composite score)
1528
- intelligenceIndex: 12.5,
1529
- normalizedScore: 18,
1530
-
1339
+ "olmo-2-7b": {
1531
1340
  // AA specific benchmarks
1532
- codingIndex: undefined,
1533
- mathIndex: undefined,
1341
+ codingIndex: 1.2,
1342
+ mathIndex: 0.7,
1534
1343
 
1535
1344
  // Academic benchmarks
1536
- mmluPro: 0.586,
1537
- gpqa: 0.398,
1538
- hle: 0.037,
1345
+ mmluPro: 0.282,
1346
+ gpqa: 0.288,
1347
+ hle: 0.055,
1539
1348
 
1540
1349
  // Capabilities
1541
1350
  contextWindow: 8192,
@@ -1543,21 +1352,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1543
1352
  supportsVision: false,
1544
1353
 
1545
1354
  // Metadata
1546
- lastUpdated: "2026-04-06",
1355
+ lastUpdated: "2026-06-01",
1356
+ originalModel: "OLMo 2 7B",
1547
1357
  },
1548
- "qwen3-vl-30b-a3b-reasoning": {
1549
- // AA Intelligence Index (composite score)
1550
- intelligenceIndex: 19.7,
1551
- normalizedScore: 28,
1552
-
1358
+ "olmo-3-32b-think": {
1553
1359
  // AA specific benchmarks
1554
- codingIndex: 13.1,
1555
- mathIndex: 82.3,
1360
+ codingIndex: 10.5,
1361
+ mathIndex: 73.7,
1556
1362
 
1557
1363
  // Academic benchmarks
1558
- mmluPro: 0.807,
1559
- gpqa: 0.72,
1560
- hle: 0.087,
1364
+ mmluPro: 0.759,
1365
+ gpqa: 0.61,
1366
+ hle: 0.059,
1561
1367
 
1562
1368
  // Capabilities
1563
1369
  contextWindow: 8192,
@@ -1565,21 +1371,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1565
1371
  supportsVision: false,
1566
1372
 
1567
1373
  // Metadata
1568
- lastUpdated: "2026-04-06",
1374
+ lastUpdated: "2026-06-01",
1375
+ originalModel: "Olmo 3 32B Think",
1569
1376
  },
1570
- "qwen3-vl-8b-reasoning": {
1571
- // AA Intelligence Index (composite score)
1572
- intelligenceIndex: 16.7,
1573
- normalizedScore: 24,
1574
-
1377
+ "olmo-2-32b": {
1575
1378
  // AA specific benchmarks
1576
- codingIndex: 9.8,
1577
- mathIndex: 30.7,
1379
+ codingIndex: 2.7,
1380
+ mathIndex: 3.3,
1578
1381
 
1579
1382
  // Academic benchmarks
1580
- mmluPro: 0.749,
1581
- gpqa: 0.579,
1582
- hle: 0.033,
1383
+ mmluPro: 0.511,
1384
+ gpqa: 0.328,
1385
+ hle: 0.037,
1583
1386
 
1584
1387
  // Capabilities
1585
1388
  contextWindow: 8192,
@@ -1587,20 +1390,17 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1587
1390
  supportsVision: false,
1588
1391
 
1589
1392
  // Metadata
1590
- lastUpdated: "2026-04-06",
1393
+ lastUpdated: "2026-06-01",
1394
+ originalModel: "OLMo 2 32B",
1591
1395
  },
1592
- "qwen3-8b-reasoning": {
1593
- // AA Intelligence Index (composite score)
1594
- intelligenceIndex: 13.2,
1595
- normalizedScore: 19,
1596
-
1396
+ "granite-3.3-8b-non-reasoning": {
1597
1397
  // AA specific benchmarks
1598
- codingIndex: 9,
1599
- mathIndex: 19,
1398
+ codingIndex: 3.4,
1399
+ mathIndex: 6.7,
1600
1400
 
1601
1401
  // Academic benchmarks
1602
- mmluPro: 0.743,
1603
- gpqa: 0.589,
1402
+ mmluPro: 0.468,
1403
+ gpqa: 0.338,
1604
1404
  hle: 0.042,
1605
1405
 
1606
1406
  // Capabilities
@@ -1609,21 +1409,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1609
1409
  supportsVision: false,
1610
1410
 
1611
1411
  // Metadata
1612
- lastUpdated: "2026-04-06",
1412
+ lastUpdated: "2026-06-01",
1413
+ originalModel: "Granite 3.3 8B (Non-reasoning)",
1613
1414
  },
1614
- "qwen3-vl-235b-a22b-reasoning": {
1615
- // AA Intelligence Index (composite score)
1616
- intelligenceIndex: 27.6,
1617
- normalizedScore: 39,
1618
-
1415
+ "reka-flash-sep-24": {
1619
1416
  // AA specific benchmarks
1620
- codingIndex: 20.9,
1621
- mathIndex: 88.3,
1417
+ codingIndex: undefined,
1418
+ mathIndex: undefined,
1622
1419
 
1623
1420
  // Academic benchmarks
1624
- mmluPro: 0.836,
1625
- gpqa: 0.772,
1626
- hle: 0.101,
1421
+ mmluPro: undefined,
1422
+ gpqa: undefined,
1423
+ hle: undefined,
1627
1424
 
1628
1425
  // Capabilities
1629
1426
  contextWindow: 8192,
@@ -1631,21 +1428,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1631
1428
  supportsVision: false,
1632
1429
 
1633
1430
  // Metadata
1634
- lastUpdated: "2026-04-06",
1431
+ lastUpdated: "2026-06-01",
1432
+ originalModel: "Reka Flash (Sep '24)",
1635
1433
  },
1636
- "qwen3-4b-reasoning": {
1637
- // AA Intelligence Index (composite score)
1638
- intelligenceIndex: 14.2,
1639
- normalizedScore: 20,
1640
-
1434
+ "hermes-3---llama-3.1-70b": {
1641
1435
  // AA specific benchmarks
1642
1436
  codingIndex: undefined,
1643
- mathIndex: 22.3,
1437
+ mathIndex: undefined,
1644
1438
 
1645
1439
  // Academic benchmarks
1646
- mmluPro: 0.696,
1647
- gpqa: 0.522,
1648
- hle: 0.051,
1440
+ mmluPro: 0.571,
1441
+ gpqa: 0.401,
1442
+ hle: 0.041,
1649
1443
 
1650
1444
  // Capabilities
1651
1445
  contextWindow: 8192,
@@ -1653,21 +1447,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1653
1447
  supportsVision: false,
1654
1448
 
1655
1449
  // Metadata
1656
- lastUpdated: "2026-04-06",
1450
+ lastUpdated: "2026-06-01",
1451
+ originalModel: "Hermes 3 - Llama-3.1 70B",
1657
1452
  },
1658
- "qwen3-0.6b-non-reasoning": {
1659
- // AA Intelligence Index (composite score)
1660
- intelligenceIndex: 5.7,
1661
- normalizedScore: 8,
1662
-
1453
+ "mimo-v2-flash-reasoning": {
1663
1454
  // AA specific benchmarks
1664
- codingIndex: 1.4,
1665
- mathIndex: 10.3,
1455
+ codingIndex: 31.8,
1456
+ mathIndex: 96.3,
1666
1457
 
1667
1458
  // Academic benchmarks
1668
- mmluPro: 0.231,
1669
- gpqa: 0.231,
1670
- hle: 0.052,
1459
+ mmluPro: 0.843,
1460
+ gpqa: 0.846,
1461
+ hle: 0.211,
1671
1462
 
1672
1463
  // Capabilities
1673
1464
  contextWindow: 8192,
@@ -1675,21 +1466,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1675
1466
  supportsVision: false,
1676
1467
 
1677
1468
  // Metadata
1678
- lastUpdated: "2026-04-06",
1469
+ lastUpdated: "2026-06-01",
1470
+ originalModel: "MiMo-V2-Flash (Reasoning)",
1679
1471
  },
1680
- "qwen3-30b-a3b-non-reasoning": {
1681
- // AA Intelligence Index (composite score)
1682
- intelligenceIndex: 12.5,
1683
- normalizedScore: 18,
1684
-
1472
+ "mimo-v2-pro": {
1685
1473
  // AA specific benchmarks
1686
- codingIndex: 13.3,
1687
- mathIndex: 21.7,
1474
+ codingIndex: 41.4,
1475
+ mathIndex: undefined,
1688
1476
 
1689
1477
  // Academic benchmarks
1690
- mmluPro: 0.71,
1691
- gpqa: 0.515,
1692
- hle: 0.046,
1478
+ mmluPro: undefined,
1479
+ gpqa: 0.87,
1480
+ hle: 0.283,
1693
1481
 
1694
1482
  // Capabilities
1695
1483
  contextWindow: 8192,
@@ -1697,21 +1485,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1697
1485
  supportsVision: false,
1698
1486
 
1699
1487
  // Metadata
1700
- lastUpdated: "2026-04-06",
1488
+ lastUpdated: "2026-06-01",
1489
+ originalModel: "MiMo-V2-Pro",
1701
1490
  },
1702
- "qwen3-14b-reasoning": {
1703
- // AA Intelligence Index (composite score)
1704
- intelligenceIndex: 16.2,
1705
- normalizedScore: 23,
1706
-
1491
+ "sarvam-m-reasoning": {
1707
1492
  // AA specific benchmarks
1708
- codingIndex: 13.1,
1709
- mathIndex: 55.7,
1493
+ codingIndex: 7.5,
1494
+ mathIndex: undefined,
1710
1495
 
1711
1496
  // Academic benchmarks
1712
- mmluPro: 0.774,
1713
- gpqa: 0.604,
1714
- hle: 0.043,
1497
+ mmluPro: 0.696,
1498
+ gpqa: 0.416,
1499
+ hle: 0.033,
1715
1500
 
1716
1501
  // Capabilities
1717
1502
  contextWindow: 8192,
@@ -1719,21 +1504,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1719
1504
  supportsVision: false,
1720
1505
 
1721
1506
  // Metadata
1722
- lastUpdated: "2026-04-06",
1507
+ lastUpdated: "2026-06-01",
1508
+ originalModel: "Sarvam M (Reasoning)",
1723
1509
  },
1724
- "qwen3-1.7b-reasoning": {
1725
- // AA Intelligence Index (composite score)
1726
- intelligenceIndex: 8,
1727
- normalizedScore: 11,
1728
-
1510
+ "glm-4.6-non-reasoning": {
1729
1511
  // AA specific benchmarks
1730
- codingIndex: 1.4,
1731
- mathIndex: 38.7,
1512
+ codingIndex: 30.2,
1513
+ mathIndex: 44.3,
1732
1514
 
1733
1515
  // Academic benchmarks
1734
- mmluPro: 0.57,
1735
- gpqa: 0.356,
1736
- hle: 0.048,
1516
+ mmluPro: 0.784,
1517
+ gpqa: 0.632,
1518
+ hle: 0.052,
1737
1519
 
1738
1520
  // Capabilities
1739
1521
  contextWindow: 8192,
@@ -1741,21 +1523,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1741
1523
  supportsVision: false,
1742
1524
 
1743
1525
  // Metadata
1744
- lastUpdated: "2026-04-06",
1526
+ lastUpdated: "2026-06-01",
1527
+ originalModel: "GLM-4.6 (Non-reasoning)",
1745
1528
  },
1746
- "qwen3-max": {
1747
- // AA Intelligence Index (composite score)
1748
- intelligenceIndex: 31.4,
1749
- normalizedScore: 45,
1750
-
1529
+ "glm-4.7-reasoning": {
1751
1530
  // AA specific benchmarks
1752
- codingIndex: 26.4,
1753
- mathIndex: 80.7,
1531
+ codingIndex: 36.3,
1532
+ mathIndex: 95,
1754
1533
 
1755
1534
  // Academic benchmarks
1756
- mmluPro: 0.841,
1757
- gpqa: 0.764,
1758
- hle: 0.111,
1535
+ mmluPro: 0.856,
1536
+ gpqa: 0.859,
1537
+ hle: 0.251,
1759
1538
 
1760
1539
  // Capabilities
1761
1540
  contextWindow: 8192,
@@ -1763,21 +1542,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1763
1542
  supportsVision: false,
1764
1543
 
1765
1544
  // Metadata
1766
- lastUpdated: "2026-04-06",
1545
+ lastUpdated: "2026-06-01",
1546
+ originalModel: "GLM-4.7 (Reasoning)",
1767
1547
  },
1768
- "qwen3-coder-30b-a3b-instruct": {
1769
- // AA Intelligence Index (composite score)
1770
- intelligenceIndex: 20,
1771
- normalizedScore: 29,
1772
-
1548
+ "glm-5-reasoning": {
1773
1549
  // AA specific benchmarks
1774
- codingIndex: 19.4,
1775
- mathIndex: 29,
1550
+ codingIndex: 44.2,
1551
+ mathIndex: undefined,
1776
1552
 
1777
1553
  // Academic benchmarks
1778
- mmluPro: 0.706,
1779
- gpqa: 0.516,
1780
- hle: 0.04,
1554
+ mmluPro: undefined,
1555
+ gpqa: 0.82,
1556
+ hle: 0.272,
1781
1557
 
1782
1558
  // Capabilities
1783
1559
  contextWindow: 8192,
@@ -1785,21 +1561,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1785
1561
  supportsVision: false,
1786
1562
 
1787
1563
  // Metadata
1788
- lastUpdated: "2026-04-06",
1564
+ lastUpdated: "2026-06-01",
1565
+ originalModel: "GLM-5 (Reasoning)",
1789
1566
  },
1790
- "qwen3-vl-8b-instruct": {
1791
- // AA Intelligence Index (composite score)
1792
- intelligenceIndex: 14.3,
1793
- normalizedScore: 20,
1794
-
1567
+ "glm-4.6-reasoning": {
1795
1568
  // AA specific benchmarks
1796
- codingIndex: 7.3,
1797
- mathIndex: 27.3,
1569
+ codingIndex: 29.5,
1570
+ mathIndex: 86,
1798
1571
 
1799
1572
  // Academic benchmarks
1800
- mmluPro: 0.686,
1801
- gpqa: 0.427,
1802
- hle: 0.029,
1573
+ mmluPro: 0.829,
1574
+ gpqa: 0.78,
1575
+ hle: 0.133,
1803
1576
 
1804
1577
  // Capabilities
1805
1578
  contextWindow: 8192,
@@ -1807,21 +1580,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1807
1580
  supportsVision: false,
1808
1581
 
1809
1582
  // Metadata
1810
- lastUpdated: "2026-04-06",
1583
+ lastUpdated: "2026-06-01",
1584
+ originalModel: "GLM-4.6 (Reasoning)",
1811
1585
  },
1812
- "qwen1.5-chat-110b": {
1813
- // AA Intelligence Index (composite score)
1814
- intelligenceIndex: 9.5,
1815
- normalizedScore: 14,
1816
-
1586
+ "glm-4.7-non-reasoning": {
1817
1587
  // AA specific benchmarks
1818
- codingIndex: undefined,
1819
- mathIndex: undefined,
1588
+ codingIndex: 32,
1589
+ mathIndex: 48,
1820
1590
 
1821
1591
  // Academic benchmarks
1822
- mmluPro: undefined,
1823
- gpqa: 0.289,
1824
- hle: undefined,
1592
+ mmluPro: 0.794,
1593
+ gpqa: 0.664,
1594
+ hle: 0.061,
1825
1595
 
1826
1596
  // Capabilities
1827
1597
  contextWindow: 8192,
@@ -1829,21 +1599,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1829
1599
  supportsVision: false,
1830
1600
 
1831
1601
  // Metadata
1832
- lastUpdated: "2026-04-06",
1602
+ lastUpdated: "2026-06-01",
1603
+ originalModel: "GLM-4.7 (Non-reasoning)",
1833
1604
  },
1834
- "qwen3-max-preview": {
1835
- // AA Intelligence Index (composite score)
1836
- intelligenceIndex: 26.1,
1837
- normalizedScore: 37,
1838
-
1605
+ "glm-4.5-reasoning": {
1839
1606
  // AA specific benchmarks
1840
- codingIndex: 25.5,
1841
- mathIndex: 75,
1607
+ codingIndex: 26.3,
1608
+ mathIndex: 73.7,
1842
1609
 
1843
1610
  // Academic benchmarks
1844
- mmluPro: 0.838,
1845
- gpqa: 0.764,
1846
- hle: 0.093,
1611
+ mmluPro: 0.835,
1612
+ gpqa: 0.782,
1613
+ hle: 0.122,
1847
1614
 
1848
1615
  // Capabilities
1849
1616
  contextWindow: 8192,
@@ -1851,21 +1618,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1851
1618
  supportsVision: false,
1852
1619
 
1853
1620
  // Metadata
1854
- lastUpdated: "2026-04-06",
1621
+ lastUpdated: "2026-06-01",
1622
+ originalModel: "GLM-4.5 (Reasoning)",
1855
1623
  },
1856
- "qwen3-vl-4b-reasoning": {
1857
- // AA Intelligence Index (composite score)
1858
- intelligenceIndex: 13.7,
1859
- normalizedScore: 20,
1860
-
1624
+ "glm-4.7-flash-reasoning": {
1861
1625
  // AA specific benchmarks
1862
- codingIndex: 6.7,
1863
- mathIndex: 25.7,
1626
+ codingIndex: 25.9,
1627
+ mathIndex: undefined,
1864
1628
 
1865
1629
  // Academic benchmarks
1866
- mmluPro: 0.7,
1867
- gpqa: 0.494,
1868
- hle: 0.044,
1630
+ mmluPro: undefined,
1631
+ gpqa: 0.581,
1632
+ hle: 0.071,
1869
1633
 
1870
1634
  // Capabilities
1871
1635
  contextWindow: 8192,
@@ -1873,21 +1637,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1873
1637
  supportsVision: false,
1874
1638
 
1875
1639
  // Metadata
1876
- lastUpdated: "2026-04-06",
1640
+ lastUpdated: "2026-06-01",
1641
+ originalModel: "GLM-4.7-Flash (Reasoning)",
1877
1642
  },
1878
- "qwen3-max-thinking-preview": {
1879
- // AA Intelligence Index (composite score)
1880
- intelligenceIndex: 32.5,
1881
- normalizedScore: 46,
1882
-
1643
+ "glm-5-non-reasoning": {
1883
1644
  // AA specific benchmarks
1884
- codingIndex: 24.5,
1885
- mathIndex: 82.3,
1645
+ codingIndex: 39,
1646
+ mathIndex: undefined,
1886
1647
 
1887
1648
  // Academic benchmarks
1888
- mmluPro: 0.824,
1889
- gpqa: 0.776,
1890
- hle: 0.12,
1649
+ mmluPro: undefined,
1650
+ gpqa: 0.666,
1651
+ hle: 0.072,
1891
1652
 
1892
1653
  // Capabilities
1893
1654
  contextWindow: 8192,
@@ -1895,21 +1656,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1895
1656
  supportsVision: false,
1896
1657
 
1897
1658
  // Metadata
1898
- lastUpdated: "2026-04-06",
1659
+ lastUpdated: "2026-06-01",
1660
+ originalModel: "GLM-5 (Non-reasoning)",
1899
1661
  },
1900
- "qwen3-vl-32b-reasoning": {
1901
- // AA Intelligence Index (composite score)
1902
- intelligenceIndex: 24.7,
1903
- normalizedScore: 35,
1904
-
1662
+ "glm-4.7-flash-non-reasoning": {
1905
1663
  // AA specific benchmarks
1906
- codingIndex: 14.5,
1907
- mathIndex: 84.7,
1664
+ codingIndex: 11,
1665
+ mathIndex: undefined,
1908
1666
 
1909
1667
  // Academic benchmarks
1910
- mmluPro: 0.818,
1911
- gpqa: 0.733,
1912
- hle: 0.096,
1668
+ mmluPro: undefined,
1669
+ gpqa: 0.452,
1670
+ hle: 0.049,
1913
1671
 
1914
1672
  // Capabilities
1915
1673
  contextWindow: 8192,
@@ -1917,21 +1675,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1917
1675
  supportsVision: false,
1918
1676
 
1919
1677
  // Metadata
1920
- lastUpdated: "2026-04-06",
1678
+ lastUpdated: "2026-06-01",
1679
+ originalModel: "GLM-4.7-Flash (Non-reasoning)",
1921
1680
  },
1922
- "qwen-chat-72b": {
1923
- // AA Intelligence Index (composite score)
1924
- intelligenceIndex: 8.8,
1925
- normalizedScore: 13,
1926
-
1681
+ "glm-4.6v-non-reasoning": {
1927
1682
  // AA specific benchmarks
1928
- codingIndex: undefined,
1929
- mathIndex: undefined,
1683
+ codingIndex: 11.1,
1684
+ mathIndex: 26.3,
1930
1685
 
1931
1686
  // Academic benchmarks
1932
- mmluPro: undefined,
1933
- gpqa: undefined,
1934
- hle: undefined,
1687
+ mmluPro: 0.752,
1688
+ gpqa: 0.566,
1689
+ hle: 0.037,
1935
1690
 
1936
1691
  // Capabilities
1937
1692
  contextWindow: 8192,
@@ -1939,21 +1694,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1939
1694
  supportsVision: false,
1940
1695
 
1941
1696
  // Metadata
1942
- lastUpdated: "2026-04-06",
1697
+ lastUpdated: "2026-06-01",
1698
+ originalModel: "GLM-4.6V (Non-reasoning)",
1943
1699
  },
1944
- "seed-oss-36b-instruct": {
1945
- // AA Intelligence Index (composite score)
1946
- intelligenceIndex: 25.2,
1947
- normalizedScore: 36,
1948
-
1700
+ "glm-4.5v-reasoning": {
1949
1701
  // AA specific benchmarks
1950
- codingIndex: 16.7,
1951
- mathIndex: 84.7,
1702
+ codingIndex: 10.9,
1703
+ mathIndex: 73,
1952
1704
 
1953
1705
  // Academic benchmarks
1954
- mmluPro: 0.815,
1955
- gpqa: 0.726,
1956
- hle: 0.091,
1706
+ mmluPro: 0.788,
1707
+ gpqa: 0.684,
1708
+ hle: 0.059,
1957
1709
 
1958
1710
  // Capabilities
1959
1711
  contextWindow: 8192,
@@ -1961,9 +1713,7 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
1961
1713
  supportsVision: false,
1962
1714
 
1963
1715
  // Metadata
1964
- lastUpdated: "2026-04-06",
1716
+ lastUpdated: "2026-06-01",
1717
+ originalModel: "GLM-4.5V (Reasoning)",
1965
1718
  },
1966
-
1967
-
1968
-
1969
1719
  };