pi-free 2.0.12 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,20 @@
1
1
  // Auto-generated benchmark data chunk 1
2
- // Models: solar-pro-2-reasoning .. qwen3-coder-480b-a35b-instruct (90 entries)
2
+ // Models: lfm2-2.6b .. minicpm-v-4.6-1.3b (90 entries)
3
+ // Last updated: 2026-06-01
3
4
  // DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
4
5
 
5
6
  import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
6
7
 
7
8
  export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
8
- "solar-pro-2-reasoning": {
9
- // AA Intelligence Index (composite score)
10
- intelligenceIndex: 14.9,
11
- normalizedScore: 21,
12
-
9
+ "lfm2-2.6b": {
13
10
  // AA specific benchmarks
14
- codingIndex: 12.1,
15
- mathIndex: 61.3,
11
+ codingIndex: 1.4,
12
+ mathIndex: 8.3,
16
13
 
17
14
  // Academic benchmarks
18
- mmluPro: 0.805,
19
- gpqa: 0.687,
20
- hle: 0.07,
15
+ mmluPro: 0.298,
16
+ gpqa: 0.306,
17
+ hle: 0.052,
21
18
 
22
19
  // Capabilities
23
20
  contextWindow: 8192,
@@ -25,21 +22,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
25
22
  supportsVision: false,
26
23
 
27
24
  // Metadata
28
- lastUpdated: "2026-04-06",
25
+ lastUpdated: "2026-06-01",
26
+ originalModel: "LFM2 2.6B",
29
27
  },
30
- "solar-pro-2-non-reasoning": {
31
- // AA Intelligence Index (composite score)
32
- intelligenceIndex: 13.6,
33
- normalizedScore: 19,
34
-
28
+ "lfm2.5-1.2b-thinking": {
35
29
  // AA specific benchmarks
36
- codingIndex: 11.3,
37
- mathIndex: 30,
30
+ codingIndex: 1.4,
31
+ mathIndex: undefined,
38
32
 
39
33
  // Academic benchmarks
40
- mmluPro: 0.75,
41
- gpqa: 0.561,
42
- hle: 0.038,
34
+ mmluPro: undefined,
35
+ gpqa: 0.339,
36
+ hle: 0.061,
43
37
 
44
38
  // Capabilities
45
39
  contextWindow: 8192,
@@ -47,21 +41,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
47
41
  supportsVision: false,
48
42
 
49
43
  // Metadata
50
- lastUpdated: "2026-04-06",
44
+ lastUpdated: "2026-06-01",
45
+ originalModel: "LFM2.5-1.2B-Thinking",
51
46
  },
52
- "minimax-m2.7": {
53
- // AA Intelligence Index (composite score)
54
- intelligenceIndex: 49.6,
55
- normalizedScore: 71,
56
-
47
+ "lfm2.5-1.2b-instruct": {
57
48
  // AA specific benchmarks
58
- codingIndex: 41.9,
49
+ codingIndex: 0.8,
59
50
  mathIndex: undefined,
60
51
 
61
52
  // Academic benchmarks
62
53
  mmluPro: undefined,
63
- gpqa: 0.874,
64
- hle: 0.281,
54
+ gpqa: 0.326,
55
+ hle: 0.068,
65
56
 
66
57
  // Capabilities
67
58
  contextWindow: 8192,
@@ -69,21 +60,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
69
60
  supportsVision: false,
70
61
 
71
62
  // Metadata
72
- lastUpdated: "2026-04-06",
63
+ lastUpdated: "2026-06-01",
64
+ originalModel: "LFM2.5-1.2B-Instruct",
73
65
  },
74
- "llama-3.1-nemotron-instruct-70b": {
75
- // AA Intelligence Index (composite score)
76
- intelligenceIndex: 13.4,
77
- normalizedScore: 19,
78
-
66
+ "solar-pro-3": {
79
67
  // AA specific benchmarks
80
- codingIndex: 10.8,
81
- mathIndex: 11,
68
+ codingIndex: 13.3,
69
+ mathIndex: undefined,
82
70
 
83
71
  // Academic benchmarks
84
- mmluPro: 0.69,
85
- gpqa: 0.465,
86
- hle: 0.046,
72
+ mmluPro: undefined,
73
+ gpqa: 0.724,
74
+ hle: 0.101,
87
75
 
88
76
  // Capabilities
89
77
  contextWindow: 8192,
@@ -91,21 +79,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
91
79
  supportsVision: false,
92
80
 
93
81
  // Metadata
94
- lastUpdated: "2026-04-06",
82
+ lastUpdated: "2026-06-01",
83
+ originalModel: "Solar Pro 3",
95
84
  },
96
- "nvidia-nemotron-nano-9b-v2-reasoning": {
97
- // AA Intelligence Index (composite score)
98
- intelligenceIndex: 14.8,
99
- normalizedScore: 21,
100
-
85
+ "solar-open-100b-reasoning": {
101
86
  // AA specific benchmarks
102
- codingIndex: 8.3,
103
- mathIndex: 69.7,
87
+ codingIndex: 10.5,
88
+ mathIndex: undefined,
104
89
 
105
90
  // Academic benchmarks
106
- mmluPro: 0.742,
107
- gpqa: 0.57,
108
- hle: 0.046,
91
+ mmluPro: undefined,
92
+ gpqa: 0.657,
93
+ hle: 0.092,
109
94
 
110
95
  // Capabilities
111
96
  contextWindow: 8192,
@@ -113,21 +98,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
113
98
  supportsVision: false,
114
99
 
115
100
  // Metadata
116
- lastUpdated: "2026-04-06",
101
+ lastUpdated: "2026-06-01",
102
+ originalModel: "Solar Open 100B (Reasoning)",
117
103
  },
118
- "nvidia-nemotron-nano-12b-v2-vl-non-reasoning": {
119
- // AA Intelligence Index (composite score)
120
- intelligenceIndex: 10.1,
121
- normalizedScore: 14,
122
-
104
+ "solar-pro-2-non-reasoning": {
123
105
  // AA specific benchmarks
124
- codingIndex: 5.9,
125
- mathIndex: 26.7,
106
+ codingIndex: 11.3,
107
+ mathIndex: 30,
126
108
 
127
109
  // Academic benchmarks
128
- mmluPro: 0.649,
129
- gpqa: 0.439,
130
- hle: 0.045,
110
+ mmluPro: 0.75,
111
+ gpqa: 0.561,
112
+ hle: 0.038,
131
113
 
132
114
  // Capabilities
133
115
  contextWindow: 8192,
@@ -135,21 +117,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
135
117
  supportsVision: false,
136
118
 
137
119
  // Metadata
138
- lastUpdated: "2026-04-06",
120
+ lastUpdated: "2026-06-01",
121
+ originalModel: "Solar Pro 2 (Non-reasoning)",
139
122
  },
140
- "llama-nemotron-super-49b-v1.5-reasoning": {
141
- // AA Intelligence Index (composite score)
142
- intelligenceIndex: 18.7,
143
- normalizedScore: 27,
144
-
123
+ "solar-pro-2-reasoning": {
145
124
  // AA specific benchmarks
146
- codingIndex: 15.2,
147
- mathIndex: 76.7,
125
+ codingIndex: 12.1,
126
+ mathIndex: 61.3,
148
127
 
149
128
  // Academic benchmarks
150
- mmluPro: 0.814,
151
- gpqa: 0.748,
152
- hle: 0.068,
129
+ mmluPro: 0.805,
130
+ gpqa: 0.687,
131
+ hle: 0.07,
153
132
 
154
133
  // Capabilities
155
134
  contextWindow: 8192,
@@ -157,21 +136,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
157
136
  supportsVision: false,
158
137
 
159
138
  // Metadata
160
- lastUpdated: "2026-04-06",
139
+ lastUpdated: "2026-06-01",
140
+ originalModel: "Solar Pro 2 (Reasoning)",
161
141
  },
162
- "nemotron-cascade-2-30b-a3b": {
163
- // AA Intelligence Index (composite score)
164
- intelligenceIndex: 27.7,
165
- normalizedScore: 40,
166
-
142
+ "minimax-m2.7": {
167
143
  // AA specific benchmarks
168
- codingIndex: 25.1,
144
+ codingIndex: 41.9,
169
145
  mathIndex: undefined,
170
146
 
171
147
  // Academic benchmarks
172
148
  mmluPro: undefined,
173
- gpqa: 0.763,
174
- hle: 0.114,
149
+ gpqa: 0.874,
150
+ hle: 0.281,
175
151
 
176
152
  // Capabilities
177
153
  contextWindow: 8192,
@@ -179,13 +155,48 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
179
155
  supportsVision: false,
180
156
 
181
157
  // Metadata
182
- lastUpdated: "2026-04-06",
158
+ lastUpdated: "2026-06-01",
159
+ originalModel: "MiniMax-M2.7",
183
160
  },
184
- "nvidia-nemotron-3-super-120b-a12b-reasoning": {
185
- // AA Intelligence Index (composite score)
186
- intelligenceIndex: 36,
187
- normalizedScore: 51,
161
+ "llama-3.1-nemotron-instruct-70b": {
162
+ // AA specific benchmarks
163
+ codingIndex: 10.8,
164
+ mathIndex: 11,
165
+
166
+ // Academic benchmarks
167
+ mmluPro: 0.69,
168
+ gpqa: 0.465,
169
+ hle: 0.046,
170
+
171
+ // Capabilities
172
+ contextWindow: 8192,
173
+ supportsReasoning: false,
174
+ supportsVision: false,
175
+
176
+ // Metadata
177
+ lastUpdated: "2026-06-01",
178
+ originalModel: "Llama 3.1 Nemotron Instruct 70B",
179
+ },
180
+ "nvidia-nemotron-nano-12b-v2-vl-non-reasoning": {
181
+ // AA specific benchmarks
182
+ codingIndex: 5.9,
183
+ mathIndex: 26.7,
184
+
185
+ // Academic benchmarks
186
+ mmluPro: 0.649,
187
+ gpqa: 0.439,
188
+ hle: 0.045,
189
+
190
+ // Capabilities
191
+ contextWindow: 8192,
192
+ supportsReasoning: false,
193
+ supportsVision: false,
188
194
 
195
+ // Metadata
196
+ lastUpdated: "2026-06-01",
197
+ originalModel: "NVIDIA Nemotron Nano 12B v2 VL (Non-reasoning)",
198
+ },
199
+ "nvidia-nemotron-3-super-120b-a12b-reasoning": {
189
200
  // AA specific benchmarks
190
201
  codingIndex: 31.2,
191
202
  mathIndex: undefined,
@@ -201,13 +212,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
201
212
  supportsVision: false,
202
213
 
203
214
  // Metadata
204
- lastUpdated: "2026-04-06",
215
+ lastUpdated: "2026-06-01",
216
+ originalModel: "NVIDIA Nemotron 3 Super 120B A12B (Reasoning)",
205
217
  },
206
- "nvidia-nemotron-nano-9b-v2-non-reasoning": {
207
- // AA Intelligence Index (composite score)
208
- intelligenceIndex: 13.2,
209
- normalizedScore: 19,
218
+ "nvidia-nemotron-3-nano-30b-a3b-non-reasoning": {
219
+ // AA specific benchmarks
220
+ codingIndex: 15.8,
221
+ mathIndex: 13.3,
222
+
223
+ // Academic benchmarks
224
+ mmluPro: 0.579,
225
+ gpqa: 0.399,
226
+ hle: 0.046,
227
+
228
+ // Capabilities
229
+ contextWindow: 8192,
230
+ supportsReasoning: false,
231
+ supportsVision: false,
210
232
 
233
+ // Metadata
234
+ lastUpdated: "2026-06-01",
235
+ originalModel: "NVIDIA Nemotron 3 Nano 30B A3B (Non-reasoning)",
236
+ },
237
+ "nvidia-nemotron-nano-9b-v2-non-reasoning": {
211
238
  // AA specific benchmarks
212
239
  codingIndex: 7.5,
213
240
  mathIndex: 62.3,
@@ -223,21 +250,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
223
250
  supportsVision: false,
224
251
 
225
252
  // Metadata
226
- lastUpdated: "2026-04-06",
253
+ lastUpdated: "2026-06-01",
254
+ originalModel: "NVIDIA Nemotron Nano 9B V2 (Non-reasoning)",
227
255
  },
228
- "llama-3.1-nemotron-ultra-253b-v1-reasoning": {
229
- // AA Intelligence Index (composite score)
230
- intelligenceIndex: 15,
231
- normalizedScore: 21,
232
-
256
+ "nemotron-3-nano-omni-30b-a3b-reasoning": {
233
257
  // AA specific benchmarks
234
- codingIndex: 13.1,
235
- mathIndex: 63.7,
258
+ codingIndex: 14.8,
259
+ mathIndex: undefined,
236
260
 
237
261
  // Academic benchmarks
238
- mmluPro: 0.825,
239
- gpqa: 0.728,
240
- hle: 0.081,
262
+ mmluPro: undefined,
263
+ gpqa: 0.469,
264
+ hle: 0.053,
241
265
 
242
266
  // Capabilities
243
267
  contextWindow: 8192,
@@ -245,21 +269,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
245
269
  supportsVision: false,
246
270
 
247
271
  // Metadata
248
- lastUpdated: "2026-04-06",
272
+ lastUpdated: "2026-06-01",
273
+ originalModel: "Nemotron 3 Nano Omni 30B A3B Reasoning",
249
274
  },
250
- "llama-3.1-nemotron-nano-4b-v1.1-reasoning": {
251
- // AA Intelligence Index (composite score)
252
- intelligenceIndex: 14.4,
253
- normalizedScore: 21,
254
-
275
+ "llama-nemotron-super-49b-v1.5-non-reasoning": {
255
276
  // AA specific benchmarks
256
- codingIndex: undefined,
257
- mathIndex: 50,
277
+ codingIndex: 10.5,
278
+ mathIndex: 8,
258
279
 
259
280
  // Academic benchmarks
260
- mmluPro: 0.556,
261
- gpqa: 0.408,
262
- hle: 0.051,
281
+ mmluPro: 0.692,
282
+ gpqa: 0.481,
283
+ hle: 0.043,
263
284
 
264
285
  // Capabilities
265
286
  contextWindow: 8192,
@@ -267,21 +288,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
267
288
  supportsVision: false,
268
289
 
269
290
  // Metadata
270
- lastUpdated: "2026-04-06",
291
+ lastUpdated: "2026-06-01",
292
+ originalModel: "Llama Nemotron Super 49B v1.5 (Non-reasoning)",
271
293
  },
272
- "nvidia-nemotron-nano-12b-v2-vl-reasoning": {
273
- // AA Intelligence Index (composite score)
274
- intelligenceIndex: 14.9,
275
- normalizedScore: 21,
276
-
294
+ "nemotron-cascade-2-30b-a3b": {
277
295
  // AA specific benchmarks
278
- codingIndex: 11.8,
279
- mathIndex: 75,
296
+ codingIndex: 25.8,
297
+ mathIndex: undefined,
280
298
 
281
299
  // Academic benchmarks
282
- mmluPro: 0.759,
283
- gpqa: 0.572,
284
- hle: 0.053,
300
+ mmluPro: undefined,
301
+ gpqa: 0.758,
302
+ hle: 0.114,
285
303
 
286
304
  // Capabilities
287
305
  contextWindow: 8192,
@@ -289,21 +307,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
289
307
  supportsVision: false,
290
308
 
291
309
  // Metadata
292
- lastUpdated: "2026-04-06",
310
+ lastUpdated: "2026-06-01",
311
+ originalModel: "Nemotron Cascade 2 30B A3B",
293
312
  },
294
- "nvidia-nemotron-3-nano-30b-a3b-non-reasoning": {
295
- // AA Intelligence Index (composite score)
296
- intelligenceIndex: 13.2,
297
- normalizedScore: 19,
298
-
313
+ "nvidia-nemotron-nano-12b-v2-vl-reasoning": {
299
314
  // AA specific benchmarks
300
- codingIndex: 15.8,
301
- mathIndex: 13.3,
315
+ codingIndex: 11.7,
316
+ mathIndex: 75,
302
317
 
303
318
  // Academic benchmarks
304
- mmluPro: 0.579,
305
- gpqa: 0.399,
306
- hle: 0.046,
319
+ mmluPro: 0.759,
320
+ gpqa: 0.572,
321
+ hle: 0.053,
307
322
 
308
323
  // Capabilities
309
324
  contextWindow: 8192,
@@ -311,21 +326,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
311
326
  supportsVision: false,
312
327
 
313
328
  // Metadata
314
- lastUpdated: "2026-04-06",
329
+ lastUpdated: "2026-06-01",
330
+ originalModel: "NVIDIA Nemotron Nano 12B v2 VL (Reasoning)",
315
331
  },
316
- "llama-3.3-nemotron-super-49b-v1-reasoning": {
317
- // AA Intelligence Index (composite score)
318
- intelligenceIndex: 18.5,
319
- normalizedScore: 26,
320
-
332
+ "nvidia-nemotron-3-nano-4b": {
321
333
  // AA specific benchmarks
322
- codingIndex: 9.4,
323
- mathIndex: 54.7,
334
+ codingIndex: 10,
335
+ mathIndex: undefined,
324
336
 
325
337
  // Academic benchmarks
326
- mmluPro: 0.785,
327
- gpqa: 0.643,
328
- hle: 0.065,
338
+ mmluPro: undefined,
339
+ gpqa: 0.513,
340
+ hle: 0.048,
329
341
 
330
342
  // Capabilities
331
343
  contextWindow: 8192,
@@ -333,13 +345,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
333
345
  supportsVision: false,
334
346
 
335
347
  // Metadata
336
- lastUpdated: "2026-04-06",
348
+ lastUpdated: "2026-06-01",
349
+ originalModel: "NVIDIA Nemotron 3 Nano 4B",
337
350
  },
338
351
  "nvidia-nemotron-3-nano-30b-a3b-reasoning": {
339
- // AA Intelligence Index (composite score)
340
- intelligenceIndex: 24.3,
341
- normalizedScore: 35,
342
-
343
352
  // AA specific benchmarks
344
353
  codingIndex: 19,
345
354
  mathIndex: 91,
@@ -355,21 +364,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
355
364
  supportsVision: false,
356
365
 
357
366
  // Metadata
358
- lastUpdated: "2026-04-06",
367
+ lastUpdated: "2026-06-01",
368
+ originalModel: "NVIDIA Nemotron 3 Nano 30B A3B (Reasoning)",
359
369
  },
360
- "llama-3.3-nemotron-super-49b-v1-non-reasoning": {
361
- // AA Intelligence Index (composite score)
362
- intelligenceIndex: 14.3,
363
- normalizedScore: 20,
364
-
370
+ "nvidia-nemotron-nano-9b-v2-reasoning": {
365
371
  // AA specific benchmarks
366
- codingIndex: 7.6,
367
- mathIndex: 7.7,
372
+ codingIndex: 8.3,
373
+ mathIndex: 69.7,
368
374
 
369
375
  // Academic benchmarks
370
- mmluPro: 0.698,
371
- gpqa: 0.517,
372
- hle: 0.035,
376
+ mmluPro: 0.742,
377
+ gpqa: 0.57,
378
+ hle: 0.046,
373
379
 
374
380
  // Capabilities
375
381
  contextWindow: 8192,
@@ -377,21 +383,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
377
383
  supportsVision: false,
378
384
 
379
385
  // Metadata
380
- lastUpdated: "2026-04-06",
386
+ lastUpdated: "2026-06-01",
387
+ originalModel: "NVIDIA Nemotron Nano 9B V2 (Reasoning)",
381
388
  },
382
- "nvidia-nemotron-3-nano-4b": {
383
- // AA Intelligence Index (composite score)
384
- intelligenceIndex: 14.7,
385
- normalizedScore: 21,
386
-
389
+ "llama-nemotron-super-49b-v1.5-reasoning": {
387
390
  // AA specific benchmarks
388
- codingIndex: 10,
389
- mathIndex: undefined,
391
+ codingIndex: 15.1,
392
+ mathIndex: 76.7,
390
393
 
391
394
  // Academic benchmarks
392
- mmluPro: undefined,
393
- gpqa: 0.513,
394
- hle: 0.048,
395
+ mmluPro: 0.814,
396
+ gpqa: 0.748,
397
+ hle: 0.068,
395
398
 
396
399
  // Capabilities
397
400
  contextWindow: 8192,
@@ -399,21 +402,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
399
402
  supportsVision: false,
400
403
 
401
404
  // Metadata
402
- lastUpdated: "2026-04-06",
405
+ lastUpdated: "2026-06-01",
406
+ originalModel: "Llama Nemotron Super 49B v1.5 (Reasoning)",
403
407
  },
404
- "llama-nemotron-super-49b-v1.5-non-reasoning": {
405
- // AA Intelligence Index (composite score)
406
- intelligenceIndex: 14.6,
407
- normalizedScore: 21,
408
-
408
+ "llama-3.1-nemotron-ultra-253b-v1-reasoning": {
409
409
  // AA specific benchmarks
410
- codingIndex: 10.5,
411
- mathIndex: 8,
410
+ codingIndex: 13.1,
411
+ mathIndex: 63.7,
412
412
 
413
413
  // Academic benchmarks
414
- mmluPro: 0.692,
415
- gpqa: 0.481,
416
- hle: 0.043,
414
+ mmluPro: 0.825,
415
+ gpqa: 0.728,
416
+ hle: 0.081,
417
417
 
418
418
  // Capabilities
419
419
  contextWindow: 8192,
@@ -421,21 +421,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
421
421
  supportsVision: false,
422
422
 
423
423
  // Metadata
424
- lastUpdated: "2026-04-06",
424
+ lastUpdated: "2026-06-01",
425
+ originalModel: "Llama 3.1 Nemotron Ultra 253B v1 (Reasoning)",
425
426
  },
426
- "kimi-k2.5-non-reasoning": {
427
- // AA Intelligence Index (composite score)
428
- intelligenceIndex: 37.3,
429
- normalizedScore: 53,
430
-
427
+ "kimi-k2.6-non-reasoning": {
431
428
  // AA specific benchmarks
432
- codingIndex: 25.8,
429
+ codingIndex: 38.4,
433
430
  mathIndex: undefined,
434
431
 
435
432
  // Academic benchmarks
436
433
  mmluPro: undefined,
437
- gpqa: 0.789,
438
- hle: 0.123,
434
+ gpqa: 0.788,
435
+ hle: 0.182,
439
436
 
440
437
  // Capabilities
441
438
  contextWindow: 8192,
@@ -443,21 +440,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
443
440
  supportsVision: false,
444
441
 
445
442
  // Metadata
446
- lastUpdated: "2026-04-06",
443
+ lastUpdated: "2026-06-01",
444
+ originalModel: "Kimi K2.6 (Non-reasoning)",
447
445
  },
448
- "kimi-k2.5-reasoning": {
449
- // AA Intelligence Index (composite score)
450
- intelligenceIndex: 46.8,
451
- normalizedScore: 67,
452
-
446
+ "kimi-k2.6": {
453
447
  // AA specific benchmarks
454
- codingIndex: 39.5,
448
+ codingIndex: 47.1,
455
449
  mathIndex: undefined,
456
450
 
457
451
  // Academic benchmarks
458
452
  mmluPro: undefined,
459
- gpqa: 0.879,
460
- hle: 0.294,
453
+ gpqa: 0.911,
454
+ hle: 0.359,
461
455
 
462
456
  // Capabilities
463
457
  contextWindow: 8192,
@@ -465,13 +459,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
465
459
  supportsVision: false,
466
460
 
467
461
  // Metadata
468
- lastUpdated: "2026-04-06",
462
+ lastUpdated: "2026-06-01",
463
+ originalModel: "Kimi K2.6",
469
464
  },
470
465
  "kimi-linear-48b-a3b-instruct": {
471
- // AA Intelligence Index (composite score)
472
- intelligenceIndex: 14.4,
473
- normalizedScore: 21,
474
-
475
466
  // AA specific benchmarks
476
467
  codingIndex: 14.2,
477
468
  mathIndex: 36.3,
@@ -487,21 +478,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
487
478
  supportsVision: false,
488
479
 
489
480
  // Metadata
490
- lastUpdated: "2026-04-06",
481
+ lastUpdated: "2026-06-01",
482
+ originalModel: "Kimi Linear 48B A3B Instruct",
491
483
  },
492
- "step-3.5-flash": {
493
- // AA Intelligence Index (composite score)
494
- intelligenceIndex: 37.8,
495
- normalizedScore: 54,
484
+ "llama-65b": {
485
+ // AA specific benchmarks
486
+ codingIndex: undefined,
487
+ mathIndex: undefined,
488
+
489
+ // Academic benchmarks
490
+ mmluPro: undefined,
491
+ gpqa: undefined,
492
+ hle: undefined,
493
+
494
+ // Capabilities
495
+ contextWindow: 8192,
496
+ supportsReasoning: false,
497
+ supportsVision: false,
496
498
 
499
+ // Metadata
500
+ lastUpdated: "2026-06-01",
501
+ originalModel: "Llama 65B",
502
+ },
503
+ "step-3.5-flash-2603": {
497
504
  // AA specific benchmarks
498
- codingIndex: 31.6,
505
+ codingIndex: 34.6,
499
506
  mathIndex: undefined,
500
507
 
501
508
  // Academic benchmarks
502
509
  mmluPro: undefined,
503
- gpqa: 0.831,
504
- hle: 0.191,
510
+ gpqa: 0.826,
511
+ hle: 0.226,
505
512
 
506
513
  // Capabilities
507
514
  contextWindow: 8192,
@@ -509,13 +516,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
509
516
  supportsVision: false,
510
517
 
511
518
  // Metadata
512
- lastUpdated: "2026-04-06",
519
+ lastUpdated: "2026-06-01",
520
+ originalModel: "Step 3.5 Flash 2603",
513
521
  },
514
522
  "step3-vl-10b": {
515
- // AA Intelligence Index (composite score)
516
- intelligenceIndex: 15.4,
517
- normalizedScore: 22,
518
-
519
523
  // AA specific benchmarks
520
524
  codingIndex: 13.9,
521
525
  mathIndex: undefined,
@@ -531,21 +535,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
531
535
  supportsVision: false,
532
536
 
533
537
  // Metadata
534
- lastUpdated: "2026-04-06",
538
+ lastUpdated: "2026-06-01",
539
+ originalModel: "Step3 VL 10B",
535
540
  },
536
- "olmo-3.1-32b-think": {
537
- // AA Intelligence Index (composite score)
538
- intelligenceIndex: 13.9,
539
- normalizedScore: 20,
540
-
541
+ "molmo-7b-d": {
541
542
  // AA specific benchmarks
542
- codingIndex: 9.8,
543
- mathIndex: 77.3,
543
+ codingIndex: 1.2,
544
+ mathIndex: 0,
544
545
 
545
546
  // Academic benchmarks
546
- mmluPro: 0.763,
547
- gpqa: 0.591,
548
- hle: 0.06,
547
+ mmluPro: 0.371,
548
+ gpqa: 0.24,
549
+ hle: 0.051,
549
550
 
550
551
  // Capabilities
551
552
  contextWindow: 8192,
@@ -553,16 +554,32 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
553
554
  supportsVision: false,
554
555
 
555
556
  // Metadata
556
- lastUpdated: "2026-04-06",
557
+ lastUpdated: "2026-06-01",
558
+ originalModel: "Molmo 7B-D",
557
559
  },
558
- "olmo-3-7b-instruct": {
559
- // AA Intelligence Index (composite score)
560
- intelligenceIndex: 8.2,
561
- normalizedScore: 12,
562
-
560
+ "molmo2-8b": {
563
561
  // AA specific benchmarks
564
- codingIndex: 3.4,
565
- mathIndex: 41.3,
562
+ codingIndex: 4.4,
563
+ mathIndex: undefined,
564
+
565
+ // Academic benchmarks
566
+ mmluPro: undefined,
567
+ gpqa: 0.425,
568
+ hle: 0.044,
569
+
570
+ // Capabilities
571
+ contextWindow: 8192,
572
+ supportsReasoning: false,
573
+ supportsVision: false,
574
+
575
+ // Metadata
576
+ lastUpdated: "2026-06-01",
577
+ originalModel: "Molmo2-8B",
578
+ },
579
+ "olmo-3-7b-instruct": {
580
+ // AA specific benchmarks
581
+ codingIndex: 3.4,
582
+ mathIndex: 41.3,
566
583
 
567
584
  // Academic benchmarks
568
585
  mmluPro: 0.522,
@@ -575,13 +592,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
575
592
  supportsVision: false,
576
593
 
577
594
  // Metadata
578
- lastUpdated: "2026-04-06",
595
+ lastUpdated: "2026-06-01",
596
+ originalModel: "Olmo 3 7B Instruct",
579
597
  },
580
598
  "olmo-3-7b-think": {
581
- // AA Intelligence Index (composite score)
582
- intelligenceIndex: 9.4,
583
- normalizedScore: 13,
584
-
585
599
  // AA specific benchmarks
586
600
  codingIndex: 7.6,
587
601
  mathIndex: 70.7,
@@ -597,21 +611,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
597
611
  supportsVision: false,
598
612
 
599
613
  // Metadata
600
- lastUpdated: "2026-04-06",
614
+ lastUpdated: "2026-06-01",
615
+ originalModel: "Olmo 3 7B Think",
601
616
  },
602
- "molmo2-8b": {
603
- // AA Intelligence Index (composite score)
604
- intelligenceIndex: 7.3,
605
- normalizedScore: 10,
606
-
617
+ "olmo-3.1-32b-instruct": {
607
618
  // AA specific benchmarks
608
- codingIndex: 4.4,
619
+ codingIndex: 5.6,
609
620
  mathIndex: undefined,
610
621
 
611
622
  // Academic benchmarks
612
623
  mmluPro: undefined,
613
- gpqa: 0.425,
614
- hle: 0.044,
624
+ gpqa: 0.539,
625
+ hle: 0.049,
615
626
 
616
627
  // Capabilities
617
628
  contextWindow: 8192,
@@ -619,21 +630,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
619
630
  supportsVision: false,
620
631
 
621
632
  // Metadata
622
- lastUpdated: "2026-04-06",
633
+ lastUpdated: "2026-06-01",
634
+ originalModel: "Olmo 3.1 32B Instruct",
623
635
  },
624
- "molmo-7b-d": {
625
- // AA Intelligence Index (composite score)
626
- intelligenceIndex: 9.2,
627
- normalizedScore: 13,
628
-
636
+ "olmo-3.1-32b-think": {
629
637
  // AA specific benchmarks
630
- codingIndex: 1.2,
631
- mathIndex: 0,
638
+ codingIndex: 9.8,
639
+ mathIndex: 77.3,
632
640
 
633
641
  // Academic benchmarks
634
- mmluPro: 0.371,
635
- gpqa: 0.24,
636
- hle: 0.051,
642
+ mmluPro: 0.763,
643
+ gpqa: 0.591,
644
+ hle: 0.06,
637
645
 
638
646
  // Capabilities
639
647
  contextWindow: 8192,
@@ -641,21 +649,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
641
649
  supportsVision: false,
642
650
 
643
651
  // Metadata
644
- lastUpdated: "2026-04-06",
652
+ lastUpdated: "2026-06-01",
653
+ originalModel: "Olmo 3.1 32B Think",
645
654
  },
646
- "olmo-3.1-32b-instruct": {
647
- // AA Intelligence Index (composite score)
648
- intelligenceIndex: 12.2,
649
- normalizedScore: 17,
650
-
655
+ "granite-4.1-8b": {
651
656
  // AA specific benchmarks
652
- codingIndex: 5.6,
657
+ codingIndex: 7.3,
653
658
  mathIndex: undefined,
654
659
 
655
660
  // Academic benchmarks
656
661
  mmluPro: undefined,
657
- gpqa: 0.539,
658
- hle: 0.049,
662
+ gpqa: 0.433,
663
+ hle: 0.038,
659
664
 
660
665
  // Capabilities
661
666
  contextWindow: 8192,
@@ -663,21 +668,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
663
668
  supportsVision: false,
664
669
 
665
670
  // Metadata
666
- lastUpdated: "2026-04-06",
671
+ lastUpdated: "2026-06-01",
672
+ originalModel: "Granite 4.1 8B",
667
673
  },
668
- "granite-4-1b": {
669
- // AA Intelligence Index (composite score)
670
- intelligenceIndex: 7.3,
671
- normalizedScore: 10,
672
-
674
+ "granite-4.0-350m": {
673
675
  // AA specific benchmarks
674
- codingIndex: 2.9,
675
- mathIndex: 6.3,
676
+ codingIndex: 0.3,
677
+ mathIndex: 0,
676
678
 
677
679
  // Academic benchmarks
678
- mmluPro: 0.325,
679
- gpqa: 0.281,
680
- hle: 0.051,
680
+ mmluPro: 0.124,
681
+ gpqa: 0.261,
682
+ hle: 0.057,
681
683
 
682
684
  // Capabilities
683
685
  contextWindow: 8192,
@@ -685,20 +687,36 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
685
687
  supportsVision: false,
686
688
 
687
689
  // Metadata
688
- lastUpdated: "2026-04-06",
690
+ lastUpdated: "2026-06-01",
691
+ originalModel: "Granite 4.0 350M",
689
692
  },
690
- "granite-4-micro": {
691
- // AA Intelligence Index (composite score)
692
- intelligenceIndex: 7.7,
693
- normalizedScore: 11,
693
+ "granite-4.1-3b": {
694
+ // AA specific benchmarks
695
+ codingIndex: 5.5,
696
+ mathIndex: undefined,
697
+
698
+ // Academic benchmarks
699
+ mmluPro: undefined,
700
+ gpqa: 0.314,
701
+ hle: 0.034,
694
702
 
703
+ // Capabilities
704
+ contextWindow: 8192,
705
+ supportsReasoning: false,
706
+ supportsVision: false,
707
+
708
+ // Metadata
709
+ lastUpdated: "2026-06-01",
710
+ originalModel: "Granite 4.1 3B",
711
+ },
712
+ "granite-4.0-1b": {
695
713
  // AA specific benchmarks
696
- codingIndex: 5,
697
- mathIndex: 6,
714
+ codingIndex: 2.9,
715
+ mathIndex: 6.3,
698
716
 
699
717
  // Academic benchmarks
700
- mmluPro: 0.447,
701
- gpqa: 0.336,
718
+ mmluPro: 0.325,
719
+ gpqa: 0.281,
702
720
  hle: 0.051,
703
721
 
704
722
  // Capabilities
@@ -707,13 +725,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
707
725
  supportsVision: false,
708
726
 
709
727
  // Metadata
710
- lastUpdated: "2026-04-06",
728
+ lastUpdated: "2026-06-01",
729
+ originalModel: "Granite 4.0 1B",
711
730
  },
712
- "granite-4-h-350m": {
713
- // AA Intelligence Index (composite score)
714
- intelligenceIndex: 5.4,
715
- normalizedScore: 8,
716
-
731
+ "granite-4.0-h-350m": {
717
732
  // AA specific benchmarks
718
733
  codingIndex: 0.6,
719
734
  mathIndex: 1.3,
@@ -729,21 +744,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
729
744
  supportsVision: false,
730
745
 
731
746
  // Metadata
732
- lastUpdated: "2026-04-06",
747
+ lastUpdated: "2026-06-01",
748
+ originalModel: "Granite 4.0 H 350M",
733
749
  },
734
- "llama-65b": {
735
- // AA Intelligence Index (composite score)
736
- intelligenceIndex: 7.4,
737
- normalizedScore: 11,
738
-
750
+ "granite-4.1-30b": {
739
751
  // AA specific benchmarks
740
- codingIndex: undefined,
752
+ codingIndex: 10.1,
741
753
  mathIndex: undefined,
742
754
 
743
755
  // Academic benchmarks
744
756
  mmluPro: undefined,
745
- gpqa: undefined,
746
- hle: undefined,
757
+ gpqa: 0.481,
758
+ hle: 0.042,
747
759
 
748
760
  // Capabilities
749
761
  contextWindow: 8192,
@@ -751,21 +763,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
751
763
  supportsVision: false,
752
764
 
753
765
  // Metadata
754
- lastUpdated: "2026-04-06",
766
+ lastUpdated: "2026-06-01",
767
+ originalModel: "Granite 4.1 30B",
755
768
  },
756
- "granite-4-h-small": {
757
- // AA Intelligence Index (composite score)
758
- intelligenceIndex: 10.8,
759
- normalizedScore: 15,
760
-
769
+ "granite-4.0-h-1b": {
761
770
  // AA specific benchmarks
762
- codingIndex: 8.5,
763
- mathIndex: 13.7,
771
+ codingIndex: 2.7,
772
+ mathIndex: 6.3,
764
773
 
765
774
  // Academic benchmarks
766
- mmluPro: 0.624,
767
- gpqa: 0.416,
768
- hle: 0.037,
775
+ mmluPro: 0.277,
776
+ gpqa: 0.263,
777
+ hle: 0.05,
769
778
 
770
779
  // Capabilities
771
780
  contextWindow: 8192,
@@ -773,21 +782,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
773
782
  supportsVision: false,
774
783
 
775
784
  // Metadata
776
- lastUpdated: "2026-04-06",
785
+ lastUpdated: "2026-06-01",
786
+ originalModel: "Granite 4.0 H 1B",
777
787
  },
778
- "granite-4-h-1b": {
779
- // AA Intelligence Index (composite score)
780
- intelligenceIndex: 8,
781
- normalizedScore: 11,
782
-
788
+ "granite-4.0-h-small": {
783
789
  // AA specific benchmarks
784
- codingIndex: 2.7,
785
- mathIndex: 6.3,
790
+ codingIndex: 8.5,
791
+ mathIndex: 13.7,
786
792
 
787
793
  // Academic benchmarks
788
- mmluPro: 0.277,
789
- gpqa: 0.263,
790
- hle: 0.05,
794
+ mmluPro: 0.624,
795
+ gpqa: 0.416,
796
+ hle: 0.037,
791
797
 
792
798
  // Capabilities
793
799
  contextWindow: 8192,
@@ -795,21 +801,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
795
801
  supportsVision: false,
796
802
 
797
803
  // Metadata
798
- lastUpdated: "2026-04-06",
804
+ lastUpdated: "2026-06-01",
805
+ originalModel: "Granite 4.0 H Small",
799
806
  },
800
- "granite-4-350m": {
801
- // AA Intelligence Index (composite score)
802
- intelligenceIndex: 6.1,
803
- normalizedScore: 9,
804
-
807
+ "granite-4.0-micro": {
805
808
  // AA specific benchmarks
806
- codingIndex: 0.3,
807
- mathIndex: 0,
809
+ codingIndex: 5,
810
+ mathIndex: 6,
808
811
 
809
812
  // Academic benchmarks
810
- mmluPro: 0.124,
811
- gpqa: 0.261,
812
- hle: 0.057,
813
+ mmluPro: 0.447,
814
+ gpqa: 0.336,
815
+ hle: 0.051,
813
816
 
814
817
  // Capabilities
815
818
  contextWindow: 8192,
@@ -817,13 +820,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
817
820
  supportsVision: false,
818
821
 
819
822
  // Metadata
820
- lastUpdated: "2026-04-06",
823
+ lastUpdated: "2026-06-01",
824
+ originalModel: "Granite 4.0 Micro",
821
825
  },
822
826
  "mercury-2": {
823
- // AA Intelligence Index (composite score)
824
- intelligenceIndex: 32.8,
825
- normalizedScore: 47,
826
-
827
827
  // AA specific benchmarks
828
828
  codingIndex: 30.6,
829
829
  mathIndex: undefined,
@@ -839,13 +839,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
839
839
  supportsVision: false,
840
840
 
841
841
  // Metadata
842
- lastUpdated: "2026-04-06",
842
+ lastUpdated: "2026-06-01",
843
+ originalModel: "Mercury 2",
843
844
  },
844
845
  "reka-flash-3": {
845
- // AA Intelligence Index (composite score)
846
- intelligenceIndex: 9.5,
847
- normalizedScore: 14,
848
-
849
846
  // AA specific benchmarks
850
847
  codingIndex: 8.9,
851
848
  mathIndex: 33.7,
@@ -861,21 +858,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
861
858
  supportsVision: false,
862
859
 
863
860
  // Metadata
864
- lastUpdated: "2026-04-06",
861
+ lastUpdated: "2026-06-01",
862
+ originalModel: "Reka Flash 3",
865
863
  },
866
- "hermes-4---llama-3.1-70b-non-reasoning": {
867
- // AA Intelligence Index (composite score)
868
- intelligenceIndex: 12.6,
869
- normalizedScore: 18,
870
-
864
+ "deephermes-3---llama-3.1-8b-preview-non-reasoning": {
871
865
  // AA specific benchmarks
872
- codingIndex: 9.2,
873
- mathIndex: 11.3,
866
+ codingIndex: undefined,
867
+ mathIndex: undefined,
874
868
 
875
869
  // Academic benchmarks
876
- mmluPro: 0.664,
877
- gpqa: 0.491,
878
- hle: 0.036,
870
+ mmluPro: 0.365,
871
+ gpqa: 0.27,
872
+ hle: 0.043,
879
873
 
880
874
  // Capabilities
881
875
  contextWindow: 8192,
@@ -883,13 +877,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
883
877
  supportsVision: false,
884
878
 
885
879
  // Metadata
886
- lastUpdated: "2026-04-06",
880
+ lastUpdated: "2026-06-01",
881
+ originalModel: "DeepHermes 3 - Llama-3.1 8B Preview (Non-reasoning)",
887
882
  },
888
883
  "hermes-4---llama-3.1-405b-reasoning": {
889
- // AA Intelligence Index (composite score)
890
- intelligenceIndex: 18.6,
891
- normalizedScore: 27,
892
-
893
884
  // AA specific benchmarks
894
885
  codingIndex: 16,
895
886
  mathIndex: 69.7,
@@ -905,13 +896,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
905
896
  supportsVision: false,
906
897
 
907
898
  // Metadata
908
- lastUpdated: "2026-04-06",
899
+ lastUpdated: "2026-06-01",
900
+ originalModel: "Hermes 4 - Llama-3.1 405B (Reasoning)",
909
901
  },
910
- "deephermes-3---mistral-24b-preview-non-reasoning": {
911
- // AA Intelligence Index (composite score)
912
- intelligenceIndex: 10.9,
913
- normalizedScore: 16,
902
+ "hermes-4---llama-3.1-405b-non-reasoning": {
903
+ // AA specific benchmarks
904
+ codingIndex: 18.1,
905
+ mathIndex: 15.3,
906
+
907
+ // Academic benchmarks
908
+ mmluPro: 0.729,
909
+ gpqa: 0.536,
910
+ hle: 0.042,
911
+
912
+ // Capabilities
913
+ contextWindow: 8192,
914
+ supportsReasoning: false,
915
+ supportsVision: false,
914
916
 
917
+ // Metadata
918
+ lastUpdated: "2026-06-01",
919
+ originalModel: "Hermes 4 - Llama-3.1 405B (Non-reasoning)",
920
+ },
921
+ "deephermes-3---mistral-24b-preview-non-reasoning": {
915
922
  // AA specific benchmarks
916
923
  codingIndex: undefined,
917
924
  mathIndex: undefined,
@@ -927,13 +934,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
927
934
  supportsVision: false,
928
935
 
929
936
  // Metadata
930
- lastUpdated: "2026-04-06",
937
+ lastUpdated: "2026-06-01",
938
+ originalModel: "DeepHermes 3 - Mistral 24B Preview (Non-reasoning)",
931
939
  },
932
940
  "hermes-4---llama-3.1-70b-reasoning": {
933
- // AA Intelligence Index (composite score)
934
- intelligenceIndex: 16,
935
- normalizedScore: 23,
936
-
937
941
  // AA specific benchmarks
938
942
  codingIndex: 14.4,
939
943
  mathIndex: 68.7,
@@ -949,21 +953,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
949
953
  supportsVision: false,
950
954
 
951
955
  // Metadata
952
- lastUpdated: "2026-04-06",
956
+ lastUpdated: "2026-06-01",
957
+ originalModel: "Hermes 4 - Llama-3.1 70B (Reasoning)",
953
958
  },
954
- "hermes-4---llama-3.1-405b-non-reasoning": {
955
- // AA Intelligence Index (composite score)
956
- intelligenceIndex: 17.6,
957
- normalizedScore: 25,
959
+ "hermes-4---llama-3.1-70b-non-reasoning": {
960
+ // AA specific benchmarks
961
+ codingIndex: 9.2,
962
+ mathIndex: 11.3,
958
963
 
964
+ // Academic benchmarks
965
+ mmluPro: 0.664,
966
+ gpqa: 0.491,
967
+ hle: 0.036,
968
+
969
+ // Capabilities
970
+ contextWindow: 8192,
971
+ supportsReasoning: false,
972
+ supportsVision: false,
973
+
974
+ // Metadata
975
+ lastUpdated: "2026-06-01",
976
+ originalModel: "Hermes 4 - Llama-3.1 70B (Non-reasoning)",
977
+ },
978
+ "exaone-4.0-1.2b-reasoning": {
959
979
  // AA specific benchmarks
960
- codingIndex: 18.1,
961
- mathIndex: 15.3,
980
+ codingIndex: 3.1,
981
+ mathIndex: 50.3,
962
982
 
963
983
  // Academic benchmarks
964
- mmluPro: 0.729,
965
- gpqa: 0.536,
966
- hle: 0.042,
984
+ mmluPro: 0.588,
985
+ gpqa: 0.515,
986
+ hle: 0.058,
967
987
 
968
988
  // Capabilities
969
989
  contextWindow: 8192,
@@ -971,21 +991,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
971
991
  supportsVision: false,
972
992
 
973
993
  // Metadata
974
- lastUpdated: "2026-04-06",
994
+ lastUpdated: "2026-06-01",
995
+ originalModel: "Exaone 4.0 1.2B (Reasoning)",
975
996
  },
976
- "deephermes-3---llama-3.1-8b-preview-non-reasoning": {
977
- // AA Intelligence Index (composite score)
978
- intelligenceIndex: 7.6,
979
- normalizedScore: 11,
997
+ "exaone-4.0-32b-reasoning": {
998
+ // AA specific benchmarks
999
+ codingIndex: 14,
1000
+ mathIndex: 80,
1001
+
1002
+ // Academic benchmarks
1003
+ mmluPro: 0.818,
1004
+ gpqa: 0.739,
1005
+ hle: 0.105,
1006
+
1007
+ // Capabilities
1008
+ contextWindow: 8192,
1009
+ supportsReasoning: false,
1010
+ supportsVision: false,
980
1011
 
1012
+ // Metadata
1013
+ lastUpdated: "2026-06-01",
1014
+ originalModel: "EXAONE 4.0 32B (Reasoning)",
1015
+ },
1016
+ "k-exaone-reasoning": {
981
1017
  // AA specific benchmarks
982
- codingIndex: undefined,
983
- mathIndex: undefined,
1018
+ codingIndex: 27,
1019
+ mathIndex: 90.3,
984
1020
 
985
1021
  // Academic benchmarks
986
- mmluPro: 0.365,
987
- gpqa: 0.27,
988
- hle: 0.043,
1022
+ mmluPro: 0.838,
1023
+ gpqa: 0.783,
1024
+ hle: 0.131,
989
1025
 
990
1026
  // Capabilities
991
1027
  contextWindow: 8192,
@@ -993,13 +1029,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
993
1029
  supportsVision: false,
994
1030
 
995
1031
  // Metadata
996
- lastUpdated: "2026-04-06",
1032
+ lastUpdated: "2026-06-01",
1033
+ originalModel: "K-EXAONE (Reasoning)",
997
1034
  },
998
1035
  "k-exaone-non-reasoning": {
999
- // AA Intelligence Index (composite score)
1000
- intelligenceIndex: 23.4,
1001
- normalizedScore: 33,
1002
-
1003
1036
  // AA specific benchmarks
1004
1037
  codingIndex: 13.5,
1005
1038
  mathIndex: 44,
@@ -1015,21 +1048,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1015
1048
  supportsVision: false,
1016
1049
 
1017
1050
  // Metadata
1018
- lastUpdated: "2026-04-06",
1051
+ lastUpdated: "2026-06-01",
1052
+ originalModel: "K-EXAONE (Non-reasoning)",
1019
1053
  },
1020
- "exaone-4-32b-non-reasoning": {
1021
- // AA Intelligence Index (composite score)
1022
- intelligenceIndex: 11.7,
1023
- normalizedScore: 17,
1024
-
1054
+ "exaone-4.5-33b": {
1025
1055
  // AA specific benchmarks
1026
- codingIndex: 9.4,
1027
- mathIndex: 39.3,
1056
+ codingIndex: 23,
1057
+ mathIndex: undefined,
1028
1058
 
1029
1059
  // Academic benchmarks
1030
- mmluPro: 0.768,
1031
- gpqa: 0.628,
1032
- hle: 0.049,
1060
+ mmluPro: undefined,
1061
+ gpqa: 0.794,
1062
+ hle: 0.116,
1033
1063
 
1034
1064
  // Capabilities
1035
1065
  contextWindow: 8192,
@@ -1037,21 +1067,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1037
1067
  supportsVision: false,
1038
1068
 
1039
1069
  // Metadata
1040
- lastUpdated: "2026-04-06",
1070
+ lastUpdated: "2026-06-01",
1071
+ originalModel: "EXAONE 4.5 33B",
1041
1072
  },
1042
- "k-exaone-reasoning": {
1043
- // AA Intelligence Index (composite score)
1044
- intelligenceIndex: 32.1,
1045
- normalizedScore: 46,
1046
-
1073
+ "exaone-4.0-32b-non-reasoning": {
1047
1074
  // AA specific benchmarks
1048
- codingIndex: 27,
1049
- mathIndex: 90.3,
1075
+ codingIndex: 9.4,
1076
+ mathIndex: 39.3,
1050
1077
 
1051
1078
  // Academic benchmarks
1052
- mmluPro: 0.838,
1053
- gpqa: 0.783,
1054
- hle: 0.131,
1079
+ mmluPro: 0.768,
1080
+ gpqa: 0.628,
1081
+ hle: 0.049,
1055
1082
 
1056
1083
  // Capabilities
1057
1084
  contextWindow: 8192,
@@ -1059,13 +1086,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1059
1086
  supportsVision: false,
1060
1087
 
1061
1088
  // Metadata
1062
- lastUpdated: "2026-04-06",
1089
+ lastUpdated: "2026-06-01",
1090
+ originalModel: "EXAONE 4.0 32B (Non-reasoning)",
1063
1091
  },
1064
- "exaone-4-1.2b-non-reasoning": {
1065
- // AA Intelligence Index (composite score)
1066
- intelligenceIndex: 8.1,
1067
- normalizedScore: 12,
1068
-
1092
+ "exaone-4.0-1.2b-non-reasoning": {
1069
1093
  // AA specific benchmarks
1070
1094
  codingIndex: 2.5,
1071
1095
  mathIndex: 24,
@@ -1081,21 +1105,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1081
1105
  supportsVision: false,
1082
1106
 
1083
1107
  // Metadata
1084
- lastUpdated: "2026-04-06",
1108
+ lastUpdated: "2026-06-01",
1109
+ originalModel: "Exaone 4.0 1.2B (Non-reasoning)",
1085
1110
  },
1086
- "exaone-4-32b-reasoning": {
1087
- // AA Intelligence Index (composite score)
1088
- intelligenceIndex: 16.7,
1089
- normalizedScore: 24,
1090
-
1111
+ "mimo-v2-flash-non-reasoning": {
1091
1112
  // AA specific benchmarks
1092
- codingIndex: 14,
1093
- mathIndex: 80,
1113
+ codingIndex: 25.8,
1114
+ mathIndex: 67.7,
1094
1115
 
1095
1116
  // Academic benchmarks
1096
- mmluPro: 0.818,
1097
- gpqa: 0.739,
1098
- hle: 0.105,
1117
+ mmluPro: 0.744,
1118
+ gpqa: 0.656,
1119
+ hle: 0.08,
1099
1120
 
1100
1121
  // Capabilities
1101
1122
  contextWindow: 8192,
@@ -1103,21 +1124,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1103
1124
  supportsVision: false,
1104
1125
 
1105
1126
  // Metadata
1106
- lastUpdated: "2026-04-06",
1127
+ lastUpdated: "2026-06-01",
1128
+ originalModel: "MiMo-V2-Flash (Non-reasoning)",
1107
1129
  },
1108
- "exaone-4-1.2b-reasoning": {
1109
- // AA Intelligence Index (composite score)
1110
- intelligenceIndex: 8.3,
1111
- normalizedScore: 12,
1112
-
1130
+ "mimo-v2.5-pro-non-reasoning": {
1113
1131
  // AA specific benchmarks
1114
- codingIndex: 3.1,
1115
- mathIndex: 50.3,
1132
+ codingIndex: 36.8,
1133
+ mathIndex: undefined,
1116
1134
 
1117
1135
  // Academic benchmarks
1118
- mmluPro: 0.588,
1119
- gpqa: 0.515,
1120
- hle: 0.058,
1136
+ mmluPro: undefined,
1137
+ gpqa: 0.762,
1138
+ hle: 0.133,
1121
1139
 
1122
1140
  // Capabilities
1123
1141
  contextWindow: 8192,
@@ -1125,21 +1143,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1125
1143
  supportsVision: false,
1126
1144
 
1127
1145
  // Metadata
1128
- lastUpdated: "2026-04-06",
1146
+ lastUpdated: "2026-06-01",
1147
+ originalModel: "MiMo-V2.5-Pro (Non-reasoning)",
1129
1148
  },
1130
- "mimo-v2-pro": {
1131
- // AA Intelligence Index (composite score)
1132
- intelligenceIndex: 49.2,
1133
- normalizedScore: 70,
1134
-
1149
+ "mimo-v2.5": {
1135
1150
  // AA specific benchmarks
1136
- codingIndex: 41.4,
1151
+ codingIndex: 42.1,
1137
1152
  mathIndex: undefined,
1138
1153
 
1139
1154
  // Academic benchmarks
1140
1155
  mmluPro: undefined,
1141
- gpqa: 0.87,
1142
- hle: 0.283,
1156
+ gpqa: 0.849,
1157
+ hle: 0.252,
1143
1158
 
1144
1159
  // Capabilities
1145
1160
  contextWindow: 8192,
@@ -1147,13 +1162,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1147
1162
  supportsVision: false,
1148
1163
 
1149
1164
  // Metadata
1150
- lastUpdated: "2026-04-06",
1165
+ lastUpdated: "2026-06-01",
1166
+ originalModel: "MiMo-V2.5",
1151
1167
  },
1152
- "mimo-v2-flash-feb-2026": {
1153
- // AA Intelligence Index (composite score)
1154
- intelligenceIndex: 41.5,
1155
- normalizedScore: 59,
1156
-
1168
+ "mimo-v2.5-pro": {
1169
+ // AA specific benchmarks
1170
+ codingIndex: 45.5,
1171
+ mathIndex: undefined,
1172
+
1173
+ // Academic benchmarks
1174
+ mmluPro: undefined,
1175
+ gpqa: 0.866,
1176
+ hle: 0.338,
1177
+
1178
+ // Capabilities
1179
+ contextWindow: 8192,
1180
+ supportsReasoning: false,
1181
+ supportsVision: false,
1182
+
1183
+ // Metadata
1184
+ lastUpdated: "2026-06-01",
1185
+ originalModel: "MiMo-V2.5-Pro",
1186
+ },
1187
+ "mimo-v2-flash-feb-2026": {
1157
1188
  // AA specific benchmarks
1158
1189
  codingIndex: 33.5,
1159
1190
  mathIndex: undefined,
@@ -1169,21 +1200,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1169
1200
  supportsVision: false,
1170
1201
 
1171
1202
  // Metadata
1172
- lastUpdated: "2026-04-06",
1203
+ lastUpdated: "2026-06-01",
1204
+ originalModel: "MiMo-V2-Flash (Feb 2026)",
1173
1205
  },
1174
- "mimo-v2-flash-non-reasoning": {
1175
- // AA Intelligence Index (composite score)
1176
- intelligenceIndex: 30.4,
1177
- normalizedScore: 43,
1178
-
1206
+ "mimo-v2-omni-0327": {
1179
1207
  // AA specific benchmarks
1180
- codingIndex: 25.8,
1181
- mathIndex: 67.7,
1208
+ codingIndex: 36.9,
1209
+ mathIndex: undefined,
1182
1210
 
1183
1211
  // Academic benchmarks
1184
- mmluPro: 0.744,
1185
- gpqa: 0.656,
1186
- hle: 0.08,
1212
+ mmluPro: undefined,
1213
+ gpqa: 0.855,
1214
+ hle: 0.204,
1187
1215
 
1188
1216
  // Capabilities
1189
1217
  contextWindow: 8192,
@@ -1191,13 +1219,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1191
1219
  supportsVision: false,
1192
1220
 
1193
1221
  // Metadata
1194
- lastUpdated: "2026-04-06",
1222
+ lastUpdated: "2026-06-01",
1223
+ originalModel: "MiMo-V2-Omni-0327",
1195
1224
  },
1196
1225
  "mimo-v2-omni": {
1197
- // AA Intelligence Index (composite score)
1198
- intelligenceIndex: 43.4,
1199
- normalizedScore: 62,
1200
-
1201
1226
  // AA specific benchmarks
1202
1227
  codingIndex: 35.5,
1203
1228
  mathIndex: undefined,
@@ -1213,13 +1238,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1213
1238
  supportsVision: false,
1214
1239
 
1215
1240
  // Metadata
1216
- lastUpdated: "2026-04-06",
1241
+ lastUpdated: "2026-06-01",
1242
+ originalModel: "MiMo-V2-Omni",
1217
1243
  },
1218
1244
  "ernie-4.5-300b-a47b": {
1219
- // AA Intelligence Index (composite score)
1220
- intelligenceIndex: 15,
1221
- normalizedScore: 21,
1222
-
1223
1245
  // AA specific benchmarks
1224
1246
  codingIndex: 14.5,
1225
1247
  mathIndex: 41.3,
@@ -1235,13 +1257,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1235
1257
  supportsVision: false,
1236
1258
 
1237
1259
  // Metadata
1238
- lastUpdated: "2026-04-06",
1260
+ lastUpdated: "2026-06-01",
1261
+ originalModel: "ERNIE 4.5 300B A47B",
1239
1262
  },
1240
- "ernie-5-thinking-preview": {
1241
- // AA Intelligence Index (composite score)
1242
- intelligenceIndex: 29.1,
1243
- normalizedScore: 42,
1244
-
1263
+ "ernie-5.0-thinking-preview": {
1245
1264
  // AA specific benchmarks
1246
1265
  codingIndex: 29.2,
1247
1266
  mathIndex: 85,
@@ -1257,13 +1276,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1257
1276
  supportsVision: false,
1258
1277
 
1259
1278
  // Metadata
1260
- lastUpdated: "2026-04-06",
1279
+ lastUpdated: "2026-06-01",
1280
+ originalModel: "ERNIE 5.0 Thinking Preview",
1261
1281
  },
1262
1282
  "sarvam-30b-high": {
1263
- // AA Intelligence Index (composite score)
1264
- intelligenceIndex: 12.3,
1265
- normalizedScore: 18,
1266
-
1267
1283
  // AA specific benchmarks
1268
1284
  codingIndex: 7.9,
1269
1285
  mathIndex: undefined,
@@ -1279,13 +1295,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1279
1295
  supportsVision: false,
1280
1296
 
1281
1297
  // Metadata
1282
- lastUpdated: "2026-04-06",
1298
+ lastUpdated: "2026-06-01",
1299
+ originalModel: "Sarvam 30B (high)",
1283
1300
  },
1284
1301
  "sarvam-105b-high": {
1285
- // AA Intelligence Index (composite score)
1286
- intelligenceIndex: 18.2,
1287
- normalizedScore: 26,
1288
-
1289
1302
  // AA specific benchmarks
1290
1303
  codingIndex: 9.8,
1291
1304
  mathIndex: undefined,
@@ -1301,21 +1314,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1301
1314
  supportsVision: false,
1302
1315
 
1303
1316
  // Metadata
1304
- lastUpdated: "2026-04-06",
1317
+ lastUpdated: "2026-06-01",
1318
+ originalModel: "Sarvam 105B (high)",
1305
1319
  },
1306
- "kat-coder-pro-v1": {
1307
- // AA Intelligence Index (composite score)
1308
- intelligenceIndex: 36,
1309
- normalizedScore: 51,
1320
+ "qwen-chat-14b": {
1321
+ // AA specific benchmarks
1322
+ codingIndex: undefined,
1323
+ mathIndex: undefined,
1324
+
1325
+ // Academic benchmarks
1326
+ mmluPro: undefined,
1327
+ gpqa: undefined,
1328
+ hle: undefined,
1310
1329
 
1330
+ // Capabilities
1331
+ contextWindow: 8192,
1332
+ supportsReasoning: false,
1333
+ supportsVision: false,
1334
+
1335
+ // Metadata
1336
+ lastUpdated: "2026-06-01",
1337
+ originalModel: "Qwen Chat 14B",
1338
+ },
1339
+ "hy3-preview-reasoning": {
1311
1340
  // AA specific benchmarks
1312
- codingIndex: 18.3,
1313
- mathIndex: 94.7,
1341
+ codingIndex: 36.5,
1342
+ mathIndex: undefined,
1314
1343
 
1315
1344
  // Academic benchmarks
1316
- mmluPro: 0.813,
1317
- gpqa: 0.764,
1318
- hle: 0.334,
1345
+ mmluPro: undefined,
1346
+ gpqa: 0.867,
1347
+ hle: 0.255,
1319
1348
 
1320
1349
  // Capabilities
1321
1350
  contextWindow: 8192,
@@ -1323,13 +1352,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1323
1352
  supportsVision: false,
1324
1353
 
1325
1354
  // Metadata
1326
- lastUpdated: "2026-04-06",
1355
+ lastUpdated: "2026-06-01",
1356
+ originalModel: "Hy3-preview (Reasoning)",
1327
1357
  },
1328
- "kat-coder-pro-v2": {
1329
- // AA Intelligence Index (composite score)
1330
- intelligenceIndex: 43.8,
1331
- normalizedScore: 63,
1358
+ "hy3-preview-non-reasoning": {
1359
+ // AA specific benchmarks
1360
+ codingIndex: 34.3,
1361
+ mathIndex: undefined,
1362
+
1363
+ // Academic benchmarks
1364
+ mmluPro: undefined,
1365
+ gpqa: 0.732,
1366
+ hle: 0.063,
1367
+
1368
+ // Capabilities
1369
+ contextWindow: 8192,
1370
+ supportsReasoning: false,
1371
+ supportsVision: false,
1332
1372
 
1373
+ // Metadata
1374
+ lastUpdated: "2026-06-01",
1375
+ originalModel: "Hy3-preview (Non-reasoning)",
1376
+ },
1377
+ "kat-coder-pro-v2": {
1333
1378
  // AA specific benchmarks
1334
1379
  codingIndex: 45.6,
1335
1380
  mathIndex: undefined,
@@ -1345,13 +1390,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1345
1390
  supportsVision: false,
1346
1391
 
1347
1392
  // Metadata
1348
- lastUpdated: "2026-04-06",
1393
+ lastUpdated: "2026-06-01",
1394
+ originalModel: "KAT Coder Pro V2",
1349
1395
  },
1350
- "intellect-3": {
1351
- // AA Intelligence Index (composite score)
1352
- intelligenceIndex: 22.2,
1353
- normalizedScore: 32,
1396
+ "kat-coder-pro-v1": {
1397
+ // AA specific benchmarks
1398
+ codingIndex: 18.3,
1399
+ mathIndex: 94.7,
1354
1400
 
1401
+ // Academic benchmarks
1402
+ mmluPro: 0.813,
1403
+ gpqa: 0.764,
1404
+ hle: 0.334,
1405
+
1406
+ // Capabilities
1407
+ contextWindow: 8192,
1408
+ supportsReasoning: false,
1409
+ supportsVision: false,
1410
+
1411
+ // Metadata
1412
+ lastUpdated: "2026-06-01",
1413
+ originalModel: "KAT-Coder-Pro V1",
1414
+ },
1415
+ "intellect-3": {
1355
1416
  // AA specific benchmarks
1356
1417
  codingIndex: 19.1,
1357
1418
  mathIndex: 88,
@@ -1367,13 +1428,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1367
1428
  supportsVision: false,
1368
1429
 
1369
1430
  // Metadata
1370
- lastUpdated: "2026-04-06",
1431
+ lastUpdated: "2026-06-01",
1432
+ originalModel: "INTELLECT-3",
1371
1433
  },
1372
1434
  "motif-2-12.7b-reasoning": {
1373
- // AA Intelligence Index (composite score)
1374
- intelligenceIndex: 19.1,
1375
- normalizedScore: 27,
1376
-
1377
1435
  // AA specific benchmarks
1378
1436
  codingIndex: 11.9,
1379
1437
  mathIndex: 80.3,
@@ -1389,21 +1447,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1389
1447
  supportsVision: false,
1390
1448
 
1391
1449
  // Metadata
1392
- lastUpdated: "2026-04-06",
1450
+ lastUpdated: "2026-06-01",
1451
+ originalModel: "Motif-2-12.7B-Reasoning",
1393
1452
  },
1394
- "k2-v2-low": {
1395
- // AA Intelligence Index (composite score)
1396
- intelligenceIndex: 14.4,
1397
- normalizedScore: 21,
1398
-
1453
+ "k2-think-v2": {
1399
1454
  // AA specific benchmarks
1400
- codingIndex: 10.5,
1401
- mathIndex: 35.3,
1455
+ codingIndex: 15.5,
1456
+ mathIndex: undefined,
1402
1457
 
1403
1458
  // Academic benchmarks
1404
- mmluPro: 0.713,
1405
- gpqa: 0.541,
1406
- hle: 0.039,
1459
+ mmluPro: undefined,
1460
+ gpqa: 0.713,
1461
+ hle: 0.095,
1407
1462
 
1408
1463
  // Capabilities
1409
1464
  contextWindow: 8192,
@@ -1411,21 +1466,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1411
1466
  supportsVision: false,
1412
1467
 
1413
1468
  // Metadata
1414
- lastUpdated: "2026-04-06",
1469
+ lastUpdated: "2026-06-01",
1470
+ originalModel: "K2 Think V2",
1415
1471
  },
1416
- "k2-v2-medium": {
1417
- // AA Intelligence Index (composite score)
1418
- intelligenceIndex: 18.7,
1419
- normalizedScore: 27,
1420
-
1472
+ "k2-v2-high": {
1421
1473
  // AA specific benchmarks
1422
- codingIndex: 14,
1423
- mathIndex: 64.7,
1474
+ codingIndex: 16.1,
1475
+ mathIndex: 78.3,
1424
1476
 
1425
1477
  // Academic benchmarks
1426
- mmluPro: 0.761,
1427
- gpqa: 0.598,
1428
- hle: 0.044,
1478
+ mmluPro: 0.786,
1479
+ gpqa: 0.681,
1480
+ hle: 0.098,
1429
1481
 
1430
1482
  // Capabilities
1431
1483
  contextWindow: 8192,
@@ -1433,21 +1485,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1433
1485
  supportsVision: false,
1434
1486
 
1435
1487
  // Metadata
1436
- lastUpdated: "2026-04-06",
1488
+ lastUpdated: "2026-06-01",
1489
+ originalModel: "K2-V2 (high)",
1437
1490
  },
1438
- "k2-v2-high": {
1439
- // AA Intelligence Index (composite score)
1440
- intelligenceIndex: 20.6,
1441
- normalizedScore: 29,
1442
-
1491
+ "k2-v2-low": {
1443
1492
  // AA specific benchmarks
1444
- codingIndex: 16.1,
1445
- mathIndex: 78.3,
1493
+ codingIndex: 10.5,
1494
+ mathIndex: 35.3,
1446
1495
 
1447
1496
  // Academic benchmarks
1448
- mmluPro: 0.786,
1449
- gpqa: 0.681,
1450
- hle: 0.098,
1497
+ mmluPro: 0.713,
1498
+ gpqa: 0.541,
1499
+ hle: 0.039,
1451
1500
 
1452
1501
  // Capabilities
1453
1502
  contextWindow: 8192,
@@ -1455,21 +1504,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1455
1504
  supportsVision: false,
1456
1505
 
1457
1506
  // Metadata
1458
- lastUpdated: "2026-04-06",
1507
+ lastUpdated: "2026-06-01",
1508
+ originalModel: "K2-V2 (low)",
1459
1509
  },
1460
- "k2-think-v2": {
1461
- // AA Intelligence Index (composite score)
1462
- intelligenceIndex: 24.1,
1463
- normalizedScore: 34,
1464
-
1510
+ "k2-v2-medium": {
1465
1511
  // AA specific benchmarks
1466
- codingIndex: 15.5,
1467
- mathIndex: undefined,
1512
+ codingIndex: 14,
1513
+ mathIndex: 64.7,
1468
1514
 
1469
1515
  // Academic benchmarks
1470
- mmluPro: undefined,
1471
- gpqa: 0.713,
1472
- hle: 0.095,
1516
+ mmluPro: 0.761,
1517
+ gpqa: 0.598,
1518
+ hle: 0.044,
1473
1519
 
1474
1520
  // Capabilities
1475
1521
  contextWindow: 8192,
@@ -1477,13 +1523,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1477
1523
  supportsVision: false,
1478
1524
 
1479
1525
  // Metadata
1480
- lastUpdated: "2026-04-06",
1526
+ lastUpdated: "2026-06-01",
1527
+ originalModel: "K2-V2 (medium)",
1481
1528
  },
1482
1529
  "mi-dm-k-2.5-pro": {
1483
- // AA Intelligence Index (composite score)
1484
- intelligenceIndex: 23.1,
1485
- normalizedScore: 33,
1486
-
1487
1530
  // AA specific benchmarks
1488
1531
  codingIndex: 12.6,
1489
1532
  mathIndex: 76.7,
@@ -1499,13 +1542,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1499
1542
  supportsVision: false,
1500
1543
 
1501
1544
  // Metadata
1502
- lastUpdated: "2026-04-06",
1545
+ lastUpdated: "2026-06-01",
1546
+ originalModel: "Mi:dm K 2.5 Pro",
1503
1547
  },
1504
1548
  "hyperclova-x-seed-think-32b": {
1505
- // AA Intelligence Index (composite score)
1506
- intelligenceIndex: 23.7,
1507
- normalizedScore: 34,
1508
-
1509
1549
  // AA specific benchmarks
1510
1550
  codingIndex: 17.5,
1511
1551
  mathIndex: 59,
@@ -1521,13 +1561,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1521
1561
  supportsVision: false,
1522
1562
 
1523
1563
  // Metadata
1524
- lastUpdated: "2026-04-06",
1564
+ lastUpdated: "2026-06-01",
1565
+ originalModel: "HyperCLOVA X SEED Think (32B)",
1525
1566
  },
1526
1567
  "longcat-flash-lite": {
1527
- // AA Intelligence Index (composite score)
1528
- intelligenceIndex: 23.9,
1529
- normalizedScore: 34,
1530
-
1531
1568
  // AA specific benchmarks
1532
1569
  codingIndex: 16.5,
1533
1570
  mathIndex: undefined,
@@ -1543,13 +1580,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1543
1580
  supportsVision: false,
1544
1581
 
1545
1582
  // Metadata
1546
- lastUpdated: "2026-04-06",
1583
+ lastUpdated: "2026-06-01",
1584
+ originalModel: "LongCat Flash Lite",
1547
1585
  },
1548
1586
  "tri-21b-think": {
1549
- // AA Intelligence Index (composite score)
1550
- intelligenceIndex: 18.6,
1551
- normalizedScore: 27,
1552
-
1553
1587
  // AA specific benchmarks
1554
1588
  codingIndex: 6.3,
1555
1589
  mathIndex: undefined,
@@ -1565,13 +1599,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1565
1599
  supportsVision: false,
1566
1600
 
1567
1601
  // Metadata
1568
- lastUpdated: "2026-04-06",
1602
+ lastUpdated: "2026-06-01",
1603
+ originalModel: "Tri-21B-Think",
1569
1604
  },
1570
1605
  "tri-21b-think-preview": {
1571
- // AA Intelligence Index (composite score)
1572
- intelligenceIndex: 20,
1573
- normalizedScore: 29,
1574
-
1575
1606
  // AA specific benchmarks
1576
1607
  codingIndex: 7.4,
1577
1608
  mathIndex: undefined,
@@ -1587,13 +1618,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1587
1618
  supportsVision: false,
1588
1619
 
1589
1620
  // Metadata
1590
- lastUpdated: "2026-04-06",
1621
+ lastUpdated: "2026-06-01",
1622
+ originalModel: "Tri-21B-think Preview",
1591
1623
  },
1592
1624
  "nanbeige4.1-3b": {
1593
- // AA Intelligence Index (composite score)
1594
- intelligenceIndex: 16.1,
1595
- normalizedScore: 23,
1596
-
1597
1625
  // AA specific benchmarks
1598
1626
  codingIndex: 8.9,
1599
1627
  mathIndex: undefined,
@@ -1609,35 +1637,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1609
1637
  supportsVision: false,
1610
1638
 
1611
1639
  // Metadata
1612
- lastUpdated: "2026-04-06",
1613
- },
1614
- "apertus-70b-instruct": {
1615
- // AA Intelligence Index (composite score)
1616
- intelligenceIndex: 7.7,
1617
- normalizedScore: 11,
1618
-
1619
- // AA specific benchmarks
1620
- codingIndex: 1.9,
1621
- mathIndex: undefined,
1622
-
1623
- // Academic benchmarks
1624
- mmluPro: undefined,
1625
- gpqa: 0.272,
1626
- hle: 0.055,
1627
-
1628
- // Capabilities
1629
- contextWindow: 8192,
1630
- supportsReasoning: false,
1631
- supportsVision: false,
1632
-
1633
- // Metadata
1634
- lastUpdated: "2026-04-06",
1640
+ lastUpdated: "2026-06-01",
1641
+ originalModel: "Nanbeige4.1-3B",
1635
1642
  },
1636
1643
  "apertus-8b-instruct": {
1637
- // AA Intelligence Index (composite score)
1638
- intelligenceIndex: 5.9,
1639
- normalizedScore: 8,
1640
-
1641
1644
  // AA specific benchmarks
1642
1645
  codingIndex: 1.4,
1643
1646
  mathIndex: undefined,
@@ -1653,131 +1656,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1653
1656
  supportsVision: false,
1654
1657
 
1655
1658
  // Metadata
1656
- lastUpdated: "2026-04-06",
1657
- },
1658
- "qwen-chat-14b": {
1659
- // AA Intelligence Index (composite score)
1660
- intelligenceIndex: 7.4,
1661
- normalizedScore: 11,
1662
-
1663
- // AA specific benchmarks
1664
- codingIndex: undefined,
1665
- mathIndex: undefined,
1666
-
1667
- // Academic benchmarks
1668
- mmluPro: undefined,
1669
- gpqa: undefined,
1670
- hle: undefined,
1671
-
1672
- // Capabilities
1673
- contextWindow: 8192,
1674
- supportsReasoning: false,
1675
- supportsVision: false,
1676
-
1677
- // Metadata
1678
- lastUpdated: "2026-04-06",
1659
+ lastUpdated: "2026-06-01",
1660
+ originalModel: "Apertus 8B Instruct",
1679
1661
  },
1680
- "glm-4.6v-reasoning": {
1681
- // AA Intelligence Index (composite score)
1682
- intelligenceIndex: 23.4,
1683
- normalizedScore: 33,
1684
-
1685
- // AA specific benchmarks
1686
- codingIndex: 19.7,
1687
- mathIndex: 85.3,
1688
-
1689
- // Academic benchmarks
1690
- mmluPro: 0.799,
1691
- gpqa: 0.719,
1692
- hle: 0.089,
1693
-
1694
- // Capabilities
1695
- contextWindow: 8192,
1696
- supportsReasoning: false,
1697
- supportsVision: false,
1698
-
1699
- // Metadata
1700
- lastUpdated: "2026-04-06",
1701
- },
1702
- "glm-5-turbo": {
1703
- // AA Intelligence Index (composite score)
1704
- intelligenceIndex: 46.8,
1705
- normalizedScore: 67,
1706
-
1707
- // AA specific benchmarks
1708
- codingIndex: 36.8,
1709
- mathIndex: undefined,
1710
-
1711
- // Academic benchmarks
1712
- mmluPro: undefined,
1713
- gpqa: 0.847,
1714
- hle: 0.254,
1715
-
1716
- // Capabilities
1717
- contextWindow: 8192,
1718
- supportsReasoning: false,
1719
- supportsVision: false,
1720
-
1721
- // Metadata
1722
- lastUpdated: "2026-04-06",
1723
- },
1724
- "glm-4.6v-non-reasoning": {
1725
- // AA Intelligence Index (composite score)
1726
- intelligenceIndex: 17.1,
1727
- normalizedScore: 24,
1728
-
1729
- // AA specific benchmarks
1730
- codingIndex: 11.1,
1731
- mathIndex: 26.3,
1732
-
1733
- // Academic benchmarks
1734
- mmluPro: 0.752,
1735
- gpqa: 0.566,
1736
- hle: 0.037,
1737
-
1738
- // Capabilities
1739
- contextWindow: 8192,
1740
- supportsReasoning: false,
1741
- supportsVision: false,
1742
-
1743
- // Metadata
1744
- lastUpdated: "2026-04-06",
1745
- },
1746
- "glm-5-non-reasoning": {
1747
- // AA Intelligence Index (composite score)
1748
- intelligenceIndex: 40.6,
1749
- normalizedScore: 58,
1750
-
1751
- // AA specific benchmarks
1752
- codingIndex: 39,
1753
- mathIndex: undefined,
1754
-
1755
- // Academic benchmarks
1756
- mmluPro: undefined,
1757
- gpqa: 0.666,
1758
- hle: 0.072,
1759
-
1760
- // Capabilities
1761
- contextWindow: 8192,
1762
- supportsReasoning: false,
1763
- supportsVision: false,
1764
-
1765
- // Metadata
1766
- lastUpdated: "2026-04-06",
1767
- },
1768
- "glm-5-reasoning": {
1769
- // AA Intelligence Index (composite score)
1770
- intelligenceIndex: 49.8,
1771
- normalizedScore: 71,
1772
-
1662
+ "apertus-70b-instruct": {
1773
1663
  // AA specific benchmarks
1774
- codingIndex: 44.2,
1664
+ codingIndex: 1.9,
1775
1665
  mathIndex: undefined,
1776
1666
 
1777
1667
  // Academic benchmarks
1778
1668
  mmluPro: undefined,
1779
- gpqa: 0.82,
1780
- hle: 0.272,
1669
+ gpqa: 0.272,
1670
+ hle: 0.055,
1781
1671
 
1782
1672
  // Capabilities
1783
1673
  contextWindow: 8192,
@@ -1785,21 +1675,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1785
1675
  supportsVision: false,
1786
1676
 
1787
1677
  // Metadata
1788
- lastUpdated: "2026-04-06",
1678
+ lastUpdated: "2026-06-01",
1679
+ originalModel: "Apertus 70B Instruct",
1789
1680
  },
1790
- "glm-5v-turbo-reasoning": {
1791
- // AA Intelligence Index (composite score)
1792
- intelligenceIndex: 42.9,
1793
- normalizedScore: 61,
1794
-
1681
+ "minicpm5-1b-non-reasoning": {
1795
1682
  // AA specific benchmarks
1796
- codingIndex: 36.2,
1683
+ codingIndex: 0.5,
1797
1684
  mathIndex: undefined,
1798
1685
 
1799
1686
  // Academic benchmarks
1800
1687
  mmluPro: undefined,
1801
- gpqa: 0.809,
1802
- hle: 0.158,
1688
+ gpqa: 0.269,
1689
+ hle: 0.046,
1803
1690
 
1804
1691
  // Capabilities
1805
1692
  contextWindow: 8192,
@@ -1807,175 +1694,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1807
1694
  supportsVision: false,
1808
1695
 
1809
1696
  // Metadata
1810
- lastUpdated: "2026-04-06",
1697
+ lastUpdated: "2026-06-01",
1698
+ originalModel: "MiniCPM5-1B (Non-reasoning)",
1811
1699
  },
1812
- "tiny-aya-global": {
1813
- // AA Intelligence Index (composite score)
1814
- intelligenceIndex: 4.7,
1815
- normalizedScore: 7,
1816
-
1700
+ "minicpm-v-4.6-1.3b": {
1817
1701
  // AA specific benchmarks
1818
- codingIndex: 1.2,
1702
+ codingIndex: 0.7,
1819
1703
  mathIndex: undefined,
1820
1704
 
1821
1705
  // Academic benchmarks
1822
1706
  mmluPro: undefined,
1823
1707
  gpqa: 0.305,
1824
- hle: 0.052,
1825
-
1826
- // Capabilities
1827
- contextWindow: 8192,
1828
- supportsReasoning: false,
1829
- supportsVision: false,
1830
-
1831
- // Metadata
1832
- lastUpdated: "2026-04-06",
1833
- },
1834
- "command-a": {
1835
- // AA Intelligence Index (composite score)
1836
- intelligenceIndex: 13.5,
1837
- normalizedScore: 19,
1838
-
1839
- // AA specific benchmarks
1840
- codingIndex: 9.9,
1841
- mathIndex: 13,
1842
-
1843
- // Academic benchmarks
1844
- mmluPro: 0.712,
1845
- gpqa: 0.527,
1846
- hle: 0.046,
1847
-
1848
- // Capabilities
1849
- contextWindow: 8192,
1850
- supportsReasoning: false,
1851
- supportsVision: false,
1852
-
1853
- // Metadata
1854
- lastUpdated: "2026-04-06",
1855
- },
1856
- "apriel-v1.6-15b-thinker": {
1857
- // AA Intelligence Index (composite score)
1858
- intelligenceIndex: 27.6,
1859
- normalizedScore: 39,
1860
-
1861
- // AA specific benchmarks
1862
- codingIndex: 22,
1863
- mathIndex: 88,
1864
-
1865
- // Academic benchmarks
1866
- mmluPro: 0.79,
1867
- gpqa: 0.733,
1868
- hle: 0.098,
1869
-
1870
- // Capabilities
1871
- contextWindow: 8192,
1872
- supportsReasoning: false,
1873
- supportsVision: false,
1874
-
1875
- // Metadata
1876
- lastUpdated: "2026-04-06",
1877
- },
1878
- "jamba-reasoning-3b": {
1879
- // AA Intelligence Index (composite score)
1880
- intelligenceIndex: 9.6,
1881
- normalizedScore: 14,
1882
-
1883
- // AA specific benchmarks
1884
- codingIndex: 2.5,
1885
- mathIndex: 10.7,
1886
-
1887
- // Academic benchmarks
1888
- mmluPro: 0.577,
1889
- gpqa: 0.333,
1890
- hle: 0.046,
1891
-
1892
- // Capabilities
1893
- contextWindow: 8192,
1894
- supportsReasoning: false,
1895
- supportsVision: false,
1896
-
1897
- // Metadata
1898
- lastUpdated: "2026-04-06",
1899
- },
1900
- "jamba-1.7-large": {
1901
- // AA Intelligence Index (composite score)
1902
- intelligenceIndex: 10.9,
1903
- normalizedScore: 16,
1904
-
1905
- // AA specific benchmarks
1906
- codingIndex: 7.8,
1907
- mathIndex: 2.3,
1908
-
1909
- // Academic benchmarks
1910
- mmluPro: 0.577,
1911
- gpqa: 0.39,
1912
- hle: 0.038,
1913
-
1914
- // Capabilities
1915
- contextWindow: 8192,
1916
- supportsReasoning: false,
1917
- supportsVision: false,
1918
-
1919
- // Metadata
1920
- lastUpdated: "2026-04-06",
1921
- },
1922
- "jamba-1.7-mini": {
1923
- // AA Intelligence Index (composite score)
1924
- intelligenceIndex: 8.1,
1925
- normalizedScore: 12,
1926
-
1927
- // AA specific benchmarks
1928
- codingIndex: 3.1,
1929
- mathIndex: 0.3,
1930
-
1931
- // Academic benchmarks
1932
- mmluPro: 0.388,
1933
- gpqa: 0.322,
1934
- hle: 0.045,
1935
-
1936
- // Capabilities
1937
- contextWindow: 8192,
1938
- supportsReasoning: false,
1939
- supportsVision: false,
1940
-
1941
- // Metadata
1942
- lastUpdated: "2026-04-06",
1943
- },
1944
- "qwen3-next-80b-a3b-reasoning": {
1945
- // AA Intelligence Index (composite score)
1946
- intelligenceIndex: 26.7,
1947
- normalizedScore: 38,
1948
-
1949
- // AA specific benchmarks
1950
- codingIndex: 19.5,
1951
- mathIndex: 84.3,
1952
-
1953
- // Academic benchmarks
1954
- mmluPro: 0.824,
1955
- gpqa: 0.759,
1956
- hle: 0.117,
1957
-
1958
- // Capabilities
1959
- contextWindow: 8192,
1960
- supportsReasoning: false,
1961
- supportsVision: false,
1962
-
1963
- // Metadata
1964
- lastUpdated: "2026-04-06",
1965
- },
1966
- "qwen3-coder-480b-a35b-instruct": {
1967
- // AA Intelligence Index (composite score)
1968
- intelligenceIndex: 24.8,
1969
- normalizedScore: 35,
1970
-
1971
- // AA specific benchmarks
1972
- codingIndex: 24.6,
1973
- mathIndex: 39.3,
1974
-
1975
- // Academic benchmarks
1976
- mmluPro: 0.788,
1977
- gpqa: 0.618,
1978
- hle: 0.044,
1708
+ hle: 0.049,
1979
1709
 
1980
1710
  // Capabilities
1981
1711
  contextWindow: 8192,
@@ -1983,6 +1713,7 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
1983
1713
  supportsVision: false,
1984
1714
 
1985
1715
  // Metadata
1986
- lastUpdated: "2026-04-06",
1716
+ lastUpdated: "2026-06-01",
1717
+ originalModel: "MiniCPM-V 4.6 1.3B",
1987
1718
  },
1988
1719
  };