pi-free 2.0.13 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,50 @@
1
1
  // Auto-generated benchmark data chunk 0
2
- // Models: gpt-oss-120b-high .. solar-open-100b-reasoning (90 entries)
2
+ // Models: gpt-oss-20b-low .. lfm2-8b-a1b (90 entries)
3
+ // Last updated: 2026-06-01
3
4
  // DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
4
5
 
5
6
  import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
6
7
 
7
8
  export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
8
- "gpt-oss-120b-high": {
9
- // AA Intelligence Index (composite score)
10
- intelligenceIndex: 33.3,
11
- normalizedScore: 48,
9
+ "gpt-oss-20b-low": {
10
+ // AA specific benchmarks
11
+ codingIndex: 14.4,
12
+ mathIndex: 62.3,
13
+
14
+ // Academic benchmarks
15
+ mmluPro: 0.718,
16
+ gpqa: 0.611,
17
+ hle: 0.051,
18
+
19
+ // Capabilities
20
+ contextWindow: 8192,
21
+ supportsReasoning: false,
22
+ supportsVision: false,
23
+
24
+ // Metadata
25
+ lastUpdated: "2026-06-01",
26
+ originalModel: "gpt-oss-20B (low)",
27
+ },
28
+ "gpt-oss-20b-high": {
29
+ // AA specific benchmarks
30
+ codingIndex: 18.5,
31
+ mathIndex: 89.3,
32
+
33
+ // Academic benchmarks
34
+ mmluPro: 0.748,
35
+ gpqa: 0.688,
36
+ hle: 0.098,
37
+
38
+ // Capabilities
39
+ contextWindow: 8192,
40
+ supportsReasoning: false,
41
+ supportsVision: false,
12
42
 
43
+ // Metadata
44
+ lastUpdated: "2026-06-01",
45
+ originalModel: "gpt-oss-20B (high)",
46
+ },
47
+ "gpt-oss-120b-high": {
13
48
  // AA specific benchmarks
14
49
  codingIndex: 28.6,
15
50
  mathIndex: 93.4,
@@ -25,13 +60,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
25
60
  supportsVision: false,
26
61
 
27
62
  // Metadata
28
- lastUpdated: "2026-04-06",
63
+ lastUpdated: "2026-06-01",
64
+ originalModel: "gpt-oss-120b (high)",
29
65
  },
30
66
  "gpt-5.4-mini-xhigh": {
31
- // AA Intelligence Index (composite score)
32
- intelligenceIndex: 48.1,
33
- normalizedScore: 69,
34
-
35
67
  // AA specific benchmarks
36
68
  codingIndex: 51.5,
37
69
  mathIndex: undefined,
@@ -47,13 +79,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
47
79
  supportsVision: false,
48
80
 
49
81
  // Metadata
50
- lastUpdated: "2026-04-06",
82
+ lastUpdated: "2026-06-01",
83
+ originalModel: "GPT-5.4 mini (xhigh)",
51
84
  },
52
85
  "gpt-5.4-nano-xhigh": {
53
- // AA Intelligence Index (composite score)
54
- intelligenceIndex: 44.4,
55
- normalizedScore: 63,
56
-
57
86
  // AA specific benchmarks
58
87
  codingIndex: 43.9,
59
88
  mathIndex: undefined,
@@ -69,13 +98,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
69
98
  supportsVision: false,
70
99
 
71
100
  // Metadata
72
- lastUpdated: "2026-04-06",
101
+ lastUpdated: "2026-06-01",
102
+ originalModel: "GPT-5.4 nano (xhigh)",
73
103
  },
74
104
  "gpt-oss-120b-low": {
75
- // AA Intelligence Index (composite score)
76
- intelligenceIndex: 24.5,
77
- normalizedScore: 35,
78
-
79
105
  // AA specific benchmarks
80
106
  codingIndex: 15.5,
81
107
  mathIndex: 66.7,
@@ -91,21 +117,37 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
91
117
  supportsVision: false,
92
118
 
93
119
  // Metadata
94
- lastUpdated: "2026-04-06",
120
+ lastUpdated: "2026-06-01",
121
+ originalModel: "gpt-oss-120b (low)",
95
122
  },
96
- "gpt-5.4-nano-non-reasoning": {
97
- // AA Intelligence Index (composite score)
98
- intelligenceIndex: 24.4,
99
- normalizedScore: 35,
123
+ "o3": {
124
+ // AA specific benchmarks
125
+ codingIndex: 38.4,
126
+ mathIndex: 88.3,
127
+
128
+ // Academic benchmarks
129
+ mmluPro: 0.853,
130
+ gpqa: 0.827,
131
+ hle: 0.2,
132
+
133
+ // Capabilities
134
+ contextWindow: 8192,
135
+ supportsReasoning: false,
136
+ supportsVision: false,
100
137
 
138
+ // Metadata
139
+ lastUpdated: "2026-06-01",
140
+ originalModel: "o3",
141
+ },
142
+ "grok-1": {
101
143
  // AA specific benchmarks
102
- codingIndex: 27.9,
144
+ codingIndex: undefined,
103
145
  mathIndex: undefined,
104
146
 
105
147
  // Academic benchmarks
106
148
  mmluPro: undefined,
107
- gpqa: 0.558,
108
- hle: 0.042,
149
+ gpqa: undefined,
150
+ hle: undefined,
109
151
 
110
152
  // Capabilities
111
153
  contextWindow: 8192,
@@ -113,21 +155,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
113
155
  supportsVision: false,
114
156
 
115
157
  // Metadata
116
- lastUpdated: "2026-04-06",
158
+ lastUpdated: "2026-06-01",
159
+ originalModel: "Grok-1",
117
160
  },
118
- "gpt-5.4-nano-medium": {
119
- // AA Intelligence Index (composite score)
120
- intelligenceIndex: 38.1,
121
- normalizedScore: 54,
122
-
161
+ "gpt-5.4-mini-medium": {
123
162
  // AA specific benchmarks
124
- codingIndex: 35,
163
+ codingIndex: 37.5,
125
164
  mathIndex: undefined,
126
165
 
127
166
  // Academic benchmarks
128
167
  mmluPro: undefined,
129
- gpqa: 0.761,
130
- hle: 0.147,
168
+ gpqa: 0.823,
169
+ hle: 0.171,
131
170
 
132
171
  // Capabilities
133
172
  contextWindow: 8192,
@@ -135,13 +174,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
135
174
  supportsVision: false,
136
175
 
137
176
  // Metadata
138
- lastUpdated: "2026-04-06",
177
+ lastUpdated: "2026-06-01",
178
+ originalModel: "GPT-5.4 mini (medium)",
139
179
  },
140
180
  "gpt-5.4-mini-non-reasoning": {
141
- // AA Intelligence Index (composite score)
142
- intelligenceIndex: 23.3,
143
- normalizedScore: 33,
144
-
145
181
  // AA specific benchmarks
146
182
  codingIndex: 25.3,
147
183
  mathIndex: undefined,
@@ -157,21 +193,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
157
193
  supportsVision: false,
158
194
 
159
195
  // Metadata
160
- lastUpdated: "2026-04-06",
196
+ lastUpdated: "2026-06-01",
197
+ originalModel: "GPT-5.4 mini (Non-Reasoning)",
161
198
  },
162
- "gpt-5.4-non-reasoning": {
163
- // AA Intelligence Index (composite score)
164
- intelligenceIndex: 35.4,
165
- normalizedScore: 51,
166
-
199
+ "gpt-5.5-instant-may-2026": {
167
200
  // AA specific benchmarks
168
- codingIndex: 41,
201
+ codingIndex: 45.1,
169
202
  mathIndex: undefined,
170
203
 
171
204
  // Academic benchmarks
172
205
  mmluPro: undefined,
173
- gpqa: 0.748,
174
- hle: 0.106,
206
+ gpqa: 0.846,
207
+ hle: 0.203,
175
208
 
176
209
  // Capabilities
177
210
  contextWindow: 8192,
@@ -179,21 +212,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
179
212
  supportsVision: false,
180
213
 
181
214
  // Metadata
182
- lastUpdated: "2026-04-06",
215
+ lastUpdated: "2026-06-01",
216
+ originalModel: "GPT-5.5 Instant (May 2026)",
183
217
  },
184
- "gpt-5.4-mini-medium": {
185
- // AA Intelligence Index (composite score)
186
- intelligenceIndex: 37.7,
187
- normalizedScore: 54,
188
-
218
+ "gpt-5.5-low": {
189
219
  // AA specific benchmarks
190
- codingIndex: 37.5,
220
+ codingIndex: 52.1,
191
221
  mathIndex: undefined,
192
222
 
193
223
  // Academic benchmarks
194
224
  mmluPro: undefined,
195
- gpqa: 0.823,
196
- hle: 0.171,
225
+ gpqa: 0.91,
226
+ hle: 0.31,
197
227
 
198
228
  // Capabilities
199
229
  contextWindow: 8192,
@@ -201,21 +231,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
201
231
  supportsVision: false,
202
232
 
203
233
  // Metadata
204
- lastUpdated: "2026-04-06",
234
+ lastUpdated: "2026-06-01",
235
+ originalModel: "GPT-5.5 (low)",
205
236
  },
206
- "gpt-oss-20b-high": {
207
- // AA Intelligence Index (composite score)
208
- intelligenceIndex: 24.5,
209
- normalizedScore: 35,
210
-
237
+ "gpt-5.4-nano-non-reasoning": {
211
238
  // AA specific benchmarks
212
- codingIndex: 18.5,
213
- mathIndex: 89.3,
239
+ codingIndex: 27.9,
240
+ mathIndex: undefined,
214
241
 
215
242
  // Academic benchmarks
216
- mmluPro: 0.748,
217
- gpqa: 0.688,
218
- hle: 0.098,
243
+ mmluPro: undefined,
244
+ gpqa: 0.558,
245
+ hle: 0.042,
219
246
 
220
247
  // Capabilities
221
248
  contextWindow: 8192,
@@ -223,21 +250,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
223
250
  supportsVision: false,
224
251
 
225
252
  // Metadata
226
- lastUpdated: "2026-04-06",
253
+ lastUpdated: "2026-06-01",
254
+ originalModel: "GPT-5.4 nano (Non-Reasoning)",
227
255
  },
228
- "gpt-5.4-xhigh": {
229
- // AA Intelligence Index (composite score)
230
- intelligenceIndex: 57.2,
231
- normalizedScore: 82,
232
-
256
+ "gpt-5.5-high": {
233
257
  // AA specific benchmarks
234
- codingIndex: 57.3,
258
+ codingIndex: 58.5,
235
259
  mathIndex: undefined,
236
260
 
237
261
  // Academic benchmarks
238
262
  mmluPro: undefined,
239
- gpqa: 0.92,
240
- hle: 0.416,
263
+ gpqa: 0.932,
264
+ hle: 0.43,
241
265
 
242
266
  // Capabilities
243
267
  contextWindow: 8192,
@@ -245,21 +269,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
245
269
  supportsVision: false,
246
270
 
247
271
  // Metadata
248
- lastUpdated: "2026-04-06",
272
+ lastUpdated: "2026-06-01",
273
+ originalModel: "GPT-5.5 (high)",
249
274
  },
250
- "grok-1": {
251
- // AA Intelligence Index (composite score)
252
- intelligenceIndex: 11.7,
253
- normalizedScore: 17,
254
-
275
+ "gpt-5.4-nano-medium": {
255
276
  // AA specific benchmarks
256
- codingIndex: undefined,
277
+ codingIndex: 35,
257
278
  mathIndex: undefined,
258
279
 
259
280
  // Academic benchmarks
260
281
  mmluPro: undefined,
261
- gpqa: undefined,
262
- hle: undefined,
282
+ gpqa: 0.761,
283
+ hle: 0.147,
263
284
 
264
285
  // Capabilities
265
286
  contextWindow: 8192,
@@ -267,21 +288,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
267
288
  supportsVision: false,
268
289
 
269
290
  // Metadata
270
- lastUpdated: "2026-04-06",
291
+ lastUpdated: "2026-06-01",
292
+ originalModel: "GPT-5.4 nano (medium)",
271
293
  },
272
- "gpt-oss-20b-low": {
273
- // AA Intelligence Index (composite score)
274
- intelligenceIndex: 20.8,
275
- normalizedScore: 30,
276
-
294
+ "gpt-5.5-non-reasoning": {
277
295
  // AA specific benchmarks
278
- codingIndex: 14.4,
279
- mathIndex: 62.3,
296
+ codingIndex: 48.6,
297
+ mathIndex: undefined,
280
298
 
281
299
  // Academic benchmarks
282
- mmluPro: 0.718,
283
- gpqa: 0.611,
284
- hle: 0.051,
300
+ mmluPro: undefined,
301
+ gpqa: 0.768,
302
+ hle: 0.126,
285
303
 
286
304
  // Capabilities
287
305
  contextWindow: 8192,
@@ -289,21 +307,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
289
307
  supportsVision: false,
290
308
 
291
309
  // Metadata
292
- lastUpdated: "2026-04-06",
310
+ lastUpdated: "2026-06-01",
311
+ originalModel: "GPT-5.5 (Non-reasoning)",
293
312
  },
294
- o3: {
295
- // AA Intelligence Index (composite score)
296
- intelligenceIndex: 38.4,
297
- normalizedScore: 55,
298
-
313
+ "gpt-5.5-medium": {
299
314
  // AA specific benchmarks
300
- codingIndex: 38.4,
301
- mathIndex: 88.3,
315
+ codingIndex: 56.2,
316
+ mathIndex: undefined,
302
317
 
303
318
  // Academic benchmarks
304
- mmluPro: 0.853,
305
- gpqa: 0.827,
306
- hle: 0.2,
319
+ mmluPro: undefined,
320
+ gpqa: 0.926,
321
+ hle: 0.406,
307
322
 
308
323
  // Capabilities
309
324
  contextWindow: 8192,
@@ -311,13 +326,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
311
326
  supportsVision: false,
312
327
 
313
328
  // Metadata
314
- lastUpdated: "2026-04-06",
329
+ lastUpdated: "2026-06-01",
330
+ originalModel: "GPT-5.5 (medium)",
315
331
  },
316
332
  "gpt-5.3-codex-xhigh": {
317
- // AA Intelligence Index (composite score)
318
- intelligenceIndex: 54,
319
- normalizedScore: 77,
320
-
321
333
  // AA specific benchmarks
322
334
  codingIndex: 53.1,
323
335
  mathIndex: undefined,
@@ -333,13 +345,29 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
333
345
  supportsVision: false,
334
346
 
335
347
  // Metadata
336
- lastUpdated: "2026-04-06",
348
+ lastUpdated: "2026-06-01",
349
+ originalModel: "GPT-5.3 Codex (xhigh)",
337
350
  },
338
- "llama-3.3-instruct-70b": {
339
- // AA Intelligence Index (composite score)
340
- intelligenceIndex: 14.5,
341
- normalizedScore: 21,
351
+ "gpt-5.5-xhigh": {
352
+ // AA specific benchmarks
353
+ codingIndex: 59.1,
354
+ mathIndex: undefined,
355
+
356
+ // Academic benchmarks
357
+ mmluPro: undefined,
358
+ gpqa: 0.935,
359
+ hle: 0.443,
342
360
 
361
+ // Capabilities
362
+ contextWindow: 8192,
363
+ supportsReasoning: false,
364
+ supportsVision: false,
365
+
366
+ // Metadata
367
+ lastUpdated: "2026-06-01",
368
+ originalModel: "GPT-5.5 (xhigh)",
369
+ },
370
+ "llama-3.3-instruct-70b": {
343
371
  // AA specific benchmarks
344
372
  codingIndex: 10.7,
345
373
  mathIndex: 7.7,
@@ -355,13 +383,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
355
383
  supportsVision: false,
356
384
 
357
385
  // Metadata
358
- lastUpdated: "2026-04-06",
386
+ lastUpdated: "2026-06-01",
387
+ originalModel: "Llama 3.3 Instruct 70B",
359
388
  },
360
389
  "llama-3.1-instruct-405b": {
361
- // AA Intelligence Index (composite score)
362
- intelligenceIndex: 17.4,
363
- normalizedScore: 25,
364
-
365
390
  // AA specific benchmarks
366
391
  codingIndex: 14.5,
367
392
  mathIndex: 3,
@@ -377,13 +402,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
377
402
  supportsVision: false,
378
403
 
379
404
  // Metadata
380
- lastUpdated: "2026-04-06",
405
+ lastUpdated: "2026-06-01",
406
+ originalModel: "Llama 3.1 Instruct 405B",
381
407
  },
382
408
  "llama-3.2-instruct-90b-vision": {
383
- // AA Intelligence Index (composite score)
384
- intelligenceIndex: 11.9,
385
- normalizedScore: 17,
386
-
387
409
  // AA specific benchmarks
388
410
  codingIndex: undefined,
389
411
  mathIndex: undefined,
@@ -399,15 +421,12 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
399
421
  supportsVision: false,
400
422
 
401
423
  // Metadata
402
- lastUpdated: "2026-04-06",
424
+ lastUpdated: "2026-06-01",
425
+ originalModel: "Llama 3.2 Instruct 90B (Vision)",
403
426
  },
404
427
  "llama-3.2-instruct-11b-vision": {
405
- // AA Intelligence Index (composite score)
406
- intelligenceIndex: 8.7,
407
- normalizedScore: 12,
408
-
409
428
  // AA specific benchmarks
410
- codingIndex: 4.3,
429
+ codingIndex: 4.2,
411
430
  mathIndex: 1.7,
412
431
 
413
432
  // Academic benchmarks
@@ -421,13 +440,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
421
440
  supportsVision: false,
422
441
 
423
442
  // Metadata
424
- lastUpdated: "2026-04-06",
443
+ lastUpdated: "2026-06-01",
444
+ originalModel: "Llama 3.2 Instruct 11B (Vision)",
425
445
  },
426
446
  "llama-4-maverick": {
427
- // AA Intelligence Index (composite score)
428
- intelligenceIndex: 18.4,
429
- normalizedScore: 26,
430
-
431
447
  // AA specific benchmarks
432
448
  codingIndex: 15.6,
433
449
  mathIndex: 19.3,
@@ -443,13 +459,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
443
459
  supportsVision: false,
444
460
 
445
461
  // Metadata
446
- lastUpdated: "2026-04-06",
462
+ lastUpdated: "2026-06-01",
463
+ originalModel: "Llama 4 Maverick",
447
464
  },
448
465
  "llama-4-scout": {
449
- // AA Intelligence Index (composite score)
450
- intelligenceIndex: 13.5,
451
- normalizedScore: 19,
452
-
453
466
  // AA specific benchmarks
454
467
  codingIndex: 6.7,
455
468
  mathIndex: 14,
@@ -465,21 +478,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
465
478
  supportsVision: false,
466
479
 
467
480
  // Metadata
468
- lastUpdated: "2026-04-06",
481
+ lastUpdated: "2026-06-01",
482
+ originalModel: "Llama 4 Scout",
469
483
  },
470
- "gemma-3-12b-instruct": {
471
- // AA Intelligence Index (composite score)
472
- intelligenceIndex: 8.8,
473
- normalizedScore: 13,
474
-
484
+ "muse-spark": {
475
485
  // AA specific benchmarks
476
- codingIndex: 6.3,
477
- mathIndex: 18.3,
486
+ codingIndex: 47.5,
487
+ mathIndex: undefined,
478
488
 
479
489
  // Academic benchmarks
480
- mmluPro: 0.595,
481
- gpqa: 0.349,
482
- hle: 0.048,
490
+ mmluPro: undefined,
491
+ gpqa: 0.884,
492
+ hle: 0.399,
483
493
 
484
494
  // Capabilities
485
495
  contextWindow: 8192,
@@ -487,21 +497,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
487
497
  supportsVision: false,
488
498
 
489
499
  // Metadata
490
- lastUpdated: "2026-04-06",
500
+ lastUpdated: "2026-06-01",
501
+ originalModel: "Muse Spark",
491
502
  },
492
- "gemini-3-flash-preview-non-reasoning": {
493
- // AA Intelligence Index (composite score)
494
- intelligenceIndex: 35,
495
- normalizedScore: 50,
496
-
503
+ "gemini-3.1-pro-preview": {
497
504
  // AA specific benchmarks
498
- codingIndex: 37.8,
499
- mathIndex: 55.7,
505
+ codingIndex: 55.5,
506
+ mathIndex: undefined,
500
507
 
501
508
  // Academic benchmarks
502
- mmluPro: 0.882,
503
- gpqa: 0.812,
504
- hle: 0.141,
509
+ mmluPro: undefined,
510
+ gpqa: 0.941,
511
+ hle: 0.447,
505
512
 
506
513
  // Capabilities
507
514
  contextWindow: 8192,
@@ -509,21 +516,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
509
516
  supportsVision: false,
510
517
 
511
518
  // Metadata
512
- lastUpdated: "2026-04-06",
519
+ lastUpdated: "2026-06-01",
520
+ originalModel: "Gemini 3.1 Pro Preview",
513
521
  },
514
- "gemma-3-27b-instruct": {
515
- // AA Intelligence Index (composite score)
516
- intelligenceIndex: 10.3,
517
- normalizedScore: 15,
518
-
522
+ "gemma-4-26b-a4b-non-reasoning": {
519
523
  // AA specific benchmarks
520
- codingIndex: 9.6,
521
- mathIndex: 20.7,
524
+ codingIndex: 29.1,
525
+ mathIndex: undefined,
522
526
 
523
527
  // Academic benchmarks
524
- mmluPro: 0.669,
525
- gpqa: 0.428,
526
- hle: 0.047,
528
+ mmluPro: undefined,
529
+ gpqa: 0.714,
530
+ hle: 0.107,
527
531
 
528
532
  // Capabilities
529
533
  contextWindow: 8192,
@@ -531,21 +535,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
531
535
  supportsVision: false,
532
536
 
533
537
  // Metadata
534
- lastUpdated: "2026-04-06",
538
+ lastUpdated: "2026-06-01",
539
+ originalModel: "Gemma 4 26B A4B (Non-reasoning)",
535
540
  },
536
- "gemma-4-31b-reasoning": {
537
- // AA Intelligence Index (composite score)
538
- intelligenceIndex: 39.2,
539
- normalizedScore: 56,
540
-
541
+ "gemini-3.5-flash-medium": {
541
542
  // AA specific benchmarks
542
- codingIndex: 38.7,
543
+ codingIndex: 43.9,
543
544
  mathIndex: undefined,
544
545
 
545
546
  // Academic benchmarks
546
547
  mmluPro: undefined,
547
- gpqa: 0.857,
548
- hle: 0.227,
548
+ gpqa: 0.921,
549
+ hle: 0.399,
549
550
 
550
551
  // Capabilities
551
552
  contextWindow: 8192,
@@ -553,21 +554,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
553
554
  supportsVision: false,
554
555
 
555
556
  // Metadata
556
- lastUpdated: "2026-04-06",
557
+ lastUpdated: "2026-06-01",
558
+ originalModel: "Gemini 3.5 Flash (medium)",
557
559
  },
558
- "gemma-3-4b-instruct": {
559
- // AA Intelligence Index (composite score)
560
- intelligenceIndex: 6.3,
561
- normalizedScore: 9,
562
-
560
+ "gemma-4-e2b-non-reasoning": {
563
561
  // AA specific benchmarks
564
- codingIndex: 2.9,
565
- mathIndex: 12.7,
562
+ codingIndex: 8.3,
563
+ mathIndex: undefined,
566
564
 
567
565
  // Academic benchmarks
568
- mmluPro: 0.417,
569
- gpqa: 0.291,
570
- hle: 0.052,
566
+ mmluPro: undefined,
567
+ gpqa: 0.405,
568
+ hle: 0.045,
571
569
 
572
570
  // Capabilities
573
571
  contextWindow: 8192,
@@ -575,21 +573,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
575
573
  supportsVision: false,
576
574
 
577
575
  // Metadata
578
- lastUpdated: "2026-04-06",
576
+ lastUpdated: "2026-06-01",
577
+ originalModel: "Gemma 4 E2B (Non-reasoning)",
579
578
  },
580
- "gemma-4-e4b": {
581
- // AA Intelligence Index (composite score)
582
- intelligenceIndex: 18.8,
583
- normalizedScore: 27,
584
-
579
+ "gemma-4-31b-reasoning": {
585
580
  // AA specific benchmarks
586
- codingIndex: 13.7,
581
+ codingIndex: 38.7,
587
582
  mathIndex: undefined,
588
583
 
589
584
  // Academic benchmarks
590
585
  mmluPro: undefined,
591
- gpqa: 0.576,
592
- hle: 0.037,
586
+ gpqa: 0.857,
587
+ hle: 0.227,
593
588
 
594
589
  // Capabilities
595
590
  contextWindow: 8192,
@@ -597,21 +592,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
597
592
  supportsVision: false,
598
593
 
599
594
  // Metadata
600
- lastUpdated: "2026-04-06",
595
+ lastUpdated: "2026-06-01",
596
+ originalModel: "Gemma 4 31B (Reasoning)",
601
597
  },
602
- "gemini-3-pro-preview-low": {
603
- // AA Intelligence Index (composite score)
604
- intelligenceIndex: 41.3,
605
- normalizedScore: 59,
606
-
598
+ "gemma-4-31b-non-reasoning": {
607
599
  // AA specific benchmarks
608
- codingIndex: 39.4,
609
- mathIndex: 86.7,
600
+ codingIndex: 33.9,
601
+ mathIndex: undefined,
610
602
 
611
603
  // Academic benchmarks
612
- mmluPro: 0.895,
613
- gpqa: 0.887,
614
- hle: 0.276,
604
+ mmluPro: undefined,
605
+ gpqa: 0.763,
606
+ hle: 0.115,
615
607
 
616
608
  // Capabilities
617
609
  contextWindow: 8192,
@@ -619,21 +611,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
619
611
  supportsVision: false,
620
612
 
621
613
  // Metadata
622
- lastUpdated: "2026-04-06",
614
+ lastUpdated: "2026-06-01",
615
+ originalModel: "Gemma 4 31B (Non-reasoning)",
623
616
  },
624
- "gemma-3-1b-instruct": {
625
- // AA Intelligence Index (composite score)
626
- intelligenceIndex: 5.5,
627
- normalizedScore: 8,
628
-
617
+ "gemma-4-26b-a4b-reasoning": {
629
618
  // AA specific benchmarks
630
- codingIndex: 0.2,
631
- mathIndex: 3.3,
619
+ codingIndex: 22.4,
620
+ mathIndex: undefined,
632
621
 
633
622
  // Academic benchmarks
634
- mmluPro: 0.135,
635
- gpqa: 0.237,
636
- hle: 0.052,
623
+ mmluPro: undefined,
624
+ gpqa: 0.792,
625
+ hle: 0.183,
637
626
 
638
627
  // Capabilities
639
628
  contextWindow: 8192,
@@ -641,21 +630,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
641
630
  supportsVision: false,
642
631
 
643
632
  // Metadata
644
- lastUpdated: "2026-04-06",
633
+ lastUpdated: "2026-06-01",
634
+ originalModel: "Gemma 4 26B A4B (Reasoning)",
645
635
  },
646
- "gemma-4-26b-a4b-reasoning": {
647
- // AA Intelligence Index (composite score)
648
- intelligenceIndex: 31.2,
649
- normalizedScore: 45,
650
-
636
+ "gemini-3.1-flash-lite": {
651
637
  // AA specific benchmarks
652
- codingIndex: 22.4,
638
+ codingIndex: 30.1,
653
639
  mathIndex: undefined,
654
640
 
655
641
  // Academic benchmarks
656
642
  mmluPro: undefined,
657
- gpqa: 0.792,
658
- hle: 0.183,
643
+ gpqa: 0.822,
644
+ hle: 0.162,
659
645
 
660
646
  // Capabilities
661
647
  contextWindow: 8192,
@@ -663,21 +649,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
663
649
  supportsVision: false,
664
650
 
665
651
  // Metadata
666
- lastUpdated: "2026-04-06",
652
+ lastUpdated: "2026-06-01",
653
+ originalModel: "Gemini 3.1 Flash-Lite",
667
654
  },
668
- "gemma-4-e2b": {
669
- // AA Intelligence Index (composite score)
670
- intelligenceIndex: 15.2,
671
- normalizedScore: 22,
672
-
655
+ "gemma-4-e4b-non-reasoning": {
673
656
  // AA specific benchmarks
674
- codingIndex: 9,
657
+ codingIndex: 6.4,
675
658
  mathIndex: undefined,
676
659
 
677
660
  // Academic benchmarks
678
661
  mmluPro: undefined,
679
- gpqa: 0.433,
680
- hle: 0.048,
662
+ gpqa: 0.549,
663
+ hle: 0.047,
681
664
 
682
665
  // Capabilities
683
666
  contextWindow: 8192,
@@ -685,21 +668,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
685
668
  supportsVision: false,
686
669
 
687
670
  // Metadata
688
- lastUpdated: "2026-04-06",
671
+ lastUpdated: "2026-06-01",
672
+ originalModel: "Gemma 4 E4B (Non-reasoning)",
689
673
  },
690
- "gemini-3.1-pro-preview": {
691
- // AA Intelligence Index (composite score)
692
- intelligenceIndex: 57.2,
693
- normalizedScore: 82,
694
-
674
+ "gemma-3-270m": {
695
675
  // AA specific benchmarks
696
- codingIndex: 55.5,
697
- mathIndex: undefined,
676
+ codingIndex: 0,
677
+ mathIndex: 2.3,
698
678
 
699
679
  // Academic benchmarks
700
- mmluPro: undefined,
701
- gpqa: 0.941,
702
- hle: 0.447,
680
+ mmluPro: 0.055,
681
+ gpqa: 0.224,
682
+ hle: 0.042,
703
683
 
704
684
  // Capabilities
705
685
  contextWindow: 8192,
@@ -707,21 +687,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
707
687
  supportsVision: false,
708
688
 
709
689
  // Metadata
710
- lastUpdated: "2026-04-06",
690
+ lastUpdated: "2026-06-01",
691
+ originalModel: "Gemma 3 270M",
711
692
  },
712
- "gemini-3-flash-preview-reasoning": {
713
- // AA Intelligence Index (composite score)
714
- intelligenceIndex: 46.4,
715
- normalizedScore: 66,
716
-
693
+ "gemma-4-e4b-reasoning": {
717
694
  // AA specific benchmarks
718
- codingIndex: 42.6,
719
- mathIndex: 97,
695
+ codingIndex: 13.7,
696
+ mathIndex: undefined,
720
697
 
721
698
  // Academic benchmarks
722
- mmluPro: 0.89,
723
- gpqa: 0.898,
724
- hle: 0.347,
699
+ mmluPro: undefined,
700
+ gpqa: 0.576,
701
+ hle: 0.037,
725
702
 
726
703
  // Capabilities
727
704
  contextWindow: 8192,
@@ -729,21 +706,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
729
706
  supportsVision: false,
730
707
 
731
708
  // Metadata
732
- lastUpdated: "2026-04-06",
709
+ lastUpdated: "2026-06-01",
710
+ originalModel: "Gemma 4 E4B (Reasoning)",
733
711
  },
734
- "gemma-3n-e4b-instruct": {
735
- // AA Intelligence Index (composite score)
736
- intelligenceIndex: 6.4,
737
- normalizedScore: 9,
738
-
712
+ "gemini-3.5-flash-high": {
739
713
  // AA specific benchmarks
740
- codingIndex: 4.2,
741
- mathIndex: 14.3,
714
+ codingIndex: 45,
715
+ mathIndex: undefined,
742
716
 
743
717
  // Academic benchmarks
744
- mmluPro: 0.488,
745
- gpqa: 0.296,
746
- hle: 0.044,
718
+ mmluPro: undefined,
719
+ gpqa: 0.922,
720
+ hle: 0.41,
747
721
 
748
722
  // Capabilities
749
723
  contextWindow: 8192,
@@ -751,21 +725,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
751
725
  supportsVision: false,
752
726
 
753
727
  // Metadata
754
- lastUpdated: "2026-04-06",
728
+ lastUpdated: "2026-06-01",
729
+ originalModel: "Gemini 3.5 Flash (high)",
755
730
  },
756
- "gemini-2.5-flash-lite-preview-sep-25-non-reasoning": {
757
- // AA Intelligence Index (composite score)
758
- intelligenceIndex: 19.4,
759
- normalizedScore: 28,
760
-
731
+ "gemini-3.5-flash-minimal": {
761
732
  // AA specific benchmarks
762
- codingIndex: 14.5,
763
- mathIndex: 46.7,
733
+ codingIndex: 47.1,
734
+ mathIndex: undefined,
764
735
 
765
736
  // Academic benchmarks
766
- mmluPro: 0.796,
767
- gpqa: 0.651,
768
- hle: 0.046,
737
+ mmluPro: undefined,
738
+ gpqa: 0.828,
739
+ hle: 0.231,
769
740
 
770
741
  // Capabilities
771
742
  contextWindow: 8192,
@@ -773,21 +744,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
773
744
  supportsVision: false,
774
745
 
775
746
  // Metadata
776
- lastUpdated: "2026-04-06",
747
+ lastUpdated: "2026-06-01",
748
+ originalModel: "Gemini 3.5 Flash (minimal)",
777
749
  },
778
- "gemma-3n-e2b-instruct": {
779
- // AA Intelligence Index (composite score)
780
- intelligenceIndex: 4.8,
781
- normalizedScore: 7,
782
-
750
+ "gemma-4-e2b-reasoning": {
783
751
  // AA specific benchmarks
784
- codingIndex: 2.2,
785
- mathIndex: 10.3,
752
+ codingIndex: 9,
753
+ mathIndex: undefined,
786
754
 
787
755
  // Academic benchmarks
788
- mmluPro: 0.378,
789
- gpqa: 0.229,
790
- hle: 0.04,
756
+ mmluPro: undefined,
757
+ gpqa: 0.433,
758
+ hle: 0.048,
791
759
 
792
760
  // Capabilities
793
761
  contextWindow: 8192,
@@ -795,15 +763,12 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
795
763
  supportsVision: false,
796
764
 
797
765
  // Metadata
798
- lastUpdated: "2026-04-06",
766
+ lastUpdated: "2026-06-01",
767
+ originalModel: "Gemma 4 E2B (Reasoning)",
799
768
  },
800
769
  "gemini-2.5-pro": {
801
- // AA Intelligence Index (composite score)
802
- intelligenceIndex: 34.6,
803
- normalizedScore: 49,
804
-
805
770
  // AA specific benchmarks
806
- codingIndex: 31.9,
771
+ codingIndex: 32,
807
772
  mathIndex: 87.7,
808
773
 
809
774
  // Academic benchmarks
@@ -817,21 +782,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
817
782
  supportsVision: false,
818
783
 
819
784
  // Metadata
820
- lastUpdated: "2026-04-06",
785
+ lastUpdated: "2026-06-01",
786
+ originalModel: "Gemini 2.5 Pro",
821
787
  },
822
- "gemma-3-270m": {
823
- // AA Intelligence Index (composite score)
824
- intelligenceIndex: 7.7,
825
- normalizedScore: 11,
826
-
788
+ "claude-sonnet-4.6-non-reasoning-high-effort": {
827
789
  // AA specific benchmarks
828
- codingIndex: 0,
829
- mathIndex: 2.3,
790
+ codingIndex: 46.4,
791
+ mathIndex: undefined,
830
792
 
831
793
  // Academic benchmarks
832
- mmluPro: 0.055,
833
- gpqa: 0.224,
834
- hle: 0.042,
794
+ mmluPro: undefined,
795
+ gpqa: 0.799,
796
+ hle: 0.132,
835
797
 
836
798
  // Capabilities
837
799
  contextWindow: 8192,
@@ -839,43 +801,37 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
839
801
  supportsVision: false,
840
802
 
841
803
  // Metadata
842
- lastUpdated: "2026-04-06",
804
+ lastUpdated: "2026-06-01",
805
+ originalModel: "Claude Sonnet 4.6 (Non-reasoning, High Effort)",
843
806
  },
844
- "gemini-2.5-flash-lite-preview-sep-25-reasoning": {
845
- // AA Intelligence Index (composite score)
846
- intelligenceIndex: 21.6,
847
- normalizedScore: 31,
848
-
807
+ "claude-opus-4.8-adaptive-reasoning-max-effort": {
849
808
  // AA specific benchmarks
850
- codingIndex: 18.1,
851
- mathIndex: 68.7,
809
+ codingIndex: 56.7,
810
+ mathIndex: undefined,
852
811
 
853
812
  // Academic benchmarks
854
- mmluPro: 0.808,
855
- gpqa: 0.709,
856
- hle: 0.066,
857
-
813
+ mmluPro: undefined,
814
+ gpqa: 0.92,
815
+ hle: 0.457,
816
+
858
817
  // Capabilities
859
818
  contextWindow: 8192,
860
819
  supportsReasoning: false,
861
820
  supportsVision: false,
862
821
 
863
822
  // Metadata
864
- lastUpdated: "2026-04-06",
823
+ lastUpdated: "2026-06-01",
824
+ originalModel: "Claude Opus 4.8 (Adaptive Reasoning, Max Effort)",
865
825
  },
866
- "gemini-3.1-flash-lite-preview": {
867
- // AA Intelligence Index (composite score)
868
- intelligenceIndex: 33.5,
869
- normalizedScore: 48,
870
-
826
+ "claude-4.5-haiku-reasoning": {
871
827
  // AA specific benchmarks
872
- codingIndex: 30.1,
873
- mathIndex: undefined,
828
+ codingIndex: 32.6,
829
+ mathIndex: 83.7,
874
830
 
875
831
  // Academic benchmarks
876
- mmluPro: undefined,
877
- gpqa: 0.822,
878
- hle: 0.162,
832
+ mmluPro: 0.76,
833
+ gpqa: 0.672,
834
+ hle: 0.097,
879
835
 
880
836
  // Capabilities
881
837
  contextWindow: 8192,
@@ -883,21 +839,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
883
839
  supportsVision: false,
884
840
 
885
841
  // Metadata
886
- lastUpdated: "2026-04-06",
842
+ lastUpdated: "2026-06-01",
843
+ originalModel: "Claude 4.5 Haiku (Reasoning)",
887
844
  },
888
- "claude-4.5-haiku-non-reasoning": {
889
- // AA Intelligence Index (composite score)
890
- intelligenceIndex: 31.1,
891
- normalizedScore: 44,
892
-
845
+ "claude-opus-4.7-adaptive-reasoning-max-effort": {
893
846
  // AA specific benchmarks
894
- codingIndex: 29.6,
895
- mathIndex: 39,
847
+ codingIndex: 52.5,
848
+ mathIndex: undefined,
896
849
 
897
850
  // Academic benchmarks
898
- mmluPro: 0.8,
899
- gpqa: 0.646,
900
- hle: 0.043,
851
+ mmluPro: undefined,
852
+ gpqa: 0.914,
853
+ hle: 0.396,
901
854
 
902
855
  // Capabilities
903
856
  contextWindow: 8192,
@@ -905,21 +858,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
905
858
  supportsVision: false,
906
859
 
907
860
  // Metadata
908
- lastUpdated: "2026-04-06",
861
+ lastUpdated: "2026-06-01",
862
+ originalModel: "Claude Opus 4.7 (Adaptive Reasoning, Max Effort)",
909
863
  },
910
- "claude-4.5-haiku-reasoning": {
911
- // AA Intelligence Index (composite score)
912
- intelligenceIndex: 37.1,
913
- normalizedScore: 53,
914
-
864
+ "claude-opus-4.7-non-reasoning-high-effort": {
915
865
  // AA specific benchmarks
916
- codingIndex: 32.6,
917
- mathIndex: 83.7,
866
+ codingIndex: 53.1,
867
+ mathIndex: undefined,
918
868
 
919
869
  // Academic benchmarks
920
- mmluPro: 0.76,
921
- gpqa: 0.672,
922
- hle: 0.097,
870
+ mmluPro: undefined,
871
+ gpqa: 0.885,
872
+ hle: 0.312,
923
873
 
924
874
  // Capabilities
925
875
  contextWindow: 8192,
@@ -927,21 +877,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
927
877
  supportsVision: false,
928
878
 
929
879
  // Metadata
930
- lastUpdated: "2026-04-06",
880
+ lastUpdated: "2026-06-01",
881
+ originalModel: "Claude Opus 4.7 (Non-reasoning, High Effort)",
931
882
  },
932
- "claude-opus-4.6-non-reasoning-high-effort": {
933
- // AA Intelligence Index (composite score)
934
- intelligenceIndex: 46.5,
935
- normalizedScore: 66,
936
-
883
+ "claude-sonnet-4.6-non-reasoning-low-effort": {
937
884
  // AA specific benchmarks
938
- codingIndex: 47.6,
885
+ codingIndex: 43,
939
886
  mathIndex: undefined,
940
887
 
941
888
  // Academic benchmarks
942
889
  mmluPro: undefined,
943
- gpqa: 0.84,
944
- hle: 0.186,
890
+ gpqa: 0.797,
891
+ hle: 0.108,
945
892
 
946
893
  // Capabilities
947
894
  contextWindow: 8192,
@@ -949,21 +896,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
949
896
  supportsVision: false,
950
897
 
951
898
  // Metadata
952
- lastUpdated: "2026-04-06",
899
+ lastUpdated: "2026-06-01",
900
+ originalModel: "Claude Sonnet 4.6 (Non-reasoning, Low Effort)",
953
901
  },
954
- "claude-opus-4.6-adaptive-reasoning-max-effort": {
955
- // AA Intelligence Index (composite score)
956
- intelligenceIndex: 53,
957
- normalizedScore: 76,
958
-
902
+ "claude-4.5-haiku-non-reasoning": {
959
903
  // AA specific benchmarks
960
- codingIndex: 48.1,
961
- mathIndex: undefined,
904
+ codingIndex: 29.6,
905
+ mathIndex: 39,
962
906
 
963
907
  // Academic benchmarks
964
- mmluPro: undefined,
965
- gpqa: 0.896,
966
- hle: 0.367,
908
+ mmluPro: 0.8,
909
+ gpqa: 0.646,
910
+ hle: 0.043,
967
911
 
968
912
  // Capabilities
969
913
  contextWindow: 8192,
@@ -971,21 +915,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
971
915
  supportsVision: false,
972
916
 
973
917
  // Metadata
974
- lastUpdated: "2026-04-06",
918
+ lastUpdated: "2026-06-01",
919
+ originalModel: "Claude 4.5 Haiku (Non-reasoning)",
975
920
  },
976
- "claude-sonnet-4.6-non-reasoning-high-effort": {
977
- // AA Intelligence Index (composite score)
978
- intelligenceIndex: 44.4,
979
- normalizedScore: 63,
980
-
921
+ "claude-sonnet-4.6-adaptive-reasoning-max-effort": {
981
922
  // AA specific benchmarks
982
- codingIndex: 46.4,
923
+ codingIndex: 50.9,
983
924
  mathIndex: undefined,
984
925
 
985
926
  // Academic benchmarks
986
927
  mmluPro: undefined,
987
- gpqa: 0.799,
988
- hle: 0.132,
928
+ gpqa: 0.875,
929
+ hle: 0.3,
989
930
 
990
931
  // Capabilities
991
932
  contextWindow: 8192,
@@ -993,21 +934,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
993
934
  supportsVision: false,
994
935
 
995
936
  // Metadata
996
- lastUpdated: "2026-04-06",
937
+ lastUpdated: "2026-06-01",
938
+ originalModel: "Claude Sonnet 4.6 (Adaptive Reasoning, Max Effort)",
997
939
  },
998
- "claude-sonnet-4.6-adaptive-reasoning-max-effort": {
999
- // AA Intelligence Index (composite score)
1000
- intelligenceIndex: 51.7,
1001
- normalizedScore: 74,
1002
-
940
+ "magistral-small-1.2": {
1003
941
  // AA specific benchmarks
1004
- codingIndex: 50.9,
1005
- mathIndex: undefined,
942
+ codingIndex: 14.8,
943
+ mathIndex: 80.3,
1006
944
 
1007
945
  // Academic benchmarks
1008
- mmluPro: undefined,
1009
- gpqa: 0.875,
1010
- hle: 0.3,
946
+ mmluPro: 0.768,
947
+ gpqa: 0.663,
948
+ hle: 0.061,
1011
949
 
1012
950
  // Capabilities
1013
951
  contextWindow: 8192,
@@ -1015,21 +953,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1015
953
  supportsVision: false,
1016
954
 
1017
955
  // Metadata
1018
- lastUpdated: "2026-04-06",
956
+ lastUpdated: "2026-06-01",
957
+ originalModel: "Magistral Small 1.2",
1019
958
  },
1020
- "claude-sonnet-4.6-non-reasoning-low-effort": {
1021
- // AA Intelligence Index (composite score)
1022
- intelligenceIndex: 42.6,
1023
- normalizedScore: 61,
1024
-
959
+ "mistral-medium-3.5": {
1025
960
  // AA specific benchmarks
1026
- codingIndex: 43,
961
+ codingIndex: 35.4,
1027
962
  mathIndex: undefined,
1028
963
 
1029
964
  // Academic benchmarks
1030
965
  mmluPro: undefined,
1031
- gpqa: 0.797,
1032
- hle: 0.108,
966
+ gpqa: 0.748,
967
+ hle: 0.128,
1033
968
 
1034
969
  // Capabilities
1035
970
  contextWindow: 8192,
@@ -1037,21 +972,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1037
972
  supportsVision: false,
1038
973
 
1039
974
  // Metadata
1040
- lastUpdated: "2026-04-06",
975
+ lastUpdated: "2026-06-01",
976
+ originalModel: "Mistral Medium 3.5",
1041
977
  },
1042
- "mistral-large-3": {
1043
- // AA Intelligence Index (composite score)
1044
- intelligenceIndex: 22.8,
1045
- normalizedScore: 33,
1046
-
978
+ "devstral-small-2": {
1047
979
  // AA specific benchmarks
1048
- codingIndex: 22.7,
1049
- mathIndex: 38,
980
+ codingIndex: 20.7,
981
+ mathIndex: 34.3,
1050
982
 
1051
983
  // Academic benchmarks
1052
- mmluPro: 0.807,
1053
- gpqa: 0.68,
1054
- hle: 0.041,
984
+ mmluPro: 0.678,
985
+ gpqa: 0.532,
986
+ hle: 0.034,
1055
987
 
1056
988
  // Capabilities
1057
989
  contextWindow: 8192,
@@ -1059,21 +991,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1059
991
  supportsVision: false,
1060
992
 
1061
993
  // Metadata
1062
- lastUpdated: "2026-04-06",
994
+ lastUpdated: "2026-06-01",
995
+ originalModel: "Devstral Small 2",
1063
996
  },
1064
- "devstral-2": {
1065
- // AA Intelligence Index (composite score)
1066
- intelligenceIndex: 22,
1067
- normalizedScore: 31,
1068
-
997
+ "ministral-3-8b": {
1069
998
  // AA specific benchmarks
1070
- codingIndex: 23.7,
1071
- mathIndex: 36.7,
999
+ codingIndex: 10,
1000
+ mathIndex: 31.7,
1072
1001
 
1073
1002
  // Academic benchmarks
1074
- mmluPro: 0.762,
1075
- gpqa: 0.594,
1076
- hle: 0.036,
1003
+ mmluPro: 0.642,
1004
+ gpqa: 0.471,
1005
+ hle: 0.043,
1077
1006
 
1078
1007
  // Capabilities
1079
1008
  contextWindow: 8192,
@@ -1081,21 +1010,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1081
1010
  supportsVision: false,
1082
1011
 
1083
1012
  // Metadata
1084
- lastUpdated: "2026-04-06",
1013
+ lastUpdated: "2026-06-01",
1014
+ originalModel: "Ministral 3 8B",
1085
1015
  },
1086
- "mistral-small-4-reasoning": {
1087
- // AA Intelligence Index (composite score)
1088
- intelligenceIndex: 27.2,
1089
- normalizedScore: 39,
1090
-
1016
+ "magistral-medium-1.2": {
1091
1017
  // AA specific benchmarks
1092
- codingIndex: 24.3,
1093
- mathIndex: undefined,
1018
+ codingIndex: 21.7,
1019
+ mathIndex: 82,
1094
1020
 
1095
1021
  // Academic benchmarks
1096
- mmluPro: undefined,
1097
- gpqa: 0.769,
1098
- hle: 0.095,
1022
+ mmluPro: 0.815,
1023
+ gpqa: 0.739,
1024
+ hle: 0.096,
1099
1025
 
1100
1026
  // Capabilities
1101
1027
  contextWindow: 8192,
@@ -1103,21 +1029,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1103
1029
  supportsVision: false,
1104
1030
 
1105
1031
  // Metadata
1106
- lastUpdated: "2026-04-06",
1032
+ lastUpdated: "2026-06-01",
1033
+ originalModel: "Magistral Medium 1.2",
1107
1034
  },
1108
- "ministral-3-8b": {
1109
- // AA Intelligence Index (composite score)
1110
- intelligenceIndex: 14.8,
1111
- normalizedScore: 21,
1112
-
1035
+ "mistral-large-3": {
1113
1036
  // AA specific benchmarks
1114
- codingIndex: 10,
1115
- mathIndex: 31.7,
1037
+ codingIndex: 22.7,
1038
+ mathIndex: 38,
1116
1039
 
1117
1040
  // Academic benchmarks
1118
- mmluPro: 0.642,
1119
- gpqa: 0.471,
1120
- hle: 0.043,
1041
+ mmluPro: 0.807,
1042
+ gpqa: 0.68,
1043
+ hle: 0.041,
1121
1044
 
1122
1045
  // Capabilities
1123
1046
  contextWindow: 8192,
@@ -1125,13 +1048,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1125
1048
  supportsVision: false,
1126
1049
 
1127
1050
  // Metadata
1128
- lastUpdated: "2026-04-06",
1051
+ lastUpdated: "2026-06-01",
1052
+ originalModel: "Mistral Large 3",
1129
1053
  },
1130
1054
  "ministral-3-14b": {
1131
- // AA Intelligence Index (composite score)
1132
- intelligenceIndex: 16,
1133
- normalizedScore: 23,
1134
-
1135
1055
  // AA specific benchmarks
1136
1056
  codingIndex: 10.9,
1137
1057
  mathIndex: 30,
@@ -1147,21 +1067,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1147
1067
  supportsVision: false,
1148
1068
 
1149
1069
  // Metadata
1150
- lastUpdated: "2026-04-06",
1070
+ lastUpdated: "2026-06-01",
1071
+ originalModel: "Ministral 3 14B",
1151
1072
  },
1152
- "magistral-medium-1.2": {
1153
- // AA Intelligence Index (composite score)
1154
- intelligenceIndex: 27.1,
1155
- normalizedScore: 39,
1156
-
1073
+ "mistral-small-4-reasoning": {
1157
1074
  // AA specific benchmarks
1158
- codingIndex: 21.7,
1159
- mathIndex: 82,
1075
+ codingIndex: 24.3,
1076
+ mathIndex: undefined,
1160
1077
 
1161
1078
  // Academic benchmarks
1162
- mmluPro: 0.815,
1163
- gpqa: 0.739,
1164
- hle: 0.096,
1079
+ mmluPro: undefined,
1080
+ gpqa: 0.769,
1081
+ hle: 0.095,
1165
1082
 
1166
1083
  // Capabilities
1167
1084
  contextWindow: 8192,
@@ -1169,13 +1086,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1169
1086
  supportsVision: false,
1170
1087
 
1171
1088
  // Metadata
1172
- lastUpdated: "2026-04-06",
1089
+ lastUpdated: "2026-06-01",
1090
+ originalModel: "Mistral Small 4 (Reasoning)",
1173
1091
  },
1174
1092
  "mistral-small-4-non-reasoning": {
1175
- // AA Intelligence Index (composite score)
1176
- intelligenceIndex: 18.6,
1177
- normalizedScore: 27,
1178
-
1179
1093
  // AA specific benchmarks
1180
1094
  codingIndex: 16.4,
1181
1095
  mathIndex: undefined,
@@ -1191,21 +1105,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1191
1105
  supportsVision: false,
1192
1106
 
1193
1107
  // Metadata
1194
- lastUpdated: "2026-04-06",
1108
+ lastUpdated: "2026-06-01",
1109
+ originalModel: "Mistral Small 4 (Non-reasoning)",
1195
1110
  },
1196
- "magistral-small-1.2": {
1197
- // AA Intelligence Index (composite score)
1198
- intelligenceIndex: 18.2,
1199
- normalizedScore: 26,
1200
-
1111
+ "devstral-2": {
1201
1112
  // AA specific benchmarks
1202
- codingIndex: 14.8,
1203
- mathIndex: 80.3,
1113
+ codingIndex: 23.7,
1114
+ mathIndex: 36.7,
1204
1115
 
1205
1116
  // Academic benchmarks
1206
- mmluPro: 0.768,
1207
- gpqa: 0.663,
1208
- hle: 0.061,
1117
+ mmluPro: 0.762,
1118
+ gpqa: 0.594,
1119
+ hle: 0.036,
1209
1120
 
1210
1121
  // Capabilities
1211
1122
  contextWindow: 8192,
@@ -1213,13 +1124,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1213
1124
  supportsVision: false,
1214
1125
 
1215
1126
  // Metadata
1216
- lastUpdated: "2026-04-06",
1127
+ lastUpdated: "2026-06-01",
1128
+ originalModel: "Devstral 2",
1217
1129
  },
1218
1130
  "ministral-3-3b": {
1219
- // AA Intelligence Index (composite score)
1220
- intelligenceIndex: 11.2,
1221
- normalizedScore: 16,
1222
-
1223
1131
  // AA specific benchmarks
1224
1132
  codingIndex: 4.8,
1225
1133
  mathIndex: 22,
@@ -1235,65 +1143,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1235
1143
  supportsVision: false,
1236
1144
 
1237
1145
  // Metadata
1238
- lastUpdated: "2026-04-06",
1239
- },
1240
- "mistral-medium-3.1": {
1241
- // AA Intelligence Index (composite score)
1242
- intelligenceIndex: 21.3,
1243
- normalizedScore: 30,
1244
-
1245
- // AA specific benchmarks
1246
- codingIndex: 18.3,
1247
- mathIndex: 38.3,
1248
-
1249
- // Academic benchmarks
1250
- mmluPro: 0.683,
1251
- gpqa: 0.588,
1252
- hle: 0.044,
1253
-
1254
- // Capabilities
1255
- contextWindow: 8192,
1256
- supportsReasoning: false,
1257
- supportsVision: false,
1258
-
1259
- // Metadata
1260
- lastUpdated: "2026-04-06",
1146
+ lastUpdated: "2026-06-01",
1147
+ originalModel: "Ministral 3 3B",
1261
1148
  },
1262
- "devstral-small-2": {
1263
- // AA Intelligence Index (composite score)
1264
- intelligenceIndex: 19.5,
1265
- normalizedScore: 28,
1266
-
1149
+ "deepseek-v4-pro-non-reasoning": {
1267
1150
  // AA specific benchmarks
1268
- codingIndex: 20.7,
1269
- mathIndex: 34.3,
1270
-
1271
- // Academic benchmarks
1272
- mmluPro: 0.678,
1273
- gpqa: 0.532,
1274
- hle: 0.034,
1275
-
1276
- // Capabilities
1277
- contextWindow: 8192,
1278
- supportsReasoning: false,
1279
- supportsVision: false,
1280
-
1281
- // Metadata
1282
- lastUpdated: "2026-04-06",
1283
- },
1284
- "deepseek-r1-distill-llama-70b": {
1285
- // AA Intelligence Index (composite score)
1286
- intelligenceIndex: 16,
1287
- normalizedScore: 23,
1288
-
1289
- // AA specific benchmarks
1290
- codingIndex: 11.4,
1291
- mathIndex: 53.7,
1151
+ codingIndex: 38.4,
1152
+ mathIndex: undefined,
1292
1153
 
1293
1154
  // Academic benchmarks
1294
- mmluPro: 0.795,
1295
- gpqa: 0.402,
1296
- hle: 0.061,
1155
+ mmluPro: undefined,
1156
+ gpqa: 0.717,
1157
+ hle: 0.077,
1297
1158
 
1298
1159
  // Capabilities
1299
1160
  contextWindow: 8192,
@@ -1301,21 +1162,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1301
1162
  supportsVision: false,
1302
1163
 
1303
1164
  // Metadata
1304
- lastUpdated: "2026-04-06",
1165
+ lastUpdated: "2026-06-01",
1166
+ originalModel: "DeepSeek V4 Pro (Non-reasoning)",
1305
1167
  },
1306
- "deepseek-r1-0528-may-25": {
1307
- // AA Intelligence Index (composite score)
1308
- intelligenceIndex: 27.1,
1309
- normalizedScore: 39,
1310
-
1168
+ "deepseek-v4-pro-reasoning-high-effort": {
1311
1169
  // AA specific benchmarks
1312
- codingIndex: 24,
1313
- mathIndex: 76,
1170
+ codingIndex: 43.2,
1171
+ mathIndex: undefined,
1314
1172
 
1315
1173
  // Academic benchmarks
1316
- mmluPro: 0.849,
1317
- gpqa: 0.813,
1318
- hle: 0.149,
1174
+ mmluPro: undefined,
1175
+ gpqa: 0.905,
1176
+ hle: 0.335,
1319
1177
 
1320
1178
  // Capabilities
1321
1179
  contextWindow: 8192,
@@ -1323,21 +1181,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1323
1181
  supportsVision: false,
1324
1182
 
1325
1183
  // Metadata
1326
- lastUpdated: "2026-04-06",
1184
+ lastUpdated: "2026-06-01",
1185
+ originalModel: "DeepSeek V4 Pro (Reasoning, High Effort)",
1327
1186
  },
1328
- "deepseek-v3.2-non-reasoning": {
1329
- // AA Intelligence Index (composite score)
1330
- intelligenceIndex: 32.1,
1331
- normalizedScore: 46,
1332
-
1187
+ "deepseek-v4-pro-reasoning-max-effort": {
1333
1188
  // AA specific benchmarks
1334
- codingIndex: 34.6,
1335
- mathIndex: 59,
1189
+ codingIndex: 47.5,
1190
+ mathIndex: undefined,
1336
1191
 
1337
1192
  // Academic benchmarks
1338
- mmluPro: 0.837,
1339
- gpqa: 0.751,
1340
- hle: 0.105,
1193
+ mmluPro: undefined,
1194
+ gpqa: 0.888,
1195
+ hle: 0.359,
1341
1196
 
1342
1197
  // Capabilities
1343
1198
  contextWindow: 8192,
@@ -1345,21 +1200,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1345
1200
  supportsVision: false,
1346
1201
 
1347
1202
  // Metadata
1348
- lastUpdated: "2026-04-06",
1203
+ lastUpdated: "2026-06-01",
1204
+ originalModel: "DeepSeek V4 Pro (Reasoning, Max Effort)",
1349
1205
  },
1350
- "deepseek-v3.2-speciale": {
1351
- // AA Intelligence Index (composite score)
1352
- intelligenceIndex: 29.4,
1353
- normalizedScore: 42,
1354
-
1206
+ "deepseek-v4-flash-reasoning-max-effort": {
1355
1207
  // AA specific benchmarks
1356
- codingIndex: 37.9,
1357
- mathIndex: 96.7,
1208
+ codingIndex: 38.7,
1209
+ mathIndex: undefined,
1358
1210
 
1359
1211
  // Academic benchmarks
1360
- mmluPro: 0.863,
1361
- gpqa: 0.871,
1362
- hle: 0.261,
1212
+ mmluPro: undefined,
1213
+ gpqa: 0.894,
1214
+ hle: 0.321,
1363
1215
 
1364
1216
  // Capabilities
1365
1217
  contextWindow: 8192,
@@ -1367,21 +1219,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1367
1219
  supportsVision: false,
1368
1220
 
1369
1221
  // Metadata
1370
- lastUpdated: "2026-04-06",
1222
+ lastUpdated: "2026-06-01",
1223
+ originalModel: "DeepSeek V4 Flash (Reasoning, Max Effort)",
1371
1224
  },
1372
- "deepseek-r1-0528-qwen3-8b": {
1373
- // AA Intelligence Index (composite score)
1374
- intelligenceIndex: 16.4,
1375
- normalizedScore: 23,
1376
-
1225
+ "deepseek-v4-flash-reasoning-high-effort": {
1377
1226
  // AA specific benchmarks
1378
- codingIndex: 7.8,
1379
- mathIndex: 63.7,
1227
+ codingIndex: 39.8,
1228
+ mathIndex: undefined,
1380
1229
 
1381
1230
  // Academic benchmarks
1382
- mmluPro: 0.739,
1383
- gpqa: 0.612,
1384
- hle: 0.056,
1231
+ mmluPro: undefined,
1232
+ gpqa: 0.867,
1233
+ hle: 0.278,
1385
1234
 
1386
1235
  // Capabilities
1387
1236
  contextWindow: 8192,
@@ -1389,21 +1238,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1389
1238
  supportsVision: false,
1390
1239
 
1391
1240
  // Metadata
1392
- lastUpdated: "2026-04-06",
1241
+ lastUpdated: "2026-06-01",
1242
+ originalModel: "DeepSeek V4 Flash (Reasoning, High Effort)",
1393
1243
  },
1394
- "deepseek-v3.2-reasoning": {
1395
- // AA Intelligence Index (composite score)
1396
- intelligenceIndex: 41.7,
1397
- normalizedScore: 60,
1398
-
1244
+ "deepseek-v4-flash-non-reasoning": {
1399
1245
  // AA specific benchmarks
1400
- codingIndex: 36.7,
1401
- mathIndex: 92,
1246
+ codingIndex: 35.2,
1247
+ mathIndex: undefined,
1402
1248
 
1403
1249
  // Academic benchmarks
1404
- mmluPro: 0.862,
1405
- gpqa: 0.84,
1406
- hle: 0.222,
1250
+ mmluPro: undefined,
1251
+ gpqa: 0.716,
1252
+ hle: 0.07,
1407
1253
 
1408
1254
  // Capabilities
1409
1255
  contextWindow: 8192,
@@ -1411,13 +1257,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1411
1257
  supportsVision: false,
1412
1258
 
1413
1259
  // Metadata
1414
- lastUpdated: "2026-04-06",
1260
+ lastUpdated: "2026-06-01",
1261
+ originalModel: "DeepSeek V4 Flash (Non-reasoning)",
1415
1262
  },
1416
1263
  "r1-1776": {
1417
- // AA Intelligence Index (composite score)
1418
- intelligenceIndex: 12,
1419
- normalizedScore: 17,
1420
-
1421
1264
  // AA specific benchmarks
1422
1265
  codingIndex: undefined,
1423
1266
  mathIndex: undefined,
@@ -1433,13 +1276,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1433
1276
  supportsVision: false,
1434
1277
 
1435
1278
  // Metadata
1436
- lastUpdated: "2026-04-06",
1279
+ lastUpdated: "2026-06-01",
1280
+ originalModel: "R1 1776",
1437
1281
  },
1438
1282
  "falcon-h1r-7b": {
1439
- // AA Intelligence Index (composite score)
1440
- intelligenceIndex: 15.8,
1441
- normalizedScore: 23,
1442
-
1443
1283
  // AA specific benchmarks
1444
1284
  codingIndex: 9.8,
1445
1285
  mathIndex: 80,
@@ -1455,21 +1295,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1455
1295
  supportsVision: false,
1456
1296
 
1457
1297
  // Metadata
1458
- lastUpdated: "2026-04-06",
1298
+ lastUpdated: "2026-06-01",
1299
+ originalModel: "Falcon-H1R-7B",
1459
1300
  },
1460
- "grok-4.20-beta-0309-reasoning": {
1461
- // AA Intelligence Index (composite score)
1462
- intelligenceIndex: 48.5,
1463
- normalizedScore: 69,
1464
-
1301
+ "grok-4.3-medium": {
1465
1302
  // AA specific benchmarks
1466
- codingIndex: 42.2,
1303
+ codingIndex: 35.1,
1467
1304
  mathIndex: undefined,
1468
1305
 
1469
1306
  // Academic benchmarks
1470
1307
  mmluPro: undefined,
1471
- gpqa: 0.885,
1472
- hle: 0.3,
1308
+ gpqa: 0.89,
1309
+ hle: 0.281,
1473
1310
 
1474
1311
  // Capabilities
1475
1312
  contextWindow: 8192,
@@ -1477,21 +1314,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1477
1314
  supportsVision: false,
1478
1315
 
1479
1316
  // Metadata
1480
- lastUpdated: "2026-04-06",
1317
+ lastUpdated: "2026-06-01",
1318
+ originalModel: "Grok 4.3 (medium)",
1481
1319
  },
1482
- "grok-4.20-beta-0309-non-reasoning": {
1483
- // AA Intelligence Index (composite score)
1484
- intelligenceIndex: 29.7,
1485
- normalizedScore: 42,
1486
-
1320
+ "grok-4.3-low": {
1487
1321
  // AA specific benchmarks
1488
- codingIndex: 25.4,
1322
+ codingIndex: 31.6,
1489
1323
  mathIndex: undefined,
1490
1324
 
1491
1325
  // Academic benchmarks
1492
1326
  mmluPro: undefined,
1493
- gpqa: 0.785,
1494
- hle: 0.225,
1327
+ gpqa: 0.843,
1328
+ hle: 0.173,
1495
1329
 
1496
1330
  // Capabilities
1497
1331
  contextWindow: 8192,
@@ -1499,21 +1333,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1499
1333
  supportsVision: false,
1500
1334
 
1501
1335
  // Metadata
1502
- lastUpdated: "2026-04-06",
1336
+ lastUpdated: "2026-06-01",
1337
+ originalModel: "Grok 4.3 (low)",
1503
1338
  },
1504
- "grok-code-fast-1": {
1505
- // AA Intelligence Index (composite score)
1506
- intelligenceIndex: 28.7,
1507
- normalizedScore: 41,
1508
-
1339
+ "grok-4.3-high": {
1509
1340
  // AA specific benchmarks
1510
- codingIndex: 23.7,
1511
- mathIndex: 43.3,
1341
+ codingIndex: 41,
1342
+ mathIndex: undefined,
1512
1343
 
1513
1344
  // Academic benchmarks
1514
- mmluPro: 0.793,
1515
- gpqa: 0.727,
1516
- hle: 0.075,
1345
+ mmluPro: undefined,
1346
+ gpqa: 0.901,
1347
+ hle: 0.35,
1517
1348
 
1518
1349
  // Capabilities
1519
1350
  contextWindow: 8192,
@@ -1521,21 +1352,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1521
1352
  supportsVision: false,
1522
1353
 
1523
1354
  // Metadata
1524
- lastUpdated: "2026-04-06",
1355
+ lastUpdated: "2026-06-01",
1356
+ originalModel: "Grok 4.3 (high)",
1525
1357
  },
1526
- "grok-3-mini-reasoning-high": {
1527
- // AA Intelligence Index (composite score)
1528
- intelligenceIndex: 32.1,
1529
- normalizedScore: 46,
1530
-
1358
+ "grok-4.3-non-reasoning": {
1531
1359
  // AA specific benchmarks
1532
- codingIndex: 25.2,
1533
- mathIndex: 84.7,
1360
+ codingIndex: 25.1,
1361
+ mathIndex: undefined,
1534
1362
 
1535
1363
  // Academic benchmarks
1536
- mmluPro: 0.828,
1537
- gpqa: 0.791,
1538
- hle: 0.111,
1364
+ mmluPro: undefined,
1365
+ gpqa: 0.658,
1366
+ hle: 0.065,
1539
1367
 
1540
1368
  // Capabilities
1541
1369
  contextWindow: 8192,
@@ -1543,13 +1371,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1543
1371
  supportsVision: false,
1544
1372
 
1545
1373
  // Metadata
1546
- lastUpdated: "2026-04-06",
1374
+ lastUpdated: "2026-06-01",
1375
+ originalModel: "Grok 4.3 (Non-reasoning)",
1547
1376
  },
1548
1377
  "nova-micro": {
1549
- // AA Intelligence Index (composite score)
1550
- intelligenceIndex: 10.3,
1551
- normalizedScore: 15,
1552
-
1553
1378
  // AA specific benchmarks
1554
1379
  codingIndex: 4.1,
1555
1380
  mathIndex: 6,
@@ -1565,21 +1390,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1565
1390
  supportsVision: false,
1566
1391
 
1567
1392
  // Metadata
1568
- lastUpdated: "2026-04-06",
1393
+ lastUpdated: "2026-06-01",
1394
+ originalModel: "Nova Micro",
1569
1395
  },
1570
- "nova-premier": {
1571
- // AA Intelligence Index (composite score)
1572
- intelligenceIndex: 19,
1573
- normalizedScore: 27,
1574
-
1396
+ "nova-2.0-omni-low": {
1575
1397
  // AA specific benchmarks
1576
- codingIndex: 13.8,
1577
- mathIndex: 17.3,
1398
+ codingIndex: 13.9,
1399
+ mathIndex: 56,
1578
1400
 
1579
1401
  // Academic benchmarks
1580
- mmluPro: 0.733,
1581
- gpqa: 0.569,
1582
- hle: 0.047,
1402
+ mmluPro: 0.798,
1403
+ gpqa: 0.699,
1404
+ hle: 0.04,
1583
1405
 
1584
1406
  // Capabilities
1585
1407
  contextWindow: 8192,
@@ -1587,21 +1409,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1587
1409
  supportsVision: false,
1588
1410
 
1589
1411
  // Metadata
1590
- lastUpdated: "2026-04-06",
1412
+ lastUpdated: "2026-06-01",
1413
+ originalModel: "Nova 2.0 Omni (low)",
1591
1414
  },
1592
- "nova-2-lite-low": {
1593
- // AA Intelligence Index (composite score)
1594
- intelligenceIndex: 24.6,
1595
- normalizedScore: 35,
1596
-
1415
+ "nova-2.0-pro-preview-medium": {
1597
1416
  // AA specific benchmarks
1598
- codingIndex: 13.6,
1599
- mathIndex: 46.7,
1417
+ codingIndex: 30.4,
1418
+ mathIndex: 89,
1600
1419
 
1601
1420
  // Academic benchmarks
1602
- mmluPro: 0.788,
1603
- gpqa: 0.698,
1604
- hle: 0.042,
1421
+ mmluPro: 0.83,
1422
+ gpqa: 0.785,
1423
+ hle: 0.089,
1605
1424
 
1606
1425
  // Capabilities
1607
1426
  contextWindow: 8192,
@@ -1609,13 +1428,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1609
1428
  supportsVision: false,
1610
1429
 
1611
1430
  // Metadata
1612
- lastUpdated: "2026-04-06",
1431
+ lastUpdated: "2026-06-01",
1432
+ originalModel: "Nova 2.0 Pro Preview (medium)",
1613
1433
  },
1614
- "nova-2-lite-medium": {
1615
- // AA Intelligence Index (composite score)
1616
- intelligenceIndex: 29.7,
1617
- normalizedScore: 42,
1618
-
1434
+ "nova-2.0-lite-medium": {
1619
1435
  // AA specific benchmarks
1620
1436
  codingIndex: 23.9,
1621
1437
  mathIndex: 88.7,
@@ -1631,21 +1447,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1631
1447
  supportsVision: false,
1632
1448
 
1633
1449
  // Metadata
1634
- lastUpdated: "2026-04-06",
1450
+ lastUpdated: "2026-06-01",
1451
+ originalModel: "Nova 2.0 Lite (medium)",
1635
1452
  },
1636
- "nova-2-pro-preview-non-reasoning": {
1637
- // AA Intelligence Index (composite score)
1638
- intelligenceIndex: 23.1,
1639
- normalizedScore: 33,
1640
-
1453
+ "nova-2.0-lite-high": {
1641
1454
  // AA specific benchmarks
1642
- codingIndex: 20.5,
1643
- mathIndex: 30.7,
1455
+ codingIndex: 23.4,
1456
+ mathIndex: 94.3,
1644
1457
 
1645
1458
  // Academic benchmarks
1646
- mmluPro: 0.772,
1647
- gpqa: 0.636,
1648
- hle: 0.04,
1459
+ mmluPro: 0.818,
1460
+ gpqa: 0.811,
1461
+ hle: 0.109,
1649
1462
 
1650
1463
  // Capabilities
1651
1464
  contextWindow: 8192,
@@ -1653,21 +1466,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1653
1466
  supportsVision: false,
1654
1467
 
1655
1468
  // Metadata
1656
- lastUpdated: "2026-04-06",
1469
+ lastUpdated: "2026-06-01",
1470
+ originalModel: "Nova 2.0 Lite (high)",
1657
1471
  },
1658
- "nova-2-pro-preview-low": {
1659
- // AA Intelligence Index (composite score)
1660
- intelligenceIndex: 31.9,
1661
- normalizedScore: 46,
1662
-
1472
+ "nova-2.0-pro-preview-non-reasoning": {
1663
1473
  // AA specific benchmarks
1664
- codingIndex: 24.5,
1665
- mathIndex: 63.3,
1474
+ codingIndex: 20.5,
1475
+ mathIndex: 30.7,
1666
1476
 
1667
1477
  // Academic benchmarks
1668
- mmluPro: 0.822,
1669
- gpqa: 0.751,
1670
- hle: 0.052,
1478
+ mmluPro: 0.772,
1479
+ gpqa: 0.636,
1480
+ hle: 0.04,
1671
1481
 
1672
1482
  // Capabilities
1673
1483
  contextWindow: 8192,
@@ -1675,21 +1485,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1675
1485
  supportsVision: false,
1676
1486
 
1677
1487
  // Metadata
1678
- lastUpdated: "2026-04-06",
1488
+ lastUpdated: "2026-06-01",
1489
+ originalModel: "Nova 2.0 Pro Preview (Non-reasoning)",
1679
1490
  },
1680
- "nova-2-omni-non-reasoning": {
1681
- // AA Intelligence Index (composite score)
1682
- intelligenceIndex: 16.6,
1683
- normalizedScore: 24,
1684
-
1491
+ "nova-2.0-lite-low": {
1685
1492
  // AA specific benchmarks
1686
- codingIndex: 13.8,
1687
- mathIndex: 37,
1493
+ codingIndex: 13.6,
1494
+ mathIndex: 46.7,
1688
1495
 
1689
1496
  // Academic benchmarks
1690
- mmluPro: 0.719,
1691
- gpqa: 0.555,
1692
- hle: 0.039,
1497
+ mmluPro: 0.788,
1498
+ gpqa: 0.698,
1499
+ hle: 0.042,
1693
1500
 
1694
1501
  // Capabilities
1695
1502
  contextWindow: 8192,
@@ -1697,13 +1504,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1697
1504
  supportsVision: false,
1698
1505
 
1699
1506
  // Metadata
1700
- lastUpdated: "2026-04-06",
1507
+ lastUpdated: "2026-06-01",
1508
+ originalModel: "Nova 2.0 Lite (low)",
1701
1509
  },
1702
- "nova-2-lite-non-reasoning": {
1703
- // AA Intelligence Index (composite score)
1704
- intelligenceIndex: 18,
1705
- normalizedScore: 26,
1706
-
1510
+ "nova-2.0-lite-non-reasoning": {
1707
1511
  // AA specific benchmarks
1708
1512
  codingIndex: 12.5,
1709
1513
  mathIndex: 33.7,
@@ -1719,21 +1523,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1719
1523
  supportsVision: false,
1720
1524
 
1721
1525
  // Metadata
1722
- lastUpdated: "2026-04-06",
1526
+ lastUpdated: "2026-06-01",
1527
+ originalModel: "Nova 2.0 Lite (Non-reasoning)",
1723
1528
  },
1724
- "nova-2-omni-medium": {
1725
- // AA Intelligence Index (composite score)
1726
- intelligenceIndex: 28,
1727
- normalizedScore: 40,
1728
-
1529
+ "nova-premier": {
1729
1530
  // AA specific benchmarks
1730
- codingIndex: 15.1,
1731
- mathIndex: 89.7,
1531
+ codingIndex: 13.8,
1532
+ mathIndex: 17.3,
1732
1533
 
1733
1534
  // Academic benchmarks
1734
- mmluPro: 0.809,
1735
- gpqa: 0.76,
1736
- hle: 0.068,
1535
+ mmluPro: 0.733,
1536
+ gpqa: 0.569,
1537
+ hle: 0.047,
1737
1538
 
1738
1539
  // Capabilities
1739
1540
  contextWindow: 8192,
@@ -1741,21 +1542,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1741
1542
  supportsVision: false,
1742
1543
 
1743
1544
  // Metadata
1744
- lastUpdated: "2026-04-06",
1545
+ lastUpdated: "2026-06-01",
1546
+ originalModel: "Nova Premier",
1745
1547
  },
1746
- "nova-2-pro-preview-medium": {
1747
- // AA Intelligence Index (composite score)
1748
- intelligenceIndex: 35.7,
1749
- normalizedScore: 51,
1750
-
1548
+ "nova-2.0-omni-non-reasoning": {
1751
1549
  // AA specific benchmarks
1752
- codingIndex: 30.4,
1753
- mathIndex: 89,
1550
+ codingIndex: 13.8,
1551
+ mathIndex: 37,
1754
1552
 
1755
1553
  // Academic benchmarks
1756
- mmluPro: 0.83,
1757
- gpqa: 0.785,
1758
- hle: 0.089,
1554
+ mmluPro: 0.719,
1555
+ gpqa: 0.555,
1556
+ hle: 0.039,
1759
1557
 
1760
1558
  // Capabilities
1761
1559
  contextWindow: 8192,
@@ -1763,21 +1561,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1763
1561
  supportsVision: false,
1764
1562
 
1765
1563
  // Metadata
1766
- lastUpdated: "2026-04-06",
1564
+ lastUpdated: "2026-06-01",
1565
+ originalModel: "Nova 2.0 Omni (Non-reasoning)",
1767
1566
  },
1768
- "nova-2-omni-low": {
1769
- // AA Intelligence Index (composite score)
1770
- intelligenceIndex: 23.2,
1771
- normalizedScore: 33,
1772
-
1567
+ "nova-2.0-omni-medium": {
1773
1568
  // AA specific benchmarks
1774
- codingIndex: 13.9,
1775
- mathIndex: 56,
1569
+ codingIndex: 15.1,
1570
+ mathIndex: 89.7,
1776
1571
 
1777
1572
  // Academic benchmarks
1778
- mmluPro: 0.798,
1779
- gpqa: 0.699,
1780
- hle: 0.04,
1573
+ mmluPro: 0.809,
1574
+ gpqa: 0.76,
1575
+ hle: 0.068,
1781
1576
 
1782
1577
  // Capabilities
1783
1578
  contextWindow: 8192,
@@ -1785,21 +1580,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1785
1580
  supportsVision: false,
1786
1581
 
1787
1582
  // Metadata
1788
- lastUpdated: "2026-04-06",
1583
+ lastUpdated: "2026-06-01",
1584
+ originalModel: "Nova 2.0 Omni (medium)",
1789
1585
  },
1790
- "phi-4": {
1791
- // AA Intelligence Index (composite score)
1792
- intelligenceIndex: 10.4,
1793
- normalizedScore: 15,
1794
-
1586
+ "nova-2.0-pro-preview-low": {
1795
1587
  // AA specific benchmarks
1796
- codingIndex: 11.2,
1797
- mathIndex: 18,
1588
+ codingIndex: 24.5,
1589
+ mathIndex: 63.3,
1798
1590
 
1799
1591
  // Academic benchmarks
1800
- mmluPro: 0.714,
1801
- gpqa: 0.575,
1802
- hle: 0.041,
1592
+ mmluPro: 0.822,
1593
+ gpqa: 0.751,
1594
+ hle: 0.052,
1803
1595
 
1804
1596
  // Capabilities
1805
1597
  contextWindow: 8192,
@@ -1807,21 +1599,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1807
1599
  supportsVision: false,
1808
1600
 
1809
1601
  // Metadata
1810
- lastUpdated: "2026-04-06",
1602
+ lastUpdated: "2026-06-01",
1603
+ originalModel: "Nova 2.0 Pro Preview (low)",
1811
1604
  },
1812
- "phi-4-multimodal-instruct": {
1813
- // AA Intelligence Index (composite score)
1814
- intelligenceIndex: 10,
1815
- normalizedScore: 14,
1816
-
1605
+ "phi-4": {
1817
1606
  // AA specific benchmarks
1818
- codingIndex: undefined,
1819
- mathIndex: undefined,
1607
+ codingIndex: 11.2,
1608
+ mathIndex: 18,
1820
1609
 
1821
1610
  // Academic benchmarks
1822
- mmluPro: 0.485,
1823
- gpqa: 0.315,
1824
- hle: 0.044,
1611
+ mmluPro: 0.714,
1612
+ gpqa: 0.575,
1613
+ hle: 0.041,
1825
1614
 
1826
1615
  // Capabilities
1827
1616
  contextWindow: 8192,
@@ -1829,13 +1618,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1829
1618
  supportsVision: false,
1830
1619
 
1831
1620
  // Metadata
1832
- lastUpdated: "2026-04-06",
1621
+ lastUpdated: "2026-06-01",
1622
+ originalModel: "Phi-4",
1833
1623
  },
1834
1624
  "phi-4-mini-instruct": {
1835
- // AA Intelligence Index (composite score)
1836
- intelligenceIndex: 8.4,
1837
- normalizedScore: 12,
1838
-
1839
1625
  // AA specific benchmarks
1840
1626
  codingIndex: 3.6,
1841
1627
  mathIndex: 6.7,
@@ -1851,43 +1637,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1851
1637
  supportsVision: false,
1852
1638
 
1853
1639
  // Metadata
1854
- lastUpdated: "2026-04-06",
1640
+ lastUpdated: "2026-06-01",
1641
+ originalModel: "Phi-4 Mini Instruct",
1855
1642
  },
1856
- "lfm2.5-1.2b-thinking": {
1857
- // AA Intelligence Index (composite score)
1858
- intelligenceIndex: 8.1,
1859
- normalizedScore: 12,
1860
-
1643
+ "phi-4-multimodal-instruct": {
1861
1644
  // AA specific benchmarks
1862
- codingIndex: 1.4,
1645
+ codingIndex: undefined,
1863
1646
  mathIndex: undefined,
1864
1647
 
1865
1648
  // Academic benchmarks
1866
- mmluPro: undefined,
1867
- gpqa: 0.339,
1868
- hle: 0.061,
1869
-
1870
- // Capabilities
1871
- contextWindow: 8192,
1872
- supportsReasoning: false,
1873
- supportsVision: false,
1874
-
1875
- // Metadata
1876
- lastUpdated: "2026-04-06",
1877
- },
1878
- "lfm2-8b-a1b": {
1879
- // AA Intelligence Index (composite score)
1880
- intelligenceIndex: 7,
1881
- normalizedScore: 10,
1882
-
1883
- // AA specific benchmarks
1884
- codingIndex: 2.3,
1885
- mathIndex: 25.3,
1886
-
1887
- // Academic benchmarks
1888
- mmluPro: 0.505,
1889
- gpqa: 0.344,
1890
- hle: 0.049,
1649
+ mmluPro: 0.485,
1650
+ gpqa: 0.315,
1651
+ hle: 0.044,
1891
1652
 
1892
1653
  // Capabilities
1893
1654
  contextWindow: 8192,
@@ -1895,21 +1656,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1895
1656
  supportsVision: false,
1896
1657
 
1897
1658
  // Metadata
1898
- lastUpdated: "2026-04-06",
1659
+ lastUpdated: "2026-06-01",
1660
+ originalModel: "Phi-4 Multimodal Instruct",
1899
1661
  },
1900
- "lfm2.5-1.2b-instruct": {
1901
- // AA Intelligence Index (composite score)
1902
- intelligenceIndex: 8,
1903
- normalizedScore: 11,
1904
-
1662
+ "lfm2.5-vl-1.6b": {
1905
1663
  // AA specific benchmarks
1906
- codingIndex: 0.8,
1664
+ codingIndex: 1,
1907
1665
  mathIndex: undefined,
1908
1666
 
1909
1667
  // Academic benchmarks
1910
1668
  mmluPro: undefined,
1911
- gpqa: 0.326,
1912
- hle: 0.068,
1669
+ gpqa: 0.289,
1670
+ hle: 0.051,
1913
1671
 
1914
1672
  // Capabilities
1915
1673
  contextWindow: 8192,
@@ -1917,13 +1675,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1917
1675
  supportsVision: false,
1918
1676
 
1919
1677
  // Metadata
1920
- lastUpdated: "2026-04-06",
1678
+ lastUpdated: "2026-06-01",
1679
+ originalModel: "LFM2.5-VL-1.6B",
1921
1680
  },
1922
1681
  "lfm2-24b-a2b": {
1923
- // AA Intelligence Index (composite score)
1924
- intelligenceIndex: 10.5,
1925
- normalizedScore: 15,
1926
-
1927
1682
  // AA specific benchmarks
1928
1683
  codingIndex: 3.6,
1929
1684
  mathIndex: undefined,
@@ -1939,65 +1694,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
1939
1694
  supportsVision: false,
1940
1695
 
1941
1696
  // Metadata
1942
- lastUpdated: "2026-04-06",
1943
- },
1944
- "lfm2-2.6b": {
1945
- // AA Intelligence Index (composite score)
1946
- intelligenceIndex: 8,
1947
- normalizedScore: 11,
1948
-
1949
- // AA specific benchmarks
1950
- codingIndex: 1.4,
1951
- mathIndex: 8.3,
1952
-
1953
- // Academic benchmarks
1954
- mmluPro: 0.298,
1955
- gpqa: 0.306,
1956
- hle: 0.052,
1957
-
1958
- // Capabilities
1959
- contextWindow: 8192,
1960
- supportsReasoning: false,
1961
- supportsVision: false,
1962
-
1963
- // Metadata
1964
- lastUpdated: "2026-04-06",
1697
+ lastUpdated: "2026-06-01",
1698
+ originalModel: "LFM2 24B A2B",
1965
1699
  },
1966
- "lfm2.5-vl-1.6b": {
1967
- // AA Intelligence Index (composite score)
1968
- intelligenceIndex: 6.2,
1969
- normalizedScore: 9,
1970
-
1971
- // AA specific benchmarks
1972
- codingIndex: 1,
1973
- mathIndex: undefined,
1974
-
1975
- // Academic benchmarks
1976
- mmluPro: undefined,
1977
- gpqa: 0.289,
1978
- hle: 0.051,
1979
-
1980
- // Capabilities
1981
- contextWindow: 8192,
1982
- supportsReasoning: false,
1983
- supportsVision: false,
1984
-
1985
- // Metadata
1986
- lastUpdated: "2026-04-06",
1987
- },
1988
- "solar-open-100b-reasoning": {
1989
- // AA Intelligence Index (composite score)
1990
- intelligenceIndex: 21.7,
1991
- normalizedScore: 31,
1992
-
1700
+ "lfm2-8b-a1b": {
1993
1701
  // AA specific benchmarks
1994
- codingIndex: 10.5,
1995
- mathIndex: undefined,
1702
+ codingIndex: 2.3,
1703
+ mathIndex: 25.3,
1996
1704
 
1997
1705
  // Academic benchmarks
1998
- mmluPro: undefined,
1999
- gpqa: 0.657,
2000
- hle: 0.092,
1706
+ mmluPro: 0.505,
1707
+ gpqa: 0.344,
1708
+ hle: 0.049,
2001
1709
 
2002
1710
  // Capabilities
2003
1711
  contextWindow: 8192,
@@ -2005,6 +1713,7 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
2005
1713
  supportsVision: false,
2006
1714
 
2007
1715
  // Metadata
2008
- lastUpdated: "2026-04-06",
1716
+ lastUpdated: "2026-06-01",
1717
+ originalModel: "LFM2 8B A1B",
2009
1718
  },
2010
1719
  };