pi-free 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,20 @@
1
1
  // Auto-generated benchmark data chunk 3
2
- // Models: gemini-2.5-pro-preview-may-25 .. phi-3-mini-instruct-3.8b (90 entries)
2
+ // Models: llama-3.1-instruct-70b .. magistral-medium-1 (90 entries)
3
+ // Last updated: 2026-06-01
3
4
  // DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
4
5
 
5
6
  import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
6
7
 
7
8
  export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
8
- "gemini-2.5-pro-preview-may-25": {
9
- // AA Intelligence Index (composite score)
10
- intelligenceIndex: 29.5,
11
- normalizedScore: 42,
12
-
9
+ "llama-3.1-instruct-70b": {
13
10
  // AA specific benchmarks
14
- codingIndex: undefined,
15
- mathIndex: undefined,
11
+ codingIndex: 10.9,
12
+ mathIndex: 4,
16
13
 
17
14
  // Academic benchmarks
18
- mmluPro: 0.837,
19
- gpqa: 0.822,
20
- hle: 0.154,
15
+ mmluPro: 0.676,
16
+ gpqa: 0.409,
17
+ hle: 0.046,
21
18
 
22
19
  // Capabilities
23
20
  contextWindow: 8192,
@@ -25,21 +22,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
25
22
  supportsVision: false,
26
23
 
27
24
  // Metadata
28
- lastUpdated: "2026-04-06",
25
+ lastUpdated: "2026-06-01",
26
+ originalModel: "Llama 3.1 Instruct 70B",
29
27
  },
30
- "gemini-2.5-flash-preview-non-reasoning": {
31
- // AA Intelligence Index (composite score)
32
- intelligenceIndex: 17.8,
33
- normalizedScore: 25,
34
-
28
+ "llama-3.1-instruct-8b": {
35
29
  // AA specific benchmarks
36
- codingIndex: undefined,
37
- mathIndex: undefined,
30
+ codingIndex: 4.9,
31
+ mathIndex: 4.3,
38
32
 
39
33
  // Academic benchmarks
40
- mmluPro: 0.783,
41
- gpqa: 0.594,
42
- hle: 0.05,
34
+ mmluPro: 0.476,
35
+ gpqa: 0.259,
36
+ hle: 0.051,
43
37
 
44
38
  // Capabilities
45
39
  contextWindow: 8192,
@@ -47,21 +41,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
47
41
  supportsVision: false,
48
42
 
49
43
  // Metadata
50
- lastUpdated: "2026-04-06",
44
+ lastUpdated: "2026-06-01",
45
+ originalModel: "Llama 3.1 Instruct 8B",
51
46
  },
52
- "gemini-1.5-pro-may-24": {
53
- // AA Intelligence Index (composite score)
54
- intelligenceIndex: 12,
55
- normalizedScore: 17,
56
-
47
+ "llama-3.2-instruct-3b": {
57
48
  // AA specific benchmarks
58
- codingIndex: 19.8,
59
- mathIndex: undefined,
49
+ codingIndex: undefined,
50
+ mathIndex: 3.3,
60
51
 
61
52
  // Academic benchmarks
62
- mmluPro: 0.657,
63
- gpqa: 0.371,
64
- hle: 0.039,
53
+ mmluPro: 0.347,
54
+ gpqa: 0.255,
55
+ hle: 0.052,
65
56
 
66
57
  // Capabilities
67
58
  contextWindow: 8192,
@@ -69,21 +60,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
69
60
  supportsVision: false,
70
61
 
71
62
  // Metadata
72
- lastUpdated: "2026-04-06",
63
+ lastUpdated: "2026-06-01",
64
+ originalModel: "Llama 3.2 Instruct 3B",
73
65
  },
74
- "gemini-2.5-flash-non-reasoning": {
75
- // AA Intelligence Index (composite score)
76
- intelligenceIndex: 20.6,
77
- normalizedScore: 29,
78
-
66
+ "llama-3-instruct-70b": {
79
67
  // AA specific benchmarks
80
- codingIndex: 17.8,
81
- mathIndex: 60.3,
68
+ codingIndex: 6.8,
69
+ mathIndex: undefined,
82
70
 
83
71
  // Academic benchmarks
84
- mmluPro: 0.809,
85
- gpqa: 0.683,
86
- hle: 0.051,
72
+ mmluPro: 0.574,
73
+ gpqa: 0.379,
74
+ hle: 0.044,
87
75
 
88
76
  // Capabilities
89
77
  contextWindow: 8192,
@@ -91,21 +79,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
91
79
  supportsVision: false,
92
80
 
93
81
  // Metadata
94
- lastUpdated: "2026-04-06",
82
+ lastUpdated: "2026-06-01",
83
+ originalModel: "Llama 3 Instruct 70B",
95
84
  },
96
- "gemini-2.5-flash-reasoning": {
97
- // AA Intelligence Index (composite score)
98
- intelligenceIndex: 27,
99
- normalizedScore: 39,
100
-
85
+ "llama-3-instruct-8b": {
101
86
  // AA specific benchmarks
102
- codingIndex: 22.2,
103
- mathIndex: 73.3,
87
+ codingIndex: 4,
88
+ mathIndex: undefined,
104
89
 
105
90
  // Academic benchmarks
106
- mmluPro: 0.832,
107
- gpqa: 0.79,
108
- hle: 0.111,
91
+ mmluPro: 0.405,
92
+ gpqa: 0.296,
93
+ hle: 0.051,
109
94
 
110
95
  // Capabilities
111
96
  contextWindow: 8192,
@@ -113,21 +98,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
113
98
  supportsVision: false,
114
99
 
115
100
  // Metadata
116
- lastUpdated: "2026-04-06",
101
+ lastUpdated: "2026-06-01",
102
+ originalModel: "Llama 3 Instruct 8B",
117
103
  },
118
- "gemini-2.5-flash-preview-sep-25-non-reasoning": {
119
- // AA Intelligence Index (composite score)
120
- intelligenceIndex: 25.7,
121
- normalizedScore: 37,
122
-
104
+ "llama-3.2-instruct-1b": {
123
105
  // AA specific benchmarks
124
- codingIndex: 22.1,
125
- mathIndex: 56.7,
106
+ codingIndex: 0.6,
107
+ mathIndex: 0,
126
108
 
127
109
  // Academic benchmarks
128
- mmluPro: 0.836,
129
- gpqa: 0.766,
130
- hle: 0.078,
110
+ mmluPro: 0.2,
111
+ gpqa: 0.196,
112
+ hle: 0.053,
131
113
 
132
114
  // Capabilities
133
115
  contextWindow: 8192,
@@ -135,21 +117,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
135
117
  supportsVision: false,
136
118
 
137
119
  // Metadata
138
- lastUpdated: "2026-04-06",
120
+ lastUpdated: "2026-06-01",
121
+ originalModel: "Llama 3.2 Instruct 1B",
139
122
  },
140
- "gemma-3n-e4b-instruct-preview-may-25": {
141
- // AA Intelligence Index (composite score)
142
- intelligenceIndex: 10.1,
143
- normalizedScore: 14,
144
-
123
+ "llama-2-chat-70b": {
145
124
  // AA specific benchmarks
146
125
  codingIndex: undefined,
147
126
  mathIndex: undefined,
148
127
 
149
128
  // Academic benchmarks
150
- mmluPro: 0.483,
151
- gpqa: 0.278,
152
- hle: 0.049,
129
+ mmluPro: 0.406,
130
+ gpqa: 0.327,
131
+ hle: 0.05,
153
132
 
154
133
  // Capabilities
155
134
  contextWindow: 8192,
@@ -157,21 +136,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
157
136
  supportsVision: false,
158
137
 
159
138
  // Metadata
160
- lastUpdated: "2026-04-06",
139
+ lastUpdated: "2026-06-01",
140
+ originalModel: "Llama 2 Chat 70B",
161
141
  },
162
- "gemini-1.5-flash-may-24": {
163
- // AA Intelligence Index (composite score)
164
- intelligenceIndex: 10.5,
165
- normalizedScore: 15,
166
-
142
+ "llama-2-chat-7b": {
167
143
  // AA specific benchmarks
168
144
  codingIndex: undefined,
169
145
  mathIndex: undefined,
170
146
 
171
147
  // Academic benchmarks
172
- mmluPro: 0.574,
173
- gpqa: 0.324,
174
- hle: 0.042,
148
+ mmluPro: 0.164,
149
+ gpqa: 0.227,
150
+ hle: 0.058,
175
151
 
176
152
  // Capabilities
177
153
  contextWindow: 8192,
@@ -179,21 +155,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
179
155
  supportsVision: false,
180
156
 
181
157
  // Metadata
182
- lastUpdated: "2026-04-06",
158
+ lastUpdated: "2026-06-01",
159
+ originalModel: "Llama 2 Chat 7B",
183
160
  },
184
- "gemini-2.5-flash-lite-reasoning": {
185
- // AA Intelligence Index (composite score)
186
- intelligenceIndex: 17.6,
187
- normalizedScore: 25,
188
-
161
+ "llama-2-chat-13b": {
189
162
  // AA specific benchmarks
190
- codingIndex: 9.5,
191
- mathIndex: 53.3,
163
+ codingIndex: undefined,
164
+ mathIndex: undefined,
192
165
 
193
166
  // Academic benchmarks
194
- mmluPro: 0.759,
195
- gpqa: 0.625,
196
- hle: 0.064,
167
+ mmluPro: 0.406,
168
+ gpqa: 0.321,
169
+ hle: 0.047,
197
170
 
198
171
  // Capabilities
199
172
  contextWindow: 8192,
@@ -201,21 +174,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
201
174
  supportsVision: false,
202
175
 
203
176
  // Metadata
204
- lastUpdated: "2026-04-06",
177
+ lastUpdated: "2026-06-01",
178
+ originalModel: "Llama 2 Chat 13B",
205
179
  },
206
- "gemini-2-flash-lite-feb-25": {
207
- // AA Intelligence Index (composite score)
208
- intelligenceIndex: 14.7,
209
- normalizedScore: 21,
210
-
180
+ "gemini-2.0-pro-experimental-feb-25": {
211
181
  // AA specific benchmarks
212
- codingIndex: undefined,
182
+ codingIndex: 25.5,
213
183
  mathIndex: undefined,
214
184
 
215
185
  // Academic benchmarks
216
- mmluPro: 0.724,
217
- gpqa: 0.535,
218
- hle: 0.036,
186
+ mmluPro: 0.805,
187
+ gpqa: 0.622,
188
+ hle: 0.068,
219
189
 
220
190
  // Capabilities
221
191
  contextWindow: 8192,
@@ -223,21 +193,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
223
193
  supportsVision: false,
224
194
 
225
195
  // Metadata
226
- lastUpdated: "2026-04-06",
196
+ lastUpdated: "2026-06-01",
197
+ originalModel: "Gemini 2.0 Pro Experimental (Feb '25)",
227
198
  },
228
- "gemini-2.5-flash-preview-reasoning": {
229
- // AA Intelligence Index (composite score)
230
- intelligenceIndex: 24.3,
231
- normalizedScore: 35,
232
-
199
+ "gemini-2.0-flash-experimental": {
233
200
  // AA specific benchmarks
234
201
  codingIndex: undefined,
235
202
  mathIndex: undefined,
236
203
 
237
204
  // Academic benchmarks
238
- mmluPro: 0.8,
239
- gpqa: 0.698,
240
- hle: 0.116,
205
+ mmluPro: 0.782,
206
+ gpqa: 0.636,
207
+ hle: 0.047,
241
208
 
242
209
  // Capabilities
243
210
  contextWindow: 8192,
@@ -245,21 +212,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
245
212
  supportsVision: false,
246
213
 
247
214
  // Metadata
248
- lastUpdated: "2026-04-06",
215
+ lastUpdated: "2026-06-01",
216
+ originalModel: "Gemini 2.0 Flash (experimental)",
249
217
  },
250
- "gemini-2.5-pro-preview-mar-25": {
251
- // AA Intelligence Index (composite score)
252
- intelligenceIndex: 30.3,
253
- normalizedScore: 43,
254
-
218
+ "gemini-1.5-pro-sep-24": {
255
219
  // AA specific benchmarks
256
- codingIndex: 46.7,
220
+ codingIndex: 23.6,
257
221
  mathIndex: undefined,
258
222
 
259
223
  // Academic benchmarks
260
- mmluPro: 0.858,
261
- gpqa: 0.836,
262
- hle: 0.171,
224
+ mmluPro: 0.75,
225
+ gpqa: 0.589,
226
+ hle: 0.049,
263
227
 
264
228
  // Capabilities
265
229
  contextWindow: 8192,
@@ -267,21 +231,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
267
231
  supportsVision: false,
268
232
 
269
233
  // Metadata
270
- lastUpdated: "2026-04-06",
234
+ lastUpdated: "2026-06-01",
235
+ originalModel: "Gemini 1.5 Pro (Sep '24)",
271
236
  },
272
- "gemini-1-ultra": {
273
- // AA Intelligence Index (composite score)
274
- intelligenceIndex: 10.1,
275
- normalizedScore: 14,
276
-
237
+ "gemini-2.0-flash-lite-preview": {
277
238
  // AA specific benchmarks
278
- codingIndex: 17.6,
239
+ codingIndex: undefined,
279
240
  mathIndex: undefined,
280
241
 
281
242
  // Academic benchmarks
282
243
  mmluPro: undefined,
283
- gpqa: undefined,
284
- hle: undefined,
244
+ gpqa: 0.542,
245
+ hle: 0.044,
285
246
 
286
247
  // Capabilities
287
248
  contextWindow: 8192,
@@ -289,21 +250,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
289
250
  supportsVision: false,
290
251
 
291
252
  // Metadata
292
- lastUpdated: "2026-04-06",
253
+ lastUpdated: "2026-06-01",
254
+ originalModel: "Gemini 2.0 Flash-Lite (Preview)",
293
255
  },
294
- "gemini-2.5-flash-preview-sep-25-reasoning": {
295
- // AA Intelligence Index (composite score)
296
- intelligenceIndex: 31.1,
297
- normalizedScore: 44,
298
-
256
+ "gemini-2.0-flash-feb-25": {
299
257
  // AA specific benchmarks
300
- codingIndex: 24.6,
301
- mathIndex: 78.3,
258
+ codingIndex: 13.6,
259
+ mathIndex: 21.7,
302
260
 
303
261
  // Academic benchmarks
304
- mmluPro: 0.842,
305
- gpqa: 0.793,
306
- hle: 0.127,
262
+ mmluPro: 0.779,
263
+ gpqa: 0.623,
264
+ hle: 0.053,
307
265
 
308
266
  // Capabilities
309
267
  contextWindow: 8192,
@@ -311,21 +269,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
311
269
  supportsVision: false,
312
270
 
313
271
  // Metadata
314
- lastUpdated: "2026-04-06",
272
+ lastUpdated: "2026-06-01",
273
+ originalModel: "Gemini 2.0 Flash (Feb '25)",
315
274
  },
316
- "claude-3.5-sonnet-oct-24": {
317
- // AA Intelligence Index (composite score)
318
- intelligenceIndex: 15.9,
319
- normalizedScore: 23,
320
-
275
+ "gemini-1.5-flash-sep-24": {
321
276
  // AA specific benchmarks
322
- codingIndex: 30.2,
277
+ codingIndex: undefined,
323
278
  mathIndex: undefined,
324
279
 
325
280
  // Academic benchmarks
326
- mmluPro: 0.772,
327
- gpqa: 0.599,
328
- hle: 0.039,
281
+ mmluPro: 0.68,
282
+ gpqa: 0.463,
283
+ hle: 0.035,
329
284
 
330
285
  // Capabilities
331
286
  contextWindow: 8192,
@@ -333,21 +288,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
333
288
  supportsVision: false,
334
289
 
335
290
  // Metadata
336
- lastUpdated: "2026-04-06",
291
+ lastUpdated: "2026-06-01",
292
+ originalModel: "Gemini 1.5 Flash (Sep '24)",
337
293
  },
338
- "claude-3.5-sonnet-june-24": {
339
- // AA Intelligence Index (composite score)
340
- intelligenceIndex: 14.2,
341
- normalizedScore: 20,
342
-
294
+ "gemini-1.5-flash-8b": {
343
295
  // AA specific benchmarks
344
- codingIndex: 26,
296
+ codingIndex: undefined,
345
297
  mathIndex: undefined,
346
298
 
347
299
  // Academic benchmarks
348
- mmluPro: 0.751,
349
- gpqa: 0.56,
350
- hle: 0.037,
300
+ mmluPro: 0.569,
301
+ gpqa: 0.359,
302
+ hle: 0.045,
351
303
 
352
304
  // Capabilities
353
305
  contextWindow: 8192,
@@ -355,21 +307,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
355
307
  supportsVision: false,
356
308
 
357
309
  // Metadata
358
- lastUpdated: "2026-04-06",
310
+ lastUpdated: "2026-06-01",
311
+ originalModel: "Gemini 1.5 Flash-8B",
359
312
  },
360
- "claude-3-opus": {
361
- // AA Intelligence Index (composite score)
362
- intelligenceIndex: 18,
363
- normalizedScore: 26,
364
-
313
+ "gemma-3-1b-instruct": {
365
314
  // AA specific benchmarks
366
- codingIndex: 19.5,
367
- mathIndex: undefined,
315
+ codingIndex: 0.2,
316
+ mathIndex: 3.3,
368
317
 
369
318
  // Academic benchmarks
370
- mmluPro: 0.696,
371
- gpqa: 0.489,
372
- hle: 0.031,
319
+ mmluPro: 0.135,
320
+ gpqa: 0.237,
321
+ hle: 0.052,
373
322
 
374
323
  // Capabilities
375
324
  contextWindow: 8192,
@@ -377,21 +326,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
377
326
  supportsVision: false,
378
327
 
379
328
  // Metadata
380
- lastUpdated: "2026-04-06",
329
+ lastUpdated: "2026-06-01",
330
+ originalModel: "Gemma 3 1B Instruct",
381
331
  },
382
- "claude-3.5-haiku": {
383
- // AA Intelligence Index (composite score)
384
- intelligenceIndex: 18.7,
385
- normalizedScore: 27,
386
-
332
+ "gemini-2.5-flash-lite-non-reasoning": {
387
333
  // AA specific benchmarks
388
- codingIndex: 10.7,
389
- mathIndex: undefined,
334
+ codingIndex: 7.4,
335
+ mathIndex: 35.3,
390
336
 
391
337
  // Academic benchmarks
392
- mmluPro: 0.634,
393
- gpqa: 0.408,
394
- hle: 0.035,
338
+ mmluPro: 0.724,
339
+ gpqa: 0.474,
340
+ hle: 0.037,
395
341
 
396
342
  // Capabilities
397
343
  contextWindow: 8192,
@@ -399,21 +345,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
399
345
  supportsVision: false,
400
346
 
401
347
  // Metadata
402
- lastUpdated: "2026-04-06",
348
+ lastUpdated: "2026-06-01",
349
+ originalModel: "Gemini 2.5 Flash-Lite (Non-reasoning)",
403
350
  },
404
- "claude-3-sonnet": {
405
- // AA Intelligence Index (composite score)
406
- intelligenceIndex: 10.3,
407
- normalizedScore: 15,
408
-
351
+ "gemini-3-pro-preview-high": {
409
352
  // AA specific benchmarks
410
- codingIndex: undefined,
411
- mathIndex: undefined,
353
+ codingIndex: 46.5,
354
+ mathIndex: 95.7,
412
355
 
413
356
  // Academic benchmarks
414
- mmluPro: 0.579,
415
- gpqa: 0.4,
416
- hle: 0.038,
357
+ mmluPro: 0.898,
358
+ gpqa: 0.908,
359
+ hle: 0.372,
417
360
 
418
361
  // Capabilities
419
362
  contextWindow: 8192,
@@ -421,21 +364,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
421
364
  supportsVision: false,
422
365
 
423
366
  // Metadata
424
- lastUpdated: "2026-04-06",
367
+ lastUpdated: "2026-06-01",
368
+ originalModel: "Gemini 3 Pro Preview (high)",
425
369
  },
426
- "claude-3-haiku": {
427
- // AA Intelligence Index (composite score)
428
- intelligenceIndex: 12.3,
429
- normalizedScore: 18,
430
-
370
+ "gemma-3n-e2b-instruct": {
431
371
  // AA specific benchmarks
432
- codingIndex: 6.7,
433
- mathIndex: undefined,
372
+ codingIndex: 2.2,
373
+ mathIndex: 10.3,
434
374
 
435
375
  // Academic benchmarks
436
- mmluPro: undefined,
437
- gpqa: 0.374,
438
- hle: 0.039,
376
+ mmluPro: 0.378,
377
+ gpqa: 0.229,
378
+ hle: 0.04,
439
379
 
440
380
  // Capabilities
441
381
  contextWindow: 8192,
@@ -443,21 +383,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
443
383
  supportsVision: false,
444
384
 
445
385
  // Metadata
446
- lastUpdated: "2026-04-06",
386
+ lastUpdated: "2026-06-01",
387
+ originalModel: "Gemma 3n E2B Instruct",
447
388
  },
448
- "claude-instant": {
449
- // AA Intelligence Index (composite score)
450
- intelligenceIndex: 7.4,
451
- normalizedScore: 11,
452
-
389
+ "gemma-3-12b-instruct": {
453
390
  // AA specific benchmarks
454
- codingIndex: 7.8,
455
- mathIndex: undefined,
391
+ codingIndex: 6.3,
392
+ mathIndex: 18.3,
456
393
 
457
394
  // Academic benchmarks
458
- mmluPro: 0.434,
459
- gpqa: 0.33,
460
- hle: 0.038,
395
+ mmluPro: 0.595,
396
+ gpqa: 0.349,
397
+ hle: 0.048,
461
398
 
462
399
  // Capabilities
463
400
  contextWindow: 8192,
@@ -465,21 +402,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
465
402
  supportsVision: false,
466
403
 
467
404
  // Metadata
468
- lastUpdated: "2026-04-06",
405
+ lastUpdated: "2026-06-01",
406
+ originalModel: "Gemma 3 12B Instruct",
469
407
  },
470
- "claude-3.7-sonnet-non-reasoning": {
471
- // AA Intelligence Index (composite score)
472
- intelligenceIndex: 30.8,
473
- normalizedScore: 44,
474
-
408
+ "gemma-3-27b-instruct": {
475
409
  // AA specific benchmarks
476
- codingIndex: 26.7,
477
- mathIndex: 21,
410
+ codingIndex: 9.6,
411
+ mathIndex: 20.7,
478
412
 
479
413
  // Academic benchmarks
480
- mmluPro: 0.803,
481
- gpqa: 0.656,
482
- hle: 0.048,
414
+ mmluPro: 0.669,
415
+ gpqa: 0.428,
416
+ hle: 0.047,
483
417
 
484
418
  // Capabilities
485
419
  contextWindow: 8192,
@@ -487,21 +421,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
487
421
  supportsVision: false,
488
422
 
489
423
  // Metadata
490
- lastUpdated: "2026-04-06",
424
+ lastUpdated: "2026-06-01",
425
+ originalModel: "Gemma 3 27B Instruct",
491
426
  },
492
- "claude-2.1": {
493
- // AA Intelligence Index (composite score)
494
- intelligenceIndex: 9.3,
495
- normalizedScore: 13,
496
-
427
+ "gemini-2.5-flash-preview-sep-25-reasoning": {
497
428
  // AA specific benchmarks
498
- codingIndex: 14,
499
- mathIndex: undefined,
429
+ codingIndex: 24.6,
430
+ mathIndex: 78.3,
500
431
 
501
432
  // Academic benchmarks
502
- mmluPro: 0.495,
503
- gpqa: 0.319,
504
- hle: 0.042,
433
+ mmluPro: 0.842,
434
+ gpqa: 0.793,
435
+ hle: 0.127,
505
436
 
506
437
  // Capabilities
507
438
  contextWindow: 8192,
@@ -509,21 +440,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
509
440
  supportsVision: false,
510
441
 
511
442
  // Metadata
512
- lastUpdated: "2026-04-06",
443
+ lastUpdated: "2026-06-01",
444
+ originalModel: "Gemini 2.5 Flash Preview (Sep '25) (Reasoning)",
513
445
  },
514
- "claude-3.7-sonnet-reasoning": {
515
- // AA Intelligence Index (composite score)
516
- intelligenceIndex: 34.7,
517
- normalizedScore: 50,
518
-
446
+ "gemini-1.5-pro-may-24": {
519
447
  // AA specific benchmarks
520
- codingIndex: 27.6,
521
- mathIndex: 56.3,
448
+ codingIndex: 19.8,
449
+ mathIndex: undefined,
522
450
 
523
451
  // Academic benchmarks
524
- mmluPro: 0.837,
525
- gpqa: 0.772,
526
- hle: 0.103,
452
+ mmluPro: 0.657,
453
+ gpqa: 0.371,
454
+ hle: 0.039,
527
455
 
528
456
  // Capabilities
529
457
  contextWindow: 8192,
@@ -531,21 +459,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
531
459
  supportsVision: false,
532
460
 
533
461
  // Metadata
534
- lastUpdated: "2026-04-06",
462
+ lastUpdated: "2026-06-01",
463
+ originalModel: "Gemini 1.5 Pro (May '24)",
535
464
  },
536
- "claude-4.1-opus-non-reasoning": {
537
- // AA Intelligence Index (composite score)
538
- intelligenceIndex: 36,
539
- normalizedScore: 51,
540
-
465
+ "gemma-3-4b-instruct": {
541
466
  // AA specific benchmarks
542
- codingIndex: undefined,
543
- mathIndex: undefined,
467
+ codingIndex: 2.9,
468
+ mathIndex: 12.7,
544
469
 
545
470
  // Academic benchmarks
546
- mmluPro: undefined,
547
- gpqa: undefined,
548
- hle: undefined,
471
+ mmluPro: 0.417,
472
+ gpqa: 0.291,
473
+ hle: 0.052,
549
474
 
550
475
  // Capabilities
551
476
  contextWindow: 8192,
@@ -553,21 +478,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
553
478
  supportsVision: false,
554
479
 
555
480
  // Metadata
556
- lastUpdated: "2026-04-06",
481
+ lastUpdated: "2026-06-01",
482
+ originalModel: "Gemma 3 4B Instruct",
557
483
  },
558
- "claude-4.1-opus-reasoning": {
559
- // AA Intelligence Index (composite score)
560
- intelligenceIndex: 42,
561
- normalizedScore: 60,
562
-
484
+ "gemini-2.5-pro-preview-may-25": {
563
485
  // AA specific benchmarks
564
- codingIndex: 36.5,
565
- mathIndex: 80.3,
486
+ codingIndex: undefined,
487
+ mathIndex: undefined,
566
488
 
567
489
  // Academic benchmarks
568
- mmluPro: 0.88,
569
- gpqa: 0.809,
570
- hle: 0.119,
490
+ mmluPro: 0.837,
491
+ gpqa: 0.822,
492
+ hle: 0.154,
571
493
 
572
494
  // Capabilities
573
495
  contextWindow: 8192,
@@ -575,21 +497,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
575
497
  supportsVision: false,
576
498
 
577
499
  // Metadata
578
- lastUpdated: "2026-04-06",
500
+ lastUpdated: "2026-06-01",
501
+ originalModel: "Gemini 2.5 Pro Preview (May' 25)",
579
502
  },
580
- "claude-4-sonnet-non-reasoning": {
581
- // AA Intelligence Index (composite score)
582
- intelligenceIndex: 33,
583
- normalizedScore: 47,
584
-
503
+ "gemini-2.0-flash-thinking-experimental-jan-25": {
585
504
  // AA specific benchmarks
586
- codingIndex: 30.6,
587
- mathIndex: 38,
505
+ codingIndex: 24.1,
506
+ mathIndex: undefined,
588
507
 
589
508
  // Academic benchmarks
590
- mmluPro: 0.837,
591
- gpqa: 0.683,
592
- hle: 0.04,
509
+ mmluPro: 0.798,
510
+ gpqa: 0.701,
511
+ hle: 0.071,
593
512
 
594
513
  // Capabilities
595
514
  contextWindow: 8192,
@@ -597,21 +516,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
597
516
  supportsVision: false,
598
517
 
599
518
  // Metadata
600
- lastUpdated: "2026-04-06",
519
+ lastUpdated: "2026-06-01",
520
+ originalModel: "Gemini 2.0 Flash Thinking Experimental (Jan '25)",
601
521
  },
602
- "claude-4-opus-non-reasoning": {
603
- // AA Intelligence Index (composite score)
604
- intelligenceIndex: 33,
605
- normalizedScore: 47,
606
-
522
+ "gemma-3n-e4b-instruct-preview-may-25": {
607
523
  // AA specific benchmarks
608
524
  codingIndex: undefined,
609
- mathIndex: 36.3,
525
+ mathIndex: undefined,
610
526
 
611
527
  // Academic benchmarks
612
- mmluPro: 0.86,
613
- gpqa: 0.701,
614
- hle: 0.059,
528
+ mmluPro: 0.483,
529
+ gpqa: 0.278,
530
+ hle: 0.049,
615
531
 
616
532
  // Capabilities
617
533
  contextWindow: 8192,
@@ -619,21 +535,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
619
535
  supportsVision: false,
620
536
 
621
537
  // Metadata
622
- lastUpdated: "2026-04-06",
538
+ lastUpdated: "2026-06-01",
539
+ originalModel: "Gemma 3n E4B Instruct Preview (May '25)",
623
540
  },
624
- "claude-4-sonnet-reasoning": {
625
- // AA Intelligence Index (composite score)
626
- intelligenceIndex: 38.7,
627
- normalizedScore: 55,
628
-
541
+ "gemini-1.0-ultra": {
629
542
  // AA specific benchmarks
630
- codingIndex: 34.1,
631
- mathIndex: 74.3,
543
+ codingIndex: 17.6,
544
+ mathIndex: undefined,
632
545
 
633
546
  // Academic benchmarks
634
- mmluPro: 0.842,
635
- gpqa: 0.777,
636
- hle: 0.096,
547
+ mmluPro: undefined,
548
+ gpqa: undefined,
549
+ hle: undefined,
637
550
 
638
551
  // Capabilities
639
552
  contextWindow: 8192,
@@ -641,43 +554,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
641
554
  supportsVision: false,
642
555
 
643
556
  // Metadata
644
- lastUpdated: "2026-04-06",
557
+ lastUpdated: "2026-06-01",
558
+ originalModel: "Gemini 1.0 Ultra",
645
559
  },
646
- "claude-opus-4.5-non-reasoning": {
647
- // AA Intelligence Index (composite score)
648
- intelligenceIndex: 43.1,
649
- normalizedScore: 62,
650
-
560
+ "gemma-3n-e4b-instruct": {
651
561
  // AA specific benchmarks
652
- codingIndex: 42.9,
653
- mathIndex: 62.7,
654
-
655
- // Academic benchmarks
656
- mmluPro: 0.889,
657
- gpqa: 0.81,
658
- hle: 0.129,
659
-
660
- // Capabilities
661
- contextWindow: 8192,
662
- supportsReasoning: false,
663
- supportsVision: false,
664
-
665
- // Metadata
666
- lastUpdated: "2026-04-06",
667
- },
668
- "claude-opus-4.5-reasoning": {
669
- // AA Intelligence Index (composite score)
670
- intelligenceIndex: 49.7,
671
- normalizedScore: 71,
672
-
673
- // AA specific benchmarks
674
- codingIndex: 47.8,
675
- mathIndex: 91.3,
562
+ codingIndex: 4.2,
563
+ mathIndex: 14.3,
676
564
 
677
565
  // Academic benchmarks
678
- mmluPro: 0.895,
679
- gpqa: 0.866,
680
- hle: 0.284,
566
+ mmluPro: 0.488,
567
+ gpqa: 0.296,
568
+ hle: 0.044,
681
569
 
682
570
  // Capabilities
683
571
  contextWindow: 8192,
@@ -685,21 +573,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
685
573
  supportsVision: false,
686
574
 
687
575
  // Metadata
688
- lastUpdated: "2026-04-06",
576
+ lastUpdated: "2026-06-01",
577
+ originalModel: "Gemma 3n E4B Instruct",
689
578
  },
690
- "claude-4-opus-reasoning": {
691
- // AA Intelligence Index (composite score)
692
- intelligenceIndex: 39,
693
- normalizedScore: 56,
694
-
579
+ "gemini-2.5-flash-lite-reasoning": {
695
580
  // AA specific benchmarks
696
- codingIndex: 34,
697
- mathIndex: 73.3,
581
+ codingIndex: 9.5,
582
+ mathIndex: 53.3,
698
583
 
699
584
  // Academic benchmarks
700
- mmluPro: 0.873,
701
- gpqa: 0.796,
702
- hle: 0.117,
585
+ mmluPro: 0.759,
586
+ gpqa: 0.625,
587
+ hle: 0.064,
703
588
 
704
589
  // Capabilities
705
590
  contextWindow: 8192,
@@ -707,21 +592,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
707
592
  supportsVision: false,
708
593
 
709
594
  // Metadata
710
- lastUpdated: "2026-04-06",
595
+ lastUpdated: "2026-06-01",
596
+ originalModel: "Gemini 2.5 Flash-Lite (Reasoning)",
711
597
  },
712
- "claude-4.5-sonnet-non-reasoning": {
713
- // AA Intelligence Index (composite score)
714
- intelligenceIndex: 37.1,
715
- normalizedScore: 53,
716
-
598
+ "gemini-2.0-flash-thinking-experimental-dec-24": {
717
599
  // AA specific benchmarks
718
- codingIndex: 33.5,
719
- mathIndex: 37,
600
+ codingIndex: undefined,
601
+ mathIndex: undefined,
720
602
 
721
603
  // Academic benchmarks
722
- mmluPro: 0.86,
723
- gpqa: 0.727,
724
- hle: 0.071,
604
+ mmluPro: undefined,
605
+ gpqa: undefined,
606
+ hle: undefined,
725
607
 
726
608
  // Capabilities
727
609
  contextWindow: 8192,
@@ -729,21 +611,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
729
611
  supportsVision: false,
730
612
 
731
613
  // Metadata
732
- lastUpdated: "2026-04-06",
614
+ lastUpdated: "2026-06-01",
615
+ originalModel: "Gemini 2.0 Flash Thinking Experimental (Dec '24)",
733
616
  },
734
- "claude-4.5-sonnet-reasoning": {
735
- // AA Intelligence Index (composite score)
736
- intelligenceIndex: 43,
737
- normalizedScore: 61,
738
-
617
+ "gemini-2.5-flash-reasoning": {
739
618
  // AA specific benchmarks
740
- codingIndex: 38.6,
741
- mathIndex: 88,
619
+ codingIndex: 22.2,
620
+ mathIndex: 73.3,
742
621
 
743
622
  // Academic benchmarks
744
- mmluPro: 0.875,
745
- gpqa: 0.834,
746
- hle: 0.173,
623
+ mmluPro: 0.832,
624
+ gpqa: 0.79,
625
+ hle: 0.111,
747
626
 
748
627
  // Capabilities
749
628
  contextWindow: 8192,
@@ -751,21 +630,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
751
630
  supportsVision: false,
752
631
 
753
632
  // Metadata
754
- lastUpdated: "2026-04-06",
633
+ lastUpdated: "2026-06-01",
634
+ originalModel: "Gemini 2.5 Flash (Reasoning)",
755
635
  },
756
- "claude-2": {
757
- // AA Intelligence Index (composite score)
758
- intelligenceIndex: 9.1,
759
- normalizedScore: 13,
760
-
636
+ "gemini-1.5-flash-may-24": {
761
637
  // AA specific benchmarks
762
- codingIndex: 12.9,
638
+ codingIndex: undefined,
763
639
  mathIndex: undefined,
764
640
 
765
641
  // Academic benchmarks
766
- mmluPro: 0.486,
767
- gpqa: 0.344,
768
- hle: undefined,
642
+ mmluPro: 0.574,
643
+ gpqa: 0.324,
644
+ hle: 0.042,
769
645
 
770
646
  // Capabilities
771
647
  contextWindow: 8192,
@@ -773,21 +649,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
773
649
  supportsVision: false,
774
650
 
775
651
  // Metadata
776
- lastUpdated: "2026-04-06",
652
+ lastUpdated: "2026-06-01",
653
+ originalModel: "Gemini 1.5 Flash (May '24)",
777
654
  },
778
- "mistral-large-2-nov-24": {
779
- // AA Intelligence Index (composite score)
780
- intelligenceIndex: 15.1,
781
- normalizedScore: 22,
782
-
655
+ "gemini-2.5-flash-preview-reasoning": {
783
656
  // AA specific benchmarks
784
- codingIndex: 13.8,
785
- mathIndex: 14,
657
+ codingIndex: undefined,
658
+ mathIndex: undefined,
786
659
 
787
660
  // Academic benchmarks
788
- mmluPro: 0.697,
789
- gpqa: 0.486,
790
- hle: 0.04,
661
+ mmluPro: 0.8,
662
+ gpqa: 0.698,
663
+ hle: 0.116,
791
664
 
792
665
  // Capabilities
793
666
  contextWindow: 8192,
@@ -795,21 +668,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
795
668
  supportsVision: false,
796
669
 
797
670
  // Metadata
798
- lastUpdated: "2026-04-06",
671
+ lastUpdated: "2026-06-01",
672
+ originalModel: "Gemini 2.5 Flash Preview (Reasoning)",
799
673
  },
800
- "mistral-large-2-jul-24": {
801
- // AA Intelligence Index (composite score)
802
- intelligenceIndex: 13,
803
- normalizedScore: 19,
804
-
674
+ "gemini-2.5-flash-lite-preview-sep-25-reasoning": {
805
675
  // AA specific benchmarks
806
- codingIndex: undefined,
807
- mathIndex: 0,
676
+ codingIndex: 18.2,
677
+ mathIndex: 68.7,
808
678
 
809
679
  // Academic benchmarks
810
- mmluPro: 0.683,
811
- gpqa: 0.472,
812
- hle: 0.032,
680
+ mmluPro: 0.808,
681
+ gpqa: 0.709,
682
+ hle: 0.066,
813
683
 
814
684
  // Capabilities
815
685
  contextWindow: 8192,
@@ -817,21 +687,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
817
687
  supportsVision: false,
818
688
 
819
689
  // Metadata
820
- lastUpdated: "2026-04-06",
690
+ lastUpdated: "2026-06-01",
691
+ originalModel: "Gemini 2.5 Flash-Lite Preview (Sep '25) (Reasoning)",
821
692
  },
822
- "pixtral-large": {
823
- // AA Intelligence Index (composite score)
824
- intelligenceIndex: 14,
825
- normalizedScore: 20,
826
-
693
+ "gemini-2.5-flash-preview-sep-25-non-reasoning": {
827
694
  // AA specific benchmarks
828
- codingIndex: undefined,
829
- mathIndex: 2.3,
695
+ codingIndex: 22.1,
696
+ mathIndex: 56.7,
830
697
 
831
698
  // Academic benchmarks
832
- mmluPro: 0.701,
833
- gpqa: 0.505,
834
- hle: 0.036,
699
+ mmluPro: 0.836,
700
+ gpqa: 0.766,
701
+ hle: 0.078,
835
702
 
836
703
  // Capabilities
837
704
  contextWindow: 8192,
@@ -839,21 +706,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
839
706
  supportsVision: false,
840
707
 
841
708
  // Metadata
842
- lastUpdated: "2026-04-06",
709
+ lastUpdated: "2026-06-01",
710
+ originalModel: "Gemini 2.5 Flash Preview (Sep '25) (Non-reasoning)",
843
711
  },
844
- "mistral-small-3": {
845
- // AA Intelligence Index (composite score)
846
- intelligenceIndex: 12.7,
847
- normalizedScore: 18,
848
-
712
+ "gemini-2.5-flash-preview-non-reasoning": {
849
713
  // AA specific benchmarks
850
714
  codingIndex: undefined,
851
- mathIndex: 4.3,
715
+ mathIndex: undefined,
852
716
 
853
717
  // Academic benchmarks
854
- mmluPro: 0.652,
855
- gpqa: 0.462,
856
- hle: 0.041,
718
+ mmluPro: 0.783,
719
+ gpqa: 0.594,
720
+ hle: 0.05,
857
721
 
858
722
  // Capabilities
859
723
  contextWindow: 8192,
@@ -861,21 +725,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
861
725
  supportsVision: false,
862
726
 
863
727
  // Metadata
864
- lastUpdated: "2026-04-06",
728
+ lastUpdated: "2026-06-01",
729
+ originalModel: "Gemini 2.5 Flash Preview (Non-reasoning)",
865
730
  },
866
- "mistral-small-sep-24": {
867
- // AA Intelligence Index (composite score)
868
- intelligenceIndex: 10.2,
869
- normalizedScore: 15,
870
-
731
+ "gemini-2.5-pro-preview-mar-25": {
871
732
  // AA specific benchmarks
872
- codingIndex: undefined,
733
+ codingIndex: 46.7,
873
734
  mathIndex: undefined,
874
735
 
875
736
  // Academic benchmarks
876
- mmluPro: 0.529,
877
- gpqa: 0.381,
878
- hle: 0.043,
737
+ mmluPro: 0.858,
738
+ gpqa: 0.836,
739
+ hle: 0.171,
879
740
 
880
741
  // Capabilities
881
742
  contextWindow: 8192,
@@ -883,21 +744,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
883
744
  supportsVision: false,
884
745
 
885
746
  // Metadata
886
- lastUpdated: "2026-04-06",
747
+ lastUpdated: "2026-06-01",
748
+ originalModel: "Gemini 2.5 Pro Preview (Mar' 25)",
887
749
  },
888
- "mixtral-8x22b-instruct": {
889
- // AA Intelligence Index (composite score)
890
- intelligenceIndex: 9.8,
891
- normalizedScore: 14,
892
-
750
+ "gemini-1.0-pro": {
893
751
  // AA specific benchmarks
894
752
  codingIndex: undefined,
895
753
  mathIndex: undefined,
896
754
 
897
755
  // Academic benchmarks
898
- mmluPro: 0.537,
899
- gpqa: 0.332,
900
- hle: 0.041,
756
+ mmluPro: 0.431,
757
+ gpqa: 0.277,
758
+ hle: 0.046,
901
759
 
902
760
  // Capabilities
903
761
  contextWindow: 8192,
@@ -905,21 +763,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
905
763
  supportsVision: false,
906
764
 
907
765
  // Metadata
908
- lastUpdated: "2026-04-06",
766
+ lastUpdated: "2026-06-01",
767
+ originalModel: "Gemini 1.0 Pro",
909
768
  },
910
- "mistral-small-feb-24": {
911
- // AA Intelligence Index (composite score)
912
- intelligenceIndex: 9,
913
- normalizedScore: 13,
914
-
769
+ "gemini-2.5-flash-non-reasoning": {
915
770
  // AA specific benchmarks
916
- codingIndex: undefined,
917
- mathIndex: undefined,
771
+ codingIndex: 17.8,
772
+ mathIndex: 60.3,
918
773
 
919
774
  // Academic benchmarks
920
- mmluPro: 0.419,
921
- gpqa: 0.302,
922
- hle: 0.044,
775
+ mmluPro: 0.809,
776
+ gpqa: 0.683,
777
+ hle: 0.051,
923
778
 
924
779
  // Capabilities
925
780
  contextWindow: 8192,
@@ -927,21 +782,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
927
782
  supportsVision: false,
928
783
 
929
784
  // Metadata
930
- lastUpdated: "2026-04-06",
785
+ lastUpdated: "2026-06-01",
786
+ originalModel: "Gemini 2.5 Flash (Non-reasoning)",
931
787
  },
932
- "mistral-large-feb-24": {
933
- // AA Intelligence Index (composite score)
934
- intelligenceIndex: 9.9,
935
- normalizedScore: 14,
936
-
788
+ "gemini-2.5-flash-lite-preview-sep-25-non-reasoning": {
937
789
  // AA specific benchmarks
938
- codingIndex: undefined,
939
- mathIndex: undefined,
790
+ codingIndex: 14.5,
791
+ mathIndex: 46.7,
940
792
 
941
793
  // Academic benchmarks
942
- mmluPro: 0.515,
943
- gpqa: 0.351,
944
- hle: 0.034,
794
+ mmluPro: 0.796,
795
+ gpqa: 0.651,
796
+ hle: 0.046,
945
797
 
946
798
  // Capabilities
947
799
  contextWindow: 8192,
@@ -949,21 +801,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
949
801
  supportsVision: false,
950
802
 
951
803
  // Metadata
952
- lastUpdated: "2026-04-06",
804
+ lastUpdated: "2026-06-01",
805
+ originalModel: "Gemini 2.5 Flash-Lite Preview (Sep '25) (Non-reasoning)",
953
806
  },
954
- "mixtral-8x7b-instruct": {
955
- // AA Intelligence Index (composite score)
956
- intelligenceIndex: 7.7,
957
- normalizedScore: 11,
958
-
807
+ "gemini-3-flash-preview-non-reasoning": {
959
808
  // AA specific benchmarks
960
- codingIndex: undefined,
961
- mathIndex: undefined,
809
+ codingIndex: 37.8,
810
+ mathIndex: 55.7,
962
811
 
963
812
  // Academic benchmarks
964
- mmluPro: 0.387,
965
- gpqa: 0.292,
966
- hle: 0.045,
813
+ mmluPro: 0.882,
814
+ gpqa: 0.812,
815
+ hle: 0.141,
967
816
 
968
817
  // Capabilities
969
818
  contextWindow: 8192,
@@ -971,21 +820,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
971
820
  supportsVision: false,
972
821
 
973
822
  // Metadata
974
- lastUpdated: "2026-04-06",
823
+ lastUpdated: "2026-06-01",
824
+ originalModel: "Gemini 3 Flash Preview (Non-reasoning)",
975
825
  },
976
- "mistral-7b-instruct": {
977
- // AA Intelligence Index (composite score)
978
- intelligenceIndex: 7.4,
979
- normalizedScore: 11,
980
-
826
+ "gemini-2.0-flash-lite-feb-25": {
981
827
  // AA specific benchmarks
982
828
  codingIndex: undefined,
983
829
  mathIndex: undefined,
984
830
 
985
831
  // Academic benchmarks
986
- mmluPro: 0.245,
987
- gpqa: 0.177,
988
- hle: 0.043,
832
+ mmluPro: 0.724,
833
+ gpqa: 0.535,
834
+ hle: 0.036,
989
835
 
990
836
  // Capabilities
991
837
  contextWindow: 8192,
@@ -993,21 +839,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
993
839
  supportsVision: false,
994
840
 
995
841
  // Metadata
996
- lastUpdated: "2026-04-06",
842
+ lastUpdated: "2026-06-01",
843
+ originalModel: "Gemini 2.0 Flash-Lite (Feb '25)",
997
844
  },
998
- "mistral-small-3.1": {
999
- // AA Intelligence Index (composite score)
1000
- intelligenceIndex: 14.5,
1001
- normalizedScore: 21,
1002
-
845
+ "gemini-3-flash-preview-reasoning": {
1003
846
  // AA specific benchmarks
1004
- codingIndex: 13.9,
1005
- mathIndex: 3.7,
847
+ codingIndex: 42.6,
848
+ mathIndex: 97,
1006
849
 
1007
850
  // Academic benchmarks
1008
- mmluPro: 0.659,
1009
- gpqa: 0.454,
1010
- hle: 0.048,
851
+ mmluPro: 0.89,
852
+ gpqa: 0.898,
853
+ hle: 0.347,
1011
854
 
1012
855
  // Capabilities
1013
856
  contextWindow: 8192,
@@ -1015,21 +858,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1015
858
  supportsVision: false,
1016
859
 
1017
860
  // Metadata
1018
- lastUpdated: "2026-04-06",
861
+ lastUpdated: "2026-06-01",
862
+ originalModel: "Gemini 3 Flash Preview (Reasoning)",
1019
863
  },
1020
- "mistral-medium-3": {
1021
- // AA Intelligence Index (composite score)
1022
- intelligenceIndex: 18.8,
1023
- normalizedScore: 27,
1024
-
864
+ "gemini-3-pro-preview-low": {
1025
865
  // AA specific benchmarks
1026
- codingIndex: 13.6,
1027
- mathIndex: 30.3,
866
+ codingIndex: 39.4,
867
+ mathIndex: 86.7,
1028
868
 
1029
869
  // Academic benchmarks
1030
- mmluPro: 0.76,
1031
- gpqa: 0.578,
1032
- hle: 0.043,
870
+ mmluPro: 0.895,
871
+ gpqa: 0.887,
872
+ hle: 0.276,
1033
873
 
1034
874
  // Capabilities
1035
875
  contextWindow: 8192,
@@ -1037,21 +877,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1037
877
  supportsVision: false,
1038
878
 
1039
879
  // Metadata
1040
- lastUpdated: "2026-04-06",
880
+ lastUpdated: "2026-06-01",
881
+ originalModel: "Gemini 3 Pro Preview (low)",
1041
882
  },
1042
- "mistral-saba": {
1043
- // AA Intelligence Index (composite score)
1044
- intelligenceIndex: 12.1,
1045
- normalizedScore: 17,
1046
-
883
+ "palm-2": {
1047
884
  // AA specific benchmarks
1048
- codingIndex: undefined,
885
+ codingIndex: 4.6,
1049
886
  mathIndex: undefined,
1050
887
 
1051
888
  // Academic benchmarks
1052
- mmluPro: 0.611,
1053
- gpqa: 0.424,
1054
- hle: 0.041,
889
+ mmluPro: undefined,
890
+ gpqa: undefined,
891
+ hle: undefined,
1055
892
 
1056
893
  // Capabilities
1057
894
  contextWindow: 8192,
@@ -1059,21 +896,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1059
896
  supportsVision: false,
1060
897
 
1061
898
  // Metadata
1062
- lastUpdated: "2026-04-06",
899
+ lastUpdated: "2026-06-01",
900
+ originalModel: "PALM-2",
1063
901
  },
1064
- "mistral-small-3.2": {
1065
- // AA Intelligence Index (composite score)
1066
- intelligenceIndex: 15.1,
1067
- normalizedScore: 22,
1068
-
902
+ "claude-3.5-sonnet-oct-24": {
1069
903
  // AA specific benchmarks
1070
- codingIndex: 13.3,
1071
- mathIndex: 27,
904
+ codingIndex: 30.2,
905
+ mathIndex: undefined,
1072
906
 
1073
907
  // Academic benchmarks
1074
- mmluPro: 0.681,
1075
- gpqa: 0.505,
1076
- hle: 0.043,
908
+ mmluPro: 0.772,
909
+ gpqa: 0.599,
910
+ hle: 0.039,
1077
911
 
1078
912
  // Capabilities
1079
913
  contextWindow: 8192,
@@ -1081,21 +915,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1081
915
  supportsVision: false,
1082
916
 
1083
917
  // Metadata
1084
- lastUpdated: "2026-04-06",
918
+ lastUpdated: "2026-06-01",
919
+ originalModel: "Claude 3.5 Sonnet (Oct '24)",
1085
920
  },
1086
- "magistral-medium-1": {
1087
- // AA Intelligence Index (composite score)
1088
- intelligenceIndex: 18.8,
1089
- normalizedScore: 27,
1090
-
921
+ "claude-3.5-sonnet-june-24": {
1091
922
  // AA specific benchmarks
1092
- codingIndex: 16,
1093
- mathIndex: 40.3,
923
+ codingIndex: 26,
924
+ mathIndex: undefined,
1094
925
 
1095
926
  // Academic benchmarks
1096
- mmluPro: 0.753,
1097
- gpqa: 0.679,
1098
- hle: 0.095,
927
+ mmluPro: 0.751,
928
+ gpqa: 0.56,
929
+ hle: 0.037,
1099
930
 
1100
931
  // Capabilities
1101
932
  contextWindow: 8192,
@@ -1103,21 +934,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1103
934
  supportsVision: false,
1104
935
 
1105
936
  // Metadata
1106
- lastUpdated: "2026-04-06",
937
+ lastUpdated: "2026-06-01",
938
+ originalModel: "Claude 3.5 Sonnet (June '24)",
1107
939
  },
1108
- "devstral-medium": {
1109
- // AA Intelligence Index (composite score)
1110
- intelligenceIndex: 18.7,
1111
- normalizedScore: 27,
1112
-
940
+ "claude-3-opus": {
1113
941
  // AA specific benchmarks
1114
- codingIndex: 15.9,
1115
- mathIndex: 4.7,
942
+ codingIndex: 19.5,
943
+ mathIndex: undefined,
1116
944
 
1117
945
  // Academic benchmarks
1118
- mmluPro: 0.708,
1119
- gpqa: 0.492,
1120
- hle: 0.038,
946
+ mmluPro: 0.696,
947
+ gpqa: 0.489,
948
+ hle: 0.031,
1121
949
 
1122
950
  // Capabilities
1123
951
  contextWindow: 8192,
@@ -1125,21 +953,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1125
953
  supportsVision: false,
1126
954
 
1127
955
  // Metadata
1128
- lastUpdated: "2026-04-06",
956
+ lastUpdated: "2026-06-01",
957
+ originalModel: "Claude 3 Opus",
1129
958
  },
1130
- "magistral-small-1": {
1131
- // AA Intelligence Index (composite score)
1132
- intelligenceIndex: 16.8,
1133
- normalizedScore: 24,
1134
-
959
+ "claude-3.5-haiku": {
1135
960
  // AA specific benchmarks
1136
- codingIndex: 11.1,
1137
- mathIndex: 41.3,
961
+ codingIndex: 10.7,
962
+ mathIndex: undefined,
1138
963
 
1139
964
  // Academic benchmarks
1140
- mmluPro: 0.746,
1141
- gpqa: 0.641,
1142
- hle: 0.072,
965
+ mmluPro: 0.634,
966
+ gpqa: 0.408,
967
+ hle: 0.035,
1143
968
 
1144
969
  // Capabilities
1145
970
  contextWindow: 8192,
@@ -1147,21 +972,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1147
972
  supportsVision: false,
1148
973
 
1149
974
  // Metadata
1150
- lastUpdated: "2026-04-06",
975
+ lastUpdated: "2026-06-01",
976
+ originalModel: "Claude 3.5 Haiku",
1151
977
  },
1152
- "mistral-medium": {
1153
- // AA Intelligence Index (composite score)
1154
- intelligenceIndex: 9,
1155
- normalizedScore: 13,
1156
-
978
+ "claude-3-sonnet": {
1157
979
  // AA specific benchmarks
1158
980
  codingIndex: undefined,
1159
981
  mathIndex: undefined,
1160
982
 
1161
983
  // Academic benchmarks
1162
- mmluPro: 0.491,
1163
- gpqa: 0.349,
1164
- hle: 0.034,
984
+ mmluPro: 0.579,
985
+ gpqa: 0.4,
986
+ hle: 0.038,
1165
987
 
1166
988
  // Capabilities
1167
989
  contextWindow: 8192,
@@ -1169,21 +991,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1169
991
  supportsVision: false,
1170
992
 
1171
993
  // Metadata
1172
- lastUpdated: "2026-04-06",
994
+ lastUpdated: "2026-06-01",
995
+ originalModel: "Claude 3 Sonnet",
1173
996
  },
1174
- "devstral-small-jul-25": {
1175
- // AA Intelligence Index (composite score)
1176
- intelligenceIndex: 15.2,
1177
- normalizedScore: 22,
1178
-
997
+ "claude-3-haiku": {
1179
998
  // AA specific benchmarks
1180
- codingIndex: 12.1,
1181
- mathIndex: 29.3,
999
+ codingIndex: 6.7,
1000
+ mathIndex: undefined,
1182
1001
 
1183
1002
  // Academic benchmarks
1184
- mmluPro: 0.622,
1185
- gpqa: 0.414,
1186
- hle: 0.037,
1003
+ mmluPro: undefined,
1004
+ gpqa: 0.374,
1005
+ hle: 0.039,
1187
1006
 
1188
1007
  // Capabilities
1189
1008
  contextWindow: 8192,
@@ -1191,21 +1010,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1191
1010
  supportsVision: false,
1192
1011
 
1193
1012
  // Metadata
1194
- lastUpdated: "2026-04-06",
1013
+ lastUpdated: "2026-06-01",
1014
+ originalModel: "Claude 3 Haiku",
1195
1015
  },
1196
- "devstral-small-may-25": {
1197
- // AA Intelligence Index (composite score)
1198
- intelligenceIndex: 18,
1199
- normalizedScore: 26,
1200
-
1016
+ "claude-instant": {
1201
1017
  // AA specific benchmarks
1202
- codingIndex: 12.2,
1018
+ codingIndex: 7.8,
1203
1019
  mathIndex: undefined,
1204
1020
 
1205
1021
  // Academic benchmarks
1206
- mmluPro: 0.632,
1207
- gpqa: 0.434,
1208
- hle: 0.04,
1022
+ mmluPro: 0.434,
1023
+ gpqa: 0.33,
1024
+ hle: 0.038,
1209
1025
 
1210
1026
  // Capabilities
1211
1027
  contextWindow: 8192,
@@ -1213,21 +1029,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1213
1029
  supportsVision: false,
1214
1030
 
1215
1031
  // Metadata
1216
- lastUpdated: "2026-04-06",
1032
+ lastUpdated: "2026-06-01",
1033
+ originalModel: "Claude Instant",
1217
1034
  },
1218
- "deepseek-r1-distill-qwen-32b": {
1219
- // AA Intelligence Index (composite score)
1220
- intelligenceIndex: 17.2,
1221
- normalizedScore: 25,
1222
-
1035
+ "claude-4.5-sonnet-reasoning": {
1223
1036
  // AA specific benchmarks
1224
- codingIndex: undefined,
1225
- mathIndex: 63,
1037
+ codingIndex: 38.6,
1038
+ mathIndex: 88,
1226
1039
 
1227
1040
  // Academic benchmarks
1228
- mmluPro: 0.739,
1229
- gpqa: 0.615,
1230
- hle: 0.055,
1041
+ mmluPro: 0.875,
1042
+ gpqa: 0.834,
1043
+ hle: 0.173,
1231
1044
 
1232
1045
  // Capabilities
1233
1046
  contextWindow: 8192,
@@ -1235,21 +1048,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1235
1048
  supportsVision: false,
1236
1049
 
1237
1050
  // Metadata
1238
- lastUpdated: "2026-04-06",
1051
+ lastUpdated: "2026-06-01",
1052
+ originalModel: "Claude 4.5 Sonnet (Reasoning)",
1239
1053
  },
1240
- "deepseek-v3-dec-24": {
1241
- // AA Intelligence Index (composite score)
1242
- intelligenceIndex: 16.5,
1243
- normalizedScore: 24,
1244
-
1054
+ "claude-4-sonnet-non-reasoning": {
1245
1055
  // AA specific benchmarks
1246
- codingIndex: 16.4,
1247
- mathIndex: 26,
1056
+ codingIndex: 30.6,
1057
+ mathIndex: 38,
1248
1058
 
1249
1059
  // Academic benchmarks
1250
- mmluPro: 0.752,
1251
- gpqa: 0.557,
1252
- hle: 0.036,
1060
+ mmluPro: 0.837,
1061
+ gpqa: 0.683,
1062
+ hle: 0.04,
1253
1063
 
1254
1064
  // Capabilities
1255
1065
  contextWindow: 8192,
@@ -1257,21 +1067,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1257
1067
  supportsVision: false,
1258
1068
 
1259
1069
  // Metadata
1260
- lastUpdated: "2026-04-06",
1070
+ lastUpdated: "2026-06-01",
1071
+ originalModel: "Claude 4 Sonnet (Non-reasoning)",
1261
1072
  },
1262
- "deepseek-r1-distill-qwen-14b": {
1263
- // AA Intelligence Index (composite score)
1264
- intelligenceIndex: 15.8,
1265
- normalizedScore: 23,
1266
-
1073
+ "claude-3.7-sonnet-non-reasoning": {
1267
1074
  // AA specific benchmarks
1268
- codingIndex: undefined,
1269
- mathIndex: 55.7,
1075
+ codingIndex: 26.7,
1076
+ mathIndex: 21,
1270
1077
 
1271
1078
  // Academic benchmarks
1272
- mmluPro: 0.74,
1273
- gpqa: 0.484,
1274
- hle: 0.044,
1079
+ mmluPro: 0.803,
1080
+ gpqa: 0.656,
1081
+ hle: 0.048,
1275
1082
 
1276
1083
  // Capabilities
1277
1084
  contextWindow: 8192,
@@ -1279,20 +1086,17 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1279
1086
  supportsVision: false,
1280
1087
 
1281
1088
  // Metadata
1282
- lastUpdated: "2026-04-06",
1089
+ lastUpdated: "2026-06-01",
1090
+ originalModel: "Claude 3.7 Sonnet (Non-reasoning)",
1283
1091
  },
1284
- "deepseek-v2.5-dec-24": {
1285
- // AA Intelligence Index (composite score)
1286
- intelligenceIndex: 12.5,
1287
- normalizedScore: 18,
1288
-
1092
+ "claude-2.0": {
1289
1093
  // AA specific benchmarks
1290
- codingIndex: undefined,
1094
+ codingIndex: 12.9,
1291
1095
  mathIndex: undefined,
1292
1096
 
1293
1097
  // Academic benchmarks
1294
- mmluPro: undefined,
1295
- gpqa: undefined,
1098
+ mmluPro: 0.486,
1099
+ gpqa: 0.344,
1296
1100
  hle: undefined,
1297
1101
 
1298
1102
  // Capabilities
@@ -1301,13 +1105,10 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1301
1105
  supportsVision: false,
1302
1106
 
1303
1107
  // Metadata
1304
- lastUpdated: "2026-04-06",
1108
+ lastUpdated: "2026-06-01",
1109
+ originalModel: "Claude 2.0",
1305
1110
  },
1306
- "deepseek-coder-v2": {
1307
- // AA Intelligence Index (composite score)
1308
- intelligenceIndex: 10.6,
1309
- normalizedScore: 15,
1310
-
1111
+ "claude-4.1-opus-non-reasoning": {
1311
1112
  // AA specific benchmarks
1312
1113
  codingIndex: undefined,
1313
1114
  mathIndex: undefined,
@@ -1323,21 +1124,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1323
1124
  supportsVision: false,
1324
1125
 
1325
1126
  // Metadata
1326
- lastUpdated: "2026-04-06",
1127
+ lastUpdated: "2026-06-01",
1128
+ originalModel: "Claude 4.1 Opus (Non-reasoning)",
1327
1129
  },
1328
- "deepseek-r1-distill-llama-8b": {
1329
- // AA Intelligence Index (composite score)
1330
- intelligenceIndex: 12.1,
1331
- normalizedScore: 17,
1332
-
1130
+ "claude-4.1-opus-reasoning": {
1333
1131
  // AA specific benchmarks
1334
- codingIndex: undefined,
1335
- mathIndex: 41.3,
1132
+ codingIndex: 36.5,
1133
+ mathIndex: 80.3,
1336
1134
 
1337
1135
  // Academic benchmarks
1338
- mmluPro: 0.543,
1339
- gpqa: 0.302,
1340
- hle: 0.042,
1136
+ mmluPro: 0.88,
1137
+ gpqa: 0.809,
1138
+ hle: 0.119,
1341
1139
 
1342
1140
  // Capabilities
1343
1141
  contextWindow: 8192,
@@ -1345,21 +1143,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1345
1143
  supportsVision: false,
1346
1144
 
1347
1145
  // Metadata
1348
- lastUpdated: "2026-04-06",
1146
+ lastUpdated: "2026-06-01",
1147
+ originalModel: "Claude 4.1 Opus (Reasoning)",
1349
1148
  },
1350
- "deepseek-llm-67b-chat-v1": {
1351
- // AA Intelligence Index (composite score)
1352
- intelligenceIndex: 8.4,
1353
- normalizedScore: 12,
1354
-
1149
+ "claude-4.5-sonnet-non-reasoning": {
1355
1150
  // AA specific benchmarks
1356
- codingIndex: undefined,
1357
- mathIndex: undefined,
1151
+ codingIndex: 33.5,
1152
+ mathIndex: 37,
1358
1153
 
1359
1154
  // Academic benchmarks
1360
- mmluPro: undefined,
1361
- gpqa: undefined,
1362
- hle: undefined,
1155
+ mmluPro: 0.86,
1156
+ gpqa: 0.727,
1157
+ hle: 0.071,
1363
1158
 
1364
1159
  // Capabilities
1365
1160
  contextWindow: 8192,
@@ -1367,21 +1162,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1367
1162
  supportsVision: false,
1368
1163
 
1369
1164
  // Metadata
1370
- lastUpdated: "2026-04-06",
1165
+ lastUpdated: "2026-06-01",
1166
+ originalModel: "Claude 4.5 Sonnet (Non-reasoning)",
1371
1167
  },
1372
- "deepseek-r1-distill-qwen-1.5b": {
1373
- // AA Intelligence Index (composite score)
1374
- intelligenceIndex: 9.1,
1375
- normalizedScore: 13,
1376
-
1168
+ "claude-opus-4.6-adaptive-reasoning-max-effort": {
1377
1169
  // AA specific benchmarks
1378
- codingIndex: undefined,
1379
- mathIndex: 22,
1170
+ codingIndex: 48.1,
1171
+ mathIndex: undefined,
1380
1172
 
1381
1173
  // Academic benchmarks
1382
- mmluPro: 0.269,
1383
- gpqa: 0.098,
1384
- hle: 0.033,
1174
+ mmluPro: undefined,
1175
+ gpqa: 0.896,
1176
+ hle: 0.367,
1385
1177
 
1386
1178
  // Capabilities
1387
1179
  contextWindow: 8192,
@@ -1389,21 +1181,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1389
1181
  supportsVision: false,
1390
1182
 
1391
1183
  // Metadata
1392
- lastUpdated: "2026-04-06",
1184
+ lastUpdated: "2026-06-01",
1185
+ originalModel: "Claude Opus 4.6 (Adaptive Reasoning, Max Effort)",
1393
1186
  },
1394
- "deepseek-v3.1-terminus-non-reasoning": {
1395
- // AA Intelligence Index (composite score)
1396
- intelligenceIndex: 28.5,
1397
- normalizedScore: 41,
1398
-
1187
+ "claude-opus-4.5-reasoning": {
1399
1188
  // AA specific benchmarks
1400
- codingIndex: 31.9,
1401
- mathIndex: 53.7,
1189
+ codingIndex: 47.8,
1190
+ mathIndex: 91.3,
1402
1191
 
1403
1192
  // Academic benchmarks
1404
- mmluPro: 0.836,
1405
- gpqa: 0.751,
1406
- hle: 0.084,
1193
+ mmluPro: 0.895,
1194
+ gpqa: 0.866,
1195
+ hle: 0.284,
1407
1196
 
1408
1197
  // Capabilities
1409
1198
  contextWindow: 8192,
@@ -1411,21 +1200,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1411
1200
  supportsVision: false,
1412
1201
 
1413
1202
  // Metadata
1414
- lastUpdated: "2026-04-06",
1203
+ lastUpdated: "2026-06-01",
1204
+ originalModel: "Claude Opus 4.5 (Reasoning)",
1415
1205
  },
1416
- "deepseek-v3.2-exp-reasoning": {
1417
- // AA Intelligence Index (composite score)
1418
- intelligenceIndex: 32.9,
1419
- normalizedScore: 47,
1420
-
1206
+ "claude-3.7-sonnet-reasoning": {
1421
1207
  // AA specific benchmarks
1422
- codingIndex: 33.3,
1423
- mathIndex: 87.7,
1208
+ codingIndex: 27.6,
1209
+ mathIndex: 56.3,
1424
1210
 
1425
1211
  // Academic benchmarks
1426
- mmluPro: 0.85,
1427
- gpqa: 0.797,
1428
- hle: 0.138,
1212
+ mmluPro: 0.837,
1213
+ gpqa: 0.772,
1214
+ hle: 0.103,
1429
1215
 
1430
1216
  // Capabilities
1431
1217
  contextWindow: 8192,
@@ -1433,21 +1219,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1433
1219
  supportsVision: false,
1434
1220
 
1435
1221
  // Metadata
1436
- lastUpdated: "2026-04-06",
1222
+ lastUpdated: "2026-06-01",
1223
+ originalModel: "Claude 3.7 Sonnet (Reasoning)",
1437
1224
  },
1438
- "deepseek-v3.1-reasoning": {
1439
- // AA Intelligence Index (composite score)
1440
- intelligenceIndex: 27.7,
1441
- normalizedScore: 40,
1442
-
1225
+ "claude-opus-4.6-non-reasoning-high-effort": {
1443
1226
  // AA specific benchmarks
1444
- codingIndex: 29.7,
1445
- mathIndex: 89.7,
1227
+ codingIndex: 47.6,
1228
+ mathIndex: undefined,
1446
1229
 
1447
1230
  // Academic benchmarks
1448
- mmluPro: 0.851,
1449
- gpqa: 0.779,
1450
- hle: 0.13,
1231
+ mmluPro: undefined,
1232
+ gpqa: 0.84,
1233
+ hle: 0.186,
1451
1234
 
1452
1235
  // Capabilities
1453
1236
  contextWindow: 8192,
@@ -1455,21 +1238,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1455
1238
  supportsVision: false,
1456
1239
 
1457
1240
  // Metadata
1458
- lastUpdated: "2026-04-06",
1241
+ lastUpdated: "2026-06-01",
1242
+ originalModel: "Claude Opus 4.6 (Non-reasoning, High Effort)",
1459
1243
  },
1460
- "deepseek-v3.2-exp-non-reasoning": {
1461
- // AA Intelligence Index (composite score)
1462
- intelligenceIndex: 28.4,
1463
- normalizedScore: 41,
1464
-
1244
+ "claude-4-opus-non-reasoning": {
1465
1245
  // AA specific benchmarks
1466
- codingIndex: 30,
1467
- mathIndex: 57.7,
1246
+ codingIndex: undefined,
1247
+ mathIndex: 36.3,
1468
1248
 
1469
1249
  // Academic benchmarks
1470
- mmluPro: 0.836,
1471
- gpqa: 0.738,
1472
- hle: 0.086,
1250
+ mmluPro: 0.86,
1251
+ gpqa: 0.701,
1252
+ hle: 0.059,
1473
1253
 
1474
1254
  // Capabilities
1475
1255
  contextWindow: 8192,
@@ -1477,21 +1257,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1477
1257
  supportsVision: false,
1478
1258
 
1479
1259
  // Metadata
1480
- lastUpdated: "2026-04-06",
1260
+ lastUpdated: "2026-06-01",
1261
+ originalModel: "Claude 4 Opus (Non-reasoning)",
1481
1262
  },
1482
- "deepseek-v3.1-terminus-reasoning": {
1483
- // AA Intelligence Index (composite score)
1484
- intelligenceIndex: 33.9,
1485
- normalizedScore: 48,
1486
-
1263
+ "claude-opus-4.5-non-reasoning": {
1487
1264
  // AA specific benchmarks
1488
- codingIndex: 33.7,
1489
- mathIndex: 89.7,
1265
+ codingIndex: 42.9,
1266
+ mathIndex: 62.7,
1490
1267
 
1491
1268
  // Academic benchmarks
1492
- mmluPro: 0.851,
1493
- gpqa: 0.792,
1494
- hle: 0.152,
1269
+ mmluPro: 0.889,
1270
+ gpqa: 0.81,
1271
+ hle: 0.129,
1495
1272
 
1496
1273
  // Capabilities
1497
1274
  contextWindow: 8192,
@@ -1499,21 +1276,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1499
1276
  supportsVision: false,
1500
1277
 
1501
1278
  // Metadata
1502
- lastUpdated: "2026-04-06",
1279
+ lastUpdated: "2026-06-01",
1280
+ originalModel: "Claude Opus 4.5 (Non-reasoning)",
1503
1281
  },
1504
- "deepseek-v3-0324": {
1505
- // AA Intelligence Index (composite score)
1506
- intelligenceIndex: 22.3,
1507
- normalizedScore: 32,
1508
-
1282
+ "claude-4-sonnet-reasoning": {
1509
1283
  // AA specific benchmarks
1510
- codingIndex: 22,
1511
- mathIndex: 41,
1284
+ codingIndex: 34.1,
1285
+ mathIndex: 74.3,
1512
1286
 
1513
1287
  // Academic benchmarks
1514
- mmluPro: 0.819,
1515
- gpqa: 0.655,
1516
- hle: 0.052,
1288
+ mmluPro: 0.842,
1289
+ gpqa: 0.777,
1290
+ hle: 0.096,
1517
1291
 
1518
1292
  // Capabilities
1519
1293
  contextWindow: 8192,
@@ -1521,21 +1295,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1521
1295
  supportsVision: false,
1522
1296
 
1523
1297
  // Metadata
1524
- lastUpdated: "2026-04-06",
1298
+ lastUpdated: "2026-06-01",
1299
+ originalModel: "Claude 4 Sonnet (Reasoning)",
1525
1300
  },
1526
- "deepseek-r1-jan-25": {
1527
- // AA Intelligence Index (composite score)
1528
- intelligenceIndex: 18.8,
1529
- normalizedScore: 27,
1530
-
1301
+ "claude-4-opus-reasoning": {
1531
1302
  // AA specific benchmarks
1532
- codingIndex: 15.9,
1533
- mathIndex: 68,
1303
+ codingIndex: 34,
1304
+ mathIndex: 73.3,
1534
1305
 
1535
1306
  // Academic benchmarks
1536
- mmluPro: 0.844,
1537
- gpqa: 0.708,
1538
- hle: 0.093,
1307
+ mmluPro: 0.873,
1308
+ gpqa: 0.796,
1309
+ hle: 0.117,
1539
1310
 
1540
1311
  // Capabilities
1541
1312
  contextWindow: 8192,
@@ -1543,21 +1314,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1543
1314
  supportsVision: false,
1544
1315
 
1545
1316
  // Metadata
1546
- lastUpdated: "2026-04-06",
1317
+ lastUpdated: "2026-06-01",
1318
+ originalModel: "Claude 4 Opus (Reasoning)",
1547
1319
  },
1548
- "deepseek-v3.1-non-reasoning": {
1549
- // AA Intelligence Index (composite score)
1550
- intelligenceIndex: 28.1,
1551
- normalizedScore: 40,
1552
-
1320
+ "claude-2.1": {
1553
1321
  // AA specific benchmarks
1554
- codingIndex: 28.4,
1555
- mathIndex: 49.7,
1322
+ codingIndex: 14,
1323
+ mathIndex: undefined,
1556
1324
 
1557
1325
  // Academic benchmarks
1558
- mmluPro: 0.833,
1559
- gpqa: 0.735,
1560
- hle: 0.063,
1326
+ mmluPro: 0.495,
1327
+ gpqa: 0.319,
1328
+ hle: 0.042,
1561
1329
 
1562
1330
  // Capabilities
1563
1331
  contextWindow: 8192,
@@ -1565,21 +1333,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1565
1333
  supportsVision: false,
1566
1334
 
1567
1335
  // Metadata
1568
- lastUpdated: "2026-04-06",
1336
+ lastUpdated: "2026-06-01",
1337
+ originalModel: "Claude 2.1",
1569
1338
  },
1570
- "deepseek-v2.5": {
1571
- // AA Intelligence Index (composite score)
1572
- intelligenceIndex: 12.3,
1573
- normalizedScore: 18,
1574
-
1339
+ "mistral-large-2-nov-24": {
1575
1340
  // AA specific benchmarks
1576
- codingIndex: undefined,
1577
- mathIndex: undefined,
1341
+ codingIndex: 13.8,
1342
+ mathIndex: 14,
1578
1343
 
1579
1344
  // Academic benchmarks
1580
- mmluPro: undefined,
1581
- gpqa: undefined,
1582
- hle: undefined,
1345
+ mmluPro: 0.697,
1346
+ gpqa: 0.486,
1347
+ hle: 0.04,
1583
1348
 
1584
1349
  // Capabilities
1585
1350
  contextWindow: 8192,
@@ -1587,21 +1352,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1587
1352
  supportsVision: false,
1588
1353
 
1589
1354
  // Metadata
1590
- lastUpdated: "2026-04-06",
1355
+ lastUpdated: "2026-06-01",
1356
+ originalModel: "Mistral Large 2 (Nov '24)",
1591
1357
  },
1592
- "deepseek-v2-chat": {
1593
- // AA Intelligence Index (composite score)
1594
- intelligenceIndex: 9.1,
1595
- normalizedScore: 13,
1596
-
1358
+ "mistral-large-2-jul-24": {
1597
1359
  // AA specific benchmarks
1598
1360
  codingIndex: undefined,
1599
- mathIndex: undefined,
1361
+ mathIndex: 0,
1600
1362
 
1601
1363
  // Academic benchmarks
1602
- mmluPro: undefined,
1603
- gpqa: undefined,
1604
- hle: undefined,
1364
+ mmluPro: 0.683,
1365
+ gpqa: 0.472,
1366
+ hle: 0.032,
1605
1367
 
1606
1368
  // Capabilities
1607
1369
  contextWindow: 8192,
@@ -1609,21 +1371,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1609
1371
  supportsVision: false,
1610
1372
 
1611
1373
  // Metadata
1612
- lastUpdated: "2026-04-06",
1374
+ lastUpdated: "2026-06-01",
1375
+ originalModel: "Mistral Large 2 (Jul '24)",
1613
1376
  },
1614
- "deepseek-coder-v2-lite-instruct": {
1615
- // AA Intelligence Index (composite score)
1616
- intelligenceIndex: 8.5,
1617
- normalizedScore: 12,
1618
-
1377
+ "pixtral-large": {
1619
1378
  // AA specific benchmarks
1620
1379
  codingIndex: undefined,
1621
- mathIndex: undefined,
1380
+ mathIndex: 2.3,
1622
1381
 
1623
1382
  // Academic benchmarks
1624
- mmluPro: 0.429,
1625
- gpqa: 0.319,
1626
- hle: 0.053,
1383
+ mmluPro: 0.701,
1384
+ gpqa: 0.505,
1385
+ hle: 0.036,
1627
1386
 
1628
1387
  // Capabilities
1629
1388
  contextWindow: 8192,
@@ -1631,21 +1390,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1631
1390
  supportsVision: false,
1632
1391
 
1633
1392
  // Metadata
1634
- lastUpdated: "2026-04-06",
1393
+ lastUpdated: "2026-06-01",
1394
+ originalModel: "Pixtral Large",
1635
1395
  },
1636
- sonar: {
1637
- // AA Intelligence Index (composite score)
1638
- intelligenceIndex: 15.5,
1639
- normalizedScore: 22,
1640
-
1396
+ "mistral-small-3": {
1641
1397
  // AA specific benchmarks
1642
1398
  codingIndex: undefined,
1643
- mathIndex: undefined,
1399
+ mathIndex: 4.3,
1644
1400
 
1645
1401
  // Academic benchmarks
1646
- mmluPro: 0.689,
1647
- gpqa: 0.471,
1648
- hle: 0.073,
1402
+ mmluPro: 0.652,
1403
+ gpqa: 0.462,
1404
+ hle: 0.041,
1649
1405
 
1650
1406
  // Capabilities
1651
1407
  contextWindow: 8192,
@@ -1653,21 +1409,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1653
1409
  supportsVision: false,
1654
1410
 
1655
1411
  // Metadata
1656
- lastUpdated: "2026-04-06",
1412
+ lastUpdated: "2026-06-01",
1413
+ originalModel: "Mistral Small 3",
1657
1414
  },
1658
- "sonar-reasoning-pro": {
1659
- // AA Intelligence Index (composite score)
1660
- intelligenceIndex: 24.6,
1661
- normalizedScore: 35,
1662
-
1415
+ "mistral-small-sep-24": {
1663
1416
  // AA specific benchmarks
1664
1417
  codingIndex: undefined,
1665
1418
  mathIndex: undefined,
1666
1419
 
1667
1420
  // Academic benchmarks
1668
- mmluPro: undefined,
1669
- gpqa: undefined,
1670
- hle: undefined,
1421
+ mmluPro: 0.529,
1422
+ gpqa: 0.381,
1423
+ hle: 0.043,
1671
1424
 
1672
1425
  // Capabilities
1673
1426
  contextWindow: 8192,
@@ -1675,21 +1428,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1675
1428
  supportsVision: false,
1676
1429
 
1677
1430
  // Metadata
1678
- lastUpdated: "2026-04-06",
1431
+ lastUpdated: "2026-06-01",
1432
+ originalModel: "Mistral Small (Sep '24)",
1679
1433
  },
1680
- "sonar-pro": {
1681
- // AA Intelligence Index (composite score)
1682
- intelligenceIndex: 15.2,
1683
- normalizedScore: 22,
1684
-
1434
+ "mixtral-8x22b-instruct": {
1685
1435
  // AA specific benchmarks
1686
1436
  codingIndex: undefined,
1687
1437
  mathIndex: undefined,
1688
1438
 
1689
1439
  // Academic benchmarks
1690
- mmluPro: 0.755,
1691
- gpqa: 0.578,
1692
- hle: 0.079,
1440
+ mmluPro: 0.537,
1441
+ gpqa: 0.332,
1442
+ hle: 0.041,
1693
1443
 
1694
1444
  // Capabilities
1695
1445
  contextWindow: 8192,
@@ -1697,21 +1447,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1697
1447
  supportsVision: false,
1698
1448
 
1699
1449
  // Metadata
1700
- lastUpdated: "2026-04-06",
1450
+ lastUpdated: "2026-06-01",
1451
+ originalModel: "Mixtral 8x22B Instruct",
1701
1452
  },
1702
- "sonar-reasoning": {
1703
- // AA Intelligence Index (composite score)
1704
- intelligenceIndex: 17.9,
1705
- normalizedScore: 26,
1706
-
1453
+ "mistral-small-feb-24": {
1707
1454
  // AA specific benchmarks
1708
1455
  codingIndex: undefined,
1709
1456
  mathIndex: undefined,
1710
1457
 
1711
1458
  // Academic benchmarks
1712
- mmluPro: undefined,
1713
- gpqa: 0.623,
1714
- hle: undefined,
1459
+ mmluPro: 0.419,
1460
+ gpqa: 0.302,
1461
+ hle: 0.044,
1715
1462
 
1716
1463
  // Capabilities
1717
1464
  contextWindow: 8192,
@@ -1719,21 +1466,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1719
1466
  supportsVision: false,
1720
1467
 
1721
1468
  // Metadata
1722
- lastUpdated: "2026-04-06",
1469
+ lastUpdated: "2026-06-01",
1470
+ originalModel: "Mistral Small (Feb '24)",
1723
1471
  },
1724
- "grok-beta": {
1725
- // AA Intelligence Index (composite score)
1726
- intelligenceIndex: 13.3,
1727
- normalizedScore: 19,
1728
-
1472
+ "mistral-large-feb-24": {
1729
1473
  // AA specific benchmarks
1730
1474
  codingIndex: undefined,
1731
1475
  mathIndex: undefined,
1732
1476
 
1733
1477
  // Academic benchmarks
1734
- mmluPro: 0.703,
1735
- gpqa: 0.471,
1736
- hle: 0.047,
1478
+ mmluPro: 0.515,
1479
+ gpqa: 0.351,
1480
+ hle: 0.034,
1737
1481
 
1738
1482
  // Capabilities
1739
1483
  contextWindow: 8192,
@@ -1741,21 +1485,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1741
1485
  supportsVision: false,
1742
1486
 
1743
1487
  // Metadata
1744
- lastUpdated: "2026-04-06",
1488
+ lastUpdated: "2026-06-01",
1489
+ originalModel: "Mistral Large (Feb '24)",
1745
1490
  },
1746
- "grok-4-fast-reasoning": {
1747
- // AA Intelligence Index (composite score)
1748
- intelligenceIndex: 35.1,
1749
- normalizedScore: 50,
1750
-
1491
+ "mixtral-8x7b-instruct": {
1751
1492
  // AA specific benchmarks
1752
- codingIndex: 27.4,
1753
- mathIndex: 89.7,
1493
+ codingIndex: undefined,
1494
+ mathIndex: undefined,
1754
1495
 
1755
1496
  // Academic benchmarks
1756
- mmluPro: 0.85,
1757
- gpqa: 0.847,
1758
- hle: 0.17,
1497
+ mmluPro: 0.387,
1498
+ gpqa: 0.292,
1499
+ hle: 0.045,
1759
1500
 
1760
1501
  // Capabilities
1761
1502
  contextWindow: 8192,
@@ -1763,21 +1504,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1763
1504
  supportsVision: false,
1764
1505
 
1765
1506
  // Metadata
1766
- lastUpdated: "2026-04-06",
1507
+ lastUpdated: "2026-06-01",
1508
+ originalModel: "Mixtral 8x7B Instruct",
1767
1509
  },
1768
- "grok-3-reasoning-beta": {
1769
- // AA Intelligence Index (composite score)
1770
- intelligenceIndex: 21.6,
1771
- normalizedScore: 31,
1772
-
1510
+ "mistral-7b-instruct": {
1773
1511
  // AA specific benchmarks
1774
1512
  codingIndex: undefined,
1775
1513
  mathIndex: undefined,
1776
1514
 
1777
1515
  // Academic benchmarks
1778
- mmluPro: undefined,
1779
- gpqa: undefined,
1780
- hle: undefined,
1516
+ mmluPro: 0.245,
1517
+ gpqa: 0.177,
1518
+ hle: 0.043,
1781
1519
 
1782
1520
  // Capabilities
1783
1521
  contextWindow: 8192,
@@ -1785,21 +1523,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1785
1523
  supportsVision: false,
1786
1524
 
1787
1525
  // Metadata
1788
- lastUpdated: "2026-04-06",
1526
+ lastUpdated: "2026-06-01",
1527
+ originalModel: "Mistral 7B Instruct",
1789
1528
  },
1790
- "grok-3": {
1791
- // AA Intelligence Index (composite score)
1792
- intelligenceIndex: 25.2,
1793
- normalizedScore: 36,
1794
-
1529
+ "mistral-saba": {
1795
1530
  // AA specific benchmarks
1796
- codingIndex: 19.8,
1797
- mathIndex: 58,
1531
+ codingIndex: undefined,
1532
+ mathIndex: undefined,
1798
1533
 
1799
1534
  // Academic benchmarks
1800
- mmluPro: 0.799,
1801
- gpqa: 0.693,
1802
- hle: 0.051,
1535
+ mmluPro: 0.611,
1536
+ gpqa: 0.424,
1537
+ hle: 0.041,
1803
1538
 
1804
1539
  // Capabilities
1805
1540
  contextWindow: 8192,
@@ -1807,21 +1542,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1807
1542
  supportsVision: false,
1808
1543
 
1809
1544
  // Metadata
1810
- lastUpdated: "2026-04-06",
1545
+ lastUpdated: "2026-06-01",
1546
+ originalModel: "Mistral Saba",
1811
1547
  },
1812
- "grok-4": {
1813
- // AA Intelligence Index (composite score)
1814
- intelligenceIndex: 41.5,
1815
- normalizedScore: 59,
1816
-
1548
+ "mistral-small-3.2": {
1817
1549
  // AA specific benchmarks
1818
- codingIndex: 40.5,
1819
- mathIndex: 92.7,
1550
+ codingIndex: 13.3,
1551
+ mathIndex: 27,
1820
1552
 
1821
1553
  // Academic benchmarks
1822
- mmluPro: 0.866,
1823
- gpqa: 0.877,
1824
- hle: 0.239,
1554
+ mmluPro: 0.681,
1555
+ gpqa: 0.505,
1556
+ hle: 0.043,
1825
1557
 
1826
1558
  // Capabilities
1827
1559
  contextWindow: 8192,
@@ -1829,21 +1561,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1829
1561
  supportsVision: false,
1830
1562
 
1831
1563
  // Metadata
1832
- lastUpdated: "2026-04-06",
1564
+ lastUpdated: "2026-06-01",
1565
+ originalModel: "Mistral Small 3.2",
1833
1566
  },
1834
- "grok-4.1-fast-non-reasoning": {
1835
- // AA Intelligence Index (composite score)
1836
- intelligenceIndex: 23.6,
1837
- normalizedScore: 34,
1838
-
1567
+ "mistral-small-3.1": {
1839
1568
  // AA specific benchmarks
1840
- codingIndex: 19.5,
1841
- mathIndex: 34.3,
1569
+ codingIndex: 13.9,
1570
+ mathIndex: 3.7,
1842
1571
 
1843
1572
  // Academic benchmarks
1844
- mmluPro: 0.743,
1845
- gpqa: 0.637,
1846
- hle: 0.05,
1573
+ mmluPro: 0.659,
1574
+ gpqa: 0.454,
1575
+ hle: 0.048,
1847
1576
 
1848
1577
  // Capabilities
1849
1578
  contextWindow: 8192,
@@ -1851,21 +1580,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1851
1580
  supportsVision: false,
1852
1581
 
1853
1582
  // Metadata
1854
- lastUpdated: "2026-04-06",
1583
+ lastUpdated: "2026-06-01",
1584
+ originalModel: "Mistral Small 3.1",
1855
1585
  },
1856
- "grok-4.1-fast-reasoning": {
1857
- // AA Intelligence Index (composite score)
1858
- intelligenceIndex: 38.6,
1859
- normalizedScore: 55,
1860
-
1586
+ "mistral-medium-3": {
1861
1587
  // AA specific benchmarks
1862
- codingIndex: 30.9,
1863
- mathIndex: 89.3,
1588
+ codingIndex: 13.6,
1589
+ mathIndex: 30.3,
1864
1590
 
1865
1591
  // Academic benchmarks
1866
- mmluPro: 0.854,
1867
- gpqa: 0.853,
1868
- hle: 0.176,
1592
+ mmluPro: 0.76,
1593
+ gpqa: 0.578,
1594
+ hle: 0.043,
1869
1595
 
1870
1596
  // Capabilities
1871
1597
  contextWindow: 8192,
@@ -1873,21 +1599,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1873
1599
  supportsVision: false,
1874
1600
 
1875
1601
  // Metadata
1876
- lastUpdated: "2026-04-06",
1602
+ lastUpdated: "2026-06-01",
1603
+ originalModel: "Mistral Medium 3",
1877
1604
  },
1878
- "grok-2-dec-24": {
1879
- // AA Intelligence Index (composite score)
1880
- intelligenceIndex: 13.9,
1881
- normalizedScore: 20,
1882
-
1605
+ "magistral-small-1": {
1883
1606
  // AA specific benchmarks
1884
- codingIndex: undefined,
1885
- mathIndex: undefined,
1607
+ codingIndex: 11.1,
1608
+ mathIndex: 41.3,
1886
1609
 
1887
1610
  // Academic benchmarks
1888
- mmluPro: 0.709,
1889
- gpqa: 0.51,
1890
- hle: 0.038,
1611
+ mmluPro: 0.746,
1612
+ gpqa: 0.641,
1613
+ hle: 0.072,
1891
1614
 
1892
1615
  // Capabilities
1893
1616
  contextWindow: 8192,
@@ -1895,21 +1618,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1895
1618
  supportsVision: false,
1896
1619
 
1897
1620
  // Metadata
1898
- lastUpdated: "2026-04-06",
1621
+ lastUpdated: "2026-06-01",
1622
+ originalModel: "Magistral Small 1",
1899
1623
  },
1900
- "grok-4-fast-non-reasoning": {
1901
- // AA Intelligence Index (composite score)
1902
- intelligenceIndex: 23.1,
1903
- normalizedScore: 33,
1904
-
1624
+ "devstral-small-may-25": {
1905
1625
  // AA specific benchmarks
1906
- codingIndex: 19,
1907
- mathIndex: 41.3,
1626
+ codingIndex: 12.2,
1627
+ mathIndex: undefined,
1908
1628
 
1909
1629
  // Academic benchmarks
1910
- mmluPro: 0.73,
1911
- gpqa: 0.606,
1912
- hle: 0.05,
1630
+ mmluPro: 0.632,
1631
+ gpqa: 0.434,
1632
+ hle: 0.04,
1913
1633
 
1914
1634
  // Capabilities
1915
1635
  contextWindow: 8192,
@@ -1917,21 +1637,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1917
1637
  supportsVision: false,
1918
1638
 
1919
1639
  // Metadata
1920
- lastUpdated: "2026-04-06",
1640
+ lastUpdated: "2026-06-01",
1641
+ originalModel: "Devstral Small (May '25)",
1921
1642
  },
1922
- "openchat-3.5-1210": {
1923
- // AA Intelligence Index (composite score)
1924
- intelligenceIndex: 8.3,
1925
- normalizedScore: 12,
1926
-
1643
+ "mistral-medium": {
1927
1644
  // AA specific benchmarks
1928
1645
  codingIndex: undefined,
1929
1646
  mathIndex: undefined,
1930
1647
 
1931
1648
  // Academic benchmarks
1932
- mmluPro: 0.31,
1933
- gpqa: 0.23,
1934
- hle: 0.048,
1649
+ mmluPro: 0.491,
1650
+ gpqa: 0.349,
1651
+ hle: 0.034,
1935
1652
 
1936
1653
  // Capabilities
1937
1654
  contextWindow: 8192,
@@ -1939,21 +1656,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1939
1656
  supportsVision: false,
1940
1657
 
1941
1658
  // Metadata
1942
- lastUpdated: "2026-04-06",
1659
+ lastUpdated: "2026-06-01",
1660
+ originalModel: "Mistral Medium",
1943
1661
  },
1944
- "nova-pro": {
1945
- // AA Intelligence Index (composite score)
1946
- intelligenceIndex: 13.5,
1947
- normalizedScore: 19,
1948
-
1662
+ "devstral-small-jul-25": {
1949
1663
  // AA specific benchmarks
1950
- codingIndex: 11,
1951
- mathIndex: 7,
1664
+ codingIndex: 12.1,
1665
+ mathIndex: 29.3,
1952
1666
 
1953
1667
  // Academic benchmarks
1954
- mmluPro: 0.691,
1955
- gpqa: 0.499,
1956
- hle: 0.034,
1668
+ mmluPro: 0.622,
1669
+ gpqa: 0.414,
1670
+ hle: 0.037,
1957
1671
 
1958
1672
  // Capabilities
1959
1673
  contextWindow: 8192,
@@ -1961,21 +1675,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1961
1675
  supportsVision: false,
1962
1676
 
1963
1677
  // Metadata
1964
- lastUpdated: "2026-04-06",
1678
+ lastUpdated: "2026-06-01",
1679
+ originalModel: "Devstral Small (Jul '25)",
1965
1680
  },
1966
- "nova-lite": {
1967
- // AA Intelligence Index (composite score)
1968
- intelligenceIndex: 12.7,
1969
- normalizedScore: 18,
1970
-
1681
+ "devstral-medium": {
1971
1682
  // AA specific benchmarks
1972
- codingIndex: 5.1,
1973
- mathIndex: 7,
1683
+ codingIndex: 15.9,
1684
+ mathIndex: 4.7,
1974
1685
 
1975
1686
  // Academic benchmarks
1976
- mmluPro: 0.59,
1977
- gpqa: 0.433,
1978
- hle: 0.046,
1687
+ mmluPro: 0.708,
1688
+ gpqa: 0.492,
1689
+ hle: 0.038,
1979
1690
 
1980
1691
  // Capabilities
1981
1692
  contextWindow: 8192,
@@ -1983,21 +1694,18 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
1983
1694
  supportsVision: false,
1984
1695
 
1985
1696
  // Metadata
1986
- lastUpdated: "2026-04-06",
1697
+ lastUpdated: "2026-06-01",
1698
+ originalModel: "Devstral Medium",
1987
1699
  },
1988
- "phi-3-mini-instruct-3.8b": {
1989
- // AA Intelligence Index (composite score)
1990
- intelligenceIndex: 10.1,
1991
- normalizedScore: 14,
1992
-
1700
+ "magistral-medium-1": {
1993
1701
  // AA specific benchmarks
1994
- codingIndex: 3,
1995
- mathIndex: 0.3,
1702
+ codingIndex: 16,
1703
+ mathIndex: 40.3,
1996
1704
 
1997
1705
  // Academic benchmarks
1998
- mmluPro: 0.435,
1999
- gpqa: 0.319,
2000
- hle: 0.044,
1706
+ mmluPro: 0.753,
1707
+ gpqa: 0.679,
1708
+ hle: 0.095,
2001
1709
 
2002
1710
  // Capabilities
2003
1711
  contextWindow: 8192,
@@ -2005,6 +1713,7 @@ export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
2005
1713
  supportsVision: false,
2006
1714
 
2007
1715
  // Metadata
2008
- lastUpdated: "2026-04-06",
1716
+ lastUpdated: "2026-06-01",
1717
+ originalModel: "Magistral Medium 1",
2009
1718
  },
2010
1719
  };