pi-free 2.0.13 → 2.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +9 -5
- package/config.ts +15 -0
- package/constants.ts +3 -0
- package/index.ts +135 -0
- package/lib/built-in-toggle.ts +4 -4
- package/lib/probe-cache.ts +86 -0
- package/lib/provider-compat.ts +33 -0
- package/lib/registry.ts +25 -3
- package/lib/telemetry.ts +328 -0
- package/lib/util.ts +10 -1
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +94 -8
- package/provider-failover/benchmarks-chunk-0.ts +599 -890
- package/provider-failover/benchmarks-chunk-1.ts +655 -924
- package/provider-failover/benchmarks-chunk-2.ts +675 -966
- package/provider-failover/benchmarks-chunk-3.ts +676 -967
- package/provider-failover/benchmarks-chunk-4.ts +704 -954
- package/provider-failover/benchmarks-chunk-5.ts +1301 -0
- package/provider-failover/hardcoded-benchmarks.ts +9 -3
- package/providers/cline/cline-models.ts +200 -68
- package/providers/cline/cline.ts +3 -3
- package/providers/dynamic-built-in/index.ts +1 -1
- package/providers/kilo/kilo.ts +2 -2
- package/providers/model-fetcher.ts +3 -1
- package/providers/nvidia/nvidia.ts +54 -16
- package/providers/ollama/ollama.ts +103 -46
- package/providers/opencode-session.ts +398 -371
- package/providers/qwen/qwen.ts +2 -2
- package/providers/routeway/routeway.ts +391 -0
|
@@ -1,15 +1,50 @@
|
|
|
1
1
|
// Auto-generated benchmark data chunk 0
|
|
2
|
-
// Models: gpt-oss-
|
|
2
|
+
// Models: gpt-oss-20b-low .. lfm2-8b-a1b (90 entries)
|
|
3
|
+
// Last updated: 2026-06-01
|
|
3
4
|
// DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
|
|
4
5
|
|
|
5
6
|
import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
|
|
6
7
|
|
|
7
8
|
export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
8
|
-
"gpt-oss-
|
|
9
|
-
// AA
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
"gpt-oss-20b-low": {
|
|
10
|
+
// AA specific benchmarks
|
|
11
|
+
codingIndex: 14.4,
|
|
12
|
+
mathIndex: 62.3,
|
|
13
|
+
|
|
14
|
+
// Academic benchmarks
|
|
15
|
+
mmluPro: 0.718,
|
|
16
|
+
gpqa: 0.611,
|
|
17
|
+
hle: 0.051,
|
|
18
|
+
|
|
19
|
+
// Capabilities
|
|
20
|
+
contextWindow: 8192,
|
|
21
|
+
supportsReasoning: false,
|
|
22
|
+
supportsVision: false,
|
|
23
|
+
|
|
24
|
+
// Metadata
|
|
25
|
+
lastUpdated: "2026-06-01",
|
|
26
|
+
originalModel: "gpt-oss-20B (low)",
|
|
27
|
+
},
|
|
28
|
+
"gpt-oss-20b-high": {
|
|
29
|
+
// AA specific benchmarks
|
|
30
|
+
codingIndex: 18.5,
|
|
31
|
+
mathIndex: 89.3,
|
|
32
|
+
|
|
33
|
+
// Academic benchmarks
|
|
34
|
+
mmluPro: 0.748,
|
|
35
|
+
gpqa: 0.688,
|
|
36
|
+
hle: 0.098,
|
|
37
|
+
|
|
38
|
+
// Capabilities
|
|
39
|
+
contextWindow: 8192,
|
|
40
|
+
supportsReasoning: false,
|
|
41
|
+
supportsVision: false,
|
|
12
42
|
|
|
43
|
+
// Metadata
|
|
44
|
+
lastUpdated: "2026-06-01",
|
|
45
|
+
originalModel: "gpt-oss-20B (high)",
|
|
46
|
+
},
|
|
47
|
+
"gpt-oss-120b-high": {
|
|
13
48
|
// AA specific benchmarks
|
|
14
49
|
codingIndex: 28.6,
|
|
15
50
|
mathIndex: 93.4,
|
|
@@ -25,13 +60,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
25
60
|
supportsVision: false,
|
|
26
61
|
|
|
27
62
|
// Metadata
|
|
28
|
-
lastUpdated: "2026-
|
|
63
|
+
lastUpdated: "2026-06-01",
|
|
64
|
+
originalModel: "gpt-oss-120b (high)",
|
|
29
65
|
},
|
|
30
66
|
"gpt-5.4-mini-xhigh": {
|
|
31
|
-
// AA Intelligence Index (composite score)
|
|
32
|
-
intelligenceIndex: 48.1,
|
|
33
|
-
normalizedScore: 69,
|
|
34
|
-
|
|
35
67
|
// AA specific benchmarks
|
|
36
68
|
codingIndex: 51.5,
|
|
37
69
|
mathIndex: undefined,
|
|
@@ -47,13 +79,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
47
79
|
supportsVision: false,
|
|
48
80
|
|
|
49
81
|
// Metadata
|
|
50
|
-
lastUpdated: "2026-
|
|
82
|
+
lastUpdated: "2026-06-01",
|
|
83
|
+
originalModel: "GPT-5.4 mini (xhigh)",
|
|
51
84
|
},
|
|
52
85
|
"gpt-5.4-nano-xhigh": {
|
|
53
|
-
// AA Intelligence Index (composite score)
|
|
54
|
-
intelligenceIndex: 44.4,
|
|
55
|
-
normalizedScore: 63,
|
|
56
|
-
|
|
57
86
|
// AA specific benchmarks
|
|
58
87
|
codingIndex: 43.9,
|
|
59
88
|
mathIndex: undefined,
|
|
@@ -69,13 +98,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
69
98
|
supportsVision: false,
|
|
70
99
|
|
|
71
100
|
// Metadata
|
|
72
|
-
lastUpdated: "2026-
|
|
101
|
+
lastUpdated: "2026-06-01",
|
|
102
|
+
originalModel: "GPT-5.4 nano (xhigh)",
|
|
73
103
|
},
|
|
74
104
|
"gpt-oss-120b-low": {
|
|
75
|
-
// AA Intelligence Index (composite score)
|
|
76
|
-
intelligenceIndex: 24.5,
|
|
77
|
-
normalizedScore: 35,
|
|
78
|
-
|
|
79
105
|
// AA specific benchmarks
|
|
80
106
|
codingIndex: 15.5,
|
|
81
107
|
mathIndex: 66.7,
|
|
@@ -91,21 +117,37 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
91
117
|
supportsVision: false,
|
|
92
118
|
|
|
93
119
|
// Metadata
|
|
94
|
-
lastUpdated: "2026-
|
|
120
|
+
lastUpdated: "2026-06-01",
|
|
121
|
+
originalModel: "gpt-oss-120b (low)",
|
|
95
122
|
},
|
|
96
|
-
"
|
|
97
|
-
// AA
|
|
98
|
-
|
|
99
|
-
|
|
123
|
+
"o3": {
|
|
124
|
+
// AA specific benchmarks
|
|
125
|
+
codingIndex: 38.4,
|
|
126
|
+
mathIndex: 88.3,
|
|
127
|
+
|
|
128
|
+
// Academic benchmarks
|
|
129
|
+
mmluPro: 0.853,
|
|
130
|
+
gpqa: 0.827,
|
|
131
|
+
hle: 0.2,
|
|
132
|
+
|
|
133
|
+
// Capabilities
|
|
134
|
+
contextWindow: 8192,
|
|
135
|
+
supportsReasoning: false,
|
|
136
|
+
supportsVision: false,
|
|
100
137
|
|
|
138
|
+
// Metadata
|
|
139
|
+
lastUpdated: "2026-06-01",
|
|
140
|
+
originalModel: "o3",
|
|
141
|
+
},
|
|
142
|
+
"grok-1": {
|
|
101
143
|
// AA specific benchmarks
|
|
102
|
-
codingIndex:
|
|
144
|
+
codingIndex: undefined,
|
|
103
145
|
mathIndex: undefined,
|
|
104
146
|
|
|
105
147
|
// Academic benchmarks
|
|
106
148
|
mmluPro: undefined,
|
|
107
|
-
gpqa:
|
|
108
|
-
hle:
|
|
149
|
+
gpqa: undefined,
|
|
150
|
+
hle: undefined,
|
|
109
151
|
|
|
110
152
|
// Capabilities
|
|
111
153
|
contextWindow: 8192,
|
|
@@ -113,21 +155,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
113
155
|
supportsVision: false,
|
|
114
156
|
|
|
115
157
|
// Metadata
|
|
116
|
-
lastUpdated: "2026-
|
|
158
|
+
lastUpdated: "2026-06-01",
|
|
159
|
+
originalModel: "Grok-1",
|
|
117
160
|
},
|
|
118
|
-
"gpt-5.4-
|
|
119
|
-
// AA Intelligence Index (composite score)
|
|
120
|
-
intelligenceIndex: 38.1,
|
|
121
|
-
normalizedScore: 54,
|
|
122
|
-
|
|
161
|
+
"gpt-5.4-mini-medium": {
|
|
123
162
|
// AA specific benchmarks
|
|
124
|
-
codingIndex:
|
|
163
|
+
codingIndex: 37.5,
|
|
125
164
|
mathIndex: undefined,
|
|
126
165
|
|
|
127
166
|
// Academic benchmarks
|
|
128
167
|
mmluPro: undefined,
|
|
129
|
-
gpqa: 0.
|
|
130
|
-
hle: 0.
|
|
168
|
+
gpqa: 0.823,
|
|
169
|
+
hle: 0.171,
|
|
131
170
|
|
|
132
171
|
// Capabilities
|
|
133
172
|
contextWindow: 8192,
|
|
@@ -135,13 +174,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
135
174
|
supportsVision: false,
|
|
136
175
|
|
|
137
176
|
// Metadata
|
|
138
|
-
lastUpdated: "2026-
|
|
177
|
+
lastUpdated: "2026-06-01",
|
|
178
|
+
originalModel: "GPT-5.4 mini (medium)",
|
|
139
179
|
},
|
|
140
180
|
"gpt-5.4-mini-non-reasoning": {
|
|
141
|
-
// AA Intelligence Index (composite score)
|
|
142
|
-
intelligenceIndex: 23.3,
|
|
143
|
-
normalizedScore: 33,
|
|
144
|
-
|
|
145
181
|
// AA specific benchmarks
|
|
146
182
|
codingIndex: 25.3,
|
|
147
183
|
mathIndex: undefined,
|
|
@@ -157,21 +193,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
157
193
|
supportsVision: false,
|
|
158
194
|
|
|
159
195
|
// Metadata
|
|
160
|
-
lastUpdated: "2026-
|
|
196
|
+
lastUpdated: "2026-06-01",
|
|
197
|
+
originalModel: "GPT-5.4 mini (Non-Reasoning)",
|
|
161
198
|
},
|
|
162
|
-
"gpt-5.
|
|
163
|
-
// AA Intelligence Index (composite score)
|
|
164
|
-
intelligenceIndex: 35.4,
|
|
165
|
-
normalizedScore: 51,
|
|
166
|
-
|
|
199
|
+
"gpt-5.5-instant-may-2026": {
|
|
167
200
|
// AA specific benchmarks
|
|
168
|
-
codingIndex:
|
|
201
|
+
codingIndex: 45.1,
|
|
169
202
|
mathIndex: undefined,
|
|
170
203
|
|
|
171
204
|
// Academic benchmarks
|
|
172
205
|
mmluPro: undefined,
|
|
173
|
-
gpqa: 0.
|
|
174
|
-
hle: 0.
|
|
206
|
+
gpqa: 0.846,
|
|
207
|
+
hle: 0.203,
|
|
175
208
|
|
|
176
209
|
// Capabilities
|
|
177
210
|
contextWindow: 8192,
|
|
@@ -179,21 +212,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
179
212
|
supportsVision: false,
|
|
180
213
|
|
|
181
214
|
// Metadata
|
|
182
|
-
lastUpdated: "2026-
|
|
215
|
+
lastUpdated: "2026-06-01",
|
|
216
|
+
originalModel: "GPT-5.5 Instant (May 2026)",
|
|
183
217
|
},
|
|
184
|
-
"gpt-5.
|
|
185
|
-
// AA Intelligence Index (composite score)
|
|
186
|
-
intelligenceIndex: 37.7,
|
|
187
|
-
normalizedScore: 54,
|
|
188
|
-
|
|
218
|
+
"gpt-5.5-low": {
|
|
189
219
|
// AA specific benchmarks
|
|
190
|
-
codingIndex:
|
|
220
|
+
codingIndex: 52.1,
|
|
191
221
|
mathIndex: undefined,
|
|
192
222
|
|
|
193
223
|
// Academic benchmarks
|
|
194
224
|
mmluPro: undefined,
|
|
195
|
-
gpqa: 0.
|
|
196
|
-
hle: 0.
|
|
225
|
+
gpqa: 0.91,
|
|
226
|
+
hle: 0.31,
|
|
197
227
|
|
|
198
228
|
// Capabilities
|
|
199
229
|
contextWindow: 8192,
|
|
@@ -201,21 +231,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
201
231
|
supportsVision: false,
|
|
202
232
|
|
|
203
233
|
// Metadata
|
|
204
|
-
lastUpdated: "2026-
|
|
234
|
+
lastUpdated: "2026-06-01",
|
|
235
|
+
originalModel: "GPT-5.5 (low)",
|
|
205
236
|
},
|
|
206
|
-
"gpt-
|
|
207
|
-
// AA Intelligence Index (composite score)
|
|
208
|
-
intelligenceIndex: 24.5,
|
|
209
|
-
normalizedScore: 35,
|
|
210
|
-
|
|
237
|
+
"gpt-5.4-nano-non-reasoning": {
|
|
211
238
|
// AA specific benchmarks
|
|
212
|
-
codingIndex:
|
|
213
|
-
mathIndex:
|
|
239
|
+
codingIndex: 27.9,
|
|
240
|
+
mathIndex: undefined,
|
|
214
241
|
|
|
215
242
|
// Academic benchmarks
|
|
216
|
-
mmluPro:
|
|
217
|
-
gpqa: 0.
|
|
218
|
-
hle: 0.
|
|
243
|
+
mmluPro: undefined,
|
|
244
|
+
gpqa: 0.558,
|
|
245
|
+
hle: 0.042,
|
|
219
246
|
|
|
220
247
|
// Capabilities
|
|
221
248
|
contextWindow: 8192,
|
|
@@ -223,21 +250,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
223
250
|
supportsVision: false,
|
|
224
251
|
|
|
225
252
|
// Metadata
|
|
226
|
-
lastUpdated: "2026-
|
|
253
|
+
lastUpdated: "2026-06-01",
|
|
254
|
+
originalModel: "GPT-5.4 nano (Non-Reasoning)",
|
|
227
255
|
},
|
|
228
|
-
"gpt-5.
|
|
229
|
-
// AA Intelligence Index (composite score)
|
|
230
|
-
intelligenceIndex: 57.2,
|
|
231
|
-
normalizedScore: 82,
|
|
232
|
-
|
|
256
|
+
"gpt-5.5-high": {
|
|
233
257
|
// AA specific benchmarks
|
|
234
|
-
codingIndex:
|
|
258
|
+
codingIndex: 58.5,
|
|
235
259
|
mathIndex: undefined,
|
|
236
260
|
|
|
237
261
|
// Academic benchmarks
|
|
238
262
|
mmluPro: undefined,
|
|
239
|
-
gpqa: 0.
|
|
240
|
-
hle: 0.
|
|
263
|
+
gpqa: 0.932,
|
|
264
|
+
hle: 0.43,
|
|
241
265
|
|
|
242
266
|
// Capabilities
|
|
243
267
|
contextWindow: 8192,
|
|
@@ -245,21 +269,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
245
269
|
supportsVision: false,
|
|
246
270
|
|
|
247
271
|
// Metadata
|
|
248
|
-
lastUpdated: "2026-
|
|
272
|
+
lastUpdated: "2026-06-01",
|
|
273
|
+
originalModel: "GPT-5.5 (high)",
|
|
249
274
|
},
|
|
250
|
-
"
|
|
251
|
-
// AA Intelligence Index (composite score)
|
|
252
|
-
intelligenceIndex: 11.7,
|
|
253
|
-
normalizedScore: 17,
|
|
254
|
-
|
|
275
|
+
"gpt-5.4-nano-medium": {
|
|
255
276
|
// AA specific benchmarks
|
|
256
|
-
codingIndex:
|
|
277
|
+
codingIndex: 35,
|
|
257
278
|
mathIndex: undefined,
|
|
258
279
|
|
|
259
280
|
// Academic benchmarks
|
|
260
281
|
mmluPro: undefined,
|
|
261
|
-
gpqa:
|
|
262
|
-
hle:
|
|
282
|
+
gpqa: 0.761,
|
|
283
|
+
hle: 0.147,
|
|
263
284
|
|
|
264
285
|
// Capabilities
|
|
265
286
|
contextWindow: 8192,
|
|
@@ -267,21 +288,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
267
288
|
supportsVision: false,
|
|
268
289
|
|
|
269
290
|
// Metadata
|
|
270
|
-
lastUpdated: "2026-
|
|
291
|
+
lastUpdated: "2026-06-01",
|
|
292
|
+
originalModel: "GPT-5.4 nano (medium)",
|
|
271
293
|
},
|
|
272
|
-
"gpt-
|
|
273
|
-
// AA Intelligence Index (composite score)
|
|
274
|
-
intelligenceIndex: 20.8,
|
|
275
|
-
normalizedScore: 30,
|
|
276
|
-
|
|
294
|
+
"gpt-5.5-non-reasoning": {
|
|
277
295
|
// AA specific benchmarks
|
|
278
|
-
codingIndex:
|
|
279
|
-
mathIndex:
|
|
296
|
+
codingIndex: 48.6,
|
|
297
|
+
mathIndex: undefined,
|
|
280
298
|
|
|
281
299
|
// Academic benchmarks
|
|
282
|
-
mmluPro:
|
|
283
|
-
gpqa: 0.
|
|
284
|
-
hle: 0.
|
|
300
|
+
mmluPro: undefined,
|
|
301
|
+
gpqa: 0.768,
|
|
302
|
+
hle: 0.126,
|
|
285
303
|
|
|
286
304
|
// Capabilities
|
|
287
305
|
contextWindow: 8192,
|
|
@@ -289,21 +307,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
289
307
|
supportsVision: false,
|
|
290
308
|
|
|
291
309
|
// Metadata
|
|
292
|
-
lastUpdated: "2026-
|
|
310
|
+
lastUpdated: "2026-06-01",
|
|
311
|
+
originalModel: "GPT-5.5 (Non-reasoning)",
|
|
293
312
|
},
|
|
294
|
-
|
|
295
|
-
// AA Intelligence Index (composite score)
|
|
296
|
-
intelligenceIndex: 38.4,
|
|
297
|
-
normalizedScore: 55,
|
|
298
|
-
|
|
313
|
+
"gpt-5.5-medium": {
|
|
299
314
|
// AA specific benchmarks
|
|
300
|
-
codingIndex:
|
|
301
|
-
mathIndex:
|
|
315
|
+
codingIndex: 56.2,
|
|
316
|
+
mathIndex: undefined,
|
|
302
317
|
|
|
303
318
|
// Academic benchmarks
|
|
304
|
-
mmluPro:
|
|
305
|
-
gpqa: 0.
|
|
306
|
-
hle: 0.
|
|
319
|
+
mmluPro: undefined,
|
|
320
|
+
gpqa: 0.926,
|
|
321
|
+
hle: 0.406,
|
|
307
322
|
|
|
308
323
|
// Capabilities
|
|
309
324
|
contextWindow: 8192,
|
|
@@ -311,13 +326,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
311
326
|
supportsVision: false,
|
|
312
327
|
|
|
313
328
|
// Metadata
|
|
314
|
-
lastUpdated: "2026-
|
|
329
|
+
lastUpdated: "2026-06-01",
|
|
330
|
+
originalModel: "GPT-5.5 (medium)",
|
|
315
331
|
},
|
|
316
332
|
"gpt-5.3-codex-xhigh": {
|
|
317
|
-
// AA Intelligence Index (composite score)
|
|
318
|
-
intelligenceIndex: 54,
|
|
319
|
-
normalizedScore: 77,
|
|
320
|
-
|
|
321
333
|
// AA specific benchmarks
|
|
322
334
|
codingIndex: 53.1,
|
|
323
335
|
mathIndex: undefined,
|
|
@@ -333,13 +345,29 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
333
345
|
supportsVision: false,
|
|
334
346
|
|
|
335
347
|
// Metadata
|
|
336
|
-
lastUpdated: "2026-
|
|
348
|
+
lastUpdated: "2026-06-01",
|
|
349
|
+
originalModel: "GPT-5.3 Codex (xhigh)",
|
|
337
350
|
},
|
|
338
|
-
"
|
|
339
|
-
// AA
|
|
340
|
-
|
|
341
|
-
|
|
351
|
+
"gpt-5.5-xhigh": {
|
|
352
|
+
// AA specific benchmarks
|
|
353
|
+
codingIndex: 59.1,
|
|
354
|
+
mathIndex: undefined,
|
|
355
|
+
|
|
356
|
+
// Academic benchmarks
|
|
357
|
+
mmluPro: undefined,
|
|
358
|
+
gpqa: 0.935,
|
|
359
|
+
hle: 0.443,
|
|
342
360
|
|
|
361
|
+
// Capabilities
|
|
362
|
+
contextWindow: 8192,
|
|
363
|
+
supportsReasoning: false,
|
|
364
|
+
supportsVision: false,
|
|
365
|
+
|
|
366
|
+
// Metadata
|
|
367
|
+
lastUpdated: "2026-06-01",
|
|
368
|
+
originalModel: "GPT-5.5 (xhigh)",
|
|
369
|
+
},
|
|
370
|
+
"llama-3.3-instruct-70b": {
|
|
343
371
|
// AA specific benchmarks
|
|
344
372
|
codingIndex: 10.7,
|
|
345
373
|
mathIndex: 7.7,
|
|
@@ -355,13 +383,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
355
383
|
supportsVision: false,
|
|
356
384
|
|
|
357
385
|
// Metadata
|
|
358
|
-
lastUpdated: "2026-
|
|
386
|
+
lastUpdated: "2026-06-01",
|
|
387
|
+
originalModel: "Llama 3.3 Instruct 70B",
|
|
359
388
|
},
|
|
360
389
|
"llama-3.1-instruct-405b": {
|
|
361
|
-
// AA Intelligence Index (composite score)
|
|
362
|
-
intelligenceIndex: 17.4,
|
|
363
|
-
normalizedScore: 25,
|
|
364
|
-
|
|
365
390
|
// AA specific benchmarks
|
|
366
391
|
codingIndex: 14.5,
|
|
367
392
|
mathIndex: 3,
|
|
@@ -377,13 +402,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
377
402
|
supportsVision: false,
|
|
378
403
|
|
|
379
404
|
// Metadata
|
|
380
|
-
lastUpdated: "2026-
|
|
405
|
+
lastUpdated: "2026-06-01",
|
|
406
|
+
originalModel: "Llama 3.1 Instruct 405B",
|
|
381
407
|
},
|
|
382
408
|
"llama-3.2-instruct-90b-vision": {
|
|
383
|
-
// AA Intelligence Index (composite score)
|
|
384
|
-
intelligenceIndex: 11.9,
|
|
385
|
-
normalizedScore: 17,
|
|
386
|
-
|
|
387
409
|
// AA specific benchmarks
|
|
388
410
|
codingIndex: undefined,
|
|
389
411
|
mathIndex: undefined,
|
|
@@ -399,15 +421,12 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
399
421
|
supportsVision: false,
|
|
400
422
|
|
|
401
423
|
// Metadata
|
|
402
|
-
lastUpdated: "2026-
|
|
424
|
+
lastUpdated: "2026-06-01",
|
|
425
|
+
originalModel: "Llama 3.2 Instruct 90B (Vision)",
|
|
403
426
|
},
|
|
404
427
|
"llama-3.2-instruct-11b-vision": {
|
|
405
|
-
// AA Intelligence Index (composite score)
|
|
406
|
-
intelligenceIndex: 8.7,
|
|
407
|
-
normalizedScore: 12,
|
|
408
|
-
|
|
409
428
|
// AA specific benchmarks
|
|
410
|
-
codingIndex: 4.
|
|
429
|
+
codingIndex: 4.2,
|
|
411
430
|
mathIndex: 1.7,
|
|
412
431
|
|
|
413
432
|
// Academic benchmarks
|
|
@@ -421,13 +440,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
421
440
|
supportsVision: false,
|
|
422
441
|
|
|
423
442
|
// Metadata
|
|
424
|
-
lastUpdated: "2026-
|
|
443
|
+
lastUpdated: "2026-06-01",
|
|
444
|
+
originalModel: "Llama 3.2 Instruct 11B (Vision)",
|
|
425
445
|
},
|
|
426
446
|
"llama-4-maverick": {
|
|
427
|
-
// AA Intelligence Index (composite score)
|
|
428
|
-
intelligenceIndex: 18.4,
|
|
429
|
-
normalizedScore: 26,
|
|
430
|
-
|
|
431
447
|
// AA specific benchmarks
|
|
432
448
|
codingIndex: 15.6,
|
|
433
449
|
mathIndex: 19.3,
|
|
@@ -443,13 +459,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
443
459
|
supportsVision: false,
|
|
444
460
|
|
|
445
461
|
// Metadata
|
|
446
|
-
lastUpdated: "2026-
|
|
462
|
+
lastUpdated: "2026-06-01",
|
|
463
|
+
originalModel: "Llama 4 Maverick",
|
|
447
464
|
},
|
|
448
465
|
"llama-4-scout": {
|
|
449
|
-
// AA Intelligence Index (composite score)
|
|
450
|
-
intelligenceIndex: 13.5,
|
|
451
|
-
normalizedScore: 19,
|
|
452
|
-
|
|
453
466
|
// AA specific benchmarks
|
|
454
467
|
codingIndex: 6.7,
|
|
455
468
|
mathIndex: 14,
|
|
@@ -465,21 +478,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
465
478
|
supportsVision: false,
|
|
466
479
|
|
|
467
480
|
// Metadata
|
|
468
|
-
lastUpdated: "2026-
|
|
481
|
+
lastUpdated: "2026-06-01",
|
|
482
|
+
originalModel: "Llama 4 Scout",
|
|
469
483
|
},
|
|
470
|
-
"
|
|
471
|
-
// AA Intelligence Index (composite score)
|
|
472
|
-
intelligenceIndex: 8.8,
|
|
473
|
-
normalizedScore: 13,
|
|
474
|
-
|
|
484
|
+
"muse-spark": {
|
|
475
485
|
// AA specific benchmarks
|
|
476
|
-
codingIndex:
|
|
477
|
-
mathIndex:
|
|
486
|
+
codingIndex: 47.5,
|
|
487
|
+
mathIndex: undefined,
|
|
478
488
|
|
|
479
489
|
// Academic benchmarks
|
|
480
|
-
mmluPro:
|
|
481
|
-
gpqa: 0.
|
|
482
|
-
hle: 0.
|
|
490
|
+
mmluPro: undefined,
|
|
491
|
+
gpqa: 0.884,
|
|
492
|
+
hle: 0.399,
|
|
483
493
|
|
|
484
494
|
// Capabilities
|
|
485
495
|
contextWindow: 8192,
|
|
@@ -487,21 +497,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
487
497
|
supportsVision: false,
|
|
488
498
|
|
|
489
499
|
// Metadata
|
|
490
|
-
lastUpdated: "2026-
|
|
500
|
+
lastUpdated: "2026-06-01",
|
|
501
|
+
originalModel: "Muse Spark",
|
|
491
502
|
},
|
|
492
|
-
"gemini-3-
|
|
493
|
-
// AA Intelligence Index (composite score)
|
|
494
|
-
intelligenceIndex: 35,
|
|
495
|
-
normalizedScore: 50,
|
|
496
|
-
|
|
503
|
+
"gemini-3.1-pro-preview": {
|
|
497
504
|
// AA specific benchmarks
|
|
498
|
-
codingIndex:
|
|
499
|
-
mathIndex:
|
|
505
|
+
codingIndex: 55.5,
|
|
506
|
+
mathIndex: undefined,
|
|
500
507
|
|
|
501
508
|
// Academic benchmarks
|
|
502
|
-
mmluPro:
|
|
503
|
-
gpqa: 0.
|
|
504
|
-
hle: 0.
|
|
509
|
+
mmluPro: undefined,
|
|
510
|
+
gpqa: 0.941,
|
|
511
|
+
hle: 0.447,
|
|
505
512
|
|
|
506
513
|
// Capabilities
|
|
507
514
|
contextWindow: 8192,
|
|
@@ -509,21 +516,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
509
516
|
supportsVision: false,
|
|
510
517
|
|
|
511
518
|
// Metadata
|
|
512
|
-
lastUpdated: "2026-
|
|
519
|
+
lastUpdated: "2026-06-01",
|
|
520
|
+
originalModel: "Gemini 3.1 Pro Preview",
|
|
513
521
|
},
|
|
514
|
-
"gemma-
|
|
515
|
-
// AA Intelligence Index (composite score)
|
|
516
|
-
intelligenceIndex: 10.3,
|
|
517
|
-
normalizedScore: 15,
|
|
518
|
-
|
|
522
|
+
"gemma-4-26b-a4b-non-reasoning": {
|
|
519
523
|
// AA specific benchmarks
|
|
520
|
-
codingIndex:
|
|
521
|
-
mathIndex:
|
|
524
|
+
codingIndex: 29.1,
|
|
525
|
+
mathIndex: undefined,
|
|
522
526
|
|
|
523
527
|
// Academic benchmarks
|
|
524
|
-
mmluPro:
|
|
525
|
-
gpqa: 0.
|
|
526
|
-
hle: 0.
|
|
528
|
+
mmluPro: undefined,
|
|
529
|
+
gpqa: 0.714,
|
|
530
|
+
hle: 0.107,
|
|
527
531
|
|
|
528
532
|
// Capabilities
|
|
529
533
|
contextWindow: 8192,
|
|
@@ -531,21 +535,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
531
535
|
supportsVision: false,
|
|
532
536
|
|
|
533
537
|
// Metadata
|
|
534
|
-
lastUpdated: "2026-
|
|
538
|
+
lastUpdated: "2026-06-01",
|
|
539
|
+
originalModel: "Gemma 4 26B A4B (Non-reasoning)",
|
|
535
540
|
},
|
|
536
|
-
"
|
|
537
|
-
// AA Intelligence Index (composite score)
|
|
538
|
-
intelligenceIndex: 39.2,
|
|
539
|
-
normalizedScore: 56,
|
|
540
|
-
|
|
541
|
+
"gemini-3.5-flash-medium": {
|
|
541
542
|
// AA specific benchmarks
|
|
542
|
-
codingIndex:
|
|
543
|
+
codingIndex: 43.9,
|
|
543
544
|
mathIndex: undefined,
|
|
544
545
|
|
|
545
546
|
// Academic benchmarks
|
|
546
547
|
mmluPro: undefined,
|
|
547
|
-
gpqa: 0.
|
|
548
|
-
hle: 0.
|
|
548
|
+
gpqa: 0.921,
|
|
549
|
+
hle: 0.399,
|
|
549
550
|
|
|
550
551
|
// Capabilities
|
|
551
552
|
contextWindow: 8192,
|
|
@@ -553,21 +554,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
553
554
|
supportsVision: false,
|
|
554
555
|
|
|
555
556
|
// Metadata
|
|
556
|
-
lastUpdated: "2026-
|
|
557
|
+
lastUpdated: "2026-06-01",
|
|
558
|
+
originalModel: "Gemini 3.5 Flash (medium)",
|
|
557
559
|
},
|
|
558
|
-
"gemma-
|
|
559
|
-
// AA Intelligence Index (composite score)
|
|
560
|
-
intelligenceIndex: 6.3,
|
|
561
|
-
normalizedScore: 9,
|
|
562
|
-
|
|
560
|
+
"gemma-4-e2b-non-reasoning": {
|
|
563
561
|
// AA specific benchmarks
|
|
564
|
-
codingIndex:
|
|
565
|
-
mathIndex:
|
|
562
|
+
codingIndex: 8.3,
|
|
563
|
+
mathIndex: undefined,
|
|
566
564
|
|
|
567
565
|
// Academic benchmarks
|
|
568
|
-
mmluPro:
|
|
569
|
-
gpqa: 0.
|
|
570
|
-
hle: 0.
|
|
566
|
+
mmluPro: undefined,
|
|
567
|
+
gpqa: 0.405,
|
|
568
|
+
hle: 0.045,
|
|
571
569
|
|
|
572
570
|
// Capabilities
|
|
573
571
|
contextWindow: 8192,
|
|
@@ -575,21 +573,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
575
573
|
supportsVision: false,
|
|
576
574
|
|
|
577
575
|
// Metadata
|
|
578
|
-
lastUpdated: "2026-
|
|
576
|
+
lastUpdated: "2026-06-01",
|
|
577
|
+
originalModel: "Gemma 4 E2B (Non-reasoning)",
|
|
579
578
|
},
|
|
580
|
-
"gemma-4-
|
|
581
|
-
// AA Intelligence Index (composite score)
|
|
582
|
-
intelligenceIndex: 18.8,
|
|
583
|
-
normalizedScore: 27,
|
|
584
|
-
|
|
579
|
+
"gemma-4-31b-reasoning": {
|
|
585
580
|
// AA specific benchmarks
|
|
586
|
-
codingIndex:
|
|
581
|
+
codingIndex: 38.7,
|
|
587
582
|
mathIndex: undefined,
|
|
588
583
|
|
|
589
584
|
// Academic benchmarks
|
|
590
585
|
mmluPro: undefined,
|
|
591
|
-
gpqa: 0.
|
|
592
|
-
hle: 0.
|
|
586
|
+
gpqa: 0.857,
|
|
587
|
+
hle: 0.227,
|
|
593
588
|
|
|
594
589
|
// Capabilities
|
|
595
590
|
contextWindow: 8192,
|
|
@@ -597,21 +592,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
597
592
|
supportsVision: false,
|
|
598
593
|
|
|
599
594
|
// Metadata
|
|
600
|
-
lastUpdated: "2026-
|
|
595
|
+
lastUpdated: "2026-06-01",
|
|
596
|
+
originalModel: "Gemma 4 31B (Reasoning)",
|
|
601
597
|
},
|
|
602
|
-
"
|
|
603
|
-
// AA Intelligence Index (composite score)
|
|
604
|
-
intelligenceIndex: 41.3,
|
|
605
|
-
normalizedScore: 59,
|
|
606
|
-
|
|
598
|
+
"gemma-4-31b-non-reasoning": {
|
|
607
599
|
// AA specific benchmarks
|
|
608
|
-
codingIndex:
|
|
609
|
-
mathIndex:
|
|
600
|
+
codingIndex: 33.9,
|
|
601
|
+
mathIndex: undefined,
|
|
610
602
|
|
|
611
603
|
// Academic benchmarks
|
|
612
|
-
mmluPro:
|
|
613
|
-
gpqa: 0.
|
|
614
|
-
hle: 0.
|
|
604
|
+
mmluPro: undefined,
|
|
605
|
+
gpqa: 0.763,
|
|
606
|
+
hle: 0.115,
|
|
615
607
|
|
|
616
608
|
// Capabilities
|
|
617
609
|
contextWindow: 8192,
|
|
@@ -619,21 +611,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
619
611
|
supportsVision: false,
|
|
620
612
|
|
|
621
613
|
// Metadata
|
|
622
|
-
lastUpdated: "2026-
|
|
614
|
+
lastUpdated: "2026-06-01",
|
|
615
|
+
originalModel: "Gemma 4 31B (Non-reasoning)",
|
|
623
616
|
},
|
|
624
|
-
"gemma-
|
|
625
|
-
// AA Intelligence Index (composite score)
|
|
626
|
-
intelligenceIndex: 5.5,
|
|
627
|
-
normalizedScore: 8,
|
|
628
|
-
|
|
617
|
+
"gemma-4-26b-a4b-reasoning": {
|
|
629
618
|
// AA specific benchmarks
|
|
630
|
-
codingIndex:
|
|
631
|
-
mathIndex:
|
|
619
|
+
codingIndex: 22.4,
|
|
620
|
+
mathIndex: undefined,
|
|
632
621
|
|
|
633
622
|
// Academic benchmarks
|
|
634
|
-
mmluPro:
|
|
635
|
-
gpqa: 0.
|
|
636
|
-
hle: 0.
|
|
623
|
+
mmluPro: undefined,
|
|
624
|
+
gpqa: 0.792,
|
|
625
|
+
hle: 0.183,
|
|
637
626
|
|
|
638
627
|
// Capabilities
|
|
639
628
|
contextWindow: 8192,
|
|
@@ -641,21 +630,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
641
630
|
supportsVision: false,
|
|
642
631
|
|
|
643
632
|
// Metadata
|
|
644
|
-
lastUpdated: "2026-
|
|
633
|
+
lastUpdated: "2026-06-01",
|
|
634
|
+
originalModel: "Gemma 4 26B A4B (Reasoning)",
|
|
645
635
|
},
|
|
646
|
-
"
|
|
647
|
-
// AA Intelligence Index (composite score)
|
|
648
|
-
intelligenceIndex: 31.2,
|
|
649
|
-
normalizedScore: 45,
|
|
650
|
-
|
|
636
|
+
"gemini-3.1-flash-lite": {
|
|
651
637
|
// AA specific benchmarks
|
|
652
|
-
codingIndex:
|
|
638
|
+
codingIndex: 30.1,
|
|
653
639
|
mathIndex: undefined,
|
|
654
640
|
|
|
655
641
|
// Academic benchmarks
|
|
656
642
|
mmluPro: undefined,
|
|
657
|
-
gpqa: 0.
|
|
658
|
-
hle: 0.
|
|
643
|
+
gpqa: 0.822,
|
|
644
|
+
hle: 0.162,
|
|
659
645
|
|
|
660
646
|
// Capabilities
|
|
661
647
|
contextWindow: 8192,
|
|
@@ -663,21 +649,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
663
649
|
supportsVision: false,
|
|
664
650
|
|
|
665
651
|
// Metadata
|
|
666
|
-
lastUpdated: "2026-
|
|
652
|
+
lastUpdated: "2026-06-01",
|
|
653
|
+
originalModel: "Gemini 3.1 Flash-Lite",
|
|
667
654
|
},
|
|
668
|
-
"gemma-4-
|
|
669
|
-
// AA Intelligence Index (composite score)
|
|
670
|
-
intelligenceIndex: 15.2,
|
|
671
|
-
normalizedScore: 22,
|
|
672
|
-
|
|
655
|
+
"gemma-4-e4b-non-reasoning": {
|
|
673
656
|
// AA specific benchmarks
|
|
674
|
-
codingIndex:
|
|
657
|
+
codingIndex: 6.4,
|
|
675
658
|
mathIndex: undefined,
|
|
676
659
|
|
|
677
660
|
// Academic benchmarks
|
|
678
661
|
mmluPro: undefined,
|
|
679
|
-
gpqa: 0.
|
|
680
|
-
hle: 0.
|
|
662
|
+
gpqa: 0.549,
|
|
663
|
+
hle: 0.047,
|
|
681
664
|
|
|
682
665
|
// Capabilities
|
|
683
666
|
contextWindow: 8192,
|
|
@@ -685,21 +668,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
685
668
|
supportsVision: false,
|
|
686
669
|
|
|
687
670
|
// Metadata
|
|
688
|
-
lastUpdated: "2026-
|
|
671
|
+
lastUpdated: "2026-06-01",
|
|
672
|
+
originalModel: "Gemma 4 E4B (Non-reasoning)",
|
|
689
673
|
},
|
|
690
|
-
"
|
|
691
|
-
// AA Intelligence Index (composite score)
|
|
692
|
-
intelligenceIndex: 57.2,
|
|
693
|
-
normalizedScore: 82,
|
|
694
|
-
|
|
674
|
+
"gemma-3-270m": {
|
|
695
675
|
// AA specific benchmarks
|
|
696
|
-
codingIndex:
|
|
697
|
-
mathIndex:
|
|
676
|
+
codingIndex: 0,
|
|
677
|
+
mathIndex: 2.3,
|
|
698
678
|
|
|
699
679
|
// Academic benchmarks
|
|
700
|
-
mmluPro:
|
|
701
|
-
gpqa: 0.
|
|
702
|
-
hle: 0.
|
|
680
|
+
mmluPro: 0.055,
|
|
681
|
+
gpqa: 0.224,
|
|
682
|
+
hle: 0.042,
|
|
703
683
|
|
|
704
684
|
// Capabilities
|
|
705
685
|
contextWindow: 8192,
|
|
@@ -707,21 +687,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
707
687
|
supportsVision: false,
|
|
708
688
|
|
|
709
689
|
// Metadata
|
|
710
|
-
lastUpdated: "2026-
|
|
690
|
+
lastUpdated: "2026-06-01",
|
|
691
|
+
originalModel: "Gemma 3 270M",
|
|
711
692
|
},
|
|
712
|
-
"
|
|
713
|
-
// AA Intelligence Index (composite score)
|
|
714
|
-
intelligenceIndex: 46.4,
|
|
715
|
-
normalizedScore: 66,
|
|
716
|
-
|
|
693
|
+
"gemma-4-e4b-reasoning": {
|
|
717
694
|
// AA specific benchmarks
|
|
718
|
-
codingIndex:
|
|
719
|
-
mathIndex:
|
|
695
|
+
codingIndex: 13.7,
|
|
696
|
+
mathIndex: undefined,
|
|
720
697
|
|
|
721
698
|
// Academic benchmarks
|
|
722
|
-
mmluPro:
|
|
723
|
-
gpqa: 0.
|
|
724
|
-
hle: 0.
|
|
699
|
+
mmluPro: undefined,
|
|
700
|
+
gpqa: 0.576,
|
|
701
|
+
hle: 0.037,
|
|
725
702
|
|
|
726
703
|
// Capabilities
|
|
727
704
|
contextWindow: 8192,
|
|
@@ -729,21 +706,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
729
706
|
supportsVision: false,
|
|
730
707
|
|
|
731
708
|
// Metadata
|
|
732
|
-
lastUpdated: "2026-
|
|
709
|
+
lastUpdated: "2026-06-01",
|
|
710
|
+
originalModel: "Gemma 4 E4B (Reasoning)",
|
|
733
711
|
},
|
|
734
|
-
"
|
|
735
|
-
// AA Intelligence Index (composite score)
|
|
736
|
-
intelligenceIndex: 6.4,
|
|
737
|
-
normalizedScore: 9,
|
|
738
|
-
|
|
712
|
+
"gemini-3.5-flash-high": {
|
|
739
713
|
// AA specific benchmarks
|
|
740
|
-
codingIndex:
|
|
741
|
-
mathIndex:
|
|
714
|
+
codingIndex: 45,
|
|
715
|
+
mathIndex: undefined,
|
|
742
716
|
|
|
743
717
|
// Academic benchmarks
|
|
744
|
-
mmluPro:
|
|
745
|
-
gpqa: 0.
|
|
746
|
-
hle: 0.
|
|
718
|
+
mmluPro: undefined,
|
|
719
|
+
gpqa: 0.922,
|
|
720
|
+
hle: 0.41,
|
|
747
721
|
|
|
748
722
|
// Capabilities
|
|
749
723
|
contextWindow: 8192,
|
|
@@ -751,21 +725,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
751
725
|
supportsVision: false,
|
|
752
726
|
|
|
753
727
|
// Metadata
|
|
754
|
-
lastUpdated: "2026-
|
|
728
|
+
lastUpdated: "2026-06-01",
|
|
729
|
+
originalModel: "Gemini 3.5 Flash (high)",
|
|
755
730
|
},
|
|
756
|
-
"gemini-
|
|
757
|
-
// AA Intelligence Index (composite score)
|
|
758
|
-
intelligenceIndex: 19.4,
|
|
759
|
-
normalizedScore: 28,
|
|
760
|
-
|
|
731
|
+
"gemini-3.5-flash-minimal": {
|
|
761
732
|
// AA specific benchmarks
|
|
762
|
-
codingIndex:
|
|
763
|
-
mathIndex:
|
|
733
|
+
codingIndex: 47.1,
|
|
734
|
+
mathIndex: undefined,
|
|
764
735
|
|
|
765
736
|
// Academic benchmarks
|
|
766
|
-
mmluPro:
|
|
767
|
-
gpqa: 0.
|
|
768
|
-
hle: 0.
|
|
737
|
+
mmluPro: undefined,
|
|
738
|
+
gpqa: 0.828,
|
|
739
|
+
hle: 0.231,
|
|
769
740
|
|
|
770
741
|
// Capabilities
|
|
771
742
|
contextWindow: 8192,
|
|
@@ -773,21 +744,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
773
744
|
supportsVision: false,
|
|
774
745
|
|
|
775
746
|
// Metadata
|
|
776
|
-
lastUpdated: "2026-
|
|
747
|
+
lastUpdated: "2026-06-01",
|
|
748
|
+
originalModel: "Gemini 3.5 Flash (minimal)",
|
|
777
749
|
},
|
|
778
|
-
"gemma-
|
|
779
|
-
// AA Intelligence Index (composite score)
|
|
780
|
-
intelligenceIndex: 4.8,
|
|
781
|
-
normalizedScore: 7,
|
|
782
|
-
|
|
750
|
+
"gemma-4-e2b-reasoning": {
|
|
783
751
|
// AA specific benchmarks
|
|
784
|
-
codingIndex:
|
|
785
|
-
mathIndex:
|
|
752
|
+
codingIndex: 9,
|
|
753
|
+
mathIndex: undefined,
|
|
786
754
|
|
|
787
755
|
// Academic benchmarks
|
|
788
|
-
mmluPro:
|
|
789
|
-
gpqa: 0.
|
|
790
|
-
hle: 0.
|
|
756
|
+
mmluPro: undefined,
|
|
757
|
+
gpqa: 0.433,
|
|
758
|
+
hle: 0.048,
|
|
791
759
|
|
|
792
760
|
// Capabilities
|
|
793
761
|
contextWindow: 8192,
|
|
@@ -795,15 +763,12 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
795
763
|
supportsVision: false,
|
|
796
764
|
|
|
797
765
|
// Metadata
|
|
798
|
-
lastUpdated: "2026-
|
|
766
|
+
lastUpdated: "2026-06-01",
|
|
767
|
+
originalModel: "Gemma 4 E2B (Reasoning)",
|
|
799
768
|
},
|
|
800
769
|
"gemini-2.5-pro": {
|
|
801
|
-
// AA Intelligence Index (composite score)
|
|
802
|
-
intelligenceIndex: 34.6,
|
|
803
|
-
normalizedScore: 49,
|
|
804
|
-
|
|
805
770
|
// AA specific benchmarks
|
|
806
|
-
codingIndex:
|
|
771
|
+
codingIndex: 32,
|
|
807
772
|
mathIndex: 87.7,
|
|
808
773
|
|
|
809
774
|
// Academic benchmarks
|
|
@@ -817,21 +782,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
817
782
|
supportsVision: false,
|
|
818
783
|
|
|
819
784
|
// Metadata
|
|
820
|
-
lastUpdated: "2026-
|
|
785
|
+
lastUpdated: "2026-06-01",
|
|
786
|
+
originalModel: "Gemini 2.5 Pro",
|
|
821
787
|
},
|
|
822
|
-
"
|
|
823
|
-
// AA Intelligence Index (composite score)
|
|
824
|
-
intelligenceIndex: 7.7,
|
|
825
|
-
normalizedScore: 11,
|
|
826
|
-
|
|
788
|
+
"claude-sonnet-4.6-non-reasoning-high-effort": {
|
|
827
789
|
// AA specific benchmarks
|
|
828
|
-
codingIndex:
|
|
829
|
-
mathIndex:
|
|
790
|
+
codingIndex: 46.4,
|
|
791
|
+
mathIndex: undefined,
|
|
830
792
|
|
|
831
793
|
// Academic benchmarks
|
|
832
|
-
mmluPro:
|
|
833
|
-
gpqa: 0.
|
|
834
|
-
hle: 0.
|
|
794
|
+
mmluPro: undefined,
|
|
795
|
+
gpqa: 0.799,
|
|
796
|
+
hle: 0.132,
|
|
835
797
|
|
|
836
798
|
// Capabilities
|
|
837
799
|
contextWindow: 8192,
|
|
@@ -839,43 +801,37 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
839
801
|
supportsVision: false,
|
|
840
802
|
|
|
841
803
|
// Metadata
|
|
842
|
-
lastUpdated: "2026-
|
|
804
|
+
lastUpdated: "2026-06-01",
|
|
805
|
+
originalModel: "Claude Sonnet 4.6 (Non-reasoning, High Effort)",
|
|
843
806
|
},
|
|
844
|
-
"
|
|
845
|
-
// AA Intelligence Index (composite score)
|
|
846
|
-
intelligenceIndex: 21.6,
|
|
847
|
-
normalizedScore: 31,
|
|
848
|
-
|
|
807
|
+
"claude-opus-4.8-adaptive-reasoning-max-effort": {
|
|
849
808
|
// AA specific benchmarks
|
|
850
|
-
codingIndex:
|
|
851
|
-
mathIndex:
|
|
809
|
+
codingIndex: 56.7,
|
|
810
|
+
mathIndex: undefined,
|
|
852
811
|
|
|
853
812
|
// Academic benchmarks
|
|
854
|
-
mmluPro:
|
|
855
|
-
gpqa: 0.
|
|
856
|
-
hle: 0.
|
|
857
|
-
|
|
813
|
+
mmluPro: undefined,
|
|
814
|
+
gpqa: 0.92,
|
|
815
|
+
hle: 0.457,
|
|
816
|
+
|
|
858
817
|
// Capabilities
|
|
859
818
|
contextWindow: 8192,
|
|
860
819
|
supportsReasoning: false,
|
|
861
820
|
supportsVision: false,
|
|
862
821
|
|
|
863
822
|
// Metadata
|
|
864
|
-
lastUpdated: "2026-
|
|
823
|
+
lastUpdated: "2026-06-01",
|
|
824
|
+
originalModel: "Claude Opus 4.8 (Adaptive Reasoning, Max Effort)",
|
|
865
825
|
},
|
|
866
|
-
"
|
|
867
|
-
// AA Intelligence Index (composite score)
|
|
868
|
-
intelligenceIndex: 33.5,
|
|
869
|
-
normalizedScore: 48,
|
|
870
|
-
|
|
826
|
+
"claude-4.5-haiku-reasoning": {
|
|
871
827
|
// AA specific benchmarks
|
|
872
|
-
codingIndex:
|
|
873
|
-
mathIndex:
|
|
828
|
+
codingIndex: 32.6,
|
|
829
|
+
mathIndex: 83.7,
|
|
874
830
|
|
|
875
831
|
// Academic benchmarks
|
|
876
|
-
mmluPro:
|
|
877
|
-
gpqa: 0.
|
|
878
|
-
hle: 0.
|
|
832
|
+
mmluPro: 0.76,
|
|
833
|
+
gpqa: 0.672,
|
|
834
|
+
hle: 0.097,
|
|
879
835
|
|
|
880
836
|
// Capabilities
|
|
881
837
|
contextWindow: 8192,
|
|
@@ -883,21 +839,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
883
839
|
supportsVision: false,
|
|
884
840
|
|
|
885
841
|
// Metadata
|
|
886
|
-
lastUpdated: "2026-
|
|
842
|
+
lastUpdated: "2026-06-01",
|
|
843
|
+
originalModel: "Claude 4.5 Haiku (Reasoning)",
|
|
887
844
|
},
|
|
888
|
-
"claude-4.
|
|
889
|
-
// AA Intelligence Index (composite score)
|
|
890
|
-
intelligenceIndex: 31.1,
|
|
891
|
-
normalizedScore: 44,
|
|
892
|
-
|
|
845
|
+
"claude-opus-4.7-adaptive-reasoning-max-effort": {
|
|
893
846
|
// AA specific benchmarks
|
|
894
|
-
codingIndex:
|
|
895
|
-
mathIndex:
|
|
847
|
+
codingIndex: 52.5,
|
|
848
|
+
mathIndex: undefined,
|
|
896
849
|
|
|
897
850
|
// Academic benchmarks
|
|
898
|
-
mmluPro:
|
|
899
|
-
gpqa: 0.
|
|
900
|
-
hle: 0.
|
|
851
|
+
mmluPro: undefined,
|
|
852
|
+
gpqa: 0.914,
|
|
853
|
+
hle: 0.396,
|
|
901
854
|
|
|
902
855
|
// Capabilities
|
|
903
856
|
contextWindow: 8192,
|
|
@@ -905,21 +858,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
905
858
|
supportsVision: false,
|
|
906
859
|
|
|
907
860
|
// Metadata
|
|
908
|
-
lastUpdated: "2026-
|
|
861
|
+
lastUpdated: "2026-06-01",
|
|
862
|
+
originalModel: "Claude Opus 4.7 (Adaptive Reasoning, Max Effort)",
|
|
909
863
|
},
|
|
910
|
-
"claude-4.
|
|
911
|
-
// AA Intelligence Index (composite score)
|
|
912
|
-
intelligenceIndex: 37.1,
|
|
913
|
-
normalizedScore: 53,
|
|
914
|
-
|
|
864
|
+
"claude-opus-4.7-non-reasoning-high-effort": {
|
|
915
865
|
// AA specific benchmarks
|
|
916
|
-
codingIndex:
|
|
917
|
-
mathIndex:
|
|
866
|
+
codingIndex: 53.1,
|
|
867
|
+
mathIndex: undefined,
|
|
918
868
|
|
|
919
869
|
// Academic benchmarks
|
|
920
|
-
mmluPro:
|
|
921
|
-
gpqa: 0.
|
|
922
|
-
hle: 0.
|
|
870
|
+
mmluPro: undefined,
|
|
871
|
+
gpqa: 0.885,
|
|
872
|
+
hle: 0.312,
|
|
923
873
|
|
|
924
874
|
// Capabilities
|
|
925
875
|
contextWindow: 8192,
|
|
@@ -927,21 +877,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
927
877
|
supportsVision: false,
|
|
928
878
|
|
|
929
879
|
// Metadata
|
|
930
|
-
lastUpdated: "2026-
|
|
880
|
+
lastUpdated: "2026-06-01",
|
|
881
|
+
originalModel: "Claude Opus 4.7 (Non-reasoning, High Effort)",
|
|
931
882
|
},
|
|
932
|
-
"claude-
|
|
933
|
-
// AA Intelligence Index (composite score)
|
|
934
|
-
intelligenceIndex: 46.5,
|
|
935
|
-
normalizedScore: 66,
|
|
936
|
-
|
|
883
|
+
"claude-sonnet-4.6-non-reasoning-low-effort": {
|
|
937
884
|
// AA specific benchmarks
|
|
938
|
-
codingIndex:
|
|
885
|
+
codingIndex: 43,
|
|
939
886
|
mathIndex: undefined,
|
|
940
887
|
|
|
941
888
|
// Academic benchmarks
|
|
942
889
|
mmluPro: undefined,
|
|
943
|
-
gpqa: 0.
|
|
944
|
-
hle: 0.
|
|
890
|
+
gpqa: 0.797,
|
|
891
|
+
hle: 0.108,
|
|
945
892
|
|
|
946
893
|
// Capabilities
|
|
947
894
|
contextWindow: 8192,
|
|
@@ -949,21 +896,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
949
896
|
supportsVision: false,
|
|
950
897
|
|
|
951
898
|
// Metadata
|
|
952
|
-
lastUpdated: "2026-
|
|
899
|
+
lastUpdated: "2026-06-01",
|
|
900
|
+
originalModel: "Claude Sonnet 4.6 (Non-reasoning, Low Effort)",
|
|
953
901
|
},
|
|
954
|
-
"claude-
|
|
955
|
-
// AA Intelligence Index (composite score)
|
|
956
|
-
intelligenceIndex: 53,
|
|
957
|
-
normalizedScore: 76,
|
|
958
|
-
|
|
902
|
+
"claude-4.5-haiku-non-reasoning": {
|
|
959
903
|
// AA specific benchmarks
|
|
960
|
-
codingIndex:
|
|
961
|
-
mathIndex:
|
|
904
|
+
codingIndex: 29.6,
|
|
905
|
+
mathIndex: 39,
|
|
962
906
|
|
|
963
907
|
// Academic benchmarks
|
|
964
|
-
mmluPro:
|
|
965
|
-
gpqa: 0.
|
|
966
|
-
hle: 0.
|
|
908
|
+
mmluPro: 0.8,
|
|
909
|
+
gpqa: 0.646,
|
|
910
|
+
hle: 0.043,
|
|
967
911
|
|
|
968
912
|
// Capabilities
|
|
969
913
|
contextWindow: 8192,
|
|
@@ -971,21 +915,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
971
915
|
supportsVision: false,
|
|
972
916
|
|
|
973
917
|
// Metadata
|
|
974
|
-
lastUpdated: "2026-
|
|
918
|
+
lastUpdated: "2026-06-01",
|
|
919
|
+
originalModel: "Claude 4.5 Haiku (Non-reasoning)",
|
|
975
920
|
},
|
|
976
|
-
"claude-sonnet-4.6-
|
|
977
|
-
// AA Intelligence Index (composite score)
|
|
978
|
-
intelligenceIndex: 44.4,
|
|
979
|
-
normalizedScore: 63,
|
|
980
|
-
|
|
921
|
+
"claude-sonnet-4.6-adaptive-reasoning-max-effort": {
|
|
981
922
|
// AA specific benchmarks
|
|
982
|
-
codingIndex:
|
|
923
|
+
codingIndex: 50.9,
|
|
983
924
|
mathIndex: undefined,
|
|
984
925
|
|
|
985
926
|
// Academic benchmarks
|
|
986
927
|
mmluPro: undefined,
|
|
987
|
-
gpqa: 0.
|
|
988
|
-
hle: 0.
|
|
928
|
+
gpqa: 0.875,
|
|
929
|
+
hle: 0.3,
|
|
989
930
|
|
|
990
931
|
// Capabilities
|
|
991
932
|
contextWindow: 8192,
|
|
@@ -993,21 +934,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
993
934
|
supportsVision: false,
|
|
994
935
|
|
|
995
936
|
// Metadata
|
|
996
|
-
lastUpdated: "2026-
|
|
937
|
+
lastUpdated: "2026-06-01",
|
|
938
|
+
originalModel: "Claude Sonnet 4.6 (Adaptive Reasoning, Max Effort)",
|
|
997
939
|
},
|
|
998
|
-
"
|
|
999
|
-
// AA Intelligence Index (composite score)
|
|
1000
|
-
intelligenceIndex: 51.7,
|
|
1001
|
-
normalizedScore: 74,
|
|
1002
|
-
|
|
940
|
+
"magistral-small-1.2": {
|
|
1003
941
|
// AA specific benchmarks
|
|
1004
|
-
codingIndex:
|
|
1005
|
-
mathIndex:
|
|
942
|
+
codingIndex: 14.8,
|
|
943
|
+
mathIndex: 80.3,
|
|
1006
944
|
|
|
1007
945
|
// Academic benchmarks
|
|
1008
|
-
mmluPro:
|
|
1009
|
-
gpqa: 0.
|
|
1010
|
-
hle: 0.
|
|
946
|
+
mmluPro: 0.768,
|
|
947
|
+
gpqa: 0.663,
|
|
948
|
+
hle: 0.061,
|
|
1011
949
|
|
|
1012
950
|
// Capabilities
|
|
1013
951
|
contextWindow: 8192,
|
|
@@ -1015,21 +953,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1015
953
|
supportsVision: false,
|
|
1016
954
|
|
|
1017
955
|
// Metadata
|
|
1018
|
-
lastUpdated: "2026-
|
|
956
|
+
lastUpdated: "2026-06-01",
|
|
957
|
+
originalModel: "Magistral Small 1.2",
|
|
1019
958
|
},
|
|
1020
|
-
"
|
|
1021
|
-
// AA Intelligence Index (composite score)
|
|
1022
|
-
intelligenceIndex: 42.6,
|
|
1023
|
-
normalizedScore: 61,
|
|
1024
|
-
|
|
959
|
+
"mistral-medium-3.5": {
|
|
1025
960
|
// AA specific benchmarks
|
|
1026
|
-
codingIndex:
|
|
961
|
+
codingIndex: 35.4,
|
|
1027
962
|
mathIndex: undefined,
|
|
1028
963
|
|
|
1029
964
|
// Academic benchmarks
|
|
1030
965
|
mmluPro: undefined,
|
|
1031
|
-
gpqa: 0.
|
|
1032
|
-
hle: 0.
|
|
966
|
+
gpqa: 0.748,
|
|
967
|
+
hle: 0.128,
|
|
1033
968
|
|
|
1034
969
|
// Capabilities
|
|
1035
970
|
contextWindow: 8192,
|
|
@@ -1037,21 +972,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1037
972
|
supportsVision: false,
|
|
1038
973
|
|
|
1039
974
|
// Metadata
|
|
1040
|
-
lastUpdated: "2026-
|
|
975
|
+
lastUpdated: "2026-06-01",
|
|
976
|
+
originalModel: "Mistral Medium 3.5",
|
|
1041
977
|
},
|
|
1042
|
-
"
|
|
1043
|
-
// AA Intelligence Index (composite score)
|
|
1044
|
-
intelligenceIndex: 22.8,
|
|
1045
|
-
normalizedScore: 33,
|
|
1046
|
-
|
|
978
|
+
"devstral-small-2": {
|
|
1047
979
|
// AA specific benchmarks
|
|
1048
|
-
codingIndex:
|
|
1049
|
-
mathIndex:
|
|
980
|
+
codingIndex: 20.7,
|
|
981
|
+
mathIndex: 34.3,
|
|
1050
982
|
|
|
1051
983
|
// Academic benchmarks
|
|
1052
|
-
mmluPro: 0.
|
|
1053
|
-
gpqa: 0.
|
|
1054
|
-
hle: 0.
|
|
984
|
+
mmluPro: 0.678,
|
|
985
|
+
gpqa: 0.532,
|
|
986
|
+
hle: 0.034,
|
|
1055
987
|
|
|
1056
988
|
// Capabilities
|
|
1057
989
|
contextWindow: 8192,
|
|
@@ -1059,21 +991,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1059
991
|
supportsVision: false,
|
|
1060
992
|
|
|
1061
993
|
// Metadata
|
|
1062
|
-
lastUpdated: "2026-
|
|
994
|
+
lastUpdated: "2026-06-01",
|
|
995
|
+
originalModel: "Devstral Small 2",
|
|
1063
996
|
},
|
|
1064
|
-
"
|
|
1065
|
-
// AA Intelligence Index (composite score)
|
|
1066
|
-
intelligenceIndex: 22,
|
|
1067
|
-
normalizedScore: 31,
|
|
1068
|
-
|
|
997
|
+
"ministral-3-8b": {
|
|
1069
998
|
// AA specific benchmarks
|
|
1070
|
-
codingIndex:
|
|
1071
|
-
mathIndex:
|
|
999
|
+
codingIndex: 10,
|
|
1000
|
+
mathIndex: 31.7,
|
|
1072
1001
|
|
|
1073
1002
|
// Academic benchmarks
|
|
1074
|
-
mmluPro: 0.
|
|
1075
|
-
gpqa: 0.
|
|
1076
|
-
hle: 0.
|
|
1003
|
+
mmluPro: 0.642,
|
|
1004
|
+
gpqa: 0.471,
|
|
1005
|
+
hle: 0.043,
|
|
1077
1006
|
|
|
1078
1007
|
// Capabilities
|
|
1079
1008
|
contextWindow: 8192,
|
|
@@ -1081,21 +1010,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1081
1010
|
supportsVision: false,
|
|
1082
1011
|
|
|
1083
1012
|
// Metadata
|
|
1084
|
-
lastUpdated: "2026-
|
|
1013
|
+
lastUpdated: "2026-06-01",
|
|
1014
|
+
originalModel: "Ministral 3 8B",
|
|
1085
1015
|
},
|
|
1086
|
-
"
|
|
1087
|
-
// AA Intelligence Index (composite score)
|
|
1088
|
-
intelligenceIndex: 27.2,
|
|
1089
|
-
normalizedScore: 39,
|
|
1090
|
-
|
|
1016
|
+
"magistral-medium-1.2": {
|
|
1091
1017
|
// AA specific benchmarks
|
|
1092
|
-
codingIndex:
|
|
1093
|
-
mathIndex:
|
|
1018
|
+
codingIndex: 21.7,
|
|
1019
|
+
mathIndex: 82,
|
|
1094
1020
|
|
|
1095
1021
|
// Academic benchmarks
|
|
1096
|
-
mmluPro:
|
|
1097
|
-
gpqa: 0.
|
|
1098
|
-
hle: 0.
|
|
1022
|
+
mmluPro: 0.815,
|
|
1023
|
+
gpqa: 0.739,
|
|
1024
|
+
hle: 0.096,
|
|
1099
1025
|
|
|
1100
1026
|
// Capabilities
|
|
1101
1027
|
contextWindow: 8192,
|
|
@@ -1103,21 +1029,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1103
1029
|
supportsVision: false,
|
|
1104
1030
|
|
|
1105
1031
|
// Metadata
|
|
1106
|
-
lastUpdated: "2026-
|
|
1032
|
+
lastUpdated: "2026-06-01",
|
|
1033
|
+
originalModel: "Magistral Medium 1.2",
|
|
1107
1034
|
},
|
|
1108
|
-
"
|
|
1109
|
-
// AA Intelligence Index (composite score)
|
|
1110
|
-
intelligenceIndex: 14.8,
|
|
1111
|
-
normalizedScore: 21,
|
|
1112
|
-
|
|
1035
|
+
"mistral-large-3": {
|
|
1113
1036
|
// AA specific benchmarks
|
|
1114
|
-
codingIndex:
|
|
1115
|
-
mathIndex:
|
|
1037
|
+
codingIndex: 22.7,
|
|
1038
|
+
mathIndex: 38,
|
|
1116
1039
|
|
|
1117
1040
|
// Academic benchmarks
|
|
1118
|
-
mmluPro: 0.
|
|
1119
|
-
gpqa: 0.
|
|
1120
|
-
hle: 0.
|
|
1041
|
+
mmluPro: 0.807,
|
|
1042
|
+
gpqa: 0.68,
|
|
1043
|
+
hle: 0.041,
|
|
1121
1044
|
|
|
1122
1045
|
// Capabilities
|
|
1123
1046
|
contextWindow: 8192,
|
|
@@ -1125,13 +1048,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1125
1048
|
supportsVision: false,
|
|
1126
1049
|
|
|
1127
1050
|
// Metadata
|
|
1128
|
-
lastUpdated: "2026-
|
|
1051
|
+
lastUpdated: "2026-06-01",
|
|
1052
|
+
originalModel: "Mistral Large 3",
|
|
1129
1053
|
},
|
|
1130
1054
|
"ministral-3-14b": {
|
|
1131
|
-
// AA Intelligence Index (composite score)
|
|
1132
|
-
intelligenceIndex: 16,
|
|
1133
|
-
normalizedScore: 23,
|
|
1134
|
-
|
|
1135
1055
|
// AA specific benchmarks
|
|
1136
1056
|
codingIndex: 10.9,
|
|
1137
1057
|
mathIndex: 30,
|
|
@@ -1147,21 +1067,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1147
1067
|
supportsVision: false,
|
|
1148
1068
|
|
|
1149
1069
|
// Metadata
|
|
1150
|
-
lastUpdated: "2026-
|
|
1070
|
+
lastUpdated: "2026-06-01",
|
|
1071
|
+
originalModel: "Ministral 3 14B",
|
|
1151
1072
|
},
|
|
1152
|
-
"
|
|
1153
|
-
// AA Intelligence Index (composite score)
|
|
1154
|
-
intelligenceIndex: 27.1,
|
|
1155
|
-
normalizedScore: 39,
|
|
1156
|
-
|
|
1073
|
+
"mistral-small-4-reasoning": {
|
|
1157
1074
|
// AA specific benchmarks
|
|
1158
|
-
codingIndex:
|
|
1159
|
-
mathIndex:
|
|
1075
|
+
codingIndex: 24.3,
|
|
1076
|
+
mathIndex: undefined,
|
|
1160
1077
|
|
|
1161
1078
|
// Academic benchmarks
|
|
1162
|
-
mmluPro:
|
|
1163
|
-
gpqa: 0.
|
|
1164
|
-
hle: 0.
|
|
1079
|
+
mmluPro: undefined,
|
|
1080
|
+
gpqa: 0.769,
|
|
1081
|
+
hle: 0.095,
|
|
1165
1082
|
|
|
1166
1083
|
// Capabilities
|
|
1167
1084
|
contextWindow: 8192,
|
|
@@ -1169,13 +1086,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1169
1086
|
supportsVision: false,
|
|
1170
1087
|
|
|
1171
1088
|
// Metadata
|
|
1172
|
-
lastUpdated: "2026-
|
|
1089
|
+
lastUpdated: "2026-06-01",
|
|
1090
|
+
originalModel: "Mistral Small 4 (Reasoning)",
|
|
1173
1091
|
},
|
|
1174
1092
|
"mistral-small-4-non-reasoning": {
|
|
1175
|
-
// AA Intelligence Index (composite score)
|
|
1176
|
-
intelligenceIndex: 18.6,
|
|
1177
|
-
normalizedScore: 27,
|
|
1178
|
-
|
|
1179
1093
|
// AA specific benchmarks
|
|
1180
1094
|
codingIndex: 16.4,
|
|
1181
1095
|
mathIndex: undefined,
|
|
@@ -1191,21 +1105,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1191
1105
|
supportsVision: false,
|
|
1192
1106
|
|
|
1193
1107
|
// Metadata
|
|
1194
|
-
lastUpdated: "2026-
|
|
1108
|
+
lastUpdated: "2026-06-01",
|
|
1109
|
+
originalModel: "Mistral Small 4 (Non-reasoning)",
|
|
1195
1110
|
},
|
|
1196
|
-
"
|
|
1197
|
-
// AA Intelligence Index (composite score)
|
|
1198
|
-
intelligenceIndex: 18.2,
|
|
1199
|
-
normalizedScore: 26,
|
|
1200
|
-
|
|
1111
|
+
"devstral-2": {
|
|
1201
1112
|
// AA specific benchmarks
|
|
1202
|
-
codingIndex:
|
|
1203
|
-
mathIndex:
|
|
1113
|
+
codingIndex: 23.7,
|
|
1114
|
+
mathIndex: 36.7,
|
|
1204
1115
|
|
|
1205
1116
|
// Academic benchmarks
|
|
1206
|
-
mmluPro: 0.
|
|
1207
|
-
gpqa: 0.
|
|
1208
|
-
hle: 0.
|
|
1117
|
+
mmluPro: 0.762,
|
|
1118
|
+
gpqa: 0.594,
|
|
1119
|
+
hle: 0.036,
|
|
1209
1120
|
|
|
1210
1121
|
// Capabilities
|
|
1211
1122
|
contextWindow: 8192,
|
|
@@ -1213,13 +1124,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1213
1124
|
supportsVision: false,
|
|
1214
1125
|
|
|
1215
1126
|
// Metadata
|
|
1216
|
-
lastUpdated: "2026-
|
|
1127
|
+
lastUpdated: "2026-06-01",
|
|
1128
|
+
originalModel: "Devstral 2",
|
|
1217
1129
|
},
|
|
1218
1130
|
"ministral-3-3b": {
|
|
1219
|
-
// AA Intelligence Index (composite score)
|
|
1220
|
-
intelligenceIndex: 11.2,
|
|
1221
|
-
normalizedScore: 16,
|
|
1222
|
-
|
|
1223
1131
|
// AA specific benchmarks
|
|
1224
1132
|
codingIndex: 4.8,
|
|
1225
1133
|
mathIndex: 22,
|
|
@@ -1235,65 +1143,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1235
1143
|
supportsVision: false,
|
|
1236
1144
|
|
|
1237
1145
|
// Metadata
|
|
1238
|
-
lastUpdated: "2026-
|
|
1239
|
-
|
|
1240
|
-
"mistral-medium-3.1": {
|
|
1241
|
-
// AA Intelligence Index (composite score)
|
|
1242
|
-
intelligenceIndex: 21.3,
|
|
1243
|
-
normalizedScore: 30,
|
|
1244
|
-
|
|
1245
|
-
// AA specific benchmarks
|
|
1246
|
-
codingIndex: 18.3,
|
|
1247
|
-
mathIndex: 38.3,
|
|
1248
|
-
|
|
1249
|
-
// Academic benchmarks
|
|
1250
|
-
mmluPro: 0.683,
|
|
1251
|
-
gpqa: 0.588,
|
|
1252
|
-
hle: 0.044,
|
|
1253
|
-
|
|
1254
|
-
// Capabilities
|
|
1255
|
-
contextWindow: 8192,
|
|
1256
|
-
supportsReasoning: false,
|
|
1257
|
-
supportsVision: false,
|
|
1258
|
-
|
|
1259
|
-
// Metadata
|
|
1260
|
-
lastUpdated: "2026-04-06",
|
|
1146
|
+
lastUpdated: "2026-06-01",
|
|
1147
|
+
originalModel: "Ministral 3 3B",
|
|
1261
1148
|
},
|
|
1262
|
-
"
|
|
1263
|
-
// AA Intelligence Index (composite score)
|
|
1264
|
-
intelligenceIndex: 19.5,
|
|
1265
|
-
normalizedScore: 28,
|
|
1266
|
-
|
|
1149
|
+
"deepseek-v4-pro-non-reasoning": {
|
|
1267
1150
|
// AA specific benchmarks
|
|
1268
|
-
codingIndex:
|
|
1269
|
-
mathIndex:
|
|
1270
|
-
|
|
1271
|
-
// Academic benchmarks
|
|
1272
|
-
mmluPro: 0.678,
|
|
1273
|
-
gpqa: 0.532,
|
|
1274
|
-
hle: 0.034,
|
|
1275
|
-
|
|
1276
|
-
// Capabilities
|
|
1277
|
-
contextWindow: 8192,
|
|
1278
|
-
supportsReasoning: false,
|
|
1279
|
-
supportsVision: false,
|
|
1280
|
-
|
|
1281
|
-
// Metadata
|
|
1282
|
-
lastUpdated: "2026-04-06",
|
|
1283
|
-
},
|
|
1284
|
-
"deepseek-r1-distill-llama-70b": {
|
|
1285
|
-
// AA Intelligence Index (composite score)
|
|
1286
|
-
intelligenceIndex: 16,
|
|
1287
|
-
normalizedScore: 23,
|
|
1288
|
-
|
|
1289
|
-
// AA specific benchmarks
|
|
1290
|
-
codingIndex: 11.4,
|
|
1291
|
-
mathIndex: 53.7,
|
|
1151
|
+
codingIndex: 38.4,
|
|
1152
|
+
mathIndex: undefined,
|
|
1292
1153
|
|
|
1293
1154
|
// Academic benchmarks
|
|
1294
|
-
mmluPro:
|
|
1295
|
-
gpqa: 0.
|
|
1296
|
-
hle: 0.
|
|
1155
|
+
mmluPro: undefined,
|
|
1156
|
+
gpqa: 0.717,
|
|
1157
|
+
hle: 0.077,
|
|
1297
1158
|
|
|
1298
1159
|
// Capabilities
|
|
1299
1160
|
contextWindow: 8192,
|
|
@@ -1301,21 +1162,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1301
1162
|
supportsVision: false,
|
|
1302
1163
|
|
|
1303
1164
|
// Metadata
|
|
1304
|
-
lastUpdated: "2026-
|
|
1165
|
+
lastUpdated: "2026-06-01",
|
|
1166
|
+
originalModel: "DeepSeek V4 Pro (Non-reasoning)",
|
|
1305
1167
|
},
|
|
1306
|
-
"deepseek-
|
|
1307
|
-
// AA Intelligence Index (composite score)
|
|
1308
|
-
intelligenceIndex: 27.1,
|
|
1309
|
-
normalizedScore: 39,
|
|
1310
|
-
|
|
1168
|
+
"deepseek-v4-pro-reasoning-high-effort": {
|
|
1311
1169
|
// AA specific benchmarks
|
|
1312
|
-
codingIndex:
|
|
1313
|
-
mathIndex:
|
|
1170
|
+
codingIndex: 43.2,
|
|
1171
|
+
mathIndex: undefined,
|
|
1314
1172
|
|
|
1315
1173
|
// Academic benchmarks
|
|
1316
|
-
mmluPro:
|
|
1317
|
-
gpqa: 0.
|
|
1318
|
-
hle: 0.
|
|
1174
|
+
mmluPro: undefined,
|
|
1175
|
+
gpqa: 0.905,
|
|
1176
|
+
hle: 0.335,
|
|
1319
1177
|
|
|
1320
1178
|
// Capabilities
|
|
1321
1179
|
contextWindow: 8192,
|
|
@@ -1323,21 +1181,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1323
1181
|
supportsVision: false,
|
|
1324
1182
|
|
|
1325
1183
|
// Metadata
|
|
1326
|
-
lastUpdated: "2026-
|
|
1184
|
+
lastUpdated: "2026-06-01",
|
|
1185
|
+
originalModel: "DeepSeek V4 Pro (Reasoning, High Effort)",
|
|
1327
1186
|
},
|
|
1328
|
-
"deepseek-
|
|
1329
|
-
// AA Intelligence Index (composite score)
|
|
1330
|
-
intelligenceIndex: 32.1,
|
|
1331
|
-
normalizedScore: 46,
|
|
1332
|
-
|
|
1187
|
+
"deepseek-v4-pro-reasoning-max-effort": {
|
|
1333
1188
|
// AA specific benchmarks
|
|
1334
|
-
codingIndex:
|
|
1335
|
-
mathIndex:
|
|
1189
|
+
codingIndex: 47.5,
|
|
1190
|
+
mathIndex: undefined,
|
|
1336
1191
|
|
|
1337
1192
|
// Academic benchmarks
|
|
1338
|
-
mmluPro:
|
|
1339
|
-
gpqa: 0.
|
|
1340
|
-
hle: 0.
|
|
1193
|
+
mmluPro: undefined,
|
|
1194
|
+
gpqa: 0.888,
|
|
1195
|
+
hle: 0.359,
|
|
1341
1196
|
|
|
1342
1197
|
// Capabilities
|
|
1343
1198
|
contextWindow: 8192,
|
|
@@ -1345,21 +1200,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1345
1200
|
supportsVision: false,
|
|
1346
1201
|
|
|
1347
1202
|
// Metadata
|
|
1348
|
-
lastUpdated: "2026-
|
|
1203
|
+
lastUpdated: "2026-06-01",
|
|
1204
|
+
originalModel: "DeepSeek V4 Pro (Reasoning, Max Effort)",
|
|
1349
1205
|
},
|
|
1350
|
-
"deepseek-
|
|
1351
|
-
// AA Intelligence Index (composite score)
|
|
1352
|
-
intelligenceIndex: 29.4,
|
|
1353
|
-
normalizedScore: 42,
|
|
1354
|
-
|
|
1206
|
+
"deepseek-v4-flash-reasoning-max-effort": {
|
|
1355
1207
|
// AA specific benchmarks
|
|
1356
|
-
codingIndex:
|
|
1357
|
-
mathIndex:
|
|
1208
|
+
codingIndex: 38.7,
|
|
1209
|
+
mathIndex: undefined,
|
|
1358
1210
|
|
|
1359
1211
|
// Academic benchmarks
|
|
1360
|
-
mmluPro:
|
|
1361
|
-
gpqa: 0.
|
|
1362
|
-
hle: 0.
|
|
1212
|
+
mmluPro: undefined,
|
|
1213
|
+
gpqa: 0.894,
|
|
1214
|
+
hle: 0.321,
|
|
1363
1215
|
|
|
1364
1216
|
// Capabilities
|
|
1365
1217
|
contextWindow: 8192,
|
|
@@ -1367,21 +1219,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1367
1219
|
supportsVision: false,
|
|
1368
1220
|
|
|
1369
1221
|
// Metadata
|
|
1370
|
-
lastUpdated: "2026-
|
|
1222
|
+
lastUpdated: "2026-06-01",
|
|
1223
|
+
originalModel: "DeepSeek V4 Flash (Reasoning, Max Effort)",
|
|
1371
1224
|
},
|
|
1372
|
-
"deepseek-
|
|
1373
|
-
// AA Intelligence Index (composite score)
|
|
1374
|
-
intelligenceIndex: 16.4,
|
|
1375
|
-
normalizedScore: 23,
|
|
1376
|
-
|
|
1225
|
+
"deepseek-v4-flash-reasoning-high-effort": {
|
|
1377
1226
|
// AA specific benchmarks
|
|
1378
|
-
codingIndex:
|
|
1379
|
-
mathIndex:
|
|
1227
|
+
codingIndex: 39.8,
|
|
1228
|
+
mathIndex: undefined,
|
|
1380
1229
|
|
|
1381
1230
|
// Academic benchmarks
|
|
1382
|
-
mmluPro:
|
|
1383
|
-
gpqa: 0.
|
|
1384
|
-
hle: 0.
|
|
1231
|
+
mmluPro: undefined,
|
|
1232
|
+
gpqa: 0.867,
|
|
1233
|
+
hle: 0.278,
|
|
1385
1234
|
|
|
1386
1235
|
// Capabilities
|
|
1387
1236
|
contextWindow: 8192,
|
|
@@ -1389,21 +1238,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1389
1238
|
supportsVision: false,
|
|
1390
1239
|
|
|
1391
1240
|
// Metadata
|
|
1392
|
-
lastUpdated: "2026-
|
|
1241
|
+
lastUpdated: "2026-06-01",
|
|
1242
|
+
originalModel: "DeepSeek V4 Flash (Reasoning, High Effort)",
|
|
1393
1243
|
},
|
|
1394
|
-
"deepseek-
|
|
1395
|
-
// AA Intelligence Index (composite score)
|
|
1396
|
-
intelligenceIndex: 41.7,
|
|
1397
|
-
normalizedScore: 60,
|
|
1398
|
-
|
|
1244
|
+
"deepseek-v4-flash-non-reasoning": {
|
|
1399
1245
|
// AA specific benchmarks
|
|
1400
|
-
codingIndex:
|
|
1401
|
-
mathIndex:
|
|
1246
|
+
codingIndex: 35.2,
|
|
1247
|
+
mathIndex: undefined,
|
|
1402
1248
|
|
|
1403
1249
|
// Academic benchmarks
|
|
1404
|
-
mmluPro:
|
|
1405
|
-
gpqa: 0.
|
|
1406
|
-
hle: 0.
|
|
1250
|
+
mmluPro: undefined,
|
|
1251
|
+
gpqa: 0.716,
|
|
1252
|
+
hle: 0.07,
|
|
1407
1253
|
|
|
1408
1254
|
// Capabilities
|
|
1409
1255
|
contextWindow: 8192,
|
|
@@ -1411,13 +1257,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1411
1257
|
supportsVision: false,
|
|
1412
1258
|
|
|
1413
1259
|
// Metadata
|
|
1414
|
-
lastUpdated: "2026-
|
|
1260
|
+
lastUpdated: "2026-06-01",
|
|
1261
|
+
originalModel: "DeepSeek V4 Flash (Non-reasoning)",
|
|
1415
1262
|
},
|
|
1416
1263
|
"r1-1776": {
|
|
1417
|
-
// AA Intelligence Index (composite score)
|
|
1418
|
-
intelligenceIndex: 12,
|
|
1419
|
-
normalizedScore: 17,
|
|
1420
|
-
|
|
1421
1264
|
// AA specific benchmarks
|
|
1422
1265
|
codingIndex: undefined,
|
|
1423
1266
|
mathIndex: undefined,
|
|
@@ -1433,13 +1276,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1433
1276
|
supportsVision: false,
|
|
1434
1277
|
|
|
1435
1278
|
// Metadata
|
|
1436
|
-
lastUpdated: "2026-
|
|
1279
|
+
lastUpdated: "2026-06-01",
|
|
1280
|
+
originalModel: "R1 1776",
|
|
1437
1281
|
},
|
|
1438
1282
|
"falcon-h1r-7b": {
|
|
1439
|
-
// AA Intelligence Index (composite score)
|
|
1440
|
-
intelligenceIndex: 15.8,
|
|
1441
|
-
normalizedScore: 23,
|
|
1442
|
-
|
|
1443
1283
|
// AA specific benchmarks
|
|
1444
1284
|
codingIndex: 9.8,
|
|
1445
1285
|
mathIndex: 80,
|
|
@@ -1455,21 +1295,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1455
1295
|
supportsVision: false,
|
|
1456
1296
|
|
|
1457
1297
|
// Metadata
|
|
1458
|
-
lastUpdated: "2026-
|
|
1298
|
+
lastUpdated: "2026-06-01",
|
|
1299
|
+
originalModel: "Falcon-H1R-7B",
|
|
1459
1300
|
},
|
|
1460
|
-
"grok-4.
|
|
1461
|
-
// AA Intelligence Index (composite score)
|
|
1462
|
-
intelligenceIndex: 48.5,
|
|
1463
|
-
normalizedScore: 69,
|
|
1464
|
-
|
|
1301
|
+
"grok-4.3-medium": {
|
|
1465
1302
|
// AA specific benchmarks
|
|
1466
|
-
codingIndex:
|
|
1303
|
+
codingIndex: 35.1,
|
|
1467
1304
|
mathIndex: undefined,
|
|
1468
1305
|
|
|
1469
1306
|
// Academic benchmarks
|
|
1470
1307
|
mmluPro: undefined,
|
|
1471
|
-
gpqa: 0.
|
|
1472
|
-
hle: 0.
|
|
1308
|
+
gpqa: 0.89,
|
|
1309
|
+
hle: 0.281,
|
|
1473
1310
|
|
|
1474
1311
|
// Capabilities
|
|
1475
1312
|
contextWindow: 8192,
|
|
@@ -1477,21 +1314,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1477
1314
|
supportsVision: false,
|
|
1478
1315
|
|
|
1479
1316
|
// Metadata
|
|
1480
|
-
lastUpdated: "2026-
|
|
1317
|
+
lastUpdated: "2026-06-01",
|
|
1318
|
+
originalModel: "Grok 4.3 (medium)",
|
|
1481
1319
|
},
|
|
1482
|
-
"grok-4.
|
|
1483
|
-
// AA Intelligence Index (composite score)
|
|
1484
|
-
intelligenceIndex: 29.7,
|
|
1485
|
-
normalizedScore: 42,
|
|
1486
|
-
|
|
1320
|
+
"grok-4.3-low": {
|
|
1487
1321
|
// AA specific benchmarks
|
|
1488
|
-
codingIndex:
|
|
1322
|
+
codingIndex: 31.6,
|
|
1489
1323
|
mathIndex: undefined,
|
|
1490
1324
|
|
|
1491
1325
|
// Academic benchmarks
|
|
1492
1326
|
mmluPro: undefined,
|
|
1493
|
-
gpqa: 0.
|
|
1494
|
-
hle: 0.
|
|
1327
|
+
gpqa: 0.843,
|
|
1328
|
+
hle: 0.173,
|
|
1495
1329
|
|
|
1496
1330
|
// Capabilities
|
|
1497
1331
|
contextWindow: 8192,
|
|
@@ -1499,21 +1333,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1499
1333
|
supportsVision: false,
|
|
1500
1334
|
|
|
1501
1335
|
// Metadata
|
|
1502
|
-
lastUpdated: "2026-
|
|
1336
|
+
lastUpdated: "2026-06-01",
|
|
1337
|
+
originalModel: "Grok 4.3 (low)",
|
|
1503
1338
|
},
|
|
1504
|
-
"grok-
|
|
1505
|
-
// AA Intelligence Index (composite score)
|
|
1506
|
-
intelligenceIndex: 28.7,
|
|
1507
|
-
normalizedScore: 41,
|
|
1508
|
-
|
|
1339
|
+
"grok-4.3-high": {
|
|
1509
1340
|
// AA specific benchmarks
|
|
1510
|
-
codingIndex:
|
|
1511
|
-
mathIndex:
|
|
1341
|
+
codingIndex: 41,
|
|
1342
|
+
mathIndex: undefined,
|
|
1512
1343
|
|
|
1513
1344
|
// Academic benchmarks
|
|
1514
|
-
mmluPro:
|
|
1515
|
-
gpqa: 0.
|
|
1516
|
-
hle: 0.
|
|
1345
|
+
mmluPro: undefined,
|
|
1346
|
+
gpqa: 0.901,
|
|
1347
|
+
hle: 0.35,
|
|
1517
1348
|
|
|
1518
1349
|
// Capabilities
|
|
1519
1350
|
contextWindow: 8192,
|
|
@@ -1521,21 +1352,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1521
1352
|
supportsVision: false,
|
|
1522
1353
|
|
|
1523
1354
|
// Metadata
|
|
1524
|
-
lastUpdated: "2026-
|
|
1355
|
+
lastUpdated: "2026-06-01",
|
|
1356
|
+
originalModel: "Grok 4.3 (high)",
|
|
1525
1357
|
},
|
|
1526
|
-
"grok-3-
|
|
1527
|
-
// AA Intelligence Index (composite score)
|
|
1528
|
-
intelligenceIndex: 32.1,
|
|
1529
|
-
normalizedScore: 46,
|
|
1530
|
-
|
|
1358
|
+
"grok-4.3-non-reasoning": {
|
|
1531
1359
|
// AA specific benchmarks
|
|
1532
|
-
codingIndex: 25.
|
|
1533
|
-
mathIndex:
|
|
1360
|
+
codingIndex: 25.1,
|
|
1361
|
+
mathIndex: undefined,
|
|
1534
1362
|
|
|
1535
1363
|
// Academic benchmarks
|
|
1536
|
-
mmluPro:
|
|
1537
|
-
gpqa: 0.
|
|
1538
|
-
hle: 0.
|
|
1364
|
+
mmluPro: undefined,
|
|
1365
|
+
gpqa: 0.658,
|
|
1366
|
+
hle: 0.065,
|
|
1539
1367
|
|
|
1540
1368
|
// Capabilities
|
|
1541
1369
|
contextWindow: 8192,
|
|
@@ -1543,13 +1371,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1543
1371
|
supportsVision: false,
|
|
1544
1372
|
|
|
1545
1373
|
// Metadata
|
|
1546
|
-
lastUpdated: "2026-
|
|
1374
|
+
lastUpdated: "2026-06-01",
|
|
1375
|
+
originalModel: "Grok 4.3 (Non-reasoning)",
|
|
1547
1376
|
},
|
|
1548
1377
|
"nova-micro": {
|
|
1549
|
-
// AA Intelligence Index (composite score)
|
|
1550
|
-
intelligenceIndex: 10.3,
|
|
1551
|
-
normalizedScore: 15,
|
|
1552
|
-
|
|
1553
1378
|
// AA specific benchmarks
|
|
1554
1379
|
codingIndex: 4.1,
|
|
1555
1380
|
mathIndex: 6,
|
|
@@ -1565,21 +1390,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1565
1390
|
supportsVision: false,
|
|
1566
1391
|
|
|
1567
1392
|
// Metadata
|
|
1568
|
-
lastUpdated: "2026-
|
|
1393
|
+
lastUpdated: "2026-06-01",
|
|
1394
|
+
originalModel: "Nova Micro",
|
|
1569
1395
|
},
|
|
1570
|
-
"nova-
|
|
1571
|
-
// AA Intelligence Index (composite score)
|
|
1572
|
-
intelligenceIndex: 19,
|
|
1573
|
-
normalizedScore: 27,
|
|
1574
|
-
|
|
1396
|
+
"nova-2.0-omni-low": {
|
|
1575
1397
|
// AA specific benchmarks
|
|
1576
|
-
codingIndex: 13.
|
|
1577
|
-
mathIndex:
|
|
1398
|
+
codingIndex: 13.9,
|
|
1399
|
+
mathIndex: 56,
|
|
1578
1400
|
|
|
1579
1401
|
// Academic benchmarks
|
|
1580
|
-
mmluPro: 0.
|
|
1581
|
-
gpqa: 0.
|
|
1582
|
-
hle: 0.
|
|
1402
|
+
mmluPro: 0.798,
|
|
1403
|
+
gpqa: 0.699,
|
|
1404
|
+
hle: 0.04,
|
|
1583
1405
|
|
|
1584
1406
|
// Capabilities
|
|
1585
1407
|
contextWindow: 8192,
|
|
@@ -1587,21 +1409,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1587
1409
|
supportsVision: false,
|
|
1588
1410
|
|
|
1589
1411
|
// Metadata
|
|
1590
|
-
lastUpdated: "2026-
|
|
1412
|
+
lastUpdated: "2026-06-01",
|
|
1413
|
+
originalModel: "Nova 2.0 Omni (low)",
|
|
1591
1414
|
},
|
|
1592
|
-
"nova-2-
|
|
1593
|
-
// AA Intelligence Index (composite score)
|
|
1594
|
-
intelligenceIndex: 24.6,
|
|
1595
|
-
normalizedScore: 35,
|
|
1596
|
-
|
|
1415
|
+
"nova-2.0-pro-preview-medium": {
|
|
1597
1416
|
// AA specific benchmarks
|
|
1598
|
-
codingIndex:
|
|
1599
|
-
mathIndex:
|
|
1417
|
+
codingIndex: 30.4,
|
|
1418
|
+
mathIndex: 89,
|
|
1600
1419
|
|
|
1601
1420
|
// Academic benchmarks
|
|
1602
|
-
mmluPro: 0.
|
|
1603
|
-
gpqa: 0.
|
|
1604
|
-
hle: 0.
|
|
1421
|
+
mmluPro: 0.83,
|
|
1422
|
+
gpqa: 0.785,
|
|
1423
|
+
hle: 0.089,
|
|
1605
1424
|
|
|
1606
1425
|
// Capabilities
|
|
1607
1426
|
contextWindow: 8192,
|
|
@@ -1609,13 +1428,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1609
1428
|
supportsVision: false,
|
|
1610
1429
|
|
|
1611
1430
|
// Metadata
|
|
1612
|
-
lastUpdated: "2026-
|
|
1431
|
+
lastUpdated: "2026-06-01",
|
|
1432
|
+
originalModel: "Nova 2.0 Pro Preview (medium)",
|
|
1613
1433
|
},
|
|
1614
|
-
"nova-2-lite-medium": {
|
|
1615
|
-
// AA Intelligence Index (composite score)
|
|
1616
|
-
intelligenceIndex: 29.7,
|
|
1617
|
-
normalizedScore: 42,
|
|
1618
|
-
|
|
1434
|
+
"nova-2.0-lite-medium": {
|
|
1619
1435
|
// AA specific benchmarks
|
|
1620
1436
|
codingIndex: 23.9,
|
|
1621
1437
|
mathIndex: 88.7,
|
|
@@ -1631,21 +1447,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1631
1447
|
supportsVision: false,
|
|
1632
1448
|
|
|
1633
1449
|
// Metadata
|
|
1634
|
-
lastUpdated: "2026-
|
|
1450
|
+
lastUpdated: "2026-06-01",
|
|
1451
|
+
originalModel: "Nova 2.0 Lite (medium)",
|
|
1635
1452
|
},
|
|
1636
|
-
"nova-2-
|
|
1637
|
-
// AA Intelligence Index (composite score)
|
|
1638
|
-
intelligenceIndex: 23.1,
|
|
1639
|
-
normalizedScore: 33,
|
|
1640
|
-
|
|
1453
|
+
"nova-2.0-lite-high": {
|
|
1641
1454
|
// AA specific benchmarks
|
|
1642
|
-
codingIndex:
|
|
1643
|
-
mathIndex:
|
|
1455
|
+
codingIndex: 23.4,
|
|
1456
|
+
mathIndex: 94.3,
|
|
1644
1457
|
|
|
1645
1458
|
// Academic benchmarks
|
|
1646
|
-
mmluPro: 0.
|
|
1647
|
-
gpqa: 0.
|
|
1648
|
-
hle: 0.
|
|
1459
|
+
mmluPro: 0.818,
|
|
1460
|
+
gpqa: 0.811,
|
|
1461
|
+
hle: 0.109,
|
|
1649
1462
|
|
|
1650
1463
|
// Capabilities
|
|
1651
1464
|
contextWindow: 8192,
|
|
@@ -1653,21 +1466,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1653
1466
|
supportsVision: false,
|
|
1654
1467
|
|
|
1655
1468
|
// Metadata
|
|
1656
|
-
lastUpdated: "2026-
|
|
1469
|
+
lastUpdated: "2026-06-01",
|
|
1470
|
+
originalModel: "Nova 2.0 Lite (high)",
|
|
1657
1471
|
},
|
|
1658
|
-
"nova-2-pro-preview-
|
|
1659
|
-
// AA Intelligence Index (composite score)
|
|
1660
|
-
intelligenceIndex: 31.9,
|
|
1661
|
-
normalizedScore: 46,
|
|
1662
|
-
|
|
1472
|
+
"nova-2.0-pro-preview-non-reasoning": {
|
|
1663
1473
|
// AA specific benchmarks
|
|
1664
|
-
codingIndex:
|
|
1665
|
-
mathIndex:
|
|
1474
|
+
codingIndex: 20.5,
|
|
1475
|
+
mathIndex: 30.7,
|
|
1666
1476
|
|
|
1667
1477
|
// Academic benchmarks
|
|
1668
|
-
mmluPro: 0.
|
|
1669
|
-
gpqa: 0.
|
|
1670
|
-
hle: 0.
|
|
1478
|
+
mmluPro: 0.772,
|
|
1479
|
+
gpqa: 0.636,
|
|
1480
|
+
hle: 0.04,
|
|
1671
1481
|
|
|
1672
1482
|
// Capabilities
|
|
1673
1483
|
contextWindow: 8192,
|
|
@@ -1675,21 +1485,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1675
1485
|
supportsVision: false,
|
|
1676
1486
|
|
|
1677
1487
|
// Metadata
|
|
1678
|
-
lastUpdated: "2026-
|
|
1488
|
+
lastUpdated: "2026-06-01",
|
|
1489
|
+
originalModel: "Nova 2.0 Pro Preview (Non-reasoning)",
|
|
1679
1490
|
},
|
|
1680
|
-
"nova-2-
|
|
1681
|
-
// AA Intelligence Index (composite score)
|
|
1682
|
-
intelligenceIndex: 16.6,
|
|
1683
|
-
normalizedScore: 24,
|
|
1684
|
-
|
|
1491
|
+
"nova-2.0-lite-low": {
|
|
1685
1492
|
// AA specific benchmarks
|
|
1686
|
-
codingIndex: 13.
|
|
1687
|
-
mathIndex:
|
|
1493
|
+
codingIndex: 13.6,
|
|
1494
|
+
mathIndex: 46.7,
|
|
1688
1495
|
|
|
1689
1496
|
// Academic benchmarks
|
|
1690
|
-
mmluPro: 0.
|
|
1691
|
-
gpqa: 0.
|
|
1692
|
-
hle: 0.
|
|
1497
|
+
mmluPro: 0.788,
|
|
1498
|
+
gpqa: 0.698,
|
|
1499
|
+
hle: 0.042,
|
|
1693
1500
|
|
|
1694
1501
|
// Capabilities
|
|
1695
1502
|
contextWindow: 8192,
|
|
@@ -1697,13 +1504,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1697
1504
|
supportsVision: false,
|
|
1698
1505
|
|
|
1699
1506
|
// Metadata
|
|
1700
|
-
lastUpdated: "2026-
|
|
1507
|
+
lastUpdated: "2026-06-01",
|
|
1508
|
+
originalModel: "Nova 2.0 Lite (low)",
|
|
1701
1509
|
},
|
|
1702
|
-
"nova-2-lite-non-reasoning": {
|
|
1703
|
-
// AA Intelligence Index (composite score)
|
|
1704
|
-
intelligenceIndex: 18,
|
|
1705
|
-
normalizedScore: 26,
|
|
1706
|
-
|
|
1510
|
+
"nova-2.0-lite-non-reasoning": {
|
|
1707
1511
|
// AA specific benchmarks
|
|
1708
1512
|
codingIndex: 12.5,
|
|
1709
1513
|
mathIndex: 33.7,
|
|
@@ -1719,21 +1523,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1719
1523
|
supportsVision: false,
|
|
1720
1524
|
|
|
1721
1525
|
// Metadata
|
|
1722
|
-
lastUpdated: "2026-
|
|
1526
|
+
lastUpdated: "2026-06-01",
|
|
1527
|
+
originalModel: "Nova 2.0 Lite (Non-reasoning)",
|
|
1723
1528
|
},
|
|
1724
|
-
"nova-
|
|
1725
|
-
// AA Intelligence Index (composite score)
|
|
1726
|
-
intelligenceIndex: 28,
|
|
1727
|
-
normalizedScore: 40,
|
|
1728
|
-
|
|
1529
|
+
"nova-premier": {
|
|
1729
1530
|
// AA specific benchmarks
|
|
1730
|
-
codingIndex:
|
|
1731
|
-
mathIndex:
|
|
1531
|
+
codingIndex: 13.8,
|
|
1532
|
+
mathIndex: 17.3,
|
|
1732
1533
|
|
|
1733
1534
|
// Academic benchmarks
|
|
1734
|
-
mmluPro: 0.
|
|
1735
|
-
gpqa: 0.
|
|
1736
|
-
hle: 0.
|
|
1535
|
+
mmluPro: 0.733,
|
|
1536
|
+
gpqa: 0.569,
|
|
1537
|
+
hle: 0.047,
|
|
1737
1538
|
|
|
1738
1539
|
// Capabilities
|
|
1739
1540
|
contextWindow: 8192,
|
|
@@ -1741,21 +1542,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1741
1542
|
supportsVision: false,
|
|
1742
1543
|
|
|
1743
1544
|
// Metadata
|
|
1744
|
-
lastUpdated: "2026-
|
|
1545
|
+
lastUpdated: "2026-06-01",
|
|
1546
|
+
originalModel: "Nova Premier",
|
|
1745
1547
|
},
|
|
1746
|
-
"nova-2-
|
|
1747
|
-
// AA Intelligence Index (composite score)
|
|
1748
|
-
intelligenceIndex: 35.7,
|
|
1749
|
-
normalizedScore: 51,
|
|
1750
|
-
|
|
1548
|
+
"nova-2.0-omni-non-reasoning": {
|
|
1751
1549
|
// AA specific benchmarks
|
|
1752
|
-
codingIndex:
|
|
1753
|
-
mathIndex:
|
|
1550
|
+
codingIndex: 13.8,
|
|
1551
|
+
mathIndex: 37,
|
|
1754
1552
|
|
|
1755
1553
|
// Academic benchmarks
|
|
1756
|
-
mmluPro: 0.
|
|
1757
|
-
gpqa: 0.
|
|
1758
|
-
hle: 0.
|
|
1554
|
+
mmluPro: 0.719,
|
|
1555
|
+
gpqa: 0.555,
|
|
1556
|
+
hle: 0.039,
|
|
1759
1557
|
|
|
1760
1558
|
// Capabilities
|
|
1761
1559
|
contextWindow: 8192,
|
|
@@ -1763,21 +1561,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1763
1561
|
supportsVision: false,
|
|
1764
1562
|
|
|
1765
1563
|
// Metadata
|
|
1766
|
-
lastUpdated: "2026-
|
|
1564
|
+
lastUpdated: "2026-06-01",
|
|
1565
|
+
originalModel: "Nova 2.0 Omni (Non-reasoning)",
|
|
1767
1566
|
},
|
|
1768
|
-
"nova-2-omni-
|
|
1769
|
-
// AA Intelligence Index (composite score)
|
|
1770
|
-
intelligenceIndex: 23.2,
|
|
1771
|
-
normalizedScore: 33,
|
|
1772
|
-
|
|
1567
|
+
"nova-2.0-omni-medium": {
|
|
1773
1568
|
// AA specific benchmarks
|
|
1774
|
-
codingIndex:
|
|
1775
|
-
mathIndex:
|
|
1569
|
+
codingIndex: 15.1,
|
|
1570
|
+
mathIndex: 89.7,
|
|
1776
1571
|
|
|
1777
1572
|
// Academic benchmarks
|
|
1778
|
-
mmluPro: 0.
|
|
1779
|
-
gpqa: 0.
|
|
1780
|
-
hle: 0.
|
|
1573
|
+
mmluPro: 0.809,
|
|
1574
|
+
gpqa: 0.76,
|
|
1575
|
+
hle: 0.068,
|
|
1781
1576
|
|
|
1782
1577
|
// Capabilities
|
|
1783
1578
|
contextWindow: 8192,
|
|
@@ -1785,21 +1580,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1785
1580
|
supportsVision: false,
|
|
1786
1581
|
|
|
1787
1582
|
// Metadata
|
|
1788
|
-
lastUpdated: "2026-
|
|
1583
|
+
lastUpdated: "2026-06-01",
|
|
1584
|
+
originalModel: "Nova 2.0 Omni (medium)",
|
|
1789
1585
|
},
|
|
1790
|
-
"
|
|
1791
|
-
// AA Intelligence Index (composite score)
|
|
1792
|
-
intelligenceIndex: 10.4,
|
|
1793
|
-
normalizedScore: 15,
|
|
1794
|
-
|
|
1586
|
+
"nova-2.0-pro-preview-low": {
|
|
1795
1587
|
// AA specific benchmarks
|
|
1796
|
-
codingIndex:
|
|
1797
|
-
mathIndex:
|
|
1588
|
+
codingIndex: 24.5,
|
|
1589
|
+
mathIndex: 63.3,
|
|
1798
1590
|
|
|
1799
1591
|
// Academic benchmarks
|
|
1800
|
-
mmluPro: 0.
|
|
1801
|
-
gpqa: 0.
|
|
1802
|
-
hle: 0.
|
|
1592
|
+
mmluPro: 0.822,
|
|
1593
|
+
gpqa: 0.751,
|
|
1594
|
+
hle: 0.052,
|
|
1803
1595
|
|
|
1804
1596
|
// Capabilities
|
|
1805
1597
|
contextWindow: 8192,
|
|
@@ -1807,21 +1599,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1807
1599
|
supportsVision: false,
|
|
1808
1600
|
|
|
1809
1601
|
// Metadata
|
|
1810
|
-
lastUpdated: "2026-
|
|
1602
|
+
lastUpdated: "2026-06-01",
|
|
1603
|
+
originalModel: "Nova 2.0 Pro Preview (low)",
|
|
1811
1604
|
},
|
|
1812
|
-
"phi-4
|
|
1813
|
-
// AA Intelligence Index (composite score)
|
|
1814
|
-
intelligenceIndex: 10,
|
|
1815
|
-
normalizedScore: 14,
|
|
1816
|
-
|
|
1605
|
+
"phi-4": {
|
|
1817
1606
|
// AA specific benchmarks
|
|
1818
|
-
codingIndex:
|
|
1819
|
-
mathIndex:
|
|
1607
|
+
codingIndex: 11.2,
|
|
1608
|
+
mathIndex: 18,
|
|
1820
1609
|
|
|
1821
1610
|
// Academic benchmarks
|
|
1822
|
-
mmluPro: 0.
|
|
1823
|
-
gpqa: 0.
|
|
1824
|
-
hle: 0.
|
|
1611
|
+
mmluPro: 0.714,
|
|
1612
|
+
gpqa: 0.575,
|
|
1613
|
+
hle: 0.041,
|
|
1825
1614
|
|
|
1826
1615
|
// Capabilities
|
|
1827
1616
|
contextWindow: 8192,
|
|
@@ -1829,13 +1618,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1829
1618
|
supportsVision: false,
|
|
1830
1619
|
|
|
1831
1620
|
// Metadata
|
|
1832
|
-
lastUpdated: "2026-
|
|
1621
|
+
lastUpdated: "2026-06-01",
|
|
1622
|
+
originalModel: "Phi-4",
|
|
1833
1623
|
},
|
|
1834
1624
|
"phi-4-mini-instruct": {
|
|
1835
|
-
// AA Intelligence Index (composite score)
|
|
1836
|
-
intelligenceIndex: 8.4,
|
|
1837
|
-
normalizedScore: 12,
|
|
1838
|
-
|
|
1839
1625
|
// AA specific benchmarks
|
|
1840
1626
|
codingIndex: 3.6,
|
|
1841
1627
|
mathIndex: 6.7,
|
|
@@ -1851,43 +1637,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1851
1637
|
supportsVision: false,
|
|
1852
1638
|
|
|
1853
1639
|
// Metadata
|
|
1854
|
-
lastUpdated: "2026-
|
|
1640
|
+
lastUpdated: "2026-06-01",
|
|
1641
|
+
originalModel: "Phi-4 Mini Instruct",
|
|
1855
1642
|
},
|
|
1856
|
-
"
|
|
1857
|
-
// AA Intelligence Index (composite score)
|
|
1858
|
-
intelligenceIndex: 8.1,
|
|
1859
|
-
normalizedScore: 12,
|
|
1860
|
-
|
|
1643
|
+
"phi-4-multimodal-instruct": {
|
|
1861
1644
|
// AA specific benchmarks
|
|
1862
|
-
codingIndex:
|
|
1645
|
+
codingIndex: undefined,
|
|
1863
1646
|
mathIndex: undefined,
|
|
1864
1647
|
|
|
1865
1648
|
// Academic benchmarks
|
|
1866
|
-
mmluPro:
|
|
1867
|
-
gpqa: 0.
|
|
1868
|
-
hle: 0.
|
|
1869
|
-
|
|
1870
|
-
// Capabilities
|
|
1871
|
-
contextWindow: 8192,
|
|
1872
|
-
supportsReasoning: false,
|
|
1873
|
-
supportsVision: false,
|
|
1874
|
-
|
|
1875
|
-
// Metadata
|
|
1876
|
-
lastUpdated: "2026-04-06",
|
|
1877
|
-
},
|
|
1878
|
-
"lfm2-8b-a1b": {
|
|
1879
|
-
// AA Intelligence Index (composite score)
|
|
1880
|
-
intelligenceIndex: 7,
|
|
1881
|
-
normalizedScore: 10,
|
|
1882
|
-
|
|
1883
|
-
// AA specific benchmarks
|
|
1884
|
-
codingIndex: 2.3,
|
|
1885
|
-
mathIndex: 25.3,
|
|
1886
|
-
|
|
1887
|
-
// Academic benchmarks
|
|
1888
|
-
mmluPro: 0.505,
|
|
1889
|
-
gpqa: 0.344,
|
|
1890
|
-
hle: 0.049,
|
|
1649
|
+
mmluPro: 0.485,
|
|
1650
|
+
gpqa: 0.315,
|
|
1651
|
+
hle: 0.044,
|
|
1891
1652
|
|
|
1892
1653
|
// Capabilities
|
|
1893
1654
|
contextWindow: 8192,
|
|
@@ -1895,21 +1656,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1895
1656
|
supportsVision: false,
|
|
1896
1657
|
|
|
1897
1658
|
// Metadata
|
|
1898
|
-
lastUpdated: "2026-
|
|
1659
|
+
lastUpdated: "2026-06-01",
|
|
1660
|
+
originalModel: "Phi-4 Multimodal Instruct",
|
|
1899
1661
|
},
|
|
1900
|
-
"lfm2.5-1.
|
|
1901
|
-
// AA Intelligence Index (composite score)
|
|
1902
|
-
intelligenceIndex: 8,
|
|
1903
|
-
normalizedScore: 11,
|
|
1904
|
-
|
|
1662
|
+
"lfm2.5-vl-1.6b": {
|
|
1905
1663
|
// AA specific benchmarks
|
|
1906
|
-
codingIndex:
|
|
1664
|
+
codingIndex: 1,
|
|
1907
1665
|
mathIndex: undefined,
|
|
1908
1666
|
|
|
1909
1667
|
// Academic benchmarks
|
|
1910
1668
|
mmluPro: undefined,
|
|
1911
|
-
gpqa: 0.
|
|
1912
|
-
hle: 0.
|
|
1669
|
+
gpqa: 0.289,
|
|
1670
|
+
hle: 0.051,
|
|
1913
1671
|
|
|
1914
1672
|
// Capabilities
|
|
1915
1673
|
contextWindow: 8192,
|
|
@@ -1917,13 +1675,10 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1917
1675
|
supportsVision: false,
|
|
1918
1676
|
|
|
1919
1677
|
// Metadata
|
|
1920
|
-
lastUpdated: "2026-
|
|
1678
|
+
lastUpdated: "2026-06-01",
|
|
1679
|
+
originalModel: "LFM2.5-VL-1.6B",
|
|
1921
1680
|
},
|
|
1922
1681
|
"lfm2-24b-a2b": {
|
|
1923
|
-
// AA Intelligence Index (composite score)
|
|
1924
|
-
intelligenceIndex: 10.5,
|
|
1925
|
-
normalizedScore: 15,
|
|
1926
|
-
|
|
1927
1682
|
// AA specific benchmarks
|
|
1928
1683
|
codingIndex: 3.6,
|
|
1929
1684
|
mathIndex: undefined,
|
|
@@ -1939,65 +1694,18 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
1939
1694
|
supportsVision: false,
|
|
1940
1695
|
|
|
1941
1696
|
// Metadata
|
|
1942
|
-
lastUpdated: "2026-
|
|
1943
|
-
|
|
1944
|
-
"lfm2-2.6b": {
|
|
1945
|
-
// AA Intelligence Index (composite score)
|
|
1946
|
-
intelligenceIndex: 8,
|
|
1947
|
-
normalizedScore: 11,
|
|
1948
|
-
|
|
1949
|
-
// AA specific benchmarks
|
|
1950
|
-
codingIndex: 1.4,
|
|
1951
|
-
mathIndex: 8.3,
|
|
1952
|
-
|
|
1953
|
-
// Academic benchmarks
|
|
1954
|
-
mmluPro: 0.298,
|
|
1955
|
-
gpqa: 0.306,
|
|
1956
|
-
hle: 0.052,
|
|
1957
|
-
|
|
1958
|
-
// Capabilities
|
|
1959
|
-
contextWindow: 8192,
|
|
1960
|
-
supportsReasoning: false,
|
|
1961
|
-
supportsVision: false,
|
|
1962
|
-
|
|
1963
|
-
// Metadata
|
|
1964
|
-
lastUpdated: "2026-04-06",
|
|
1697
|
+
lastUpdated: "2026-06-01",
|
|
1698
|
+
originalModel: "LFM2 24B A2B",
|
|
1965
1699
|
},
|
|
1966
|
-
"lfm2
|
|
1967
|
-
// AA Intelligence Index (composite score)
|
|
1968
|
-
intelligenceIndex: 6.2,
|
|
1969
|
-
normalizedScore: 9,
|
|
1970
|
-
|
|
1971
|
-
// AA specific benchmarks
|
|
1972
|
-
codingIndex: 1,
|
|
1973
|
-
mathIndex: undefined,
|
|
1974
|
-
|
|
1975
|
-
// Academic benchmarks
|
|
1976
|
-
mmluPro: undefined,
|
|
1977
|
-
gpqa: 0.289,
|
|
1978
|
-
hle: 0.051,
|
|
1979
|
-
|
|
1980
|
-
// Capabilities
|
|
1981
|
-
contextWindow: 8192,
|
|
1982
|
-
supportsReasoning: false,
|
|
1983
|
-
supportsVision: false,
|
|
1984
|
-
|
|
1985
|
-
// Metadata
|
|
1986
|
-
lastUpdated: "2026-04-06",
|
|
1987
|
-
},
|
|
1988
|
-
"solar-open-100b-reasoning": {
|
|
1989
|
-
// AA Intelligence Index (composite score)
|
|
1990
|
-
intelligenceIndex: 21.7,
|
|
1991
|
-
normalizedScore: 31,
|
|
1992
|
-
|
|
1700
|
+
"lfm2-8b-a1b": {
|
|
1993
1701
|
// AA specific benchmarks
|
|
1994
|
-
codingIndex:
|
|
1995
|
-
mathIndex:
|
|
1702
|
+
codingIndex: 2.3,
|
|
1703
|
+
mathIndex: 25.3,
|
|
1996
1704
|
|
|
1997
1705
|
// Academic benchmarks
|
|
1998
|
-
mmluPro:
|
|
1999
|
-
gpqa: 0.
|
|
2000
|
-
hle: 0.
|
|
1706
|
+
mmluPro: 0.505,
|
|
1707
|
+
gpqa: 0.344,
|
|
1708
|
+
hle: 0.049,
|
|
2001
1709
|
|
|
2002
1710
|
// Capabilities
|
|
2003
1711
|
contextWindow: 8192,
|
|
@@ -2005,6 +1713,7 @@ export const BENCHMARKS_CHUNK_0: Record<string, HardcodedBenchmark> = {
|
|
|
2005
1713
|
supportsVision: false,
|
|
2006
1714
|
|
|
2007
1715
|
// Metadata
|
|
2008
|
-
lastUpdated: "2026-
|
|
1716
|
+
lastUpdated: "2026-06-01",
|
|
1717
|
+
originalModel: "LFM2 8B A1B",
|
|
2009
1718
|
},
|
|
2010
1719
|
};
|