pi-free 2.0.13 → 2.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +9 -5
- package/config.ts +15 -0
- package/constants.ts +3 -0
- package/index.ts +135 -0
- package/lib/built-in-toggle.ts +4 -4
- package/lib/probe-cache.ts +86 -0
- package/lib/provider-compat.ts +33 -0
- package/lib/registry.ts +25 -3
- package/lib/telemetry.ts +328 -0
- package/lib/util.ts +10 -1
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +94 -8
- package/provider-failover/benchmarks-chunk-0.ts +599 -890
- package/provider-failover/benchmarks-chunk-1.ts +655 -924
- package/provider-failover/benchmarks-chunk-2.ts +675 -966
- package/provider-failover/benchmarks-chunk-3.ts +676 -967
- package/provider-failover/benchmarks-chunk-4.ts +704 -954
- package/provider-failover/benchmarks-chunk-5.ts +1301 -0
- package/provider-failover/hardcoded-benchmarks.ts +9 -3
- package/providers/cline/cline-models.ts +200 -68
- package/providers/cline/cline.ts +3 -3
- package/providers/dynamic-built-in/index.ts +1 -1
- package/providers/kilo/kilo.ts +2 -2
- package/providers/model-fetcher.ts +3 -1
- package/providers/nvidia/nvidia.ts +54 -16
- package/providers/ollama/ollama.ts +103 -46
- package/providers/opencode-session.ts +398 -371
- package/providers/qwen/qwen.ts +2 -2
- package/providers/routeway/routeway.ts +391 -0
|
@@ -1,23 +1,20 @@
|
|
|
1
1
|
// Auto-generated benchmark data chunk 4
|
|
2
|
-
// Models:
|
|
2
|
+
// Models: mistral-medium-3.1 .. glm-4.5v-reasoning (90 entries)
|
|
3
|
+
// Last updated: 2026-06-01
|
|
3
4
|
// DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
|
|
4
5
|
|
|
5
6
|
import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
|
|
6
7
|
|
|
7
8
|
export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
8
|
-
"
|
|
9
|
-
// AA Intelligence Index (composite score)
|
|
10
|
-
intelligenceIndex: 8.8,
|
|
11
|
-
normalizedScore: 13,
|
|
12
|
-
|
|
9
|
+
"mistral-medium-3.1": {
|
|
13
10
|
// AA specific benchmarks
|
|
14
|
-
codingIndex:
|
|
15
|
-
mathIndex:
|
|
11
|
+
codingIndex: 18.3,
|
|
12
|
+
mathIndex: 38.3,
|
|
16
13
|
|
|
17
14
|
// Academic benchmarks
|
|
18
|
-
mmluPro: 0.
|
|
19
|
-
gpqa: 0.
|
|
20
|
-
hle: 0.
|
|
15
|
+
mmluPro: 0.683,
|
|
16
|
+
gpqa: 0.588,
|
|
17
|
+
hle: 0.044,
|
|
21
18
|
|
|
22
19
|
// Capabilities
|
|
23
20
|
contextWindow: 8192,
|
|
@@ -25,21 +22,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
25
22
|
supportsVision: false,
|
|
26
23
|
|
|
27
24
|
// Metadata
|
|
28
|
-
lastUpdated: "2026-
|
|
25
|
+
lastUpdated: "2026-06-01",
|
|
26
|
+
originalModel: "Mistral Medium 3.1",
|
|
29
27
|
},
|
|
30
|
-
"
|
|
31
|
-
// AA Intelligence Index (composite score)
|
|
32
|
-
intelligenceIndex: 6.3,
|
|
33
|
-
normalizedScore: 9,
|
|
34
|
-
|
|
28
|
+
"deepseek-r1-distill-llama-70b": {
|
|
35
29
|
// AA specific benchmarks
|
|
36
|
-
codingIndex:
|
|
37
|
-
mathIndex:
|
|
30
|
+
codingIndex: 11.4,
|
|
31
|
+
mathIndex: 53.7,
|
|
38
32
|
|
|
39
33
|
// Academic benchmarks
|
|
40
|
-
mmluPro: 0.
|
|
41
|
-
gpqa: 0.
|
|
42
|
-
hle: 0.
|
|
34
|
+
mmluPro: 0.795,
|
|
35
|
+
gpqa: 0.402,
|
|
36
|
+
hle: 0.061,
|
|
43
37
|
|
|
44
38
|
// Capabilities
|
|
45
39
|
contextWindow: 8192,
|
|
@@ -47,21 +41,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
47
41
|
supportsVision: false,
|
|
48
42
|
|
|
49
43
|
// Metadata
|
|
50
|
-
lastUpdated: "2026-
|
|
44
|
+
lastUpdated: "2026-06-01",
|
|
45
|
+
originalModel: "DeepSeek R1 Distill Llama 70B",
|
|
51
46
|
},
|
|
52
|
-
"
|
|
53
|
-
// AA Intelligence Index (composite score)
|
|
54
|
-
intelligenceIndex: 11.9,
|
|
55
|
-
normalizedScore: 17,
|
|
56
|
-
|
|
47
|
+
"deepseek-r1-distill-qwen-32b": {
|
|
57
48
|
// AA specific benchmarks
|
|
58
49
|
codingIndex: undefined,
|
|
59
|
-
mathIndex:
|
|
50
|
+
mathIndex: 63,
|
|
60
51
|
|
|
61
52
|
// Academic benchmarks
|
|
62
|
-
mmluPro:
|
|
63
|
-
gpqa:
|
|
64
|
-
hle:
|
|
53
|
+
mmluPro: 0.739,
|
|
54
|
+
gpqa: 0.615,
|
|
55
|
+
hle: 0.055,
|
|
65
56
|
|
|
66
57
|
// Capabilities
|
|
67
58
|
contextWindow: 8192,
|
|
@@ -69,21 +60,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
69
60
|
supportsVision: false,
|
|
70
61
|
|
|
71
62
|
// Metadata
|
|
72
|
-
lastUpdated: "2026-
|
|
63
|
+
lastUpdated: "2026-06-01",
|
|
64
|
+
originalModel: "DeepSeek R1 Distill Qwen 32B",
|
|
73
65
|
},
|
|
74
|
-
"
|
|
75
|
-
// AA Intelligence Index (composite score)
|
|
76
|
-
intelligenceIndex: 16,
|
|
77
|
-
normalizedScore: 23,
|
|
78
|
-
|
|
66
|
+
"deepseek-v3-dec-24": {
|
|
79
67
|
// AA specific benchmarks
|
|
80
|
-
codingIndex:
|
|
81
|
-
mathIndex:
|
|
68
|
+
codingIndex: 16.4,
|
|
69
|
+
mathIndex: 26,
|
|
82
70
|
|
|
83
71
|
// Academic benchmarks
|
|
84
|
-
mmluPro: 0.
|
|
85
|
-
gpqa: 0.
|
|
86
|
-
hle: 0.
|
|
72
|
+
mmluPro: 0.752,
|
|
73
|
+
gpqa: 0.557,
|
|
74
|
+
hle: 0.036,
|
|
87
75
|
|
|
88
76
|
// Capabilities
|
|
89
77
|
contextWindow: 8192,
|
|
@@ -91,21 +79,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
91
79
|
supportsVision: false,
|
|
92
80
|
|
|
93
81
|
// Metadata
|
|
94
|
-
lastUpdated: "2026-
|
|
82
|
+
lastUpdated: "2026-06-01",
|
|
83
|
+
originalModel: "DeepSeek V3 (Dec '24)",
|
|
95
84
|
},
|
|
96
|
-
"
|
|
97
|
-
// AA Intelligence Index (composite score)
|
|
98
|
-
intelligenceIndex: 18.8,
|
|
99
|
-
normalizedScore: 27,
|
|
100
|
-
|
|
85
|
+
"deepseek-r1-distill-qwen-14b": {
|
|
101
86
|
// AA specific benchmarks
|
|
102
87
|
codingIndex: undefined,
|
|
103
|
-
mathIndex:
|
|
88
|
+
mathIndex: 55.7,
|
|
104
89
|
|
|
105
90
|
// Academic benchmarks
|
|
106
|
-
mmluPro: 0.
|
|
107
|
-
gpqa: 0.
|
|
108
|
-
hle: 0.
|
|
91
|
+
mmluPro: 0.74,
|
|
92
|
+
gpqa: 0.484,
|
|
93
|
+
hle: 0.044,
|
|
109
94
|
|
|
110
95
|
// Capabilities
|
|
111
96
|
contextWindow: 8192,
|
|
@@ -113,21 +98,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
113
98
|
supportsVision: false,
|
|
114
99
|
|
|
115
100
|
// Metadata
|
|
116
|
-
lastUpdated: "2026-
|
|
101
|
+
lastUpdated: "2026-06-01",
|
|
102
|
+
originalModel: "DeepSeek R1 Distill Qwen 14B",
|
|
117
103
|
},
|
|
118
|
-
"
|
|
119
|
-
// AA Intelligence Index (composite score)
|
|
120
|
-
intelligenceIndex: 8.3,
|
|
121
|
-
normalizedScore: 12,
|
|
122
|
-
|
|
104
|
+
"deepseek-v2.5-dec-24": {
|
|
123
105
|
// AA specific benchmarks
|
|
124
106
|
codingIndex: undefined,
|
|
125
107
|
mathIndex: undefined,
|
|
126
108
|
|
|
127
109
|
// Academic benchmarks
|
|
128
|
-
mmluPro:
|
|
129
|
-
gpqa:
|
|
130
|
-
hle:
|
|
110
|
+
mmluPro: undefined,
|
|
111
|
+
gpqa: undefined,
|
|
112
|
+
hle: undefined,
|
|
131
113
|
|
|
132
114
|
// Capabilities
|
|
133
115
|
contextWindow: 8192,
|
|
@@ -135,21 +117,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
135
117
|
supportsVision: false,
|
|
136
118
|
|
|
137
119
|
// Metadata
|
|
138
|
-
lastUpdated: "2026-
|
|
120
|
+
lastUpdated: "2026-06-01",
|
|
121
|
+
originalModel: "DeepSeek-V2.5 (Dec '24)",
|
|
139
122
|
},
|
|
140
|
-
"
|
|
141
|
-
// AA Intelligence Index (composite score)
|
|
142
|
-
intelligenceIndex: 41.9,
|
|
143
|
-
normalizedScore: 60,
|
|
144
|
-
|
|
123
|
+
"deepseek-coder-v2": {
|
|
145
124
|
// AA specific benchmarks
|
|
146
|
-
codingIndex:
|
|
125
|
+
codingIndex: undefined,
|
|
147
126
|
mathIndex: undefined,
|
|
148
127
|
|
|
149
128
|
// Academic benchmarks
|
|
150
129
|
mmluPro: undefined,
|
|
151
|
-
gpqa:
|
|
152
|
-
hle:
|
|
130
|
+
gpqa: undefined,
|
|
131
|
+
hle: undefined,
|
|
153
132
|
|
|
154
133
|
// Capabilities
|
|
155
134
|
contextWindow: 8192,
|
|
@@ -157,21 +136,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
157
136
|
supportsVision: false,
|
|
158
137
|
|
|
159
138
|
// Metadata
|
|
160
|
-
lastUpdated: "2026-
|
|
139
|
+
lastUpdated: "2026-06-01",
|
|
140
|
+
originalModel: "DeepSeek-Coder-V2",
|
|
161
141
|
},
|
|
162
|
-
"
|
|
163
|
-
// AA Intelligence Index (composite score)
|
|
164
|
-
intelligenceIndex: 39.4,
|
|
165
|
-
normalizedScore: 56,
|
|
166
|
-
|
|
142
|
+
"deepseek-r1-distill-llama-8b": {
|
|
167
143
|
// AA specific benchmarks
|
|
168
|
-
codingIndex:
|
|
169
|
-
mathIndex:
|
|
144
|
+
codingIndex: undefined,
|
|
145
|
+
mathIndex: 41.3,
|
|
170
146
|
|
|
171
147
|
// Academic benchmarks
|
|
172
|
-
mmluPro: 0.
|
|
173
|
-
gpqa: 0.
|
|
174
|
-
hle: 0.
|
|
148
|
+
mmluPro: 0.543,
|
|
149
|
+
gpqa: 0.302,
|
|
150
|
+
hle: 0.042,
|
|
175
151
|
|
|
176
152
|
// Capabilities
|
|
177
153
|
contextWindow: 8192,
|
|
@@ -179,21 +155,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
179
155
|
supportsVision: false,
|
|
180
156
|
|
|
181
157
|
// Metadata
|
|
182
|
-
lastUpdated: "2026-
|
|
158
|
+
lastUpdated: "2026-06-01",
|
|
159
|
+
originalModel: "DeepSeek R1 Distill Llama 8B",
|
|
183
160
|
},
|
|
184
|
-
"
|
|
185
|
-
// AA Intelligence Index (composite score)
|
|
186
|
-
intelligenceIndex: 24.4,
|
|
187
|
-
normalizedScore: 35,
|
|
188
|
-
|
|
161
|
+
"deepseek-llm-67b-chat-v1": {
|
|
189
162
|
// AA specific benchmarks
|
|
190
|
-
codingIndex:
|
|
191
|
-
mathIndex:
|
|
163
|
+
codingIndex: undefined,
|
|
164
|
+
mathIndex: undefined,
|
|
192
165
|
|
|
193
166
|
// Academic benchmarks
|
|
194
|
-
mmluPro:
|
|
195
|
-
gpqa:
|
|
196
|
-
hle:
|
|
167
|
+
mmluPro: undefined,
|
|
168
|
+
gpqa: undefined,
|
|
169
|
+
hle: undefined,
|
|
197
170
|
|
|
198
171
|
// Capabilities
|
|
199
172
|
contextWindow: 8192,
|
|
@@ -201,21 +174,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
201
174
|
supportsVision: false,
|
|
202
175
|
|
|
203
176
|
// Metadata
|
|
204
|
-
lastUpdated: "2026-
|
|
177
|
+
lastUpdated: "2026-06-01",
|
|
178
|
+
originalModel: "DeepSeek LLM 67B Chat (V1)",
|
|
205
179
|
},
|
|
206
|
-
"
|
|
207
|
-
// AA Intelligence Index (composite score)
|
|
208
|
-
intelligenceIndex: 36.1,
|
|
209
|
-
normalizedScore: 52,
|
|
210
|
-
|
|
180
|
+
"deepseek-r1-distill-qwen-1.5b": {
|
|
211
181
|
// AA specific benchmarks
|
|
212
|
-
codingIndex:
|
|
213
|
-
mathIndex:
|
|
182
|
+
codingIndex: undefined,
|
|
183
|
+
mathIndex: 22,
|
|
214
184
|
|
|
215
185
|
// Academic benchmarks
|
|
216
|
-
mmluPro: 0.
|
|
217
|
-
gpqa: 0.
|
|
218
|
-
hle: 0.
|
|
186
|
+
mmluPro: 0.269,
|
|
187
|
+
gpqa: 0.098,
|
|
188
|
+
hle: 0.033,
|
|
219
189
|
|
|
220
190
|
// Capabilities
|
|
221
191
|
contextWindow: 8192,
|
|
@@ -223,21 +193,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
223
193
|
supportsVision: false,
|
|
224
194
|
|
|
225
195
|
// Metadata
|
|
226
|
-
lastUpdated: "2026-
|
|
196
|
+
lastUpdated: "2026-06-01",
|
|
197
|
+
originalModel: "DeepSeek R1 Distill Qwen 1.5B",
|
|
227
198
|
},
|
|
228
|
-
"
|
|
229
|
-
// AA Intelligence Index (composite score)
|
|
230
|
-
intelligenceIndex: 20.9,
|
|
231
|
-
normalizedScore: 30,
|
|
232
|
-
|
|
199
|
+
"deepseek-v3-0324": {
|
|
233
200
|
// AA specific benchmarks
|
|
234
|
-
codingIndex:
|
|
235
|
-
mathIndex:
|
|
201
|
+
codingIndex: 22,
|
|
202
|
+
mathIndex: 41,
|
|
236
203
|
|
|
237
204
|
// Academic benchmarks
|
|
238
|
-
mmluPro: 0.
|
|
239
|
-
gpqa: 0.
|
|
240
|
-
hle: 0.
|
|
205
|
+
mmluPro: 0.819,
|
|
206
|
+
gpqa: 0.655,
|
|
207
|
+
hle: 0.052,
|
|
241
208
|
|
|
242
209
|
// Capabilities
|
|
243
210
|
contextWindow: 8192,
|
|
@@ -245,21 +212,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
245
212
|
supportsVision: false,
|
|
246
213
|
|
|
247
214
|
// Metadata
|
|
248
|
-
lastUpdated: "2026-
|
|
215
|
+
lastUpdated: "2026-06-01",
|
|
216
|
+
originalModel: "DeepSeek V3 0324",
|
|
249
217
|
},
|
|
250
|
-
"
|
|
251
|
-
// AA Intelligence Index (composite score)
|
|
252
|
-
intelligenceIndex: 40.9,
|
|
253
|
-
normalizedScore: 58,
|
|
254
|
-
|
|
218
|
+
"deepseek-v3.2-reasoning": {
|
|
255
219
|
// AA specific benchmarks
|
|
256
|
-
codingIndex:
|
|
257
|
-
mathIndex:
|
|
220
|
+
codingIndex: 36.7,
|
|
221
|
+
mathIndex: 92,
|
|
258
222
|
|
|
259
223
|
// Academic benchmarks
|
|
260
|
-
mmluPro: 0.
|
|
261
|
-
gpqa: 0.
|
|
262
|
-
hle: 0.
|
|
224
|
+
mmluPro: 0.862,
|
|
225
|
+
gpqa: 0.84,
|
|
226
|
+
hle: 0.222,
|
|
263
227
|
|
|
264
228
|
// Capabilities
|
|
265
229
|
contextWindow: 8192,
|
|
@@ -267,21 +231,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
267
231
|
supportsVision: false,
|
|
268
232
|
|
|
269
233
|
// Metadata
|
|
270
|
-
lastUpdated: "2026-
|
|
234
|
+
lastUpdated: "2026-06-01",
|
|
235
|
+
originalModel: "DeepSeek V3.2 (Reasoning)",
|
|
271
236
|
},
|
|
272
|
-
"
|
|
273
|
-
// AA Intelligence Index (composite score)
|
|
274
|
-
intelligenceIndex: 30.9,
|
|
275
|
-
normalizedScore: 44,
|
|
276
|
-
|
|
237
|
+
"deepseek-v3.2-non-reasoning": {
|
|
277
238
|
// AA specific benchmarks
|
|
278
|
-
codingIndex:
|
|
279
|
-
mathIndex:
|
|
239
|
+
codingIndex: 34.6,
|
|
240
|
+
mathIndex: 59,
|
|
280
241
|
|
|
281
242
|
// Academic benchmarks
|
|
282
|
-
mmluPro: 0.
|
|
283
|
-
gpqa: 0.
|
|
284
|
-
hle: 0.
|
|
243
|
+
mmluPro: 0.837,
|
|
244
|
+
gpqa: 0.751,
|
|
245
|
+
hle: 0.105,
|
|
285
246
|
|
|
286
247
|
// Capabilities
|
|
287
248
|
contextWindow: 8192,
|
|
@@ -289,21 +250,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
289
250
|
supportsVision: false,
|
|
290
251
|
|
|
291
252
|
// Metadata
|
|
292
|
-
lastUpdated: "2026-
|
|
253
|
+
lastUpdated: "2026-06-01",
|
|
254
|
+
originalModel: "DeepSeek V3.2 (Non-reasoning)",
|
|
293
255
|
},
|
|
294
|
-
"
|
|
295
|
-
// AA Intelligence Index (composite score)
|
|
296
|
-
intelligenceIndex: 26.3,
|
|
297
|
-
normalizedScore: 38,
|
|
298
|
-
|
|
256
|
+
"deepseek-r1-jan-25": {
|
|
299
257
|
// AA specific benchmarks
|
|
300
|
-
codingIndex:
|
|
301
|
-
mathIndex:
|
|
258
|
+
codingIndex: 15.9,
|
|
259
|
+
mathIndex: 68,
|
|
302
260
|
|
|
303
261
|
// Academic benchmarks
|
|
304
|
-
mmluPro: 0.
|
|
305
|
-
gpqa: 0.
|
|
306
|
-
hle: 0.
|
|
262
|
+
mmluPro: 0.844,
|
|
263
|
+
gpqa: 0.708,
|
|
264
|
+
hle: 0.093,
|
|
307
265
|
|
|
308
266
|
// Capabilities
|
|
309
267
|
contextWindow: 8192,
|
|
@@ -311,21 +269,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
311
269
|
supportsVision: false,
|
|
312
270
|
|
|
313
271
|
// Metadata
|
|
314
|
-
lastUpdated: "2026-
|
|
272
|
+
lastUpdated: "2026-06-01",
|
|
273
|
+
originalModel: "DeepSeek R1 (Jan '25)",
|
|
315
274
|
},
|
|
316
|
-
"
|
|
317
|
-
// AA Intelligence Index (composite score)
|
|
318
|
-
intelligenceIndex: 14.1,
|
|
319
|
-
normalizedScore: 20,
|
|
320
|
-
|
|
275
|
+
"deepseek-v3.1-non-reasoning": {
|
|
321
276
|
// AA specific benchmarks
|
|
322
|
-
codingIndex:
|
|
323
|
-
mathIndex:
|
|
277
|
+
codingIndex: 28.4,
|
|
278
|
+
mathIndex: 49.7,
|
|
324
279
|
|
|
325
280
|
// Academic benchmarks
|
|
326
|
-
mmluPro: 0.
|
|
327
|
-
gpqa: 0.
|
|
328
|
-
hle: 0.
|
|
281
|
+
mmluPro: 0.833,
|
|
282
|
+
gpqa: 0.735,
|
|
283
|
+
hle: 0.063,
|
|
329
284
|
|
|
330
285
|
// Capabilities
|
|
331
286
|
contextWindow: 8192,
|
|
@@ -333,21 +288,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
333
288
|
supportsVision: false,
|
|
334
289
|
|
|
335
290
|
// Metadata
|
|
336
|
-
lastUpdated: "2026-
|
|
291
|
+
lastUpdated: "2026-06-01",
|
|
292
|
+
originalModel: "DeepSeek V3.1 (Non-reasoning)",
|
|
337
293
|
},
|
|
338
|
-
"
|
|
339
|
-
// AA Intelligence Index (composite score)
|
|
340
|
-
intelligenceIndex: 9.3,
|
|
341
|
-
normalizedScore: 13,
|
|
342
|
-
|
|
294
|
+
"deepseek-r1-0528-may-25": {
|
|
343
295
|
// AA specific benchmarks
|
|
344
|
-
codingIndex:
|
|
345
|
-
mathIndex:
|
|
296
|
+
codingIndex: 24,
|
|
297
|
+
mathIndex: 76,
|
|
346
298
|
|
|
347
299
|
// Academic benchmarks
|
|
348
|
-
mmluPro: 0.
|
|
349
|
-
gpqa: 0.
|
|
350
|
-
hle: 0.
|
|
300
|
+
mmluPro: 0.849,
|
|
301
|
+
gpqa: 0.813,
|
|
302
|
+
hle: 0.149,
|
|
351
303
|
|
|
352
304
|
// Capabilities
|
|
353
305
|
contextWindow: 8192,
|
|
@@ -355,21 +307,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
355
307
|
supportsVision: false,
|
|
356
308
|
|
|
357
309
|
// Metadata
|
|
358
|
-
lastUpdated: "2026-
|
|
310
|
+
lastUpdated: "2026-06-01",
|
|
311
|
+
originalModel: "DeepSeek R1 0528 (May '25)",
|
|
359
312
|
},
|
|
360
|
-
"
|
|
361
|
-
// AA Intelligence Index (composite score)
|
|
362
|
-
intelligenceIndex: 10.6,
|
|
363
|
-
normalizedScore: 15,
|
|
364
|
-
|
|
313
|
+
"deepseek-v3.1-terminus-non-reasoning": {
|
|
365
314
|
// AA specific benchmarks
|
|
366
|
-
codingIndex:
|
|
367
|
-
mathIndex:
|
|
315
|
+
codingIndex: 31.9,
|
|
316
|
+
mathIndex: 53.7,
|
|
368
317
|
|
|
369
318
|
// Academic benchmarks
|
|
370
|
-
mmluPro: 0.
|
|
371
|
-
gpqa: 0.
|
|
372
|
-
hle: 0.
|
|
319
|
+
mmluPro: 0.836,
|
|
320
|
+
gpqa: 0.751,
|
|
321
|
+
hle: 0.084,
|
|
373
322
|
|
|
374
323
|
// Capabilities
|
|
375
324
|
contextWindow: 8192,
|
|
@@ -377,21 +326,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
377
326
|
supportsVision: false,
|
|
378
327
|
|
|
379
328
|
// Metadata
|
|
380
|
-
lastUpdated: "2026-
|
|
329
|
+
lastUpdated: "2026-06-01",
|
|
330
|
+
originalModel: "DeepSeek V3.1 Terminus (Non-reasoning)",
|
|
381
331
|
},
|
|
382
|
-
"
|
|
383
|
-
// AA Intelligence Index (composite score)
|
|
384
|
-
intelligenceIndex: 12.1,
|
|
385
|
-
normalizedScore: 17,
|
|
386
|
-
|
|
332
|
+
"deepseek-v3.1-reasoning": {
|
|
387
333
|
// AA specific benchmarks
|
|
388
|
-
codingIndex:
|
|
389
|
-
mathIndex:
|
|
334
|
+
codingIndex: 29.7,
|
|
335
|
+
mathIndex: 89.7,
|
|
390
336
|
|
|
391
337
|
// Academic benchmarks
|
|
392
|
-
mmluPro: 0.
|
|
393
|
-
gpqa: 0.
|
|
394
|
-
hle: 0.
|
|
338
|
+
mmluPro: 0.851,
|
|
339
|
+
gpqa: 0.779,
|
|
340
|
+
hle: 0.13,
|
|
395
341
|
|
|
396
342
|
// Capabilities
|
|
397
343
|
contextWindow: 8192,
|
|
@@ -399,21 +345,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
399
345
|
supportsVision: false,
|
|
400
346
|
|
|
401
347
|
// Metadata
|
|
402
|
-
lastUpdated: "2026-
|
|
348
|
+
lastUpdated: "2026-06-01",
|
|
349
|
+
originalModel: "DeepSeek V3.1 (Reasoning)",
|
|
403
350
|
},
|
|
404
|
-
"
|
|
405
|
-
// AA Intelligence Index (composite score)
|
|
406
|
-
intelligenceIndex: 7,
|
|
407
|
-
normalizedScore: 10,
|
|
408
|
-
|
|
351
|
+
"deepseek-v3.1-terminus-reasoning": {
|
|
409
352
|
// AA specific benchmarks
|
|
410
|
-
codingIndex:
|
|
411
|
-
mathIndex:
|
|
353
|
+
codingIndex: 33.7,
|
|
354
|
+
mathIndex: 89.7,
|
|
412
355
|
|
|
413
356
|
// Academic benchmarks
|
|
414
|
-
mmluPro: 0.
|
|
415
|
-
gpqa: 0.
|
|
416
|
-
hle: 0.
|
|
357
|
+
mmluPro: 0.851,
|
|
358
|
+
gpqa: 0.792,
|
|
359
|
+
hle: 0.152,
|
|
417
360
|
|
|
418
361
|
// Capabilities
|
|
419
362
|
contextWindow: 8192,
|
|
@@ -421,13 +364,29 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
421
364
|
supportsVision: false,
|
|
422
365
|
|
|
423
366
|
// Metadata
|
|
424
|
-
lastUpdated: "2026-
|
|
367
|
+
lastUpdated: "2026-06-01",
|
|
368
|
+
originalModel: "DeepSeek V3.1 Terminus (Reasoning)",
|
|
425
369
|
},
|
|
426
|
-
"
|
|
427
|
-
// AA
|
|
428
|
-
|
|
429
|
-
|
|
370
|
+
"deepseek-v3.2-exp-non-reasoning": {
|
|
371
|
+
// AA specific benchmarks
|
|
372
|
+
codingIndex: 30,
|
|
373
|
+
mathIndex: 57.7,
|
|
374
|
+
|
|
375
|
+
// Academic benchmarks
|
|
376
|
+
mmluPro: 0.836,
|
|
377
|
+
gpqa: 0.738,
|
|
378
|
+
hle: 0.086,
|
|
379
|
+
|
|
380
|
+
// Capabilities
|
|
381
|
+
contextWindow: 8192,
|
|
382
|
+
supportsReasoning: false,
|
|
383
|
+
supportsVision: false,
|
|
430
384
|
|
|
385
|
+
// Metadata
|
|
386
|
+
lastUpdated: "2026-06-01",
|
|
387
|
+
originalModel: "DeepSeek V3.2 Exp (Non-reasoning)",
|
|
388
|
+
},
|
|
389
|
+
"deepseek-v2.5": {
|
|
431
390
|
// AA specific benchmarks
|
|
432
391
|
codingIndex: undefined,
|
|
433
392
|
mathIndex: undefined,
|
|
@@ -443,21 +402,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
443
402
|
supportsVision: false,
|
|
444
403
|
|
|
445
404
|
// Metadata
|
|
446
|
-
lastUpdated: "2026-
|
|
405
|
+
lastUpdated: "2026-06-01",
|
|
406
|
+
originalModel: "DeepSeek-V2.5",
|
|
447
407
|
},
|
|
448
|
-
"
|
|
449
|
-
// AA Intelligence Index (composite score)
|
|
450
|
-
intelligenceIndex: 10.6,
|
|
451
|
-
normalizedScore: 15,
|
|
452
|
-
|
|
408
|
+
"deepseek-coder-v2-lite-instruct": {
|
|
453
409
|
// AA specific benchmarks
|
|
454
410
|
codingIndex: undefined,
|
|
455
411
|
mathIndex: undefined,
|
|
456
412
|
|
|
457
413
|
// Academic benchmarks
|
|
458
|
-
mmluPro: 0.
|
|
459
|
-
gpqa: 0.
|
|
460
|
-
hle: 0.
|
|
414
|
+
mmluPro: 0.429,
|
|
415
|
+
gpqa: 0.319,
|
|
416
|
+
hle: 0.053,
|
|
461
417
|
|
|
462
418
|
// Capabilities
|
|
463
419
|
contextWindow: 8192,
|
|
@@ -465,21 +421,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
465
421
|
supportsVision: false,
|
|
466
422
|
|
|
467
423
|
// Metadata
|
|
468
|
-
lastUpdated: "2026-
|
|
424
|
+
lastUpdated: "2026-06-01",
|
|
425
|
+
originalModel: "DeepSeek Coder V2 Lite Instruct",
|
|
469
426
|
},
|
|
470
|
-
"
|
|
471
|
-
// AA Intelligence Index (composite score)
|
|
472
|
-
intelligenceIndex: 39.2,
|
|
473
|
-
normalizedScore: 56,
|
|
474
|
-
|
|
427
|
+
"deepseek-r1-0528-qwen3-8b": {
|
|
475
428
|
// AA specific benchmarks
|
|
476
|
-
codingIndex:
|
|
477
|
-
mathIndex:
|
|
429
|
+
codingIndex: 7.8,
|
|
430
|
+
mathIndex: 63.7,
|
|
478
431
|
|
|
479
432
|
// Academic benchmarks
|
|
480
|
-
mmluPro: 0.
|
|
481
|
-
gpqa: 0.
|
|
482
|
-
hle: 0.
|
|
433
|
+
mmluPro: 0.739,
|
|
434
|
+
gpqa: 0.612,
|
|
435
|
+
hle: 0.056,
|
|
483
436
|
|
|
484
437
|
// Capabilities
|
|
485
438
|
contextWindow: 8192,
|
|
@@ -487,21 +440,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
487
440
|
supportsVision: false,
|
|
488
441
|
|
|
489
442
|
// Metadata
|
|
490
|
-
lastUpdated: "2026-
|
|
443
|
+
lastUpdated: "2026-06-01",
|
|
444
|
+
originalModel: "DeepSeek R1 0528 Qwen3 8B",
|
|
491
445
|
},
|
|
492
|
-
"
|
|
493
|
-
// AA Intelligence Index (composite score)
|
|
494
|
-
intelligenceIndex: 8.4,
|
|
495
|
-
normalizedScore: 12,
|
|
496
|
-
|
|
446
|
+
"deepseek-v3.2-exp-reasoning": {
|
|
497
447
|
// AA specific benchmarks
|
|
498
|
-
codingIndex:
|
|
499
|
-
mathIndex:
|
|
448
|
+
codingIndex: 33.3,
|
|
449
|
+
mathIndex: 87.7,
|
|
500
450
|
|
|
501
451
|
// Academic benchmarks
|
|
502
|
-
mmluPro: 0.
|
|
503
|
-
gpqa: 0.
|
|
504
|
-
hle: 0.
|
|
452
|
+
mmluPro: 0.85,
|
|
453
|
+
gpqa: 0.797,
|
|
454
|
+
hle: 0.138,
|
|
505
455
|
|
|
506
456
|
// Capabilities
|
|
507
457
|
contextWindow: 8192,
|
|
@@ -509,21 +459,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
509
459
|
supportsVision: false,
|
|
510
460
|
|
|
511
461
|
// Metadata
|
|
512
|
-
lastUpdated: "2026-
|
|
462
|
+
lastUpdated: "2026-06-01",
|
|
463
|
+
originalModel: "DeepSeek V3.2 Exp (Reasoning)",
|
|
513
464
|
},
|
|
514
|
-
"
|
|
515
|
-
// AA Intelligence Index (composite score)
|
|
516
|
-
intelligenceIndex: 30.2,
|
|
517
|
-
normalizedScore: 43,
|
|
518
|
-
|
|
465
|
+
"deepseek-v3.2-speciale": {
|
|
519
466
|
// AA specific benchmarks
|
|
520
|
-
codingIndex:
|
|
521
|
-
mathIndex:
|
|
467
|
+
codingIndex: 37.9,
|
|
468
|
+
mathIndex: 96.7,
|
|
522
469
|
|
|
523
470
|
// Academic benchmarks
|
|
524
|
-
mmluPro: 0.
|
|
525
|
-
gpqa: 0.
|
|
526
|
-
hle: 0.
|
|
471
|
+
mmluPro: 0.863,
|
|
472
|
+
gpqa: 0.871,
|
|
473
|
+
hle: 0.261,
|
|
527
474
|
|
|
528
475
|
// Capabilities
|
|
529
476
|
contextWindow: 8192,
|
|
@@ -531,21 +478,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
531
478
|
supportsVision: false,
|
|
532
479
|
|
|
533
480
|
// Metadata
|
|
534
|
-
lastUpdated: "2026-
|
|
481
|
+
lastUpdated: "2026-06-01",
|
|
482
|
+
originalModel: "DeepSeek V3.2 Speciale",
|
|
535
483
|
},
|
|
536
|
-
"
|
|
537
|
-
// AA Intelligence Index (composite score)
|
|
538
|
-
intelligenceIndex: 42.1,
|
|
539
|
-
normalizedScore: 60,
|
|
540
|
-
|
|
484
|
+
"deepseek-v2-chat": {
|
|
541
485
|
// AA specific benchmarks
|
|
542
|
-
codingIndex:
|
|
543
|
-
mathIndex:
|
|
486
|
+
codingIndex: undefined,
|
|
487
|
+
mathIndex: undefined,
|
|
544
488
|
|
|
545
489
|
// Academic benchmarks
|
|
546
|
-
mmluPro:
|
|
547
|
-
gpqa:
|
|
548
|
-
hle:
|
|
490
|
+
mmluPro: undefined,
|
|
491
|
+
gpqa: undefined,
|
|
492
|
+
hle: undefined,
|
|
549
493
|
|
|
550
494
|
// Capabilities
|
|
551
495
|
contextWindow: 8192,
|
|
@@ -553,21 +497,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
553
497
|
supportsVision: false,
|
|
554
498
|
|
|
555
499
|
// Metadata
|
|
556
|
-
lastUpdated: "2026-
|
|
500
|
+
lastUpdated: "2026-06-01",
|
|
501
|
+
originalModel: "DeepSeek-V2-Chat",
|
|
557
502
|
},
|
|
558
|
-
"
|
|
559
|
-
// AA Intelligence Index (composite score)
|
|
560
|
-
intelligenceIndex: 22.1,
|
|
561
|
-
normalizedScore: 32,
|
|
562
|
-
|
|
503
|
+
"sonar-pro": {
|
|
563
504
|
// AA specific benchmarks
|
|
564
|
-
codingIndex:
|
|
505
|
+
codingIndex: undefined,
|
|
565
506
|
mathIndex: undefined,
|
|
566
507
|
|
|
567
508
|
// Academic benchmarks
|
|
568
|
-
mmluPro:
|
|
569
|
-
gpqa: 0.
|
|
570
|
-
hle: 0.
|
|
509
|
+
mmluPro: 0.755,
|
|
510
|
+
gpqa: 0.578,
|
|
511
|
+
hle: 0.079,
|
|
571
512
|
|
|
572
513
|
// Capabilities
|
|
573
514
|
contextWindow: 8192,
|
|
@@ -575,21 +516,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
575
516
|
supportsVision: false,
|
|
576
517
|
|
|
577
518
|
// Metadata
|
|
578
|
-
lastUpdated: "2026-
|
|
519
|
+
lastUpdated: "2026-06-01",
|
|
520
|
+
originalModel: "Sonar Pro",
|
|
579
521
|
},
|
|
580
|
-
"
|
|
581
|
-
// AA Intelligence Index (composite score)
|
|
582
|
-
intelligenceIndex: 34.2,
|
|
583
|
-
normalizedScore: 49,
|
|
584
|
-
|
|
522
|
+
"sonar-reasoning-pro": {
|
|
585
523
|
// AA specific benchmarks
|
|
586
|
-
codingIndex:
|
|
587
|
-
mathIndex:
|
|
524
|
+
codingIndex: undefined,
|
|
525
|
+
mathIndex: undefined,
|
|
588
526
|
|
|
589
527
|
// Academic benchmarks
|
|
590
|
-
mmluPro:
|
|
591
|
-
gpqa:
|
|
592
|
-
hle:
|
|
528
|
+
mmluPro: undefined,
|
|
529
|
+
gpqa: undefined,
|
|
530
|
+
hle: undefined,
|
|
593
531
|
|
|
594
532
|
// Capabilities
|
|
595
533
|
contextWindow: 8192,
|
|
@@ -597,21 +535,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
597
535
|
supportsVision: false,
|
|
598
536
|
|
|
599
537
|
// Metadata
|
|
600
|
-
lastUpdated: "2026-
|
|
538
|
+
lastUpdated: "2026-06-01",
|
|
539
|
+
originalModel: "Sonar Reasoning Pro",
|
|
601
540
|
},
|
|
602
|
-
"
|
|
603
|
-
// AA Intelligence Index (composite score)
|
|
604
|
-
intelligenceIndex: 12.7,
|
|
605
|
-
normalizedScore: 18,
|
|
606
|
-
|
|
541
|
+
"sonar-reasoning": {
|
|
607
542
|
// AA specific benchmarks
|
|
608
|
-
codingIndex:
|
|
609
|
-
mathIndex:
|
|
543
|
+
codingIndex: undefined,
|
|
544
|
+
mathIndex: undefined,
|
|
610
545
|
|
|
611
546
|
// Academic benchmarks
|
|
612
|
-
mmluPro:
|
|
613
|
-
gpqa: 0.
|
|
614
|
-
hle:
|
|
547
|
+
mmluPro: undefined,
|
|
548
|
+
gpqa: 0.623,
|
|
549
|
+
hle: undefined,
|
|
615
550
|
|
|
616
551
|
// Capabilities
|
|
617
552
|
contextWindow: 8192,
|
|
@@ -619,21 +554,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
619
554
|
supportsVision: false,
|
|
620
555
|
|
|
621
556
|
// Metadata
|
|
622
|
-
lastUpdated: "2026-
|
|
557
|
+
lastUpdated: "2026-06-01",
|
|
558
|
+
originalModel: "Sonar Reasoning",
|
|
623
559
|
},
|
|
624
|
-
"
|
|
625
|
-
// AA Intelligence Index (composite score)
|
|
626
|
-
intelligenceIndex: 26.4,
|
|
627
|
-
normalizedScore: 38,
|
|
628
|
-
|
|
560
|
+
"sonar": {
|
|
629
561
|
// AA specific benchmarks
|
|
630
|
-
codingIndex:
|
|
631
|
-
mathIndex:
|
|
562
|
+
codingIndex: undefined,
|
|
563
|
+
mathIndex: undefined,
|
|
632
564
|
|
|
633
565
|
// Academic benchmarks
|
|
634
|
-
mmluPro: 0.
|
|
635
|
-
gpqa: 0.
|
|
636
|
-
hle: 0.
|
|
566
|
+
mmluPro: 0.689,
|
|
567
|
+
gpqa: 0.471,
|
|
568
|
+
hle: 0.073,
|
|
637
569
|
|
|
638
570
|
// Capabilities
|
|
639
571
|
contextWindow: 8192,
|
|
@@ -641,21 +573,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
641
573
|
supportsVision: false,
|
|
642
574
|
|
|
643
575
|
// Metadata
|
|
644
|
-
lastUpdated: "2026-
|
|
576
|
+
lastUpdated: "2026-06-01",
|
|
577
|
+
originalModel: "Sonar",
|
|
645
578
|
},
|
|
646
|
-
"
|
|
647
|
-
// AA Intelligence Index (composite score)
|
|
648
|
-
intelligenceIndex: 32.5,
|
|
649
|
-
normalizedScore: 46,
|
|
650
|
-
|
|
579
|
+
"grok-beta": {
|
|
651
580
|
// AA specific benchmarks
|
|
652
|
-
codingIndex:
|
|
653
|
-
mathIndex:
|
|
581
|
+
codingIndex: undefined,
|
|
582
|
+
mathIndex: undefined,
|
|
654
583
|
|
|
655
584
|
// Academic benchmarks
|
|
656
|
-
mmluPro: 0.
|
|
657
|
-
gpqa: 0.
|
|
658
|
-
hle: 0.
|
|
585
|
+
mmluPro: 0.703,
|
|
586
|
+
gpqa: 0.471,
|
|
587
|
+
hle: 0.047,
|
|
659
588
|
|
|
660
589
|
// Capabilities
|
|
661
590
|
contextWindow: 8192,
|
|
@@ -663,21 +592,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
663
592
|
supportsVision: false,
|
|
664
593
|
|
|
665
594
|
// Metadata
|
|
666
|
-
lastUpdated: "2026-
|
|
595
|
+
lastUpdated: "2026-06-01",
|
|
596
|
+
originalModel: "Grok Beta",
|
|
667
597
|
},
|
|
668
|
-
"
|
|
669
|
-
// AA Intelligence Index (composite score)
|
|
670
|
-
intelligenceIndex: 30.1,
|
|
671
|
-
normalizedScore: 43,
|
|
672
|
-
|
|
598
|
+
"grok-3": {
|
|
673
599
|
// AA specific benchmarks
|
|
674
|
-
codingIndex:
|
|
675
|
-
mathIndex:
|
|
600
|
+
codingIndex: 19.8,
|
|
601
|
+
mathIndex: 58,
|
|
676
602
|
|
|
677
603
|
// Academic benchmarks
|
|
678
|
-
mmluPro:
|
|
679
|
-
gpqa: 0.
|
|
680
|
-
hle: 0.
|
|
604
|
+
mmluPro: 0.799,
|
|
605
|
+
gpqa: 0.693,
|
|
606
|
+
hle: 0.051,
|
|
681
607
|
|
|
682
608
|
// Capabilities
|
|
683
609
|
contextWindow: 8192,
|
|
@@ -685,21 +611,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
685
611
|
supportsVision: false,
|
|
686
612
|
|
|
687
613
|
// Metadata
|
|
688
|
-
lastUpdated: "2026-
|
|
614
|
+
lastUpdated: "2026-06-01",
|
|
615
|
+
originalModel: "Grok 3",
|
|
689
616
|
},
|
|
690
|
-
"
|
|
691
|
-
// AA Intelligence Index (composite score)
|
|
692
|
-
intelligenceIndex: 15.1,
|
|
693
|
-
normalizedScore: 22,
|
|
694
|
-
|
|
617
|
+
"grok-3-mini-reasoning-high": {
|
|
695
618
|
// AA specific benchmarks
|
|
696
|
-
codingIndex:
|
|
697
|
-
mathIndex:
|
|
619
|
+
codingIndex: 25.2,
|
|
620
|
+
mathIndex: 84.7,
|
|
698
621
|
|
|
699
622
|
// Academic benchmarks
|
|
700
|
-
mmluPro: 0.
|
|
701
|
-
gpqa: 0.
|
|
702
|
-
hle: 0.
|
|
623
|
+
mmluPro: 0.828,
|
|
624
|
+
gpqa: 0.791,
|
|
625
|
+
hle: 0.111,
|
|
703
626
|
|
|
704
627
|
// Capabilities
|
|
705
628
|
contextWindow: 8192,
|
|
@@ -707,21 +630,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
707
630
|
supportsVision: false,
|
|
708
631
|
|
|
709
632
|
// Metadata
|
|
710
|
-
lastUpdated: "2026-
|
|
633
|
+
lastUpdated: "2026-06-01",
|
|
634
|
+
originalModel: "Grok 3 mini Reasoning (high)",
|
|
711
635
|
},
|
|
712
|
-
"
|
|
713
|
-
// AA Intelligence Index (composite score)
|
|
714
|
-
intelligenceIndex: 23.2,
|
|
715
|
-
normalizedScore: 33,
|
|
716
|
-
|
|
636
|
+
"grok-4.20-0309-v2-reasoning": {
|
|
717
637
|
// AA specific benchmarks
|
|
718
|
-
codingIndex:
|
|
719
|
-
mathIndex:
|
|
638
|
+
codingIndex: 40.5,
|
|
639
|
+
mathIndex: undefined,
|
|
720
640
|
|
|
721
641
|
// Academic benchmarks
|
|
722
|
-
mmluPro:
|
|
723
|
-
gpqa: 0.
|
|
724
|
-
hle: 0.
|
|
642
|
+
mmluPro: undefined,
|
|
643
|
+
gpqa: 0.911,
|
|
644
|
+
hle: 0.322,
|
|
725
645
|
|
|
726
646
|
// Capabilities
|
|
727
647
|
contextWindow: 8192,
|
|
@@ -729,21 +649,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
729
649
|
supportsVision: false,
|
|
730
650
|
|
|
731
651
|
// Metadata
|
|
732
|
-
lastUpdated: "2026-
|
|
652
|
+
lastUpdated: "2026-06-01",
|
|
653
|
+
originalModel: "Grok 4.20 0309 v2 (Reasoning)",
|
|
733
654
|
},
|
|
734
|
-
"
|
|
735
|
-
// AA Intelligence Index (composite score)
|
|
736
|
-
intelligenceIndex: 8.3,
|
|
737
|
-
normalizedScore: 12,
|
|
738
|
-
|
|
655
|
+
"grok-4": {
|
|
739
656
|
// AA specific benchmarks
|
|
740
|
-
codingIndex:
|
|
741
|
-
mathIndex:
|
|
657
|
+
codingIndex: 40.5,
|
|
658
|
+
mathIndex: 92.7,
|
|
742
659
|
|
|
743
660
|
// Academic benchmarks
|
|
744
|
-
mmluPro: 0.
|
|
745
|
-
gpqa: 0.
|
|
746
|
-
hle: 0.
|
|
661
|
+
mmluPro: 0.866,
|
|
662
|
+
gpqa: 0.877,
|
|
663
|
+
hle: 0.239,
|
|
747
664
|
|
|
748
665
|
// Capabilities
|
|
749
666
|
contextWindow: 8192,
|
|
@@ -751,21 +668,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
751
668
|
supportsVision: false,
|
|
752
669
|
|
|
753
670
|
// Metadata
|
|
754
|
-
lastUpdated: "2026-
|
|
671
|
+
lastUpdated: "2026-06-01",
|
|
672
|
+
originalModel: "Grok 4",
|
|
755
673
|
},
|
|
756
|
-
"
|
|
757
|
-
// AA Intelligence Index (composite score)
|
|
758
|
-
intelligenceIndex: 7.4,
|
|
759
|
-
normalizedScore: 11,
|
|
760
|
-
|
|
674
|
+
"grok-4-fast-non-reasoning": {
|
|
761
675
|
// AA specific benchmarks
|
|
762
|
-
codingIndex:
|
|
763
|
-
mathIndex:
|
|
676
|
+
codingIndex: 19,
|
|
677
|
+
mathIndex: 41.3,
|
|
764
678
|
|
|
765
679
|
// Academic benchmarks
|
|
766
|
-
mmluPro: 0.
|
|
767
|
-
gpqa: 0.
|
|
768
|
-
hle: 0.
|
|
680
|
+
mmluPro: 0.73,
|
|
681
|
+
gpqa: 0.606,
|
|
682
|
+
hle: 0.05,
|
|
769
683
|
|
|
770
684
|
// Capabilities
|
|
771
685
|
contextWindow: 8192,
|
|
@@ -773,21 +687,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
773
687
|
supportsVision: false,
|
|
774
688
|
|
|
775
689
|
// Metadata
|
|
776
|
-
lastUpdated: "2026-
|
|
690
|
+
lastUpdated: "2026-06-01",
|
|
691
|
+
originalModel: "Grok 4 Fast (Non-reasoning)",
|
|
777
692
|
},
|
|
778
|
-
"
|
|
779
|
-
// AA Intelligence Index (composite score)
|
|
780
|
-
intelligenceIndex: 28.3,
|
|
781
|
-
normalizedScore: 40,
|
|
782
|
-
|
|
693
|
+
"grok-4-fast-reasoning": {
|
|
783
694
|
// AA specific benchmarks
|
|
784
|
-
codingIndex:
|
|
785
|
-
mathIndex:
|
|
695
|
+
codingIndex: 27.4,
|
|
696
|
+
mathIndex: 89.7,
|
|
786
697
|
|
|
787
698
|
// Academic benchmarks
|
|
788
|
-
mmluPro: 0.
|
|
789
|
-
gpqa: 0.
|
|
790
|
-
hle: 0.
|
|
699
|
+
mmluPro: 0.85,
|
|
700
|
+
gpqa: 0.847,
|
|
701
|
+
hle: 0.17,
|
|
791
702
|
|
|
792
703
|
// Capabilities
|
|
793
704
|
contextWindow: 8192,
|
|
@@ -795,21 +706,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
795
706
|
supportsVision: false,
|
|
796
707
|
|
|
797
708
|
// Metadata
|
|
798
|
-
lastUpdated: "2026-
|
|
709
|
+
lastUpdated: "2026-06-01",
|
|
710
|
+
originalModel: "Grok 4 Fast (Reasoning)",
|
|
799
711
|
},
|
|
800
|
-
"
|
|
801
|
-
// AA Intelligence Index (composite score)
|
|
802
|
-
intelligenceIndex: 10.6,
|
|
803
|
-
normalizedScore: 15,
|
|
804
|
-
|
|
712
|
+
"grok-code-fast-1": {
|
|
805
713
|
// AA specific benchmarks
|
|
806
|
-
codingIndex:
|
|
807
|
-
mathIndex:
|
|
714
|
+
codingIndex: 23.7,
|
|
715
|
+
mathIndex: 43.3,
|
|
808
716
|
|
|
809
717
|
// Academic benchmarks
|
|
810
|
-
mmluPro: 0.
|
|
811
|
-
gpqa: 0.
|
|
812
|
-
hle: 0.
|
|
718
|
+
mmluPro: 0.793,
|
|
719
|
+
gpqa: 0.727,
|
|
720
|
+
hle: 0.075,
|
|
813
721
|
|
|
814
722
|
// Capabilities
|
|
815
723
|
contextWindow: 8192,
|
|
@@ -817,21 +725,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
817
725
|
supportsVision: false,
|
|
818
726
|
|
|
819
727
|
// Metadata
|
|
820
|
-
lastUpdated: "2026-
|
|
728
|
+
lastUpdated: "2026-06-01",
|
|
729
|
+
originalModel: "Grok Code Fast 1",
|
|
821
730
|
},
|
|
822
|
-
"
|
|
823
|
-
// AA Intelligence Index (composite score)
|
|
824
|
-
intelligenceIndex: 10.7,
|
|
825
|
-
normalizedScore: 15,
|
|
826
|
-
|
|
731
|
+
"grok-3-reasoning-beta": {
|
|
827
732
|
// AA specific benchmarks
|
|
828
733
|
codingIndex: undefined,
|
|
829
734
|
mathIndex: undefined,
|
|
830
735
|
|
|
831
736
|
// Academic benchmarks
|
|
832
|
-
mmluPro:
|
|
833
|
-
gpqa:
|
|
834
|
-
hle:
|
|
737
|
+
mmluPro: undefined,
|
|
738
|
+
gpqa: undefined,
|
|
739
|
+
hle: undefined,
|
|
835
740
|
|
|
836
741
|
// Capabilities
|
|
837
742
|
contextWindow: 8192,
|
|
@@ -839,21 +744,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
839
744
|
supportsVision: false,
|
|
840
745
|
|
|
841
746
|
// Metadata
|
|
842
|
-
lastUpdated: "2026-
|
|
747
|
+
lastUpdated: "2026-06-01",
|
|
748
|
+
originalModel: "Grok 3 Reasoning Beta",
|
|
843
749
|
},
|
|
844
|
-
"
|
|
845
|
-
// AA Intelligence Index (composite score)
|
|
846
|
-
intelligenceIndex: 8,
|
|
847
|
-
normalizedScore: 11,
|
|
848
|
-
|
|
750
|
+
"grok-4.20-0309-reasoning": {
|
|
849
751
|
// AA specific benchmarks
|
|
850
|
-
codingIndex:
|
|
752
|
+
codingIndex: 42.2,
|
|
851
753
|
mathIndex: undefined,
|
|
852
754
|
|
|
853
755
|
// Academic benchmarks
|
|
854
|
-
mmluPro:
|
|
855
|
-
gpqa: 0.
|
|
856
|
-
hle: 0.
|
|
756
|
+
mmluPro: undefined,
|
|
757
|
+
gpqa: 0.885,
|
|
758
|
+
hle: 0.3,
|
|
857
759
|
|
|
858
760
|
// Capabilities
|
|
859
761
|
contextWindow: 8192,
|
|
@@ -861,21 +763,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
861
763
|
supportsVision: false,
|
|
862
764
|
|
|
863
765
|
// Metadata
|
|
864
|
-
lastUpdated: "2026-
|
|
766
|
+
lastUpdated: "2026-06-01",
|
|
767
|
+
originalModel: "Grok 4.20 0309 (Reasoning)",
|
|
865
768
|
},
|
|
866
|
-
"
|
|
867
|
-
// AA Intelligence Index (composite score)
|
|
868
|
-
intelligenceIndex: 7.9,
|
|
869
|
-
normalizedScore: 11,
|
|
870
|
-
|
|
769
|
+
"grok-4.1-fast-reasoning": {
|
|
871
770
|
// AA specific benchmarks
|
|
872
|
-
codingIndex:
|
|
873
|
-
mathIndex:
|
|
771
|
+
codingIndex: 30.9,
|
|
772
|
+
mathIndex: 89.3,
|
|
874
773
|
|
|
875
774
|
// Academic benchmarks
|
|
876
|
-
mmluPro: 0.
|
|
877
|
-
gpqa: 0.
|
|
878
|
-
hle: 0.
|
|
775
|
+
mmluPro: 0.854,
|
|
776
|
+
gpqa: 0.853,
|
|
777
|
+
hle: 0.176,
|
|
879
778
|
|
|
880
779
|
// Capabilities
|
|
881
780
|
contextWindow: 8192,
|
|
@@ -883,21 +782,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
883
782
|
supportsVision: false,
|
|
884
783
|
|
|
885
784
|
// Metadata
|
|
886
|
-
lastUpdated: "2026-
|
|
785
|
+
lastUpdated: "2026-06-01",
|
|
786
|
+
originalModel: "Grok 4.1 Fast (Reasoning)",
|
|
887
787
|
},
|
|
888
|
-
"
|
|
889
|
-
// AA Intelligence Index (composite score)
|
|
890
|
-
intelligenceIndex: 8.8,
|
|
891
|
-
normalizedScore: 13,
|
|
892
|
-
|
|
788
|
+
"grok-2-dec-24": {
|
|
893
789
|
// AA specific benchmarks
|
|
894
790
|
codingIndex: undefined,
|
|
895
791
|
mathIndex: undefined,
|
|
896
792
|
|
|
897
793
|
// Academic benchmarks
|
|
898
|
-
mmluPro:
|
|
899
|
-
gpqa:
|
|
900
|
-
hle:
|
|
794
|
+
mmluPro: 0.709,
|
|
795
|
+
gpqa: 0.51,
|
|
796
|
+
hle: 0.038,
|
|
901
797
|
|
|
902
798
|
// Capabilities
|
|
903
799
|
contextWindow: 8192,
|
|
@@ -905,21 +801,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
905
801
|
supportsVision: false,
|
|
906
802
|
|
|
907
803
|
// Metadata
|
|
908
|
-
lastUpdated: "2026-
|
|
804
|
+
lastUpdated: "2026-06-01",
|
|
805
|
+
originalModel: "Grok 2 (Dec '24)",
|
|
909
806
|
},
|
|
910
|
-
"
|
|
911
|
-
// AA Intelligence Index (composite score)
|
|
912
|
-
intelligenceIndex: 16.3,
|
|
913
|
-
normalizedScore: 23,
|
|
914
|
-
|
|
807
|
+
"grok-4.1-fast-non-reasoning": {
|
|
915
808
|
// AA specific benchmarks
|
|
916
|
-
codingIndex:
|
|
917
|
-
mathIndex:
|
|
809
|
+
codingIndex: 19.5,
|
|
810
|
+
mathIndex: 34.3,
|
|
918
811
|
|
|
919
812
|
// Academic benchmarks
|
|
920
|
-
mmluPro: 0.
|
|
921
|
-
gpqa: 0.
|
|
922
|
-
hle: 0.
|
|
813
|
+
mmluPro: 0.743,
|
|
814
|
+
gpqa: 0.637,
|
|
815
|
+
hle: 0.05,
|
|
923
816
|
|
|
924
817
|
// Capabilities
|
|
925
818
|
contextWindow: 8192,
|
|
@@ -927,21 +820,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
927
820
|
supportsVision: false,
|
|
928
821
|
|
|
929
822
|
// Metadata
|
|
930
|
-
lastUpdated: "2026-
|
|
823
|
+
lastUpdated: "2026-06-01",
|
|
824
|
+
originalModel: "Grok 4.1 Fast (Non-reasoning)",
|
|
931
825
|
},
|
|
932
|
-
"
|
|
933
|
-
// AA Intelligence Index (composite score)
|
|
934
|
-
intelligenceIndex: 15.6,
|
|
935
|
-
normalizedScore: 22,
|
|
936
|
-
|
|
826
|
+
"grok-4.20-0309-non-reasoning": {
|
|
937
827
|
// AA specific benchmarks
|
|
938
|
-
codingIndex:
|
|
939
|
-
mathIndex:
|
|
828
|
+
codingIndex: 25.4,
|
|
829
|
+
mathIndex: undefined,
|
|
940
830
|
|
|
941
831
|
// Academic benchmarks
|
|
942
|
-
mmluPro:
|
|
943
|
-
gpqa: 0.
|
|
944
|
-
hle: 0.
|
|
832
|
+
mmluPro: undefined,
|
|
833
|
+
gpqa: 0.785,
|
|
834
|
+
hle: 0.225,
|
|
945
835
|
|
|
946
836
|
// Capabilities
|
|
947
837
|
contextWindow: 8192,
|
|
@@ -949,21 +839,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
949
839
|
supportsVision: false,
|
|
950
840
|
|
|
951
841
|
// Metadata
|
|
952
|
-
lastUpdated: "2026-
|
|
842
|
+
lastUpdated: "2026-06-01",
|
|
843
|
+
originalModel: "Grok 4.20 0309 (Non-reasoning)",
|
|
953
844
|
},
|
|
954
|
-
"
|
|
955
|
-
// AA Intelligence Index (composite score)
|
|
956
|
-
intelligenceIndex: 12.9,
|
|
957
|
-
normalizedScore: 18,
|
|
958
|
-
|
|
845
|
+
"grok-4.20-0309-v2-non-reasoning": {
|
|
959
846
|
// AA specific benchmarks
|
|
960
|
-
codingIndex:
|
|
847
|
+
codingIndex: 22,
|
|
961
848
|
mathIndex: undefined,
|
|
962
849
|
|
|
963
850
|
// Academic benchmarks
|
|
964
|
-
mmluPro:
|
|
965
|
-
gpqa: 0.
|
|
966
|
-
hle: 0.
|
|
851
|
+
mmluPro: undefined,
|
|
852
|
+
gpqa: 0.776,
|
|
853
|
+
hle: 0.242,
|
|
967
854
|
|
|
968
855
|
// Capabilities
|
|
969
856
|
contextWindow: 8192,
|
|
@@ -971,21 +858,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
971
858
|
supportsVision: false,
|
|
972
859
|
|
|
973
860
|
// Metadata
|
|
974
|
-
lastUpdated: "2026-
|
|
861
|
+
lastUpdated: "2026-06-01",
|
|
862
|
+
originalModel: "Grok 4.20 0309 v2 (Non-reasoning)",
|
|
975
863
|
},
|
|
976
|
-
"
|
|
977
|
-
// AA Intelligence Index (composite score)
|
|
978
|
-
intelligenceIndex: 12,
|
|
979
|
-
normalizedScore: 17,
|
|
980
|
-
|
|
864
|
+
"openchat-3.5-1210": {
|
|
981
865
|
// AA specific benchmarks
|
|
982
866
|
codingIndex: undefined,
|
|
983
867
|
mathIndex: undefined,
|
|
984
868
|
|
|
985
869
|
// Academic benchmarks
|
|
986
|
-
mmluPro: 0.
|
|
987
|
-
gpqa: 0.
|
|
988
|
-
hle: 0.
|
|
870
|
+
mmluPro: 0.31,
|
|
871
|
+
gpqa: 0.23,
|
|
872
|
+
hle: 0.048,
|
|
989
873
|
|
|
990
874
|
// Capabilities
|
|
991
875
|
contextWindow: 8192,
|
|
@@ -993,21 +877,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
993
877
|
supportsVision: false,
|
|
994
878
|
|
|
995
879
|
// Metadata
|
|
996
|
-
lastUpdated: "2026-
|
|
880
|
+
lastUpdated: "2026-06-01",
|
|
881
|
+
originalModel: "OpenChat 3.5 (1210)",
|
|
997
882
|
},
|
|
998
|
-
"
|
|
999
|
-
// AA Intelligence Index (composite score)
|
|
1000
|
-
intelligenceIndex: 11.7,
|
|
1001
|
-
normalizedScore: 17,
|
|
1002
|
-
|
|
883
|
+
"nova-pro": {
|
|
1003
884
|
// AA specific benchmarks
|
|
1004
|
-
codingIndex:
|
|
1005
|
-
mathIndex:
|
|
885
|
+
codingIndex: 11,
|
|
886
|
+
mathIndex: 7,
|
|
1006
887
|
|
|
1007
888
|
// Academic benchmarks
|
|
1008
|
-
mmluPro: 0.
|
|
1009
|
-
gpqa: 0.
|
|
1010
|
-
hle: 0.
|
|
889
|
+
mmluPro: 0.691,
|
|
890
|
+
gpqa: 0.499,
|
|
891
|
+
hle: 0.034,
|
|
1011
892
|
|
|
1012
893
|
// Capabilities
|
|
1013
894
|
contextWindow: 8192,
|
|
@@ -1015,21 +896,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1015
896
|
supportsVision: false,
|
|
1016
897
|
|
|
1017
898
|
// Metadata
|
|
1018
|
-
lastUpdated: "2026-
|
|
899
|
+
lastUpdated: "2026-06-01",
|
|
900
|
+
originalModel: "Nova Pro",
|
|
1019
901
|
},
|
|
1020
|
-
"
|
|
1021
|
-
// AA Intelligence Index (composite score)
|
|
1022
|
-
intelligenceIndex: 16.1,
|
|
1023
|
-
normalizedScore: 23,
|
|
1024
|
-
|
|
902
|
+
"nova-lite": {
|
|
1025
903
|
// AA specific benchmarks
|
|
1026
|
-
codingIndex:
|
|
1027
|
-
mathIndex:
|
|
904
|
+
codingIndex: 5.1,
|
|
905
|
+
mathIndex: 7,
|
|
1028
906
|
|
|
1029
907
|
// Academic benchmarks
|
|
1030
|
-
mmluPro: 0.
|
|
1031
|
-
gpqa: 0.
|
|
1032
|
-
hle: 0.
|
|
908
|
+
mmluPro: 0.59,
|
|
909
|
+
gpqa: 0.433,
|
|
910
|
+
hle: 0.046,
|
|
1033
911
|
|
|
1034
912
|
// Capabilities
|
|
1035
913
|
contextWindow: 8192,
|
|
@@ -1037,21 +915,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1037
915
|
supportsVision: false,
|
|
1038
916
|
|
|
1039
917
|
// Metadata
|
|
1040
|
-
lastUpdated: "2026-
|
|
918
|
+
lastUpdated: "2026-06-01",
|
|
919
|
+
originalModel: "Nova Lite",
|
|
1041
920
|
},
|
|
1042
|
-
"
|
|
1043
|
-
// AA Intelligence Index (composite score)
|
|
1044
|
-
intelligenceIndex: 25,
|
|
1045
|
-
normalizedScore: 36,
|
|
1046
|
-
|
|
921
|
+
"phi-3-mini-instruct-3.8b": {
|
|
1047
922
|
// AA specific benchmarks
|
|
1048
|
-
codingIndex:
|
|
1049
|
-
mathIndex:
|
|
923
|
+
codingIndex: 3,
|
|
924
|
+
mathIndex: 0.3,
|
|
1050
925
|
|
|
1051
926
|
// Academic benchmarks
|
|
1052
|
-
mmluPro: 0.
|
|
1053
|
-
gpqa: 0.
|
|
1054
|
-
hle: 0.
|
|
927
|
+
mmluPro: 0.435,
|
|
928
|
+
gpqa: 0.319,
|
|
929
|
+
hle: 0.044,
|
|
1055
930
|
|
|
1056
931
|
// Capabilities
|
|
1057
932
|
contextWindow: 8192,
|
|
@@ -1059,21 +934,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1059
934
|
supportsVision: false,
|
|
1060
935
|
|
|
1061
936
|
// Metadata
|
|
1062
|
-
lastUpdated: "2026-
|
|
937
|
+
lastUpdated: "2026-06-01",
|
|
938
|
+
originalModel: "Phi-3 Mini Instruct 3.8B",
|
|
1063
939
|
},
|
|
1064
|
-
"
|
|
1065
|
-
// AA Intelligence Index (composite score)
|
|
1066
|
-
intelligenceIndex: 14.5,
|
|
1067
|
-
normalizedScore: 21,
|
|
1068
|
-
|
|
940
|
+
"lfm-40b": {
|
|
1069
941
|
// AA specific benchmarks
|
|
1070
942
|
codingIndex: undefined,
|
|
1071
|
-
mathIndex:
|
|
943
|
+
mathIndex: undefined,
|
|
1072
944
|
|
|
1073
945
|
// Academic benchmarks
|
|
1074
|
-
mmluPro: 0.
|
|
1075
|
-
gpqa: 0.
|
|
1076
|
-
hle: 0.
|
|
946
|
+
mmluPro: 0.425,
|
|
947
|
+
gpqa: 0.327,
|
|
948
|
+
hle: 0.049,
|
|
1077
949
|
|
|
1078
950
|
// Capabilities
|
|
1079
951
|
contextWindow: 8192,
|
|
@@ -1081,21 +953,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1081
953
|
supportsVision: false,
|
|
1082
954
|
|
|
1083
955
|
// Metadata
|
|
1084
|
-
lastUpdated: "2026-
|
|
956
|
+
lastUpdated: "2026-06-01",
|
|
957
|
+
originalModel: "LFM 40B",
|
|
1085
958
|
},
|
|
1086
|
-
"
|
|
1087
|
-
// AA Intelligence Index (composite score)
|
|
1088
|
-
intelligenceIndex: 19.8,
|
|
1089
|
-
normalizedScore: 28,
|
|
1090
|
-
|
|
959
|
+
"lfm2-1.2b": {
|
|
1091
960
|
// AA specific benchmarks
|
|
1092
|
-
codingIndex:
|
|
1093
|
-
mathIndex:
|
|
961
|
+
codingIndex: 0.8,
|
|
962
|
+
mathIndex: 3.3,
|
|
1094
963
|
|
|
1095
964
|
// Academic benchmarks
|
|
1096
|
-
mmluPro: 0.
|
|
1097
|
-
gpqa: 0.
|
|
1098
|
-
hle: 0.
|
|
965
|
+
mmluPro: 0.257,
|
|
966
|
+
gpqa: 0.228,
|
|
967
|
+
hle: 0.057,
|
|
1099
968
|
|
|
1100
969
|
// Capabilities
|
|
1101
970
|
contextWindow: 8192,
|
|
@@ -1103,21 +972,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1103
972
|
supportsVision: false,
|
|
1104
973
|
|
|
1105
974
|
// Metadata
|
|
1106
|
-
lastUpdated: "2026-
|
|
975
|
+
lastUpdated: "2026-06-01",
|
|
976
|
+
originalModel: "LFM2 1.2B",
|
|
1107
977
|
},
|
|
1108
|
-
"
|
|
1109
|
-
// AA Intelligence Index (composite score)
|
|
1110
|
-
intelligenceIndex: 17,
|
|
1111
|
-
normalizedScore: 24,
|
|
1112
|
-
|
|
978
|
+
"solar-mini": {
|
|
1113
979
|
// AA specific benchmarks
|
|
1114
|
-
codingIndex:
|
|
1115
|
-
mathIndex:
|
|
980
|
+
codingIndex: undefined,
|
|
981
|
+
mathIndex: undefined,
|
|
1116
982
|
|
|
1117
983
|
// Academic benchmarks
|
|
1118
|
-
mmluPro:
|
|
1119
|
-
gpqa:
|
|
1120
|
-
hle:
|
|
984
|
+
mmluPro: undefined,
|
|
985
|
+
gpqa: undefined,
|
|
986
|
+
hle: undefined,
|
|
1121
987
|
|
|
1122
988
|
// Capabilities
|
|
1123
989
|
contextWindow: 8192,
|
|
@@ -1125,21 +991,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1125
991
|
supportsVision: false,
|
|
1126
992
|
|
|
1127
993
|
// Metadata
|
|
1128
|
-
lastUpdated: "2026-
|
|
994
|
+
lastUpdated: "2026-06-01",
|
|
995
|
+
originalModel: "Solar Mini",
|
|
1129
996
|
},
|
|
1130
|
-
"
|
|
1131
|
-
// AA Intelligence Index (composite score)
|
|
1132
|
-
intelligenceIndex: 22.4,
|
|
1133
|
-
normalizedScore: 32,
|
|
1134
|
-
|
|
997
|
+
"solar-pro-2-preview-reasoning": {
|
|
1135
998
|
// AA specific benchmarks
|
|
1136
|
-
codingIndex:
|
|
1137
|
-
mathIndex:
|
|
999
|
+
codingIndex: undefined,
|
|
1000
|
+
mathIndex: undefined,
|
|
1138
1001
|
|
|
1139
1002
|
// Academic benchmarks
|
|
1140
|
-
mmluPro: 0.
|
|
1141
|
-
gpqa: 0.
|
|
1142
|
-
hle: 0.
|
|
1003
|
+
mmluPro: 0.768,
|
|
1004
|
+
gpqa: 0.578,
|
|
1005
|
+
hle: 0.057,
|
|
1143
1006
|
|
|
1144
1007
|
// Capabilities
|
|
1145
1008
|
contextWindow: 8192,
|
|
@@ -1147,21 +1010,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1147
1010
|
supportsVision: false,
|
|
1148
1011
|
|
|
1149
1012
|
// Metadata
|
|
1150
|
-
lastUpdated: "2026-
|
|
1013
|
+
lastUpdated: "2026-06-01",
|
|
1014
|
+
originalModel: "Solar Pro 2 (Preview) (Reasoning)",
|
|
1151
1015
|
},
|
|
1152
|
-
"
|
|
1153
|
-
// AA Intelligence Index (composite score)
|
|
1154
|
-
intelligenceIndex: 20.8,
|
|
1155
|
-
normalizedScore: 30,
|
|
1156
|
-
|
|
1016
|
+
"solar-pro-2-preview-non-reasoning": {
|
|
1157
1017
|
// AA specific benchmarks
|
|
1158
|
-
codingIndex:
|
|
1159
|
-
mathIndex:
|
|
1018
|
+
codingIndex: undefined,
|
|
1019
|
+
mathIndex: undefined,
|
|
1160
1020
|
|
|
1161
1021
|
// Academic benchmarks
|
|
1162
|
-
mmluPro: 0.
|
|
1163
|
-
gpqa: 0.
|
|
1164
|
-
hle: 0.
|
|
1022
|
+
mmluPro: 0.725,
|
|
1023
|
+
gpqa: 0.544,
|
|
1024
|
+
hle: 0.038,
|
|
1165
1025
|
|
|
1166
1026
|
// Capabilities
|
|
1167
1027
|
contextWindow: 8192,
|
|
@@ -1169,21 +1029,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1169
1029
|
supportsVision: false,
|
|
1170
1030
|
|
|
1171
1031
|
// Metadata
|
|
1172
|
-
lastUpdated: "2026-
|
|
1032
|
+
lastUpdated: "2026-06-01",
|
|
1033
|
+
originalModel: "Solar Pro 2 (Preview) (Non-reasoning)",
|
|
1173
1034
|
},
|
|
1174
|
-
"
|
|
1175
|
-
// AA Intelligence Index (composite score)
|
|
1176
|
-
intelligenceIndex: 6.5,
|
|
1177
|
-
normalizedScore: 9,
|
|
1178
|
-
|
|
1035
|
+
"dbrx-instruct": {
|
|
1179
1036
|
// AA specific benchmarks
|
|
1180
|
-
codingIndex:
|
|
1181
|
-
mathIndex:
|
|
1037
|
+
codingIndex: undefined,
|
|
1038
|
+
mathIndex: undefined,
|
|
1182
1039
|
|
|
1183
1040
|
// Academic benchmarks
|
|
1184
|
-
mmluPro: 0.
|
|
1185
|
-
gpqa: 0.
|
|
1186
|
-
hle: 0.
|
|
1041
|
+
mmluPro: 0.397,
|
|
1042
|
+
gpqa: 0.331,
|
|
1043
|
+
hle: 0.066,
|
|
1187
1044
|
|
|
1188
1045
|
// Capabilities
|
|
1189
1046
|
contextWindow: 8192,
|
|
@@ -1191,21 +1048,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1191
1048
|
supportsVision: false,
|
|
1192
1049
|
|
|
1193
1050
|
// Metadata
|
|
1194
|
-
lastUpdated: "2026-
|
|
1051
|
+
lastUpdated: "2026-06-01",
|
|
1052
|
+
originalModel: "DBRX Instruct",
|
|
1195
1053
|
},
|
|
1196
|
-
"
|
|
1197
|
-
// AA Intelligence Index (composite score)
|
|
1198
|
-
intelligenceIndex: 29.5,
|
|
1199
|
-
normalizedScore: 42,
|
|
1200
|
-
|
|
1054
|
+
"minimax-m2.1": {
|
|
1201
1055
|
// AA specific benchmarks
|
|
1202
|
-
codingIndex:
|
|
1203
|
-
mathIndex:
|
|
1056
|
+
codingIndex: 32.8,
|
|
1057
|
+
mathIndex: 82.7,
|
|
1204
1058
|
|
|
1205
1059
|
// Academic benchmarks
|
|
1206
|
-
mmluPro: 0.
|
|
1207
|
-
gpqa: 0.
|
|
1208
|
-
hle: 0.
|
|
1060
|
+
mmluPro: 0.875,
|
|
1061
|
+
gpqa: 0.83,
|
|
1062
|
+
hle: 0.222,
|
|
1209
1063
|
|
|
1210
1064
|
// Capabilities
|
|
1211
1065
|
contextWindow: 8192,
|
|
@@ -1213,21 +1067,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1213
1067
|
supportsVision: false,
|
|
1214
1068
|
|
|
1215
1069
|
// Metadata
|
|
1216
|
-
lastUpdated: "2026-
|
|
1070
|
+
lastUpdated: "2026-06-01",
|
|
1071
|
+
originalModel: "MiniMax-M2.1",
|
|
1217
1072
|
},
|
|
1218
|
-
"
|
|
1219
|
-
// AA Intelligence Index (composite score)
|
|
1220
|
-
intelligenceIndex: 10.6,
|
|
1221
|
-
normalizedScore: 15,
|
|
1222
|
-
|
|
1073
|
+
"minimax-m2.5": {
|
|
1223
1074
|
// AA specific benchmarks
|
|
1224
|
-
codingIndex:
|
|
1225
|
-
mathIndex:
|
|
1075
|
+
codingIndex: 37.4,
|
|
1076
|
+
mathIndex: undefined,
|
|
1226
1077
|
|
|
1227
1078
|
// Academic benchmarks
|
|
1228
|
-
mmluPro:
|
|
1229
|
-
gpqa: 0.
|
|
1230
|
-
hle: 0.
|
|
1079
|
+
mmluPro: undefined,
|
|
1080
|
+
gpqa: 0.848,
|
|
1081
|
+
hle: 0.191,
|
|
1231
1082
|
|
|
1232
1083
|
// Capabilities
|
|
1233
1084
|
contextWindow: 8192,
|
|
@@ -1235,21 +1086,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1235
1086
|
supportsVision: false,
|
|
1236
1087
|
|
|
1237
1088
|
// Metadata
|
|
1238
|
-
lastUpdated: "2026-
|
|
1089
|
+
lastUpdated: "2026-06-01",
|
|
1090
|
+
originalModel: "MiniMax-M2.5",
|
|
1239
1091
|
},
|
|
1240
|
-
"
|
|
1241
|
-
// AA Intelligence Index (composite score)
|
|
1242
|
-
intelligenceIndex: 18.2,
|
|
1243
|
-
normalizedScore: 26,
|
|
1244
|
-
|
|
1092
|
+
"minimax-m2": {
|
|
1245
1093
|
// AA specific benchmarks
|
|
1246
|
-
codingIndex:
|
|
1247
|
-
mathIndex:
|
|
1094
|
+
codingIndex: 29.2,
|
|
1095
|
+
mathIndex: 78.3,
|
|
1248
1096
|
|
|
1249
1097
|
// Academic benchmarks
|
|
1250
|
-
mmluPro: 0.
|
|
1251
|
-
gpqa: 0.
|
|
1252
|
-
hle: 0.
|
|
1098
|
+
mmluPro: 0.82,
|
|
1099
|
+
gpqa: 0.777,
|
|
1100
|
+
hle: 0.125,
|
|
1253
1101
|
|
|
1254
1102
|
// Capabilities
|
|
1255
1103
|
contextWindow: 8192,
|
|
@@ -1257,20 +1105,17 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1257
1105
|
supportsVision: false,
|
|
1258
1106
|
|
|
1259
1107
|
// Metadata
|
|
1260
|
-
lastUpdated: "2026-
|
|
1108
|
+
lastUpdated: "2026-06-01",
|
|
1109
|
+
originalModel: "MiniMax-M2",
|
|
1261
1110
|
},
|
|
1262
|
-
"
|
|
1263
|
-
// AA Intelligence Index (composite score)
|
|
1264
|
-
intelligenceIndex: 19.7,
|
|
1265
|
-
normalizedScore: 28,
|
|
1266
|
-
|
|
1111
|
+
"minimax-m1-80k": {
|
|
1267
1112
|
// AA specific benchmarks
|
|
1268
|
-
codingIndex:
|
|
1269
|
-
mathIndex:
|
|
1113
|
+
codingIndex: 14.5,
|
|
1114
|
+
mathIndex: 61,
|
|
1270
1115
|
|
|
1271
1116
|
// Academic benchmarks
|
|
1272
|
-
mmluPro: 0.
|
|
1273
|
-
gpqa: 0.
|
|
1117
|
+
mmluPro: 0.816,
|
|
1118
|
+
gpqa: 0.697,
|
|
1274
1119
|
hle: 0.082,
|
|
1275
1120
|
|
|
1276
1121
|
// Capabilities
|
|
@@ -1279,21 +1124,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1279
1124
|
supportsVision: false,
|
|
1280
1125
|
|
|
1281
1126
|
// Metadata
|
|
1282
|
-
lastUpdated: "2026-
|
|
1127
|
+
lastUpdated: "2026-06-01",
|
|
1128
|
+
originalModel: "MiniMax M1 80k",
|
|
1283
1129
|
},
|
|
1284
|
-
"
|
|
1285
|
-
// AA Intelligence Index (composite score)
|
|
1286
|
-
intelligenceIndex: 16.5,
|
|
1287
|
-
normalizedScore: 24,
|
|
1288
|
-
|
|
1130
|
+
"minimax-m1-40k": {
|
|
1289
1131
|
// AA specific benchmarks
|
|
1290
|
-
codingIndex:
|
|
1291
|
-
mathIndex:
|
|
1132
|
+
codingIndex: 14.1,
|
|
1133
|
+
mathIndex: 13.7,
|
|
1292
1134
|
|
|
1293
1135
|
// Academic benchmarks
|
|
1294
|
-
mmluPro: 0.
|
|
1295
|
-
gpqa: 0.
|
|
1296
|
-
hle: 0.
|
|
1136
|
+
mmluPro: 0.808,
|
|
1137
|
+
gpqa: 0.682,
|
|
1138
|
+
hle: 0.075,
|
|
1297
1139
|
|
|
1298
1140
|
// Capabilities
|
|
1299
1141
|
contextWindow: 8192,
|
|
@@ -1301,21 +1143,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1301
1143
|
supportsVision: false,
|
|
1302
1144
|
|
|
1303
1145
|
// Metadata
|
|
1304
|
-
lastUpdated: "2026-
|
|
1146
|
+
lastUpdated: "2026-06-01",
|
|
1147
|
+
originalModel: "MiniMax M1 40k",
|
|
1305
1148
|
},
|
|
1306
|
-
"
|
|
1307
|
-
// AA Intelligence Index (composite score)
|
|
1308
|
-
intelligenceIndex: 12.9,
|
|
1309
|
-
normalizedScore: 18,
|
|
1310
|
-
|
|
1149
|
+
"llama-3.3-nemotron-super-49b-v1-reasoning": {
|
|
1311
1150
|
// AA specific benchmarks
|
|
1312
|
-
codingIndex: 9.
|
|
1313
|
-
mathIndex:
|
|
1151
|
+
codingIndex: 9.4,
|
|
1152
|
+
mathIndex: 54.7,
|
|
1314
1153
|
|
|
1315
1154
|
// Academic benchmarks
|
|
1316
|
-
mmluPro: 0.
|
|
1317
|
-
gpqa: 0.
|
|
1318
|
-
hle: 0.
|
|
1155
|
+
mmluPro: 0.785,
|
|
1156
|
+
gpqa: 0.643,
|
|
1157
|
+
hle: 0.065,
|
|
1319
1158
|
|
|
1320
1159
|
// Capabilities
|
|
1321
1160
|
contextWindow: 8192,
|
|
@@ -1323,21 +1162,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1323
1162
|
supportsVision: false,
|
|
1324
1163
|
|
|
1325
1164
|
// Metadata
|
|
1326
|
-
lastUpdated: "2026-
|
|
1165
|
+
lastUpdated: "2026-06-01",
|
|
1166
|
+
originalModel: "Llama 3.3 Nemotron Super 49B v1 (Reasoning)",
|
|
1327
1167
|
},
|
|
1328
|
-
"
|
|
1329
|
-
// AA Intelligence Index (composite score)
|
|
1330
|
-
intelligenceIndex: 15,
|
|
1331
|
-
normalizedScore: 21,
|
|
1332
|
-
|
|
1168
|
+
"llama-3.3-nemotron-super-49b-v1-non-reasoning": {
|
|
1333
1169
|
// AA specific benchmarks
|
|
1334
|
-
codingIndex:
|
|
1335
|
-
mathIndex:
|
|
1170
|
+
codingIndex: 7.6,
|
|
1171
|
+
mathIndex: 7.7,
|
|
1336
1172
|
|
|
1337
1173
|
// Academic benchmarks
|
|
1338
|
-
mmluPro: 0.
|
|
1339
|
-
gpqa: 0.
|
|
1340
|
-
hle: 0.
|
|
1174
|
+
mmluPro: 0.698,
|
|
1175
|
+
gpqa: 0.517,
|
|
1176
|
+
hle: 0.035,
|
|
1341
1177
|
|
|
1342
1178
|
// Capabilities
|
|
1343
1179
|
contextWindow: 8192,
|
|
@@ -1345,21 +1181,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1345
1181
|
supportsVision: false,
|
|
1346
1182
|
|
|
1347
1183
|
// Metadata
|
|
1348
|
-
lastUpdated: "2026-
|
|
1184
|
+
lastUpdated: "2026-06-01",
|
|
1185
|
+
originalModel: "Llama 3.3 Nemotron Super 49B v1 (Non-reasoning)",
|
|
1349
1186
|
},
|
|
1350
|
-
"
|
|
1351
|
-
// AA Intelligence Index (composite score)
|
|
1352
|
-
intelligenceIndex: 12.8,
|
|
1353
|
-
normalizedScore: 18,
|
|
1354
|
-
|
|
1187
|
+
"llama-3.1-nemotron-nano-4b-v1.1-reasoning": {
|
|
1355
1188
|
// AA specific benchmarks
|
|
1356
|
-
codingIndex:
|
|
1357
|
-
mathIndex:
|
|
1189
|
+
codingIndex: undefined,
|
|
1190
|
+
mathIndex: 50,
|
|
1358
1191
|
|
|
1359
1192
|
// Academic benchmarks
|
|
1360
|
-
mmluPro: 0.
|
|
1361
|
-
gpqa: 0.
|
|
1362
|
-
hle: 0.
|
|
1193
|
+
mmluPro: 0.556,
|
|
1194
|
+
gpqa: 0.408,
|
|
1195
|
+
hle: 0.051,
|
|
1363
1196
|
|
|
1364
1197
|
// Capabilities
|
|
1365
1198
|
contextWindow: 8192,
|
|
@@ -1367,21 +1200,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1367
1200
|
supportsVision: false,
|
|
1368
1201
|
|
|
1369
1202
|
// Metadata
|
|
1370
|
-
lastUpdated: "2026-
|
|
1203
|
+
lastUpdated: "2026-06-01",
|
|
1204
|
+
originalModel: "Llama 3.1 Nemotron Nano 4B v1.1 (Reasoning)",
|
|
1371
1205
|
},
|
|
1372
|
-
"
|
|
1373
|
-
// AA Intelligence Index (composite score)
|
|
1374
|
-
intelligenceIndex: 9.6,
|
|
1375
|
-
normalizedScore: 14,
|
|
1376
|
-
|
|
1206
|
+
"kimi-k2.5-reasoning": {
|
|
1377
1207
|
// AA specific benchmarks
|
|
1378
|
-
codingIndex:
|
|
1379
|
-
mathIndex:
|
|
1208
|
+
codingIndex: 39.6,
|
|
1209
|
+
mathIndex: undefined,
|
|
1380
1210
|
|
|
1381
1211
|
// Academic benchmarks
|
|
1382
|
-
mmluPro:
|
|
1383
|
-
gpqa: 0.
|
|
1384
|
-
hle: 0.
|
|
1212
|
+
mmluPro: undefined,
|
|
1213
|
+
gpqa: 0.879,
|
|
1214
|
+
hle: 0.294,
|
|
1385
1215
|
|
|
1386
1216
|
// Capabilities
|
|
1387
1217
|
contextWindow: 8192,
|
|
@@ -1389,21 +1219,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1389
1219
|
supportsVision: false,
|
|
1390
1220
|
|
|
1391
1221
|
// Metadata
|
|
1392
|
-
lastUpdated: "2026-
|
|
1222
|
+
lastUpdated: "2026-06-01",
|
|
1223
|
+
originalModel: "Kimi K2.5 (Reasoning)",
|
|
1393
1224
|
},
|
|
1394
|
-
"
|
|
1395
|
-
// AA Intelligence Index (composite score)
|
|
1396
|
-
intelligenceIndex: 10,
|
|
1397
|
-
normalizedScore: 14,
|
|
1398
|
-
|
|
1225
|
+
"kimi-k2-0905": {
|
|
1399
1226
|
// AA specific benchmarks
|
|
1400
|
-
codingIndex:
|
|
1401
|
-
mathIndex:
|
|
1227
|
+
codingIndex: 25.9,
|
|
1228
|
+
mathIndex: 57.3,
|
|
1402
1229
|
|
|
1403
1230
|
// Academic benchmarks
|
|
1404
|
-
mmluPro: 0.
|
|
1405
|
-
gpqa: 0.
|
|
1406
|
-
hle: 0.
|
|
1231
|
+
mmluPro: 0.819,
|
|
1232
|
+
gpqa: 0.767,
|
|
1233
|
+
hle: 0.063,
|
|
1407
1234
|
|
|
1408
1235
|
// Capabilities
|
|
1409
1236
|
contextWindow: 8192,
|
|
@@ -1411,21 +1238,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1411
1238
|
supportsVision: false,
|
|
1412
1239
|
|
|
1413
1240
|
// Metadata
|
|
1414
|
-
lastUpdated: "2026-
|
|
1241
|
+
lastUpdated: "2026-06-01",
|
|
1242
|
+
originalModel: "Kimi K2 0905",
|
|
1415
1243
|
},
|
|
1416
|
-
"
|
|
1417
|
-
// AA Intelligence Index (composite score)
|
|
1418
|
-
intelligenceIndex: 15.3,
|
|
1419
|
-
normalizedScore: 22,
|
|
1420
|
-
|
|
1244
|
+
"kimi-k2": {
|
|
1421
1245
|
// AA specific benchmarks
|
|
1422
|
-
codingIndex:
|
|
1423
|
-
mathIndex:
|
|
1246
|
+
codingIndex: 22.1,
|
|
1247
|
+
mathIndex: 57,
|
|
1424
1248
|
|
|
1425
1249
|
// Academic benchmarks
|
|
1426
|
-
mmluPro: 0.
|
|
1427
|
-
gpqa: 0.
|
|
1428
|
-
hle: 0.
|
|
1250
|
+
mmluPro: 0.824,
|
|
1251
|
+
gpqa: 0.766,
|
|
1252
|
+
hle: 0.07,
|
|
1429
1253
|
|
|
1430
1254
|
// Capabilities
|
|
1431
1255
|
contextWindow: 8192,
|
|
@@ -1433,21 +1257,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1433
1257
|
supportsVision: false,
|
|
1434
1258
|
|
|
1435
1259
|
// Metadata
|
|
1436
|
-
lastUpdated: "2026-
|
|
1260
|
+
lastUpdated: "2026-06-01",
|
|
1261
|
+
originalModel: "Kimi K2",
|
|
1437
1262
|
},
|
|
1438
|
-
"
|
|
1439
|
-
// AA Intelligence Index (composite score)
|
|
1440
|
-
intelligenceIndex: 15.2,
|
|
1441
|
-
normalizedScore: 22,
|
|
1442
|
-
|
|
1263
|
+
"kimi-k2-thinking": {
|
|
1443
1264
|
// AA specific benchmarks
|
|
1444
|
-
codingIndex:
|
|
1445
|
-
mathIndex:
|
|
1265
|
+
codingIndex: 34.8,
|
|
1266
|
+
mathIndex: 94.7,
|
|
1446
1267
|
|
|
1447
1268
|
// Academic benchmarks
|
|
1448
|
-
mmluPro: 0.
|
|
1449
|
-
gpqa: 0.
|
|
1450
|
-
hle: 0.
|
|
1269
|
+
mmluPro: 0.848,
|
|
1270
|
+
gpqa: 0.838,
|
|
1271
|
+
hle: 0.223,
|
|
1451
1272
|
|
|
1452
1273
|
// Capabilities
|
|
1453
1274
|
contextWindow: 8192,
|
|
@@ -1455,21 +1276,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1455
1276
|
supportsVision: false,
|
|
1456
1277
|
|
|
1457
1278
|
// Metadata
|
|
1458
|
-
lastUpdated: "2026-
|
|
1279
|
+
lastUpdated: "2026-06-01",
|
|
1280
|
+
originalModel: "Kimi K2 Thinking",
|
|
1459
1281
|
},
|
|
1460
|
-
"
|
|
1461
|
-
// AA Intelligence Index (composite score)
|
|
1462
|
-
intelligenceIndex: 17.2,
|
|
1463
|
-
normalizedScore: 25,
|
|
1464
|
-
|
|
1282
|
+
"kimi-k2.5-non-reasoning": {
|
|
1465
1283
|
// AA specific benchmarks
|
|
1466
|
-
codingIndex:
|
|
1467
|
-
mathIndex:
|
|
1284
|
+
codingIndex: 25.8,
|
|
1285
|
+
mathIndex: undefined,
|
|
1468
1286
|
|
|
1469
1287
|
// Academic benchmarks
|
|
1470
|
-
mmluPro:
|
|
1471
|
-
gpqa: 0.
|
|
1472
|
-
hle: 0.
|
|
1288
|
+
mmluPro: undefined,
|
|
1289
|
+
gpqa: 0.789,
|
|
1290
|
+
hle: 0.123,
|
|
1473
1291
|
|
|
1474
1292
|
// Capabilities
|
|
1475
1293
|
contextWindow: 8192,
|
|
@@ -1477,21 +1295,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1477
1295
|
supportsVision: false,
|
|
1478
1296
|
|
|
1479
1297
|
// Metadata
|
|
1480
|
-
lastUpdated: "2026-
|
|
1298
|
+
lastUpdated: "2026-06-01",
|
|
1299
|
+
originalModel: "Kimi K2.5 (Non-reasoning)",
|
|
1481
1300
|
},
|
|
1482
|
-
"
|
|
1483
|
-
// AA Intelligence Index (composite score)
|
|
1484
|
-
intelligenceIndex: 6.8,
|
|
1485
|
-
normalizedScore: 10,
|
|
1486
|
-
|
|
1301
|
+
"step-3.5-flash": {
|
|
1487
1302
|
// AA specific benchmarks
|
|
1488
|
-
codingIndex:
|
|
1489
|
-
mathIndex:
|
|
1303
|
+
codingIndex: 31.6,
|
|
1304
|
+
mathIndex: undefined,
|
|
1490
1305
|
|
|
1491
1306
|
// Academic benchmarks
|
|
1492
|
-
mmluPro:
|
|
1493
|
-
gpqa: 0.
|
|
1494
|
-
hle: 0.
|
|
1307
|
+
mmluPro: undefined,
|
|
1308
|
+
gpqa: 0.831,
|
|
1309
|
+
hle: 0.191,
|
|
1495
1310
|
|
|
1496
1311
|
// Capabilities
|
|
1497
1312
|
contextWindow: 8192,
|
|
@@ -1499,21 +1314,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1499
1314
|
supportsVision: false,
|
|
1500
1315
|
|
|
1501
1316
|
// Metadata
|
|
1502
|
-
lastUpdated: "2026-
|
|
1317
|
+
lastUpdated: "2026-06-01",
|
|
1318
|
+
originalModel: "Step 3.5 Flash",
|
|
1503
1319
|
},
|
|
1504
|
-
"
|
|
1505
|
-
// AA Intelligence Index (composite score)
|
|
1506
|
-
intelligenceIndex: 13.2,
|
|
1507
|
-
normalizedScore: 19,
|
|
1508
|
-
|
|
1320
|
+
"llama-3.1-tulu3-405b": {
|
|
1509
1321
|
// AA specific benchmarks
|
|
1510
1322
|
codingIndex: undefined,
|
|
1511
1323
|
mathIndex: undefined,
|
|
1512
1324
|
|
|
1513
1325
|
// Academic benchmarks
|
|
1514
|
-
mmluPro: 0.
|
|
1515
|
-
gpqa: 0.
|
|
1516
|
-
hle: 0.
|
|
1326
|
+
mmluPro: 0.716,
|
|
1327
|
+
gpqa: 0.516,
|
|
1328
|
+
hle: 0.035,
|
|
1517
1329
|
|
|
1518
1330
|
// Capabilities
|
|
1519
1331
|
contextWindow: 8192,
|
|
@@ -1521,21 +1333,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1521
1333
|
supportsVision: false,
|
|
1522
1334
|
|
|
1523
1335
|
// Metadata
|
|
1524
|
-
lastUpdated: "2026-
|
|
1336
|
+
lastUpdated: "2026-06-01",
|
|
1337
|
+
originalModel: "Llama 3.1 Tulu3 405B",
|
|
1525
1338
|
},
|
|
1526
|
-
"
|
|
1527
|
-
// AA Intelligence Index (composite score)
|
|
1528
|
-
intelligenceIndex: 12.5,
|
|
1529
|
-
normalizedScore: 18,
|
|
1530
|
-
|
|
1339
|
+
"olmo-2-7b": {
|
|
1531
1340
|
// AA specific benchmarks
|
|
1532
|
-
codingIndex:
|
|
1533
|
-
mathIndex:
|
|
1341
|
+
codingIndex: 1.2,
|
|
1342
|
+
mathIndex: 0.7,
|
|
1534
1343
|
|
|
1535
1344
|
// Academic benchmarks
|
|
1536
|
-
mmluPro: 0.
|
|
1537
|
-
gpqa: 0.
|
|
1538
|
-
hle: 0.
|
|
1345
|
+
mmluPro: 0.282,
|
|
1346
|
+
gpqa: 0.288,
|
|
1347
|
+
hle: 0.055,
|
|
1539
1348
|
|
|
1540
1349
|
// Capabilities
|
|
1541
1350
|
contextWindow: 8192,
|
|
@@ -1543,21 +1352,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1543
1352
|
supportsVision: false,
|
|
1544
1353
|
|
|
1545
1354
|
// Metadata
|
|
1546
|
-
lastUpdated: "2026-
|
|
1355
|
+
lastUpdated: "2026-06-01",
|
|
1356
|
+
originalModel: "OLMo 2 7B",
|
|
1547
1357
|
},
|
|
1548
|
-
"
|
|
1549
|
-
// AA Intelligence Index (composite score)
|
|
1550
|
-
intelligenceIndex: 19.7,
|
|
1551
|
-
normalizedScore: 28,
|
|
1552
|
-
|
|
1358
|
+
"olmo-3-32b-think": {
|
|
1553
1359
|
// AA specific benchmarks
|
|
1554
|
-
codingIndex:
|
|
1555
|
-
mathIndex:
|
|
1360
|
+
codingIndex: 10.5,
|
|
1361
|
+
mathIndex: 73.7,
|
|
1556
1362
|
|
|
1557
1363
|
// Academic benchmarks
|
|
1558
|
-
mmluPro: 0.
|
|
1559
|
-
gpqa: 0.
|
|
1560
|
-
hle: 0.
|
|
1364
|
+
mmluPro: 0.759,
|
|
1365
|
+
gpqa: 0.61,
|
|
1366
|
+
hle: 0.059,
|
|
1561
1367
|
|
|
1562
1368
|
// Capabilities
|
|
1563
1369
|
contextWindow: 8192,
|
|
@@ -1565,21 +1371,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1565
1371
|
supportsVision: false,
|
|
1566
1372
|
|
|
1567
1373
|
// Metadata
|
|
1568
|
-
lastUpdated: "2026-
|
|
1374
|
+
lastUpdated: "2026-06-01",
|
|
1375
|
+
originalModel: "Olmo 3 32B Think",
|
|
1569
1376
|
},
|
|
1570
|
-
"
|
|
1571
|
-
// AA Intelligence Index (composite score)
|
|
1572
|
-
intelligenceIndex: 16.7,
|
|
1573
|
-
normalizedScore: 24,
|
|
1574
|
-
|
|
1377
|
+
"olmo-2-32b": {
|
|
1575
1378
|
// AA specific benchmarks
|
|
1576
|
-
codingIndex:
|
|
1577
|
-
mathIndex:
|
|
1379
|
+
codingIndex: 2.7,
|
|
1380
|
+
mathIndex: 3.3,
|
|
1578
1381
|
|
|
1579
1382
|
// Academic benchmarks
|
|
1580
|
-
mmluPro: 0.
|
|
1581
|
-
gpqa: 0.
|
|
1582
|
-
hle: 0.
|
|
1383
|
+
mmluPro: 0.511,
|
|
1384
|
+
gpqa: 0.328,
|
|
1385
|
+
hle: 0.037,
|
|
1583
1386
|
|
|
1584
1387
|
// Capabilities
|
|
1585
1388
|
contextWindow: 8192,
|
|
@@ -1587,20 +1390,17 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1587
1390
|
supportsVision: false,
|
|
1588
1391
|
|
|
1589
1392
|
// Metadata
|
|
1590
|
-
lastUpdated: "2026-
|
|
1393
|
+
lastUpdated: "2026-06-01",
|
|
1394
|
+
originalModel: "OLMo 2 32B",
|
|
1591
1395
|
},
|
|
1592
|
-
"
|
|
1593
|
-
// AA Intelligence Index (composite score)
|
|
1594
|
-
intelligenceIndex: 13.2,
|
|
1595
|
-
normalizedScore: 19,
|
|
1596
|
-
|
|
1396
|
+
"granite-3.3-8b-non-reasoning": {
|
|
1597
1397
|
// AA specific benchmarks
|
|
1598
|
-
codingIndex:
|
|
1599
|
-
mathIndex:
|
|
1398
|
+
codingIndex: 3.4,
|
|
1399
|
+
mathIndex: 6.7,
|
|
1600
1400
|
|
|
1601
1401
|
// Academic benchmarks
|
|
1602
|
-
mmluPro: 0.
|
|
1603
|
-
gpqa: 0.
|
|
1402
|
+
mmluPro: 0.468,
|
|
1403
|
+
gpqa: 0.338,
|
|
1604
1404
|
hle: 0.042,
|
|
1605
1405
|
|
|
1606
1406
|
// Capabilities
|
|
@@ -1609,21 +1409,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1609
1409
|
supportsVision: false,
|
|
1610
1410
|
|
|
1611
1411
|
// Metadata
|
|
1612
|
-
lastUpdated: "2026-
|
|
1412
|
+
lastUpdated: "2026-06-01",
|
|
1413
|
+
originalModel: "Granite 3.3 8B (Non-reasoning)",
|
|
1613
1414
|
},
|
|
1614
|
-
"
|
|
1615
|
-
// AA Intelligence Index (composite score)
|
|
1616
|
-
intelligenceIndex: 27.6,
|
|
1617
|
-
normalizedScore: 39,
|
|
1618
|
-
|
|
1415
|
+
"reka-flash-sep-24": {
|
|
1619
1416
|
// AA specific benchmarks
|
|
1620
|
-
codingIndex:
|
|
1621
|
-
mathIndex:
|
|
1417
|
+
codingIndex: undefined,
|
|
1418
|
+
mathIndex: undefined,
|
|
1622
1419
|
|
|
1623
1420
|
// Academic benchmarks
|
|
1624
|
-
mmluPro:
|
|
1625
|
-
gpqa:
|
|
1626
|
-
hle:
|
|
1421
|
+
mmluPro: undefined,
|
|
1422
|
+
gpqa: undefined,
|
|
1423
|
+
hle: undefined,
|
|
1627
1424
|
|
|
1628
1425
|
// Capabilities
|
|
1629
1426
|
contextWindow: 8192,
|
|
@@ -1631,21 +1428,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1631
1428
|
supportsVision: false,
|
|
1632
1429
|
|
|
1633
1430
|
// Metadata
|
|
1634
|
-
lastUpdated: "2026-
|
|
1431
|
+
lastUpdated: "2026-06-01",
|
|
1432
|
+
originalModel: "Reka Flash (Sep '24)",
|
|
1635
1433
|
},
|
|
1636
|
-
"
|
|
1637
|
-
// AA Intelligence Index (composite score)
|
|
1638
|
-
intelligenceIndex: 14.2,
|
|
1639
|
-
normalizedScore: 20,
|
|
1640
|
-
|
|
1434
|
+
"hermes-3---llama-3.1-70b": {
|
|
1641
1435
|
// AA specific benchmarks
|
|
1642
1436
|
codingIndex: undefined,
|
|
1643
|
-
mathIndex:
|
|
1437
|
+
mathIndex: undefined,
|
|
1644
1438
|
|
|
1645
1439
|
// Academic benchmarks
|
|
1646
|
-
mmluPro: 0.
|
|
1647
|
-
gpqa: 0.
|
|
1648
|
-
hle: 0.
|
|
1440
|
+
mmluPro: 0.571,
|
|
1441
|
+
gpqa: 0.401,
|
|
1442
|
+
hle: 0.041,
|
|
1649
1443
|
|
|
1650
1444
|
// Capabilities
|
|
1651
1445
|
contextWindow: 8192,
|
|
@@ -1653,21 +1447,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1653
1447
|
supportsVision: false,
|
|
1654
1448
|
|
|
1655
1449
|
// Metadata
|
|
1656
|
-
lastUpdated: "2026-
|
|
1450
|
+
lastUpdated: "2026-06-01",
|
|
1451
|
+
originalModel: "Hermes 3 - Llama-3.1 70B",
|
|
1657
1452
|
},
|
|
1658
|
-
"
|
|
1659
|
-
// AA Intelligence Index (composite score)
|
|
1660
|
-
intelligenceIndex: 5.7,
|
|
1661
|
-
normalizedScore: 8,
|
|
1662
|
-
|
|
1453
|
+
"mimo-v2-flash-reasoning": {
|
|
1663
1454
|
// AA specific benchmarks
|
|
1664
|
-
codingIndex:
|
|
1665
|
-
mathIndex:
|
|
1455
|
+
codingIndex: 31.8,
|
|
1456
|
+
mathIndex: 96.3,
|
|
1666
1457
|
|
|
1667
1458
|
// Academic benchmarks
|
|
1668
|
-
mmluPro: 0.
|
|
1669
|
-
gpqa: 0.
|
|
1670
|
-
hle: 0.
|
|
1459
|
+
mmluPro: 0.843,
|
|
1460
|
+
gpqa: 0.846,
|
|
1461
|
+
hle: 0.211,
|
|
1671
1462
|
|
|
1672
1463
|
// Capabilities
|
|
1673
1464
|
contextWindow: 8192,
|
|
@@ -1675,21 +1466,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1675
1466
|
supportsVision: false,
|
|
1676
1467
|
|
|
1677
1468
|
// Metadata
|
|
1678
|
-
lastUpdated: "2026-
|
|
1469
|
+
lastUpdated: "2026-06-01",
|
|
1470
|
+
originalModel: "MiMo-V2-Flash (Reasoning)",
|
|
1679
1471
|
},
|
|
1680
|
-
"
|
|
1681
|
-
// AA Intelligence Index (composite score)
|
|
1682
|
-
intelligenceIndex: 12.5,
|
|
1683
|
-
normalizedScore: 18,
|
|
1684
|
-
|
|
1472
|
+
"mimo-v2-pro": {
|
|
1685
1473
|
// AA specific benchmarks
|
|
1686
|
-
codingIndex:
|
|
1687
|
-
mathIndex:
|
|
1474
|
+
codingIndex: 41.4,
|
|
1475
|
+
mathIndex: undefined,
|
|
1688
1476
|
|
|
1689
1477
|
// Academic benchmarks
|
|
1690
|
-
mmluPro:
|
|
1691
|
-
gpqa: 0.
|
|
1692
|
-
hle: 0.
|
|
1478
|
+
mmluPro: undefined,
|
|
1479
|
+
gpqa: 0.87,
|
|
1480
|
+
hle: 0.283,
|
|
1693
1481
|
|
|
1694
1482
|
// Capabilities
|
|
1695
1483
|
contextWindow: 8192,
|
|
@@ -1697,21 +1485,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1697
1485
|
supportsVision: false,
|
|
1698
1486
|
|
|
1699
1487
|
// Metadata
|
|
1700
|
-
lastUpdated: "2026-
|
|
1488
|
+
lastUpdated: "2026-06-01",
|
|
1489
|
+
originalModel: "MiMo-V2-Pro",
|
|
1701
1490
|
},
|
|
1702
|
-
"
|
|
1703
|
-
// AA Intelligence Index (composite score)
|
|
1704
|
-
intelligenceIndex: 16.2,
|
|
1705
|
-
normalizedScore: 23,
|
|
1706
|
-
|
|
1491
|
+
"sarvam-m-reasoning": {
|
|
1707
1492
|
// AA specific benchmarks
|
|
1708
|
-
codingIndex:
|
|
1709
|
-
mathIndex:
|
|
1493
|
+
codingIndex: 7.5,
|
|
1494
|
+
mathIndex: undefined,
|
|
1710
1495
|
|
|
1711
1496
|
// Academic benchmarks
|
|
1712
|
-
mmluPro: 0.
|
|
1713
|
-
gpqa: 0.
|
|
1714
|
-
hle: 0.
|
|
1497
|
+
mmluPro: 0.696,
|
|
1498
|
+
gpqa: 0.416,
|
|
1499
|
+
hle: 0.033,
|
|
1715
1500
|
|
|
1716
1501
|
// Capabilities
|
|
1717
1502
|
contextWindow: 8192,
|
|
@@ -1719,21 +1504,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1719
1504
|
supportsVision: false,
|
|
1720
1505
|
|
|
1721
1506
|
// Metadata
|
|
1722
|
-
lastUpdated: "2026-
|
|
1507
|
+
lastUpdated: "2026-06-01",
|
|
1508
|
+
originalModel: "Sarvam M (Reasoning)",
|
|
1723
1509
|
},
|
|
1724
|
-
"
|
|
1725
|
-
// AA Intelligence Index (composite score)
|
|
1726
|
-
intelligenceIndex: 8,
|
|
1727
|
-
normalizedScore: 11,
|
|
1728
|
-
|
|
1510
|
+
"glm-4.6-non-reasoning": {
|
|
1729
1511
|
// AA specific benchmarks
|
|
1730
|
-
codingIndex:
|
|
1731
|
-
mathIndex:
|
|
1512
|
+
codingIndex: 30.2,
|
|
1513
|
+
mathIndex: 44.3,
|
|
1732
1514
|
|
|
1733
1515
|
// Academic benchmarks
|
|
1734
|
-
mmluPro: 0.
|
|
1735
|
-
gpqa: 0.
|
|
1736
|
-
hle: 0.
|
|
1516
|
+
mmluPro: 0.784,
|
|
1517
|
+
gpqa: 0.632,
|
|
1518
|
+
hle: 0.052,
|
|
1737
1519
|
|
|
1738
1520
|
// Capabilities
|
|
1739
1521
|
contextWindow: 8192,
|
|
@@ -1741,21 +1523,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1741
1523
|
supportsVision: false,
|
|
1742
1524
|
|
|
1743
1525
|
// Metadata
|
|
1744
|
-
lastUpdated: "2026-
|
|
1526
|
+
lastUpdated: "2026-06-01",
|
|
1527
|
+
originalModel: "GLM-4.6 (Non-reasoning)",
|
|
1745
1528
|
},
|
|
1746
|
-
"
|
|
1747
|
-
// AA Intelligence Index (composite score)
|
|
1748
|
-
intelligenceIndex: 31.4,
|
|
1749
|
-
normalizedScore: 45,
|
|
1750
|
-
|
|
1529
|
+
"glm-4.7-reasoning": {
|
|
1751
1530
|
// AA specific benchmarks
|
|
1752
|
-
codingIndex:
|
|
1753
|
-
mathIndex:
|
|
1531
|
+
codingIndex: 36.3,
|
|
1532
|
+
mathIndex: 95,
|
|
1754
1533
|
|
|
1755
1534
|
// Academic benchmarks
|
|
1756
|
-
mmluPro: 0.
|
|
1757
|
-
gpqa: 0.
|
|
1758
|
-
hle: 0.
|
|
1535
|
+
mmluPro: 0.856,
|
|
1536
|
+
gpqa: 0.859,
|
|
1537
|
+
hle: 0.251,
|
|
1759
1538
|
|
|
1760
1539
|
// Capabilities
|
|
1761
1540
|
contextWindow: 8192,
|
|
@@ -1763,21 +1542,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1763
1542
|
supportsVision: false,
|
|
1764
1543
|
|
|
1765
1544
|
// Metadata
|
|
1766
|
-
lastUpdated: "2026-
|
|
1545
|
+
lastUpdated: "2026-06-01",
|
|
1546
|
+
originalModel: "GLM-4.7 (Reasoning)",
|
|
1767
1547
|
},
|
|
1768
|
-
"
|
|
1769
|
-
// AA Intelligence Index (composite score)
|
|
1770
|
-
intelligenceIndex: 20,
|
|
1771
|
-
normalizedScore: 29,
|
|
1772
|
-
|
|
1548
|
+
"glm-5-reasoning": {
|
|
1773
1549
|
// AA specific benchmarks
|
|
1774
|
-
codingIndex:
|
|
1775
|
-
mathIndex:
|
|
1550
|
+
codingIndex: 44.2,
|
|
1551
|
+
mathIndex: undefined,
|
|
1776
1552
|
|
|
1777
1553
|
// Academic benchmarks
|
|
1778
|
-
mmluPro:
|
|
1779
|
-
gpqa: 0.
|
|
1780
|
-
hle: 0.
|
|
1554
|
+
mmluPro: undefined,
|
|
1555
|
+
gpqa: 0.82,
|
|
1556
|
+
hle: 0.272,
|
|
1781
1557
|
|
|
1782
1558
|
// Capabilities
|
|
1783
1559
|
contextWindow: 8192,
|
|
@@ -1785,21 +1561,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1785
1561
|
supportsVision: false,
|
|
1786
1562
|
|
|
1787
1563
|
// Metadata
|
|
1788
|
-
lastUpdated: "2026-
|
|
1564
|
+
lastUpdated: "2026-06-01",
|
|
1565
|
+
originalModel: "GLM-5 (Reasoning)",
|
|
1789
1566
|
},
|
|
1790
|
-
"
|
|
1791
|
-
// AA Intelligence Index (composite score)
|
|
1792
|
-
intelligenceIndex: 14.3,
|
|
1793
|
-
normalizedScore: 20,
|
|
1794
|
-
|
|
1567
|
+
"glm-4.6-reasoning": {
|
|
1795
1568
|
// AA specific benchmarks
|
|
1796
|
-
codingIndex:
|
|
1797
|
-
mathIndex:
|
|
1569
|
+
codingIndex: 29.5,
|
|
1570
|
+
mathIndex: 86,
|
|
1798
1571
|
|
|
1799
1572
|
// Academic benchmarks
|
|
1800
|
-
mmluPro: 0.
|
|
1801
|
-
gpqa: 0.
|
|
1802
|
-
hle: 0.
|
|
1573
|
+
mmluPro: 0.829,
|
|
1574
|
+
gpqa: 0.78,
|
|
1575
|
+
hle: 0.133,
|
|
1803
1576
|
|
|
1804
1577
|
// Capabilities
|
|
1805
1578
|
contextWindow: 8192,
|
|
@@ -1807,21 +1580,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1807
1580
|
supportsVision: false,
|
|
1808
1581
|
|
|
1809
1582
|
// Metadata
|
|
1810
|
-
lastUpdated: "2026-
|
|
1583
|
+
lastUpdated: "2026-06-01",
|
|
1584
|
+
originalModel: "GLM-4.6 (Reasoning)",
|
|
1811
1585
|
},
|
|
1812
|
-
"
|
|
1813
|
-
// AA Intelligence Index (composite score)
|
|
1814
|
-
intelligenceIndex: 9.5,
|
|
1815
|
-
normalizedScore: 14,
|
|
1816
|
-
|
|
1586
|
+
"glm-4.7-non-reasoning": {
|
|
1817
1587
|
// AA specific benchmarks
|
|
1818
|
-
codingIndex:
|
|
1819
|
-
mathIndex:
|
|
1588
|
+
codingIndex: 32,
|
|
1589
|
+
mathIndex: 48,
|
|
1820
1590
|
|
|
1821
1591
|
// Academic benchmarks
|
|
1822
|
-
mmluPro:
|
|
1823
|
-
gpqa: 0.
|
|
1824
|
-
hle:
|
|
1592
|
+
mmluPro: 0.794,
|
|
1593
|
+
gpqa: 0.664,
|
|
1594
|
+
hle: 0.061,
|
|
1825
1595
|
|
|
1826
1596
|
// Capabilities
|
|
1827
1597
|
contextWindow: 8192,
|
|
@@ -1829,21 +1599,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1829
1599
|
supportsVision: false,
|
|
1830
1600
|
|
|
1831
1601
|
// Metadata
|
|
1832
|
-
lastUpdated: "2026-
|
|
1602
|
+
lastUpdated: "2026-06-01",
|
|
1603
|
+
originalModel: "GLM-4.7 (Non-reasoning)",
|
|
1833
1604
|
},
|
|
1834
|
-
"
|
|
1835
|
-
// AA Intelligence Index (composite score)
|
|
1836
|
-
intelligenceIndex: 26.1,
|
|
1837
|
-
normalizedScore: 37,
|
|
1838
|
-
|
|
1605
|
+
"glm-4.5-reasoning": {
|
|
1839
1606
|
// AA specific benchmarks
|
|
1840
|
-
codingIndex:
|
|
1841
|
-
mathIndex:
|
|
1607
|
+
codingIndex: 26.3,
|
|
1608
|
+
mathIndex: 73.7,
|
|
1842
1609
|
|
|
1843
1610
|
// Academic benchmarks
|
|
1844
|
-
mmluPro: 0.
|
|
1845
|
-
gpqa: 0.
|
|
1846
|
-
hle: 0.
|
|
1611
|
+
mmluPro: 0.835,
|
|
1612
|
+
gpqa: 0.782,
|
|
1613
|
+
hle: 0.122,
|
|
1847
1614
|
|
|
1848
1615
|
// Capabilities
|
|
1849
1616
|
contextWindow: 8192,
|
|
@@ -1851,21 +1618,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1851
1618
|
supportsVision: false,
|
|
1852
1619
|
|
|
1853
1620
|
// Metadata
|
|
1854
|
-
lastUpdated: "2026-
|
|
1621
|
+
lastUpdated: "2026-06-01",
|
|
1622
|
+
originalModel: "GLM-4.5 (Reasoning)",
|
|
1855
1623
|
},
|
|
1856
|
-
"
|
|
1857
|
-
// AA Intelligence Index (composite score)
|
|
1858
|
-
intelligenceIndex: 13.7,
|
|
1859
|
-
normalizedScore: 20,
|
|
1860
|
-
|
|
1624
|
+
"glm-4.7-flash-reasoning": {
|
|
1861
1625
|
// AA specific benchmarks
|
|
1862
|
-
codingIndex:
|
|
1863
|
-
mathIndex:
|
|
1626
|
+
codingIndex: 25.9,
|
|
1627
|
+
mathIndex: undefined,
|
|
1864
1628
|
|
|
1865
1629
|
// Academic benchmarks
|
|
1866
|
-
mmluPro:
|
|
1867
|
-
gpqa: 0.
|
|
1868
|
-
hle: 0.
|
|
1630
|
+
mmluPro: undefined,
|
|
1631
|
+
gpqa: 0.581,
|
|
1632
|
+
hle: 0.071,
|
|
1869
1633
|
|
|
1870
1634
|
// Capabilities
|
|
1871
1635
|
contextWindow: 8192,
|
|
@@ -1873,21 +1637,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1873
1637
|
supportsVision: false,
|
|
1874
1638
|
|
|
1875
1639
|
// Metadata
|
|
1876
|
-
lastUpdated: "2026-
|
|
1640
|
+
lastUpdated: "2026-06-01",
|
|
1641
|
+
originalModel: "GLM-4.7-Flash (Reasoning)",
|
|
1877
1642
|
},
|
|
1878
|
-
"
|
|
1879
|
-
// AA Intelligence Index (composite score)
|
|
1880
|
-
intelligenceIndex: 32.5,
|
|
1881
|
-
normalizedScore: 46,
|
|
1882
|
-
|
|
1643
|
+
"glm-5-non-reasoning": {
|
|
1883
1644
|
// AA specific benchmarks
|
|
1884
|
-
codingIndex:
|
|
1885
|
-
mathIndex:
|
|
1645
|
+
codingIndex: 39,
|
|
1646
|
+
mathIndex: undefined,
|
|
1886
1647
|
|
|
1887
1648
|
// Academic benchmarks
|
|
1888
|
-
mmluPro:
|
|
1889
|
-
gpqa: 0.
|
|
1890
|
-
hle: 0.
|
|
1649
|
+
mmluPro: undefined,
|
|
1650
|
+
gpqa: 0.666,
|
|
1651
|
+
hle: 0.072,
|
|
1891
1652
|
|
|
1892
1653
|
// Capabilities
|
|
1893
1654
|
contextWindow: 8192,
|
|
@@ -1895,21 +1656,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1895
1656
|
supportsVision: false,
|
|
1896
1657
|
|
|
1897
1658
|
// Metadata
|
|
1898
|
-
lastUpdated: "2026-
|
|
1659
|
+
lastUpdated: "2026-06-01",
|
|
1660
|
+
originalModel: "GLM-5 (Non-reasoning)",
|
|
1899
1661
|
},
|
|
1900
|
-
"
|
|
1901
|
-
// AA Intelligence Index (composite score)
|
|
1902
|
-
intelligenceIndex: 24.7,
|
|
1903
|
-
normalizedScore: 35,
|
|
1904
|
-
|
|
1662
|
+
"glm-4.7-flash-non-reasoning": {
|
|
1905
1663
|
// AA specific benchmarks
|
|
1906
|
-
codingIndex:
|
|
1907
|
-
mathIndex:
|
|
1664
|
+
codingIndex: 11,
|
|
1665
|
+
mathIndex: undefined,
|
|
1908
1666
|
|
|
1909
1667
|
// Academic benchmarks
|
|
1910
|
-
mmluPro:
|
|
1911
|
-
gpqa: 0.
|
|
1912
|
-
hle: 0.
|
|
1668
|
+
mmluPro: undefined,
|
|
1669
|
+
gpqa: 0.452,
|
|
1670
|
+
hle: 0.049,
|
|
1913
1671
|
|
|
1914
1672
|
// Capabilities
|
|
1915
1673
|
contextWindow: 8192,
|
|
@@ -1917,21 +1675,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1917
1675
|
supportsVision: false,
|
|
1918
1676
|
|
|
1919
1677
|
// Metadata
|
|
1920
|
-
lastUpdated: "2026-
|
|
1678
|
+
lastUpdated: "2026-06-01",
|
|
1679
|
+
originalModel: "GLM-4.7-Flash (Non-reasoning)",
|
|
1921
1680
|
},
|
|
1922
|
-
"
|
|
1923
|
-
// AA Intelligence Index (composite score)
|
|
1924
|
-
intelligenceIndex: 8.8,
|
|
1925
|
-
normalizedScore: 13,
|
|
1926
|
-
|
|
1681
|
+
"glm-4.6v-non-reasoning": {
|
|
1927
1682
|
// AA specific benchmarks
|
|
1928
|
-
codingIndex:
|
|
1929
|
-
mathIndex:
|
|
1683
|
+
codingIndex: 11.1,
|
|
1684
|
+
mathIndex: 26.3,
|
|
1930
1685
|
|
|
1931
1686
|
// Academic benchmarks
|
|
1932
|
-
mmluPro:
|
|
1933
|
-
gpqa:
|
|
1934
|
-
hle:
|
|
1687
|
+
mmluPro: 0.752,
|
|
1688
|
+
gpqa: 0.566,
|
|
1689
|
+
hle: 0.037,
|
|
1935
1690
|
|
|
1936
1691
|
// Capabilities
|
|
1937
1692
|
contextWindow: 8192,
|
|
@@ -1939,21 +1694,18 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1939
1694
|
supportsVision: false,
|
|
1940
1695
|
|
|
1941
1696
|
// Metadata
|
|
1942
|
-
lastUpdated: "2026-
|
|
1697
|
+
lastUpdated: "2026-06-01",
|
|
1698
|
+
originalModel: "GLM-4.6V (Non-reasoning)",
|
|
1943
1699
|
},
|
|
1944
|
-
"
|
|
1945
|
-
// AA Intelligence Index (composite score)
|
|
1946
|
-
intelligenceIndex: 25.2,
|
|
1947
|
-
normalizedScore: 36,
|
|
1948
|
-
|
|
1700
|
+
"glm-4.5v-reasoning": {
|
|
1949
1701
|
// AA specific benchmarks
|
|
1950
|
-
codingIndex:
|
|
1951
|
-
mathIndex:
|
|
1702
|
+
codingIndex: 10.9,
|
|
1703
|
+
mathIndex: 73,
|
|
1952
1704
|
|
|
1953
1705
|
// Academic benchmarks
|
|
1954
|
-
mmluPro: 0.
|
|
1955
|
-
gpqa: 0.
|
|
1956
|
-
hle: 0.
|
|
1706
|
+
mmluPro: 0.788,
|
|
1707
|
+
gpqa: 0.684,
|
|
1708
|
+
hle: 0.059,
|
|
1957
1709
|
|
|
1958
1710
|
// Capabilities
|
|
1959
1711
|
contextWindow: 8192,
|
|
@@ -1961,9 +1713,7 @@ export const BENCHMARKS_CHUNK_4: Record<string, HardcodedBenchmark> = {
|
|
|
1961
1713
|
supportsVision: false,
|
|
1962
1714
|
|
|
1963
1715
|
// Metadata
|
|
1964
|
-
lastUpdated: "2026-
|
|
1716
|
+
lastUpdated: "2026-06-01",
|
|
1717
|
+
originalModel: "GLM-4.5V (Reasoning)",
|
|
1965
1718
|
},
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
1719
|
};
|