pi-free 2.0.13 → 2.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +4 -1
- package/config.ts +15 -0
- package/constants.ts +3 -0
- package/index.ts +135 -0
- package/lib/built-in-toggle.ts +4 -4
- package/lib/probe-cache.ts +86 -0
- package/lib/registry.ts +25 -3
- package/lib/telemetry.ts +328 -0
- package/lib/util.ts +10 -1
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +94 -8
- package/provider-failover/benchmarks-chunk-0.ts +599 -890
- package/provider-failover/benchmarks-chunk-1.ts +655 -924
- package/provider-failover/benchmarks-chunk-2.ts +675 -966
- package/provider-failover/benchmarks-chunk-3.ts +676 -967
- package/provider-failover/benchmarks-chunk-4.ts +704 -954
- package/provider-failover/benchmarks-chunk-5.ts +1301 -0
- package/provider-failover/hardcoded-benchmarks.ts +9 -3
- package/providers/cline/cline-models.ts +196 -68
- package/providers/dynamic-built-in/index.ts +1 -1
- package/providers/kilo/kilo.ts +2 -2
- package/providers/model-fetcher.ts +3 -1
- package/providers/nvidia/nvidia.ts +47 -15
- package/providers/ollama/ollama.ts +103 -46
- package/providers/opencode-session.ts +398 -371
- package/providers/qwen/qwen.ts +2 -2
- package/providers/routeway/routeway.ts +213 -0
|
@@ -1,23 +1,20 @@
|
|
|
1
1
|
// Auto-generated benchmark data chunk 1
|
|
2
|
-
// Models:
|
|
2
|
+
// Models: lfm2-2.6b .. minicpm-v-4.6-1.3b (90 entries)
|
|
3
|
+
// Last updated: 2026-06-01
|
|
3
4
|
// DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
|
|
4
5
|
|
|
5
6
|
import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
|
|
6
7
|
|
|
7
8
|
export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
8
|
-
"
|
|
9
|
-
// AA Intelligence Index (composite score)
|
|
10
|
-
intelligenceIndex: 14.9,
|
|
11
|
-
normalizedScore: 21,
|
|
12
|
-
|
|
9
|
+
"lfm2-2.6b": {
|
|
13
10
|
// AA specific benchmarks
|
|
14
|
-
codingIndex:
|
|
15
|
-
mathIndex:
|
|
11
|
+
codingIndex: 1.4,
|
|
12
|
+
mathIndex: 8.3,
|
|
16
13
|
|
|
17
14
|
// Academic benchmarks
|
|
18
|
-
mmluPro: 0.
|
|
19
|
-
gpqa: 0.
|
|
20
|
-
hle: 0.
|
|
15
|
+
mmluPro: 0.298,
|
|
16
|
+
gpqa: 0.306,
|
|
17
|
+
hle: 0.052,
|
|
21
18
|
|
|
22
19
|
// Capabilities
|
|
23
20
|
contextWindow: 8192,
|
|
@@ -25,21 +22,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
25
22
|
supportsVision: false,
|
|
26
23
|
|
|
27
24
|
// Metadata
|
|
28
|
-
lastUpdated: "2026-
|
|
25
|
+
lastUpdated: "2026-06-01",
|
|
26
|
+
originalModel: "LFM2 2.6B",
|
|
29
27
|
},
|
|
30
|
-
"
|
|
31
|
-
// AA Intelligence Index (composite score)
|
|
32
|
-
intelligenceIndex: 13.6,
|
|
33
|
-
normalizedScore: 19,
|
|
34
|
-
|
|
28
|
+
"lfm2.5-1.2b-thinking": {
|
|
35
29
|
// AA specific benchmarks
|
|
36
|
-
codingIndex:
|
|
37
|
-
mathIndex:
|
|
30
|
+
codingIndex: 1.4,
|
|
31
|
+
mathIndex: undefined,
|
|
38
32
|
|
|
39
33
|
// Academic benchmarks
|
|
40
|
-
mmluPro:
|
|
41
|
-
gpqa: 0.
|
|
42
|
-
hle: 0.
|
|
34
|
+
mmluPro: undefined,
|
|
35
|
+
gpqa: 0.339,
|
|
36
|
+
hle: 0.061,
|
|
43
37
|
|
|
44
38
|
// Capabilities
|
|
45
39
|
contextWindow: 8192,
|
|
@@ -47,21 +41,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
47
41
|
supportsVision: false,
|
|
48
42
|
|
|
49
43
|
// Metadata
|
|
50
|
-
lastUpdated: "2026-
|
|
44
|
+
lastUpdated: "2026-06-01",
|
|
45
|
+
originalModel: "LFM2.5-1.2B-Thinking",
|
|
51
46
|
},
|
|
52
|
-
"
|
|
53
|
-
// AA Intelligence Index (composite score)
|
|
54
|
-
intelligenceIndex: 49.6,
|
|
55
|
-
normalizedScore: 71,
|
|
56
|
-
|
|
47
|
+
"lfm2.5-1.2b-instruct": {
|
|
57
48
|
// AA specific benchmarks
|
|
58
|
-
codingIndex:
|
|
49
|
+
codingIndex: 0.8,
|
|
59
50
|
mathIndex: undefined,
|
|
60
51
|
|
|
61
52
|
// Academic benchmarks
|
|
62
53
|
mmluPro: undefined,
|
|
63
|
-
gpqa: 0.
|
|
64
|
-
hle: 0.
|
|
54
|
+
gpqa: 0.326,
|
|
55
|
+
hle: 0.068,
|
|
65
56
|
|
|
66
57
|
// Capabilities
|
|
67
58
|
contextWindow: 8192,
|
|
@@ -69,21 +60,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
69
60
|
supportsVision: false,
|
|
70
61
|
|
|
71
62
|
// Metadata
|
|
72
|
-
lastUpdated: "2026-
|
|
63
|
+
lastUpdated: "2026-06-01",
|
|
64
|
+
originalModel: "LFM2.5-1.2B-Instruct",
|
|
73
65
|
},
|
|
74
|
-
"
|
|
75
|
-
// AA Intelligence Index (composite score)
|
|
76
|
-
intelligenceIndex: 13.4,
|
|
77
|
-
normalizedScore: 19,
|
|
78
|
-
|
|
66
|
+
"solar-pro-3": {
|
|
79
67
|
// AA specific benchmarks
|
|
80
|
-
codingIndex:
|
|
81
|
-
mathIndex:
|
|
68
|
+
codingIndex: 13.3,
|
|
69
|
+
mathIndex: undefined,
|
|
82
70
|
|
|
83
71
|
// Academic benchmarks
|
|
84
|
-
mmluPro:
|
|
85
|
-
gpqa: 0.
|
|
86
|
-
hle: 0.
|
|
72
|
+
mmluPro: undefined,
|
|
73
|
+
gpqa: 0.724,
|
|
74
|
+
hle: 0.101,
|
|
87
75
|
|
|
88
76
|
// Capabilities
|
|
89
77
|
contextWindow: 8192,
|
|
@@ -91,21 +79,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
91
79
|
supportsVision: false,
|
|
92
80
|
|
|
93
81
|
// Metadata
|
|
94
|
-
lastUpdated: "2026-
|
|
82
|
+
lastUpdated: "2026-06-01",
|
|
83
|
+
originalModel: "Solar Pro 3",
|
|
95
84
|
},
|
|
96
|
-
"
|
|
97
|
-
// AA Intelligence Index (composite score)
|
|
98
|
-
intelligenceIndex: 14.8,
|
|
99
|
-
normalizedScore: 21,
|
|
100
|
-
|
|
85
|
+
"solar-open-100b-reasoning": {
|
|
101
86
|
// AA specific benchmarks
|
|
102
|
-
codingIndex:
|
|
103
|
-
mathIndex:
|
|
87
|
+
codingIndex: 10.5,
|
|
88
|
+
mathIndex: undefined,
|
|
104
89
|
|
|
105
90
|
// Academic benchmarks
|
|
106
|
-
mmluPro:
|
|
107
|
-
gpqa: 0.
|
|
108
|
-
hle: 0.
|
|
91
|
+
mmluPro: undefined,
|
|
92
|
+
gpqa: 0.657,
|
|
93
|
+
hle: 0.092,
|
|
109
94
|
|
|
110
95
|
// Capabilities
|
|
111
96
|
contextWindow: 8192,
|
|
@@ -113,21 +98,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
113
98
|
supportsVision: false,
|
|
114
99
|
|
|
115
100
|
// Metadata
|
|
116
|
-
lastUpdated: "2026-
|
|
101
|
+
lastUpdated: "2026-06-01",
|
|
102
|
+
originalModel: "Solar Open 100B (Reasoning)",
|
|
117
103
|
},
|
|
118
|
-
"
|
|
119
|
-
// AA Intelligence Index (composite score)
|
|
120
|
-
intelligenceIndex: 10.1,
|
|
121
|
-
normalizedScore: 14,
|
|
122
|
-
|
|
104
|
+
"solar-pro-2-non-reasoning": {
|
|
123
105
|
// AA specific benchmarks
|
|
124
|
-
codingIndex:
|
|
125
|
-
mathIndex:
|
|
106
|
+
codingIndex: 11.3,
|
|
107
|
+
mathIndex: 30,
|
|
126
108
|
|
|
127
109
|
// Academic benchmarks
|
|
128
|
-
mmluPro: 0.
|
|
129
|
-
gpqa: 0.
|
|
130
|
-
hle: 0.
|
|
110
|
+
mmluPro: 0.75,
|
|
111
|
+
gpqa: 0.561,
|
|
112
|
+
hle: 0.038,
|
|
131
113
|
|
|
132
114
|
// Capabilities
|
|
133
115
|
contextWindow: 8192,
|
|
@@ -135,21 +117,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
135
117
|
supportsVision: false,
|
|
136
118
|
|
|
137
119
|
// Metadata
|
|
138
|
-
lastUpdated: "2026-
|
|
120
|
+
lastUpdated: "2026-06-01",
|
|
121
|
+
originalModel: "Solar Pro 2 (Non-reasoning)",
|
|
139
122
|
},
|
|
140
|
-
"
|
|
141
|
-
// AA Intelligence Index (composite score)
|
|
142
|
-
intelligenceIndex: 18.7,
|
|
143
|
-
normalizedScore: 27,
|
|
144
|
-
|
|
123
|
+
"solar-pro-2-reasoning": {
|
|
145
124
|
// AA specific benchmarks
|
|
146
|
-
codingIndex:
|
|
147
|
-
mathIndex:
|
|
125
|
+
codingIndex: 12.1,
|
|
126
|
+
mathIndex: 61.3,
|
|
148
127
|
|
|
149
128
|
// Academic benchmarks
|
|
150
|
-
mmluPro: 0.
|
|
151
|
-
gpqa: 0.
|
|
152
|
-
hle: 0.
|
|
129
|
+
mmluPro: 0.805,
|
|
130
|
+
gpqa: 0.687,
|
|
131
|
+
hle: 0.07,
|
|
153
132
|
|
|
154
133
|
// Capabilities
|
|
155
134
|
contextWindow: 8192,
|
|
@@ -157,21 +136,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
157
136
|
supportsVision: false,
|
|
158
137
|
|
|
159
138
|
// Metadata
|
|
160
|
-
lastUpdated: "2026-
|
|
139
|
+
lastUpdated: "2026-06-01",
|
|
140
|
+
originalModel: "Solar Pro 2 (Reasoning)",
|
|
161
141
|
},
|
|
162
|
-
"
|
|
163
|
-
// AA Intelligence Index (composite score)
|
|
164
|
-
intelligenceIndex: 27.7,
|
|
165
|
-
normalizedScore: 40,
|
|
166
|
-
|
|
142
|
+
"minimax-m2.7": {
|
|
167
143
|
// AA specific benchmarks
|
|
168
|
-
codingIndex:
|
|
144
|
+
codingIndex: 41.9,
|
|
169
145
|
mathIndex: undefined,
|
|
170
146
|
|
|
171
147
|
// Academic benchmarks
|
|
172
148
|
mmluPro: undefined,
|
|
173
|
-
gpqa: 0.
|
|
174
|
-
hle: 0.
|
|
149
|
+
gpqa: 0.874,
|
|
150
|
+
hle: 0.281,
|
|
175
151
|
|
|
176
152
|
// Capabilities
|
|
177
153
|
contextWindow: 8192,
|
|
@@ -179,13 +155,48 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
179
155
|
supportsVision: false,
|
|
180
156
|
|
|
181
157
|
// Metadata
|
|
182
|
-
lastUpdated: "2026-
|
|
158
|
+
lastUpdated: "2026-06-01",
|
|
159
|
+
originalModel: "MiniMax-M2.7",
|
|
183
160
|
},
|
|
184
|
-
"
|
|
185
|
-
// AA
|
|
186
|
-
|
|
187
|
-
|
|
161
|
+
"llama-3.1-nemotron-instruct-70b": {
|
|
162
|
+
// AA specific benchmarks
|
|
163
|
+
codingIndex: 10.8,
|
|
164
|
+
mathIndex: 11,
|
|
165
|
+
|
|
166
|
+
// Academic benchmarks
|
|
167
|
+
mmluPro: 0.69,
|
|
168
|
+
gpqa: 0.465,
|
|
169
|
+
hle: 0.046,
|
|
170
|
+
|
|
171
|
+
// Capabilities
|
|
172
|
+
contextWindow: 8192,
|
|
173
|
+
supportsReasoning: false,
|
|
174
|
+
supportsVision: false,
|
|
175
|
+
|
|
176
|
+
// Metadata
|
|
177
|
+
lastUpdated: "2026-06-01",
|
|
178
|
+
originalModel: "Llama 3.1 Nemotron Instruct 70B",
|
|
179
|
+
},
|
|
180
|
+
"nvidia-nemotron-nano-12b-v2-vl-non-reasoning": {
|
|
181
|
+
// AA specific benchmarks
|
|
182
|
+
codingIndex: 5.9,
|
|
183
|
+
mathIndex: 26.7,
|
|
184
|
+
|
|
185
|
+
// Academic benchmarks
|
|
186
|
+
mmluPro: 0.649,
|
|
187
|
+
gpqa: 0.439,
|
|
188
|
+
hle: 0.045,
|
|
189
|
+
|
|
190
|
+
// Capabilities
|
|
191
|
+
contextWindow: 8192,
|
|
192
|
+
supportsReasoning: false,
|
|
193
|
+
supportsVision: false,
|
|
188
194
|
|
|
195
|
+
// Metadata
|
|
196
|
+
lastUpdated: "2026-06-01",
|
|
197
|
+
originalModel: "NVIDIA Nemotron Nano 12B v2 VL (Non-reasoning)",
|
|
198
|
+
},
|
|
199
|
+
"nvidia-nemotron-3-super-120b-a12b-reasoning": {
|
|
189
200
|
// AA specific benchmarks
|
|
190
201
|
codingIndex: 31.2,
|
|
191
202
|
mathIndex: undefined,
|
|
@@ -201,13 +212,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
201
212
|
supportsVision: false,
|
|
202
213
|
|
|
203
214
|
// Metadata
|
|
204
|
-
lastUpdated: "2026-
|
|
215
|
+
lastUpdated: "2026-06-01",
|
|
216
|
+
originalModel: "NVIDIA Nemotron 3 Super 120B A12B (Reasoning)",
|
|
205
217
|
},
|
|
206
|
-
"nvidia-nemotron-nano-
|
|
207
|
-
// AA
|
|
208
|
-
|
|
209
|
-
|
|
218
|
+
"nvidia-nemotron-3-nano-30b-a3b-non-reasoning": {
|
|
219
|
+
// AA specific benchmarks
|
|
220
|
+
codingIndex: 15.8,
|
|
221
|
+
mathIndex: 13.3,
|
|
222
|
+
|
|
223
|
+
// Academic benchmarks
|
|
224
|
+
mmluPro: 0.579,
|
|
225
|
+
gpqa: 0.399,
|
|
226
|
+
hle: 0.046,
|
|
227
|
+
|
|
228
|
+
// Capabilities
|
|
229
|
+
contextWindow: 8192,
|
|
230
|
+
supportsReasoning: false,
|
|
231
|
+
supportsVision: false,
|
|
210
232
|
|
|
233
|
+
// Metadata
|
|
234
|
+
lastUpdated: "2026-06-01",
|
|
235
|
+
originalModel: "NVIDIA Nemotron 3 Nano 30B A3B (Non-reasoning)",
|
|
236
|
+
},
|
|
237
|
+
"nvidia-nemotron-nano-9b-v2-non-reasoning": {
|
|
211
238
|
// AA specific benchmarks
|
|
212
239
|
codingIndex: 7.5,
|
|
213
240
|
mathIndex: 62.3,
|
|
@@ -223,21 +250,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
223
250
|
supportsVision: false,
|
|
224
251
|
|
|
225
252
|
// Metadata
|
|
226
|
-
lastUpdated: "2026-
|
|
253
|
+
lastUpdated: "2026-06-01",
|
|
254
|
+
originalModel: "NVIDIA Nemotron Nano 9B V2 (Non-reasoning)",
|
|
227
255
|
},
|
|
228
|
-
"
|
|
229
|
-
// AA Intelligence Index (composite score)
|
|
230
|
-
intelligenceIndex: 15,
|
|
231
|
-
normalizedScore: 21,
|
|
232
|
-
|
|
256
|
+
"nemotron-3-nano-omni-30b-a3b-reasoning": {
|
|
233
257
|
// AA specific benchmarks
|
|
234
|
-
codingIndex:
|
|
235
|
-
mathIndex:
|
|
258
|
+
codingIndex: 14.8,
|
|
259
|
+
mathIndex: undefined,
|
|
236
260
|
|
|
237
261
|
// Academic benchmarks
|
|
238
|
-
mmluPro:
|
|
239
|
-
gpqa: 0.
|
|
240
|
-
hle: 0.
|
|
262
|
+
mmluPro: undefined,
|
|
263
|
+
gpqa: 0.469,
|
|
264
|
+
hle: 0.053,
|
|
241
265
|
|
|
242
266
|
// Capabilities
|
|
243
267
|
contextWindow: 8192,
|
|
@@ -245,21 +269,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
245
269
|
supportsVision: false,
|
|
246
270
|
|
|
247
271
|
// Metadata
|
|
248
|
-
lastUpdated: "2026-
|
|
272
|
+
lastUpdated: "2026-06-01",
|
|
273
|
+
originalModel: "Nemotron 3 Nano Omni 30B A3B Reasoning",
|
|
249
274
|
},
|
|
250
|
-
"llama-
|
|
251
|
-
// AA Intelligence Index (composite score)
|
|
252
|
-
intelligenceIndex: 14.4,
|
|
253
|
-
normalizedScore: 21,
|
|
254
|
-
|
|
275
|
+
"llama-nemotron-super-49b-v1.5-non-reasoning": {
|
|
255
276
|
// AA specific benchmarks
|
|
256
|
-
codingIndex:
|
|
257
|
-
mathIndex:
|
|
277
|
+
codingIndex: 10.5,
|
|
278
|
+
mathIndex: 8,
|
|
258
279
|
|
|
259
280
|
// Academic benchmarks
|
|
260
|
-
mmluPro: 0.
|
|
261
|
-
gpqa: 0.
|
|
262
|
-
hle: 0.
|
|
281
|
+
mmluPro: 0.692,
|
|
282
|
+
gpqa: 0.481,
|
|
283
|
+
hle: 0.043,
|
|
263
284
|
|
|
264
285
|
// Capabilities
|
|
265
286
|
contextWindow: 8192,
|
|
@@ -267,21 +288,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
267
288
|
supportsVision: false,
|
|
268
289
|
|
|
269
290
|
// Metadata
|
|
270
|
-
lastUpdated: "2026-
|
|
291
|
+
lastUpdated: "2026-06-01",
|
|
292
|
+
originalModel: "Llama Nemotron Super 49B v1.5 (Non-reasoning)",
|
|
271
293
|
},
|
|
272
|
-
"
|
|
273
|
-
// AA Intelligence Index (composite score)
|
|
274
|
-
intelligenceIndex: 14.9,
|
|
275
|
-
normalizedScore: 21,
|
|
276
|
-
|
|
294
|
+
"nemotron-cascade-2-30b-a3b": {
|
|
277
295
|
// AA specific benchmarks
|
|
278
|
-
codingIndex:
|
|
279
|
-
mathIndex:
|
|
296
|
+
codingIndex: 25.8,
|
|
297
|
+
mathIndex: undefined,
|
|
280
298
|
|
|
281
299
|
// Academic benchmarks
|
|
282
|
-
mmluPro:
|
|
283
|
-
gpqa: 0.
|
|
284
|
-
hle: 0.
|
|
300
|
+
mmluPro: undefined,
|
|
301
|
+
gpqa: 0.758,
|
|
302
|
+
hle: 0.114,
|
|
285
303
|
|
|
286
304
|
// Capabilities
|
|
287
305
|
contextWindow: 8192,
|
|
@@ -289,21 +307,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
289
307
|
supportsVision: false,
|
|
290
308
|
|
|
291
309
|
// Metadata
|
|
292
|
-
lastUpdated: "2026-
|
|
310
|
+
lastUpdated: "2026-06-01",
|
|
311
|
+
originalModel: "Nemotron Cascade 2 30B A3B",
|
|
293
312
|
},
|
|
294
|
-
"nvidia-nemotron-
|
|
295
|
-
// AA Intelligence Index (composite score)
|
|
296
|
-
intelligenceIndex: 13.2,
|
|
297
|
-
normalizedScore: 19,
|
|
298
|
-
|
|
313
|
+
"nvidia-nemotron-nano-12b-v2-vl-reasoning": {
|
|
299
314
|
// AA specific benchmarks
|
|
300
|
-
codingIndex:
|
|
301
|
-
mathIndex:
|
|
315
|
+
codingIndex: 11.7,
|
|
316
|
+
mathIndex: 75,
|
|
302
317
|
|
|
303
318
|
// Academic benchmarks
|
|
304
|
-
mmluPro: 0.
|
|
305
|
-
gpqa: 0.
|
|
306
|
-
hle: 0.
|
|
319
|
+
mmluPro: 0.759,
|
|
320
|
+
gpqa: 0.572,
|
|
321
|
+
hle: 0.053,
|
|
307
322
|
|
|
308
323
|
// Capabilities
|
|
309
324
|
contextWindow: 8192,
|
|
@@ -311,21 +326,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
311
326
|
supportsVision: false,
|
|
312
327
|
|
|
313
328
|
// Metadata
|
|
314
|
-
lastUpdated: "2026-
|
|
329
|
+
lastUpdated: "2026-06-01",
|
|
330
|
+
originalModel: "NVIDIA Nemotron Nano 12B v2 VL (Reasoning)",
|
|
315
331
|
},
|
|
316
|
-
"
|
|
317
|
-
// AA Intelligence Index (composite score)
|
|
318
|
-
intelligenceIndex: 18.5,
|
|
319
|
-
normalizedScore: 26,
|
|
320
|
-
|
|
332
|
+
"nvidia-nemotron-3-nano-4b": {
|
|
321
333
|
// AA specific benchmarks
|
|
322
|
-
codingIndex:
|
|
323
|
-
mathIndex:
|
|
334
|
+
codingIndex: 10,
|
|
335
|
+
mathIndex: undefined,
|
|
324
336
|
|
|
325
337
|
// Academic benchmarks
|
|
326
|
-
mmluPro:
|
|
327
|
-
gpqa: 0.
|
|
328
|
-
hle: 0.
|
|
338
|
+
mmluPro: undefined,
|
|
339
|
+
gpqa: 0.513,
|
|
340
|
+
hle: 0.048,
|
|
329
341
|
|
|
330
342
|
// Capabilities
|
|
331
343
|
contextWindow: 8192,
|
|
@@ -333,13 +345,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
333
345
|
supportsVision: false,
|
|
334
346
|
|
|
335
347
|
// Metadata
|
|
336
|
-
lastUpdated: "2026-
|
|
348
|
+
lastUpdated: "2026-06-01",
|
|
349
|
+
originalModel: "NVIDIA Nemotron 3 Nano 4B",
|
|
337
350
|
},
|
|
338
351
|
"nvidia-nemotron-3-nano-30b-a3b-reasoning": {
|
|
339
|
-
// AA Intelligence Index (composite score)
|
|
340
|
-
intelligenceIndex: 24.3,
|
|
341
|
-
normalizedScore: 35,
|
|
342
|
-
|
|
343
352
|
// AA specific benchmarks
|
|
344
353
|
codingIndex: 19,
|
|
345
354
|
mathIndex: 91,
|
|
@@ -355,21 +364,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
355
364
|
supportsVision: false,
|
|
356
365
|
|
|
357
366
|
// Metadata
|
|
358
|
-
lastUpdated: "2026-
|
|
367
|
+
lastUpdated: "2026-06-01",
|
|
368
|
+
originalModel: "NVIDIA Nemotron 3 Nano 30B A3B (Reasoning)",
|
|
359
369
|
},
|
|
360
|
-
"
|
|
361
|
-
// AA Intelligence Index (composite score)
|
|
362
|
-
intelligenceIndex: 14.3,
|
|
363
|
-
normalizedScore: 20,
|
|
364
|
-
|
|
370
|
+
"nvidia-nemotron-nano-9b-v2-reasoning": {
|
|
365
371
|
// AA specific benchmarks
|
|
366
|
-
codingIndex:
|
|
367
|
-
mathIndex:
|
|
372
|
+
codingIndex: 8.3,
|
|
373
|
+
mathIndex: 69.7,
|
|
368
374
|
|
|
369
375
|
// Academic benchmarks
|
|
370
|
-
mmluPro: 0.
|
|
371
|
-
gpqa: 0.
|
|
372
|
-
hle: 0.
|
|
376
|
+
mmluPro: 0.742,
|
|
377
|
+
gpqa: 0.57,
|
|
378
|
+
hle: 0.046,
|
|
373
379
|
|
|
374
380
|
// Capabilities
|
|
375
381
|
contextWindow: 8192,
|
|
@@ -377,21 +383,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
377
383
|
supportsVision: false,
|
|
378
384
|
|
|
379
385
|
// Metadata
|
|
380
|
-
lastUpdated: "2026-
|
|
386
|
+
lastUpdated: "2026-06-01",
|
|
387
|
+
originalModel: "NVIDIA Nemotron Nano 9B V2 (Reasoning)",
|
|
381
388
|
},
|
|
382
|
-
"
|
|
383
|
-
// AA Intelligence Index (composite score)
|
|
384
|
-
intelligenceIndex: 14.7,
|
|
385
|
-
normalizedScore: 21,
|
|
386
|
-
|
|
389
|
+
"llama-nemotron-super-49b-v1.5-reasoning": {
|
|
387
390
|
// AA specific benchmarks
|
|
388
|
-
codingIndex:
|
|
389
|
-
mathIndex:
|
|
391
|
+
codingIndex: 15.1,
|
|
392
|
+
mathIndex: 76.7,
|
|
390
393
|
|
|
391
394
|
// Academic benchmarks
|
|
392
|
-
mmluPro:
|
|
393
|
-
gpqa: 0.
|
|
394
|
-
hle: 0.
|
|
395
|
+
mmluPro: 0.814,
|
|
396
|
+
gpqa: 0.748,
|
|
397
|
+
hle: 0.068,
|
|
395
398
|
|
|
396
399
|
// Capabilities
|
|
397
400
|
contextWindow: 8192,
|
|
@@ -399,21 +402,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
399
402
|
supportsVision: false,
|
|
400
403
|
|
|
401
404
|
// Metadata
|
|
402
|
-
lastUpdated: "2026-
|
|
405
|
+
lastUpdated: "2026-06-01",
|
|
406
|
+
originalModel: "Llama Nemotron Super 49B v1.5 (Reasoning)",
|
|
403
407
|
},
|
|
404
|
-
"llama-nemotron-
|
|
405
|
-
// AA Intelligence Index (composite score)
|
|
406
|
-
intelligenceIndex: 14.6,
|
|
407
|
-
normalizedScore: 21,
|
|
408
|
-
|
|
408
|
+
"llama-3.1-nemotron-ultra-253b-v1-reasoning": {
|
|
409
409
|
// AA specific benchmarks
|
|
410
|
-
codingIndex:
|
|
411
|
-
mathIndex:
|
|
410
|
+
codingIndex: 13.1,
|
|
411
|
+
mathIndex: 63.7,
|
|
412
412
|
|
|
413
413
|
// Academic benchmarks
|
|
414
|
-
mmluPro: 0.
|
|
415
|
-
gpqa: 0.
|
|
416
|
-
hle: 0.
|
|
414
|
+
mmluPro: 0.825,
|
|
415
|
+
gpqa: 0.728,
|
|
416
|
+
hle: 0.081,
|
|
417
417
|
|
|
418
418
|
// Capabilities
|
|
419
419
|
contextWindow: 8192,
|
|
@@ -421,21 +421,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
421
421
|
supportsVision: false,
|
|
422
422
|
|
|
423
423
|
// Metadata
|
|
424
|
-
lastUpdated: "2026-
|
|
424
|
+
lastUpdated: "2026-06-01",
|
|
425
|
+
originalModel: "Llama 3.1 Nemotron Ultra 253B v1 (Reasoning)",
|
|
425
426
|
},
|
|
426
|
-
"kimi-k2.
|
|
427
|
-
// AA Intelligence Index (composite score)
|
|
428
|
-
intelligenceIndex: 37.3,
|
|
429
|
-
normalizedScore: 53,
|
|
430
|
-
|
|
427
|
+
"kimi-k2.6-non-reasoning": {
|
|
431
428
|
// AA specific benchmarks
|
|
432
|
-
codingIndex:
|
|
429
|
+
codingIndex: 38.4,
|
|
433
430
|
mathIndex: undefined,
|
|
434
431
|
|
|
435
432
|
// Academic benchmarks
|
|
436
433
|
mmluPro: undefined,
|
|
437
|
-
gpqa: 0.
|
|
438
|
-
hle: 0.
|
|
434
|
+
gpqa: 0.788,
|
|
435
|
+
hle: 0.182,
|
|
439
436
|
|
|
440
437
|
// Capabilities
|
|
441
438
|
contextWindow: 8192,
|
|
@@ -443,21 +440,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
443
440
|
supportsVision: false,
|
|
444
441
|
|
|
445
442
|
// Metadata
|
|
446
|
-
lastUpdated: "2026-
|
|
443
|
+
lastUpdated: "2026-06-01",
|
|
444
|
+
originalModel: "Kimi K2.6 (Non-reasoning)",
|
|
447
445
|
},
|
|
448
|
-
"kimi-k2.
|
|
449
|
-
// AA Intelligence Index (composite score)
|
|
450
|
-
intelligenceIndex: 46.8,
|
|
451
|
-
normalizedScore: 67,
|
|
452
|
-
|
|
446
|
+
"kimi-k2.6": {
|
|
453
447
|
// AA specific benchmarks
|
|
454
|
-
codingIndex:
|
|
448
|
+
codingIndex: 47.1,
|
|
455
449
|
mathIndex: undefined,
|
|
456
450
|
|
|
457
451
|
// Academic benchmarks
|
|
458
452
|
mmluPro: undefined,
|
|
459
|
-
gpqa: 0.
|
|
460
|
-
hle: 0.
|
|
453
|
+
gpqa: 0.911,
|
|
454
|
+
hle: 0.359,
|
|
461
455
|
|
|
462
456
|
// Capabilities
|
|
463
457
|
contextWindow: 8192,
|
|
@@ -465,13 +459,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
465
459
|
supportsVision: false,
|
|
466
460
|
|
|
467
461
|
// Metadata
|
|
468
|
-
lastUpdated: "2026-
|
|
462
|
+
lastUpdated: "2026-06-01",
|
|
463
|
+
originalModel: "Kimi K2.6",
|
|
469
464
|
},
|
|
470
465
|
"kimi-linear-48b-a3b-instruct": {
|
|
471
|
-
// AA Intelligence Index (composite score)
|
|
472
|
-
intelligenceIndex: 14.4,
|
|
473
|
-
normalizedScore: 21,
|
|
474
|
-
|
|
475
466
|
// AA specific benchmarks
|
|
476
467
|
codingIndex: 14.2,
|
|
477
468
|
mathIndex: 36.3,
|
|
@@ -487,21 +478,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
487
478
|
supportsVision: false,
|
|
488
479
|
|
|
489
480
|
// Metadata
|
|
490
|
-
lastUpdated: "2026-
|
|
481
|
+
lastUpdated: "2026-06-01",
|
|
482
|
+
originalModel: "Kimi Linear 48B A3B Instruct",
|
|
491
483
|
},
|
|
492
|
-
"
|
|
493
|
-
// AA
|
|
494
|
-
|
|
495
|
-
|
|
484
|
+
"llama-65b": {
|
|
485
|
+
// AA specific benchmarks
|
|
486
|
+
codingIndex: undefined,
|
|
487
|
+
mathIndex: undefined,
|
|
488
|
+
|
|
489
|
+
// Academic benchmarks
|
|
490
|
+
mmluPro: undefined,
|
|
491
|
+
gpqa: undefined,
|
|
492
|
+
hle: undefined,
|
|
493
|
+
|
|
494
|
+
// Capabilities
|
|
495
|
+
contextWindow: 8192,
|
|
496
|
+
supportsReasoning: false,
|
|
497
|
+
supportsVision: false,
|
|
496
498
|
|
|
499
|
+
// Metadata
|
|
500
|
+
lastUpdated: "2026-06-01",
|
|
501
|
+
originalModel: "Llama 65B",
|
|
502
|
+
},
|
|
503
|
+
"step-3.5-flash-2603": {
|
|
497
504
|
// AA specific benchmarks
|
|
498
|
-
codingIndex:
|
|
505
|
+
codingIndex: 34.6,
|
|
499
506
|
mathIndex: undefined,
|
|
500
507
|
|
|
501
508
|
// Academic benchmarks
|
|
502
509
|
mmluPro: undefined,
|
|
503
|
-
gpqa: 0.
|
|
504
|
-
hle: 0.
|
|
510
|
+
gpqa: 0.826,
|
|
511
|
+
hle: 0.226,
|
|
505
512
|
|
|
506
513
|
// Capabilities
|
|
507
514
|
contextWindow: 8192,
|
|
@@ -509,13 +516,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
509
516
|
supportsVision: false,
|
|
510
517
|
|
|
511
518
|
// Metadata
|
|
512
|
-
lastUpdated: "2026-
|
|
519
|
+
lastUpdated: "2026-06-01",
|
|
520
|
+
originalModel: "Step 3.5 Flash 2603",
|
|
513
521
|
},
|
|
514
522
|
"step3-vl-10b": {
|
|
515
|
-
// AA Intelligence Index (composite score)
|
|
516
|
-
intelligenceIndex: 15.4,
|
|
517
|
-
normalizedScore: 22,
|
|
518
|
-
|
|
519
523
|
// AA specific benchmarks
|
|
520
524
|
codingIndex: 13.9,
|
|
521
525
|
mathIndex: undefined,
|
|
@@ -531,21 +535,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
531
535
|
supportsVision: false,
|
|
532
536
|
|
|
533
537
|
// Metadata
|
|
534
|
-
lastUpdated: "2026-
|
|
538
|
+
lastUpdated: "2026-06-01",
|
|
539
|
+
originalModel: "Step3 VL 10B",
|
|
535
540
|
},
|
|
536
|
-
"
|
|
537
|
-
// AA Intelligence Index (composite score)
|
|
538
|
-
intelligenceIndex: 13.9,
|
|
539
|
-
normalizedScore: 20,
|
|
540
|
-
|
|
541
|
+
"molmo-7b-d": {
|
|
541
542
|
// AA specific benchmarks
|
|
542
|
-
codingIndex:
|
|
543
|
-
mathIndex:
|
|
543
|
+
codingIndex: 1.2,
|
|
544
|
+
mathIndex: 0,
|
|
544
545
|
|
|
545
546
|
// Academic benchmarks
|
|
546
|
-
mmluPro: 0.
|
|
547
|
-
gpqa: 0.
|
|
548
|
-
hle: 0.
|
|
547
|
+
mmluPro: 0.371,
|
|
548
|
+
gpqa: 0.24,
|
|
549
|
+
hle: 0.051,
|
|
549
550
|
|
|
550
551
|
// Capabilities
|
|
551
552
|
contextWindow: 8192,
|
|
@@ -553,16 +554,32 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
553
554
|
supportsVision: false,
|
|
554
555
|
|
|
555
556
|
// Metadata
|
|
556
|
-
lastUpdated: "2026-
|
|
557
|
+
lastUpdated: "2026-06-01",
|
|
558
|
+
originalModel: "Molmo 7B-D",
|
|
557
559
|
},
|
|
558
|
-
"
|
|
559
|
-
// AA Intelligence Index (composite score)
|
|
560
|
-
intelligenceIndex: 8.2,
|
|
561
|
-
normalizedScore: 12,
|
|
562
|
-
|
|
560
|
+
"molmo2-8b": {
|
|
563
561
|
// AA specific benchmarks
|
|
564
|
-
codingIndex:
|
|
565
|
-
mathIndex:
|
|
562
|
+
codingIndex: 4.4,
|
|
563
|
+
mathIndex: undefined,
|
|
564
|
+
|
|
565
|
+
// Academic benchmarks
|
|
566
|
+
mmluPro: undefined,
|
|
567
|
+
gpqa: 0.425,
|
|
568
|
+
hle: 0.044,
|
|
569
|
+
|
|
570
|
+
// Capabilities
|
|
571
|
+
contextWindow: 8192,
|
|
572
|
+
supportsReasoning: false,
|
|
573
|
+
supportsVision: false,
|
|
574
|
+
|
|
575
|
+
// Metadata
|
|
576
|
+
lastUpdated: "2026-06-01",
|
|
577
|
+
originalModel: "Molmo2-8B",
|
|
578
|
+
},
|
|
579
|
+
"olmo-3-7b-instruct": {
|
|
580
|
+
// AA specific benchmarks
|
|
581
|
+
codingIndex: 3.4,
|
|
582
|
+
mathIndex: 41.3,
|
|
566
583
|
|
|
567
584
|
// Academic benchmarks
|
|
568
585
|
mmluPro: 0.522,
|
|
@@ -575,13 +592,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
575
592
|
supportsVision: false,
|
|
576
593
|
|
|
577
594
|
// Metadata
|
|
578
|
-
lastUpdated: "2026-
|
|
595
|
+
lastUpdated: "2026-06-01",
|
|
596
|
+
originalModel: "Olmo 3 7B Instruct",
|
|
579
597
|
},
|
|
580
598
|
"olmo-3-7b-think": {
|
|
581
|
-
// AA Intelligence Index (composite score)
|
|
582
|
-
intelligenceIndex: 9.4,
|
|
583
|
-
normalizedScore: 13,
|
|
584
|
-
|
|
585
599
|
// AA specific benchmarks
|
|
586
600
|
codingIndex: 7.6,
|
|
587
601
|
mathIndex: 70.7,
|
|
@@ -597,21 +611,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
597
611
|
supportsVision: false,
|
|
598
612
|
|
|
599
613
|
// Metadata
|
|
600
|
-
lastUpdated: "2026-
|
|
614
|
+
lastUpdated: "2026-06-01",
|
|
615
|
+
originalModel: "Olmo 3 7B Think",
|
|
601
616
|
},
|
|
602
|
-
"
|
|
603
|
-
// AA Intelligence Index (composite score)
|
|
604
|
-
intelligenceIndex: 7.3,
|
|
605
|
-
normalizedScore: 10,
|
|
606
|
-
|
|
617
|
+
"olmo-3.1-32b-instruct": {
|
|
607
618
|
// AA specific benchmarks
|
|
608
|
-
codingIndex:
|
|
619
|
+
codingIndex: 5.6,
|
|
609
620
|
mathIndex: undefined,
|
|
610
621
|
|
|
611
622
|
// Academic benchmarks
|
|
612
623
|
mmluPro: undefined,
|
|
613
|
-
gpqa: 0.
|
|
614
|
-
hle: 0.
|
|
624
|
+
gpqa: 0.539,
|
|
625
|
+
hle: 0.049,
|
|
615
626
|
|
|
616
627
|
// Capabilities
|
|
617
628
|
contextWindow: 8192,
|
|
@@ -619,21 +630,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
619
630
|
supportsVision: false,
|
|
620
631
|
|
|
621
632
|
// Metadata
|
|
622
|
-
lastUpdated: "2026-
|
|
633
|
+
lastUpdated: "2026-06-01",
|
|
634
|
+
originalModel: "Olmo 3.1 32B Instruct",
|
|
623
635
|
},
|
|
624
|
-
"
|
|
625
|
-
// AA Intelligence Index (composite score)
|
|
626
|
-
intelligenceIndex: 9.2,
|
|
627
|
-
normalizedScore: 13,
|
|
628
|
-
|
|
636
|
+
"olmo-3.1-32b-think": {
|
|
629
637
|
// AA specific benchmarks
|
|
630
|
-
codingIndex:
|
|
631
|
-
mathIndex:
|
|
638
|
+
codingIndex: 9.8,
|
|
639
|
+
mathIndex: 77.3,
|
|
632
640
|
|
|
633
641
|
// Academic benchmarks
|
|
634
|
-
mmluPro: 0.
|
|
635
|
-
gpqa: 0.
|
|
636
|
-
hle: 0.
|
|
642
|
+
mmluPro: 0.763,
|
|
643
|
+
gpqa: 0.591,
|
|
644
|
+
hle: 0.06,
|
|
637
645
|
|
|
638
646
|
// Capabilities
|
|
639
647
|
contextWindow: 8192,
|
|
@@ -641,21 +649,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
641
649
|
supportsVision: false,
|
|
642
650
|
|
|
643
651
|
// Metadata
|
|
644
|
-
lastUpdated: "2026-
|
|
652
|
+
lastUpdated: "2026-06-01",
|
|
653
|
+
originalModel: "Olmo 3.1 32B Think",
|
|
645
654
|
},
|
|
646
|
-
"
|
|
647
|
-
// AA Intelligence Index (composite score)
|
|
648
|
-
intelligenceIndex: 12.2,
|
|
649
|
-
normalizedScore: 17,
|
|
650
|
-
|
|
655
|
+
"granite-4.1-8b": {
|
|
651
656
|
// AA specific benchmarks
|
|
652
|
-
codingIndex:
|
|
657
|
+
codingIndex: 7.3,
|
|
653
658
|
mathIndex: undefined,
|
|
654
659
|
|
|
655
660
|
// Academic benchmarks
|
|
656
661
|
mmluPro: undefined,
|
|
657
|
-
gpqa: 0.
|
|
658
|
-
hle: 0.
|
|
662
|
+
gpqa: 0.433,
|
|
663
|
+
hle: 0.038,
|
|
659
664
|
|
|
660
665
|
// Capabilities
|
|
661
666
|
contextWindow: 8192,
|
|
@@ -663,21 +668,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
663
668
|
supportsVision: false,
|
|
664
669
|
|
|
665
670
|
// Metadata
|
|
666
|
-
lastUpdated: "2026-
|
|
671
|
+
lastUpdated: "2026-06-01",
|
|
672
|
+
originalModel: "Granite 4.1 8B",
|
|
667
673
|
},
|
|
668
|
-
"granite-4-
|
|
669
|
-
// AA Intelligence Index (composite score)
|
|
670
|
-
intelligenceIndex: 7.3,
|
|
671
|
-
normalizedScore: 10,
|
|
672
|
-
|
|
674
|
+
"granite-4.0-350m": {
|
|
673
675
|
// AA specific benchmarks
|
|
674
|
-
codingIndex:
|
|
675
|
-
mathIndex:
|
|
676
|
+
codingIndex: 0.3,
|
|
677
|
+
mathIndex: 0,
|
|
676
678
|
|
|
677
679
|
// Academic benchmarks
|
|
678
|
-
mmluPro: 0.
|
|
679
|
-
gpqa: 0.
|
|
680
|
-
hle: 0.
|
|
680
|
+
mmluPro: 0.124,
|
|
681
|
+
gpqa: 0.261,
|
|
682
|
+
hle: 0.057,
|
|
681
683
|
|
|
682
684
|
// Capabilities
|
|
683
685
|
contextWindow: 8192,
|
|
@@ -685,20 +687,36 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
685
687
|
supportsVision: false,
|
|
686
688
|
|
|
687
689
|
// Metadata
|
|
688
|
-
lastUpdated: "2026-
|
|
690
|
+
lastUpdated: "2026-06-01",
|
|
691
|
+
originalModel: "Granite 4.0 350M",
|
|
689
692
|
},
|
|
690
|
-
"granite-4-
|
|
691
|
-
// AA
|
|
692
|
-
|
|
693
|
-
|
|
693
|
+
"granite-4.1-3b": {
|
|
694
|
+
// AA specific benchmarks
|
|
695
|
+
codingIndex: 5.5,
|
|
696
|
+
mathIndex: undefined,
|
|
697
|
+
|
|
698
|
+
// Academic benchmarks
|
|
699
|
+
mmluPro: undefined,
|
|
700
|
+
gpqa: 0.314,
|
|
701
|
+
hle: 0.034,
|
|
694
702
|
|
|
703
|
+
// Capabilities
|
|
704
|
+
contextWindow: 8192,
|
|
705
|
+
supportsReasoning: false,
|
|
706
|
+
supportsVision: false,
|
|
707
|
+
|
|
708
|
+
// Metadata
|
|
709
|
+
lastUpdated: "2026-06-01",
|
|
710
|
+
originalModel: "Granite 4.1 3B",
|
|
711
|
+
},
|
|
712
|
+
"granite-4.0-1b": {
|
|
695
713
|
// AA specific benchmarks
|
|
696
|
-
codingIndex:
|
|
697
|
-
mathIndex: 6,
|
|
714
|
+
codingIndex: 2.9,
|
|
715
|
+
mathIndex: 6.3,
|
|
698
716
|
|
|
699
717
|
// Academic benchmarks
|
|
700
|
-
mmluPro: 0.
|
|
701
|
-
gpqa: 0.
|
|
718
|
+
mmluPro: 0.325,
|
|
719
|
+
gpqa: 0.281,
|
|
702
720
|
hle: 0.051,
|
|
703
721
|
|
|
704
722
|
// Capabilities
|
|
@@ -707,13 +725,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
707
725
|
supportsVision: false,
|
|
708
726
|
|
|
709
727
|
// Metadata
|
|
710
|
-
lastUpdated: "2026-
|
|
728
|
+
lastUpdated: "2026-06-01",
|
|
729
|
+
originalModel: "Granite 4.0 1B",
|
|
711
730
|
},
|
|
712
|
-
"granite-4-h-350m": {
|
|
713
|
-
// AA Intelligence Index (composite score)
|
|
714
|
-
intelligenceIndex: 5.4,
|
|
715
|
-
normalizedScore: 8,
|
|
716
|
-
|
|
731
|
+
"granite-4.0-h-350m": {
|
|
717
732
|
// AA specific benchmarks
|
|
718
733
|
codingIndex: 0.6,
|
|
719
734
|
mathIndex: 1.3,
|
|
@@ -729,21 +744,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
729
744
|
supportsVision: false,
|
|
730
745
|
|
|
731
746
|
// Metadata
|
|
732
|
-
lastUpdated: "2026-
|
|
747
|
+
lastUpdated: "2026-06-01",
|
|
748
|
+
originalModel: "Granite 4.0 H 350M",
|
|
733
749
|
},
|
|
734
|
-
"
|
|
735
|
-
// AA Intelligence Index (composite score)
|
|
736
|
-
intelligenceIndex: 7.4,
|
|
737
|
-
normalizedScore: 11,
|
|
738
|
-
|
|
750
|
+
"granite-4.1-30b": {
|
|
739
751
|
// AA specific benchmarks
|
|
740
|
-
codingIndex:
|
|
752
|
+
codingIndex: 10.1,
|
|
741
753
|
mathIndex: undefined,
|
|
742
754
|
|
|
743
755
|
// Academic benchmarks
|
|
744
756
|
mmluPro: undefined,
|
|
745
|
-
gpqa:
|
|
746
|
-
hle:
|
|
757
|
+
gpqa: 0.481,
|
|
758
|
+
hle: 0.042,
|
|
747
759
|
|
|
748
760
|
// Capabilities
|
|
749
761
|
contextWindow: 8192,
|
|
@@ -751,21 +763,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
751
763
|
supportsVision: false,
|
|
752
764
|
|
|
753
765
|
// Metadata
|
|
754
|
-
lastUpdated: "2026-
|
|
766
|
+
lastUpdated: "2026-06-01",
|
|
767
|
+
originalModel: "Granite 4.1 30B",
|
|
755
768
|
},
|
|
756
|
-
"granite-4-h-
|
|
757
|
-
// AA Intelligence Index (composite score)
|
|
758
|
-
intelligenceIndex: 10.8,
|
|
759
|
-
normalizedScore: 15,
|
|
760
|
-
|
|
769
|
+
"granite-4.0-h-1b": {
|
|
761
770
|
// AA specific benchmarks
|
|
762
|
-
codingIndex:
|
|
763
|
-
mathIndex:
|
|
771
|
+
codingIndex: 2.7,
|
|
772
|
+
mathIndex: 6.3,
|
|
764
773
|
|
|
765
774
|
// Academic benchmarks
|
|
766
|
-
mmluPro: 0.
|
|
767
|
-
gpqa: 0.
|
|
768
|
-
hle: 0.
|
|
775
|
+
mmluPro: 0.277,
|
|
776
|
+
gpqa: 0.263,
|
|
777
|
+
hle: 0.05,
|
|
769
778
|
|
|
770
779
|
// Capabilities
|
|
771
780
|
contextWindow: 8192,
|
|
@@ -773,21 +782,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
773
782
|
supportsVision: false,
|
|
774
783
|
|
|
775
784
|
// Metadata
|
|
776
|
-
lastUpdated: "2026-
|
|
785
|
+
lastUpdated: "2026-06-01",
|
|
786
|
+
originalModel: "Granite 4.0 H 1B",
|
|
777
787
|
},
|
|
778
|
-
"granite-4-h-
|
|
779
|
-
// AA Intelligence Index (composite score)
|
|
780
|
-
intelligenceIndex: 8,
|
|
781
|
-
normalizedScore: 11,
|
|
782
|
-
|
|
788
|
+
"granite-4.0-h-small": {
|
|
783
789
|
// AA specific benchmarks
|
|
784
|
-
codingIndex:
|
|
785
|
-
mathIndex:
|
|
790
|
+
codingIndex: 8.5,
|
|
791
|
+
mathIndex: 13.7,
|
|
786
792
|
|
|
787
793
|
// Academic benchmarks
|
|
788
|
-
mmluPro: 0.
|
|
789
|
-
gpqa: 0.
|
|
790
|
-
hle: 0.
|
|
794
|
+
mmluPro: 0.624,
|
|
795
|
+
gpqa: 0.416,
|
|
796
|
+
hle: 0.037,
|
|
791
797
|
|
|
792
798
|
// Capabilities
|
|
793
799
|
contextWindow: 8192,
|
|
@@ -795,21 +801,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
795
801
|
supportsVision: false,
|
|
796
802
|
|
|
797
803
|
// Metadata
|
|
798
|
-
lastUpdated: "2026-
|
|
804
|
+
lastUpdated: "2026-06-01",
|
|
805
|
+
originalModel: "Granite 4.0 H Small",
|
|
799
806
|
},
|
|
800
|
-
"granite-4-
|
|
801
|
-
// AA Intelligence Index (composite score)
|
|
802
|
-
intelligenceIndex: 6.1,
|
|
803
|
-
normalizedScore: 9,
|
|
804
|
-
|
|
807
|
+
"granite-4.0-micro": {
|
|
805
808
|
// AA specific benchmarks
|
|
806
|
-
codingIndex:
|
|
807
|
-
mathIndex:
|
|
809
|
+
codingIndex: 5,
|
|
810
|
+
mathIndex: 6,
|
|
808
811
|
|
|
809
812
|
// Academic benchmarks
|
|
810
|
-
mmluPro: 0.
|
|
811
|
-
gpqa: 0.
|
|
812
|
-
hle: 0.
|
|
813
|
+
mmluPro: 0.447,
|
|
814
|
+
gpqa: 0.336,
|
|
815
|
+
hle: 0.051,
|
|
813
816
|
|
|
814
817
|
// Capabilities
|
|
815
818
|
contextWindow: 8192,
|
|
@@ -817,13 +820,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
817
820
|
supportsVision: false,
|
|
818
821
|
|
|
819
822
|
// Metadata
|
|
820
|
-
lastUpdated: "2026-
|
|
823
|
+
lastUpdated: "2026-06-01",
|
|
824
|
+
originalModel: "Granite 4.0 Micro",
|
|
821
825
|
},
|
|
822
826
|
"mercury-2": {
|
|
823
|
-
// AA Intelligence Index (composite score)
|
|
824
|
-
intelligenceIndex: 32.8,
|
|
825
|
-
normalizedScore: 47,
|
|
826
|
-
|
|
827
827
|
// AA specific benchmarks
|
|
828
828
|
codingIndex: 30.6,
|
|
829
829
|
mathIndex: undefined,
|
|
@@ -839,13 +839,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
839
839
|
supportsVision: false,
|
|
840
840
|
|
|
841
841
|
// Metadata
|
|
842
|
-
lastUpdated: "2026-
|
|
842
|
+
lastUpdated: "2026-06-01",
|
|
843
|
+
originalModel: "Mercury 2",
|
|
843
844
|
},
|
|
844
845
|
"reka-flash-3": {
|
|
845
|
-
// AA Intelligence Index (composite score)
|
|
846
|
-
intelligenceIndex: 9.5,
|
|
847
|
-
normalizedScore: 14,
|
|
848
|
-
|
|
849
846
|
// AA specific benchmarks
|
|
850
847
|
codingIndex: 8.9,
|
|
851
848
|
mathIndex: 33.7,
|
|
@@ -861,21 +858,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
861
858
|
supportsVision: false,
|
|
862
859
|
|
|
863
860
|
// Metadata
|
|
864
|
-
lastUpdated: "2026-
|
|
861
|
+
lastUpdated: "2026-06-01",
|
|
862
|
+
originalModel: "Reka Flash 3",
|
|
865
863
|
},
|
|
866
|
-
"
|
|
867
|
-
// AA Intelligence Index (composite score)
|
|
868
|
-
intelligenceIndex: 12.6,
|
|
869
|
-
normalizedScore: 18,
|
|
870
|
-
|
|
864
|
+
"deephermes-3---llama-3.1-8b-preview-non-reasoning": {
|
|
871
865
|
// AA specific benchmarks
|
|
872
|
-
codingIndex:
|
|
873
|
-
mathIndex:
|
|
866
|
+
codingIndex: undefined,
|
|
867
|
+
mathIndex: undefined,
|
|
874
868
|
|
|
875
869
|
// Academic benchmarks
|
|
876
|
-
mmluPro: 0.
|
|
877
|
-
gpqa: 0.
|
|
878
|
-
hle: 0.
|
|
870
|
+
mmluPro: 0.365,
|
|
871
|
+
gpqa: 0.27,
|
|
872
|
+
hle: 0.043,
|
|
879
873
|
|
|
880
874
|
// Capabilities
|
|
881
875
|
contextWindow: 8192,
|
|
@@ -883,13 +877,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
883
877
|
supportsVision: false,
|
|
884
878
|
|
|
885
879
|
// Metadata
|
|
886
|
-
lastUpdated: "2026-
|
|
880
|
+
lastUpdated: "2026-06-01",
|
|
881
|
+
originalModel: "DeepHermes 3 - Llama-3.1 8B Preview (Non-reasoning)",
|
|
887
882
|
},
|
|
888
883
|
"hermes-4---llama-3.1-405b-reasoning": {
|
|
889
|
-
// AA Intelligence Index (composite score)
|
|
890
|
-
intelligenceIndex: 18.6,
|
|
891
|
-
normalizedScore: 27,
|
|
892
|
-
|
|
893
884
|
// AA specific benchmarks
|
|
894
885
|
codingIndex: 16,
|
|
895
886
|
mathIndex: 69.7,
|
|
@@ -905,13 +896,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
905
896
|
supportsVision: false,
|
|
906
897
|
|
|
907
898
|
// Metadata
|
|
908
|
-
lastUpdated: "2026-
|
|
899
|
+
lastUpdated: "2026-06-01",
|
|
900
|
+
originalModel: "Hermes 4 - Llama-3.1 405B (Reasoning)",
|
|
909
901
|
},
|
|
910
|
-
"
|
|
911
|
-
// AA
|
|
912
|
-
|
|
913
|
-
|
|
902
|
+
"hermes-4---llama-3.1-405b-non-reasoning": {
|
|
903
|
+
// AA specific benchmarks
|
|
904
|
+
codingIndex: 18.1,
|
|
905
|
+
mathIndex: 15.3,
|
|
906
|
+
|
|
907
|
+
// Academic benchmarks
|
|
908
|
+
mmluPro: 0.729,
|
|
909
|
+
gpqa: 0.536,
|
|
910
|
+
hle: 0.042,
|
|
911
|
+
|
|
912
|
+
// Capabilities
|
|
913
|
+
contextWindow: 8192,
|
|
914
|
+
supportsReasoning: false,
|
|
915
|
+
supportsVision: false,
|
|
914
916
|
|
|
917
|
+
// Metadata
|
|
918
|
+
lastUpdated: "2026-06-01",
|
|
919
|
+
originalModel: "Hermes 4 - Llama-3.1 405B (Non-reasoning)",
|
|
920
|
+
},
|
|
921
|
+
"deephermes-3---mistral-24b-preview-non-reasoning": {
|
|
915
922
|
// AA specific benchmarks
|
|
916
923
|
codingIndex: undefined,
|
|
917
924
|
mathIndex: undefined,
|
|
@@ -927,13 +934,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
927
934
|
supportsVision: false,
|
|
928
935
|
|
|
929
936
|
// Metadata
|
|
930
|
-
lastUpdated: "2026-
|
|
937
|
+
lastUpdated: "2026-06-01",
|
|
938
|
+
originalModel: "DeepHermes 3 - Mistral 24B Preview (Non-reasoning)",
|
|
931
939
|
},
|
|
932
940
|
"hermes-4---llama-3.1-70b-reasoning": {
|
|
933
|
-
// AA Intelligence Index (composite score)
|
|
934
|
-
intelligenceIndex: 16,
|
|
935
|
-
normalizedScore: 23,
|
|
936
|
-
|
|
937
941
|
// AA specific benchmarks
|
|
938
942
|
codingIndex: 14.4,
|
|
939
943
|
mathIndex: 68.7,
|
|
@@ -949,21 +953,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
949
953
|
supportsVision: false,
|
|
950
954
|
|
|
951
955
|
// Metadata
|
|
952
|
-
lastUpdated: "2026-
|
|
956
|
+
lastUpdated: "2026-06-01",
|
|
957
|
+
originalModel: "Hermes 4 - Llama-3.1 70B (Reasoning)",
|
|
953
958
|
},
|
|
954
|
-
"hermes-4---llama-3.1-
|
|
955
|
-
// AA
|
|
956
|
-
|
|
957
|
-
|
|
959
|
+
"hermes-4---llama-3.1-70b-non-reasoning": {
|
|
960
|
+
// AA specific benchmarks
|
|
961
|
+
codingIndex: 9.2,
|
|
962
|
+
mathIndex: 11.3,
|
|
958
963
|
|
|
964
|
+
// Academic benchmarks
|
|
965
|
+
mmluPro: 0.664,
|
|
966
|
+
gpqa: 0.491,
|
|
967
|
+
hle: 0.036,
|
|
968
|
+
|
|
969
|
+
// Capabilities
|
|
970
|
+
contextWindow: 8192,
|
|
971
|
+
supportsReasoning: false,
|
|
972
|
+
supportsVision: false,
|
|
973
|
+
|
|
974
|
+
// Metadata
|
|
975
|
+
lastUpdated: "2026-06-01",
|
|
976
|
+
originalModel: "Hermes 4 - Llama-3.1 70B (Non-reasoning)",
|
|
977
|
+
},
|
|
978
|
+
"exaone-4.0-1.2b-reasoning": {
|
|
959
979
|
// AA specific benchmarks
|
|
960
|
-
codingIndex:
|
|
961
|
-
mathIndex:
|
|
980
|
+
codingIndex: 3.1,
|
|
981
|
+
mathIndex: 50.3,
|
|
962
982
|
|
|
963
983
|
// Academic benchmarks
|
|
964
|
-
mmluPro: 0.
|
|
965
|
-
gpqa: 0.
|
|
966
|
-
hle: 0.
|
|
984
|
+
mmluPro: 0.588,
|
|
985
|
+
gpqa: 0.515,
|
|
986
|
+
hle: 0.058,
|
|
967
987
|
|
|
968
988
|
// Capabilities
|
|
969
989
|
contextWindow: 8192,
|
|
@@ -971,21 +991,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
971
991
|
supportsVision: false,
|
|
972
992
|
|
|
973
993
|
// Metadata
|
|
974
|
-
lastUpdated: "2026-
|
|
994
|
+
lastUpdated: "2026-06-01",
|
|
995
|
+
originalModel: "Exaone 4.0 1.2B (Reasoning)",
|
|
975
996
|
},
|
|
976
|
-
"
|
|
977
|
-
// AA
|
|
978
|
-
|
|
979
|
-
|
|
997
|
+
"exaone-4.0-32b-reasoning": {
|
|
998
|
+
// AA specific benchmarks
|
|
999
|
+
codingIndex: 14,
|
|
1000
|
+
mathIndex: 80,
|
|
1001
|
+
|
|
1002
|
+
// Academic benchmarks
|
|
1003
|
+
mmluPro: 0.818,
|
|
1004
|
+
gpqa: 0.739,
|
|
1005
|
+
hle: 0.105,
|
|
1006
|
+
|
|
1007
|
+
// Capabilities
|
|
1008
|
+
contextWindow: 8192,
|
|
1009
|
+
supportsReasoning: false,
|
|
1010
|
+
supportsVision: false,
|
|
980
1011
|
|
|
1012
|
+
// Metadata
|
|
1013
|
+
lastUpdated: "2026-06-01",
|
|
1014
|
+
originalModel: "EXAONE 4.0 32B (Reasoning)",
|
|
1015
|
+
},
|
|
1016
|
+
"k-exaone-reasoning": {
|
|
981
1017
|
// AA specific benchmarks
|
|
982
|
-
codingIndex:
|
|
983
|
-
mathIndex:
|
|
1018
|
+
codingIndex: 27,
|
|
1019
|
+
mathIndex: 90.3,
|
|
984
1020
|
|
|
985
1021
|
// Academic benchmarks
|
|
986
|
-
mmluPro: 0.
|
|
987
|
-
gpqa: 0.
|
|
988
|
-
hle: 0.
|
|
1022
|
+
mmluPro: 0.838,
|
|
1023
|
+
gpqa: 0.783,
|
|
1024
|
+
hle: 0.131,
|
|
989
1025
|
|
|
990
1026
|
// Capabilities
|
|
991
1027
|
contextWindow: 8192,
|
|
@@ -993,13 +1029,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
993
1029
|
supportsVision: false,
|
|
994
1030
|
|
|
995
1031
|
// Metadata
|
|
996
|
-
lastUpdated: "2026-
|
|
1032
|
+
lastUpdated: "2026-06-01",
|
|
1033
|
+
originalModel: "K-EXAONE (Reasoning)",
|
|
997
1034
|
},
|
|
998
1035
|
"k-exaone-non-reasoning": {
|
|
999
|
-
// AA Intelligence Index (composite score)
|
|
1000
|
-
intelligenceIndex: 23.4,
|
|
1001
|
-
normalizedScore: 33,
|
|
1002
|
-
|
|
1003
1036
|
// AA specific benchmarks
|
|
1004
1037
|
codingIndex: 13.5,
|
|
1005
1038
|
mathIndex: 44,
|
|
@@ -1015,21 +1048,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1015
1048
|
supportsVision: false,
|
|
1016
1049
|
|
|
1017
1050
|
// Metadata
|
|
1018
|
-
lastUpdated: "2026-
|
|
1051
|
+
lastUpdated: "2026-06-01",
|
|
1052
|
+
originalModel: "K-EXAONE (Non-reasoning)",
|
|
1019
1053
|
},
|
|
1020
|
-
"exaone-4-
|
|
1021
|
-
// AA Intelligence Index (composite score)
|
|
1022
|
-
intelligenceIndex: 11.7,
|
|
1023
|
-
normalizedScore: 17,
|
|
1024
|
-
|
|
1054
|
+
"exaone-4.5-33b": {
|
|
1025
1055
|
// AA specific benchmarks
|
|
1026
|
-
codingIndex:
|
|
1027
|
-
mathIndex:
|
|
1056
|
+
codingIndex: 23,
|
|
1057
|
+
mathIndex: undefined,
|
|
1028
1058
|
|
|
1029
1059
|
// Academic benchmarks
|
|
1030
|
-
mmluPro:
|
|
1031
|
-
gpqa: 0.
|
|
1032
|
-
hle: 0.
|
|
1060
|
+
mmluPro: undefined,
|
|
1061
|
+
gpqa: 0.794,
|
|
1062
|
+
hle: 0.116,
|
|
1033
1063
|
|
|
1034
1064
|
// Capabilities
|
|
1035
1065
|
contextWindow: 8192,
|
|
@@ -1037,21 +1067,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1037
1067
|
supportsVision: false,
|
|
1038
1068
|
|
|
1039
1069
|
// Metadata
|
|
1040
|
-
lastUpdated: "2026-
|
|
1070
|
+
lastUpdated: "2026-06-01",
|
|
1071
|
+
originalModel: "EXAONE 4.5 33B",
|
|
1041
1072
|
},
|
|
1042
|
-
"
|
|
1043
|
-
// AA Intelligence Index (composite score)
|
|
1044
|
-
intelligenceIndex: 32.1,
|
|
1045
|
-
normalizedScore: 46,
|
|
1046
|
-
|
|
1073
|
+
"exaone-4.0-32b-non-reasoning": {
|
|
1047
1074
|
// AA specific benchmarks
|
|
1048
|
-
codingIndex:
|
|
1049
|
-
mathIndex:
|
|
1075
|
+
codingIndex: 9.4,
|
|
1076
|
+
mathIndex: 39.3,
|
|
1050
1077
|
|
|
1051
1078
|
// Academic benchmarks
|
|
1052
|
-
mmluPro: 0.
|
|
1053
|
-
gpqa: 0.
|
|
1054
|
-
hle: 0.
|
|
1079
|
+
mmluPro: 0.768,
|
|
1080
|
+
gpqa: 0.628,
|
|
1081
|
+
hle: 0.049,
|
|
1055
1082
|
|
|
1056
1083
|
// Capabilities
|
|
1057
1084
|
contextWindow: 8192,
|
|
@@ -1059,13 +1086,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1059
1086
|
supportsVision: false,
|
|
1060
1087
|
|
|
1061
1088
|
// Metadata
|
|
1062
|
-
lastUpdated: "2026-
|
|
1089
|
+
lastUpdated: "2026-06-01",
|
|
1090
|
+
originalModel: "EXAONE 4.0 32B (Non-reasoning)",
|
|
1063
1091
|
},
|
|
1064
|
-
"exaone-4-1.2b-non-reasoning": {
|
|
1065
|
-
// AA Intelligence Index (composite score)
|
|
1066
|
-
intelligenceIndex: 8.1,
|
|
1067
|
-
normalizedScore: 12,
|
|
1068
|
-
|
|
1092
|
+
"exaone-4.0-1.2b-non-reasoning": {
|
|
1069
1093
|
// AA specific benchmarks
|
|
1070
1094
|
codingIndex: 2.5,
|
|
1071
1095
|
mathIndex: 24,
|
|
@@ -1081,21 +1105,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1081
1105
|
supportsVision: false,
|
|
1082
1106
|
|
|
1083
1107
|
// Metadata
|
|
1084
|
-
lastUpdated: "2026-
|
|
1108
|
+
lastUpdated: "2026-06-01",
|
|
1109
|
+
originalModel: "Exaone 4.0 1.2B (Non-reasoning)",
|
|
1085
1110
|
},
|
|
1086
|
-
"
|
|
1087
|
-
// AA Intelligence Index (composite score)
|
|
1088
|
-
intelligenceIndex: 16.7,
|
|
1089
|
-
normalizedScore: 24,
|
|
1090
|
-
|
|
1111
|
+
"mimo-v2-flash-non-reasoning": {
|
|
1091
1112
|
// AA specific benchmarks
|
|
1092
|
-
codingIndex:
|
|
1093
|
-
mathIndex:
|
|
1113
|
+
codingIndex: 25.8,
|
|
1114
|
+
mathIndex: 67.7,
|
|
1094
1115
|
|
|
1095
1116
|
// Academic benchmarks
|
|
1096
|
-
mmluPro: 0.
|
|
1097
|
-
gpqa: 0.
|
|
1098
|
-
hle: 0.
|
|
1117
|
+
mmluPro: 0.744,
|
|
1118
|
+
gpqa: 0.656,
|
|
1119
|
+
hle: 0.08,
|
|
1099
1120
|
|
|
1100
1121
|
// Capabilities
|
|
1101
1122
|
contextWindow: 8192,
|
|
@@ -1103,21 +1124,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1103
1124
|
supportsVision: false,
|
|
1104
1125
|
|
|
1105
1126
|
// Metadata
|
|
1106
|
-
lastUpdated: "2026-
|
|
1127
|
+
lastUpdated: "2026-06-01",
|
|
1128
|
+
originalModel: "MiMo-V2-Flash (Non-reasoning)",
|
|
1107
1129
|
},
|
|
1108
|
-
"
|
|
1109
|
-
// AA Intelligence Index (composite score)
|
|
1110
|
-
intelligenceIndex: 8.3,
|
|
1111
|
-
normalizedScore: 12,
|
|
1112
|
-
|
|
1130
|
+
"mimo-v2.5-pro-non-reasoning": {
|
|
1113
1131
|
// AA specific benchmarks
|
|
1114
|
-
codingIndex:
|
|
1115
|
-
mathIndex:
|
|
1132
|
+
codingIndex: 36.8,
|
|
1133
|
+
mathIndex: undefined,
|
|
1116
1134
|
|
|
1117
1135
|
// Academic benchmarks
|
|
1118
|
-
mmluPro:
|
|
1119
|
-
gpqa: 0.
|
|
1120
|
-
hle: 0.
|
|
1136
|
+
mmluPro: undefined,
|
|
1137
|
+
gpqa: 0.762,
|
|
1138
|
+
hle: 0.133,
|
|
1121
1139
|
|
|
1122
1140
|
// Capabilities
|
|
1123
1141
|
contextWindow: 8192,
|
|
@@ -1125,21 +1143,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1125
1143
|
supportsVision: false,
|
|
1126
1144
|
|
|
1127
1145
|
// Metadata
|
|
1128
|
-
lastUpdated: "2026-
|
|
1146
|
+
lastUpdated: "2026-06-01",
|
|
1147
|
+
originalModel: "MiMo-V2.5-Pro (Non-reasoning)",
|
|
1129
1148
|
},
|
|
1130
|
-
"mimo-v2
|
|
1131
|
-
// AA Intelligence Index (composite score)
|
|
1132
|
-
intelligenceIndex: 49.2,
|
|
1133
|
-
normalizedScore: 70,
|
|
1134
|
-
|
|
1149
|
+
"mimo-v2.5": {
|
|
1135
1150
|
// AA specific benchmarks
|
|
1136
|
-
codingIndex:
|
|
1151
|
+
codingIndex: 42.1,
|
|
1137
1152
|
mathIndex: undefined,
|
|
1138
1153
|
|
|
1139
1154
|
// Academic benchmarks
|
|
1140
1155
|
mmluPro: undefined,
|
|
1141
|
-
gpqa: 0.
|
|
1142
|
-
hle: 0.
|
|
1156
|
+
gpqa: 0.849,
|
|
1157
|
+
hle: 0.252,
|
|
1143
1158
|
|
|
1144
1159
|
// Capabilities
|
|
1145
1160
|
contextWindow: 8192,
|
|
@@ -1147,13 +1162,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1147
1162
|
supportsVision: false,
|
|
1148
1163
|
|
|
1149
1164
|
// Metadata
|
|
1150
|
-
lastUpdated: "2026-
|
|
1165
|
+
lastUpdated: "2026-06-01",
|
|
1166
|
+
originalModel: "MiMo-V2.5",
|
|
1151
1167
|
},
|
|
1152
|
-
"mimo-v2-
|
|
1153
|
-
// AA
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1168
|
+
"mimo-v2.5-pro": {
|
|
1169
|
+
// AA specific benchmarks
|
|
1170
|
+
codingIndex: 45.5,
|
|
1171
|
+
mathIndex: undefined,
|
|
1172
|
+
|
|
1173
|
+
// Academic benchmarks
|
|
1174
|
+
mmluPro: undefined,
|
|
1175
|
+
gpqa: 0.866,
|
|
1176
|
+
hle: 0.338,
|
|
1177
|
+
|
|
1178
|
+
// Capabilities
|
|
1179
|
+
contextWindow: 8192,
|
|
1180
|
+
supportsReasoning: false,
|
|
1181
|
+
supportsVision: false,
|
|
1182
|
+
|
|
1183
|
+
// Metadata
|
|
1184
|
+
lastUpdated: "2026-06-01",
|
|
1185
|
+
originalModel: "MiMo-V2.5-Pro",
|
|
1186
|
+
},
|
|
1187
|
+
"mimo-v2-flash-feb-2026": {
|
|
1157
1188
|
// AA specific benchmarks
|
|
1158
1189
|
codingIndex: 33.5,
|
|
1159
1190
|
mathIndex: undefined,
|
|
@@ -1169,21 +1200,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1169
1200
|
supportsVision: false,
|
|
1170
1201
|
|
|
1171
1202
|
// Metadata
|
|
1172
|
-
lastUpdated: "2026-
|
|
1203
|
+
lastUpdated: "2026-06-01",
|
|
1204
|
+
originalModel: "MiMo-V2-Flash (Feb 2026)",
|
|
1173
1205
|
},
|
|
1174
|
-
"mimo-v2-
|
|
1175
|
-
// AA Intelligence Index (composite score)
|
|
1176
|
-
intelligenceIndex: 30.4,
|
|
1177
|
-
normalizedScore: 43,
|
|
1178
|
-
|
|
1206
|
+
"mimo-v2-omni-0327": {
|
|
1179
1207
|
// AA specific benchmarks
|
|
1180
|
-
codingIndex:
|
|
1181
|
-
mathIndex:
|
|
1208
|
+
codingIndex: 36.9,
|
|
1209
|
+
mathIndex: undefined,
|
|
1182
1210
|
|
|
1183
1211
|
// Academic benchmarks
|
|
1184
|
-
mmluPro:
|
|
1185
|
-
gpqa: 0.
|
|
1186
|
-
hle: 0.
|
|
1212
|
+
mmluPro: undefined,
|
|
1213
|
+
gpqa: 0.855,
|
|
1214
|
+
hle: 0.204,
|
|
1187
1215
|
|
|
1188
1216
|
// Capabilities
|
|
1189
1217
|
contextWindow: 8192,
|
|
@@ -1191,13 +1219,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1191
1219
|
supportsVision: false,
|
|
1192
1220
|
|
|
1193
1221
|
// Metadata
|
|
1194
|
-
lastUpdated: "2026-
|
|
1222
|
+
lastUpdated: "2026-06-01",
|
|
1223
|
+
originalModel: "MiMo-V2-Omni-0327",
|
|
1195
1224
|
},
|
|
1196
1225
|
"mimo-v2-omni": {
|
|
1197
|
-
// AA Intelligence Index (composite score)
|
|
1198
|
-
intelligenceIndex: 43.4,
|
|
1199
|
-
normalizedScore: 62,
|
|
1200
|
-
|
|
1201
1226
|
// AA specific benchmarks
|
|
1202
1227
|
codingIndex: 35.5,
|
|
1203
1228
|
mathIndex: undefined,
|
|
@@ -1213,13 +1238,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1213
1238
|
supportsVision: false,
|
|
1214
1239
|
|
|
1215
1240
|
// Metadata
|
|
1216
|
-
lastUpdated: "2026-
|
|
1241
|
+
lastUpdated: "2026-06-01",
|
|
1242
|
+
originalModel: "MiMo-V2-Omni",
|
|
1217
1243
|
},
|
|
1218
1244
|
"ernie-4.5-300b-a47b": {
|
|
1219
|
-
// AA Intelligence Index (composite score)
|
|
1220
|
-
intelligenceIndex: 15,
|
|
1221
|
-
normalizedScore: 21,
|
|
1222
|
-
|
|
1223
1245
|
// AA specific benchmarks
|
|
1224
1246
|
codingIndex: 14.5,
|
|
1225
1247
|
mathIndex: 41.3,
|
|
@@ -1235,13 +1257,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1235
1257
|
supportsVision: false,
|
|
1236
1258
|
|
|
1237
1259
|
// Metadata
|
|
1238
|
-
lastUpdated: "2026-
|
|
1260
|
+
lastUpdated: "2026-06-01",
|
|
1261
|
+
originalModel: "ERNIE 4.5 300B A47B",
|
|
1239
1262
|
},
|
|
1240
|
-
"ernie-5-thinking-preview": {
|
|
1241
|
-
// AA Intelligence Index (composite score)
|
|
1242
|
-
intelligenceIndex: 29.1,
|
|
1243
|
-
normalizedScore: 42,
|
|
1244
|
-
|
|
1263
|
+
"ernie-5.0-thinking-preview": {
|
|
1245
1264
|
// AA specific benchmarks
|
|
1246
1265
|
codingIndex: 29.2,
|
|
1247
1266
|
mathIndex: 85,
|
|
@@ -1257,13 +1276,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1257
1276
|
supportsVision: false,
|
|
1258
1277
|
|
|
1259
1278
|
// Metadata
|
|
1260
|
-
lastUpdated: "2026-
|
|
1279
|
+
lastUpdated: "2026-06-01",
|
|
1280
|
+
originalModel: "ERNIE 5.0 Thinking Preview",
|
|
1261
1281
|
},
|
|
1262
1282
|
"sarvam-30b-high": {
|
|
1263
|
-
// AA Intelligence Index (composite score)
|
|
1264
|
-
intelligenceIndex: 12.3,
|
|
1265
|
-
normalizedScore: 18,
|
|
1266
|
-
|
|
1267
1283
|
// AA specific benchmarks
|
|
1268
1284
|
codingIndex: 7.9,
|
|
1269
1285
|
mathIndex: undefined,
|
|
@@ -1279,13 +1295,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1279
1295
|
supportsVision: false,
|
|
1280
1296
|
|
|
1281
1297
|
// Metadata
|
|
1282
|
-
lastUpdated: "2026-
|
|
1298
|
+
lastUpdated: "2026-06-01",
|
|
1299
|
+
originalModel: "Sarvam 30B (high)",
|
|
1283
1300
|
},
|
|
1284
1301
|
"sarvam-105b-high": {
|
|
1285
|
-
// AA Intelligence Index (composite score)
|
|
1286
|
-
intelligenceIndex: 18.2,
|
|
1287
|
-
normalizedScore: 26,
|
|
1288
|
-
|
|
1289
1302
|
// AA specific benchmarks
|
|
1290
1303
|
codingIndex: 9.8,
|
|
1291
1304
|
mathIndex: undefined,
|
|
@@ -1301,21 +1314,37 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1301
1314
|
supportsVision: false,
|
|
1302
1315
|
|
|
1303
1316
|
// Metadata
|
|
1304
|
-
lastUpdated: "2026-
|
|
1317
|
+
lastUpdated: "2026-06-01",
|
|
1318
|
+
originalModel: "Sarvam 105B (high)",
|
|
1305
1319
|
},
|
|
1306
|
-
"
|
|
1307
|
-
// AA
|
|
1308
|
-
|
|
1309
|
-
|
|
1320
|
+
"qwen-chat-14b": {
|
|
1321
|
+
// AA specific benchmarks
|
|
1322
|
+
codingIndex: undefined,
|
|
1323
|
+
mathIndex: undefined,
|
|
1324
|
+
|
|
1325
|
+
// Academic benchmarks
|
|
1326
|
+
mmluPro: undefined,
|
|
1327
|
+
gpqa: undefined,
|
|
1328
|
+
hle: undefined,
|
|
1310
1329
|
|
|
1330
|
+
// Capabilities
|
|
1331
|
+
contextWindow: 8192,
|
|
1332
|
+
supportsReasoning: false,
|
|
1333
|
+
supportsVision: false,
|
|
1334
|
+
|
|
1335
|
+
// Metadata
|
|
1336
|
+
lastUpdated: "2026-06-01",
|
|
1337
|
+
originalModel: "Qwen Chat 14B",
|
|
1338
|
+
},
|
|
1339
|
+
"hy3-preview-reasoning": {
|
|
1311
1340
|
// AA specific benchmarks
|
|
1312
|
-
codingIndex:
|
|
1313
|
-
mathIndex:
|
|
1341
|
+
codingIndex: 36.5,
|
|
1342
|
+
mathIndex: undefined,
|
|
1314
1343
|
|
|
1315
1344
|
// Academic benchmarks
|
|
1316
|
-
mmluPro:
|
|
1317
|
-
gpqa: 0.
|
|
1318
|
-
hle: 0.
|
|
1345
|
+
mmluPro: undefined,
|
|
1346
|
+
gpqa: 0.867,
|
|
1347
|
+
hle: 0.255,
|
|
1319
1348
|
|
|
1320
1349
|
// Capabilities
|
|
1321
1350
|
contextWindow: 8192,
|
|
@@ -1323,13 +1352,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1323
1352
|
supportsVision: false,
|
|
1324
1353
|
|
|
1325
1354
|
// Metadata
|
|
1326
|
-
lastUpdated: "2026-
|
|
1355
|
+
lastUpdated: "2026-06-01",
|
|
1356
|
+
originalModel: "Hy3-preview (Reasoning)",
|
|
1327
1357
|
},
|
|
1328
|
-
"
|
|
1329
|
-
// AA
|
|
1330
|
-
|
|
1331
|
-
|
|
1358
|
+
"hy3-preview-non-reasoning": {
|
|
1359
|
+
// AA specific benchmarks
|
|
1360
|
+
codingIndex: 34.3,
|
|
1361
|
+
mathIndex: undefined,
|
|
1362
|
+
|
|
1363
|
+
// Academic benchmarks
|
|
1364
|
+
mmluPro: undefined,
|
|
1365
|
+
gpqa: 0.732,
|
|
1366
|
+
hle: 0.063,
|
|
1367
|
+
|
|
1368
|
+
// Capabilities
|
|
1369
|
+
contextWindow: 8192,
|
|
1370
|
+
supportsReasoning: false,
|
|
1371
|
+
supportsVision: false,
|
|
1332
1372
|
|
|
1373
|
+
// Metadata
|
|
1374
|
+
lastUpdated: "2026-06-01",
|
|
1375
|
+
originalModel: "Hy3-preview (Non-reasoning)",
|
|
1376
|
+
},
|
|
1377
|
+
"kat-coder-pro-v2": {
|
|
1333
1378
|
// AA specific benchmarks
|
|
1334
1379
|
codingIndex: 45.6,
|
|
1335
1380
|
mathIndex: undefined,
|
|
@@ -1345,13 +1390,29 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1345
1390
|
supportsVision: false,
|
|
1346
1391
|
|
|
1347
1392
|
// Metadata
|
|
1348
|
-
lastUpdated: "2026-
|
|
1393
|
+
lastUpdated: "2026-06-01",
|
|
1394
|
+
originalModel: "KAT Coder Pro V2",
|
|
1349
1395
|
},
|
|
1350
|
-
"
|
|
1351
|
-
// AA
|
|
1352
|
-
|
|
1353
|
-
|
|
1396
|
+
"kat-coder-pro-v1": {
|
|
1397
|
+
// AA specific benchmarks
|
|
1398
|
+
codingIndex: 18.3,
|
|
1399
|
+
mathIndex: 94.7,
|
|
1354
1400
|
|
|
1401
|
+
// Academic benchmarks
|
|
1402
|
+
mmluPro: 0.813,
|
|
1403
|
+
gpqa: 0.764,
|
|
1404
|
+
hle: 0.334,
|
|
1405
|
+
|
|
1406
|
+
// Capabilities
|
|
1407
|
+
contextWindow: 8192,
|
|
1408
|
+
supportsReasoning: false,
|
|
1409
|
+
supportsVision: false,
|
|
1410
|
+
|
|
1411
|
+
// Metadata
|
|
1412
|
+
lastUpdated: "2026-06-01",
|
|
1413
|
+
originalModel: "KAT-Coder-Pro V1",
|
|
1414
|
+
},
|
|
1415
|
+
"intellect-3": {
|
|
1355
1416
|
// AA specific benchmarks
|
|
1356
1417
|
codingIndex: 19.1,
|
|
1357
1418
|
mathIndex: 88,
|
|
@@ -1367,13 +1428,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1367
1428
|
supportsVision: false,
|
|
1368
1429
|
|
|
1369
1430
|
// Metadata
|
|
1370
|
-
lastUpdated: "2026-
|
|
1431
|
+
lastUpdated: "2026-06-01",
|
|
1432
|
+
originalModel: "INTELLECT-3",
|
|
1371
1433
|
},
|
|
1372
1434
|
"motif-2-12.7b-reasoning": {
|
|
1373
|
-
// AA Intelligence Index (composite score)
|
|
1374
|
-
intelligenceIndex: 19.1,
|
|
1375
|
-
normalizedScore: 27,
|
|
1376
|
-
|
|
1377
1435
|
// AA specific benchmarks
|
|
1378
1436
|
codingIndex: 11.9,
|
|
1379
1437
|
mathIndex: 80.3,
|
|
@@ -1389,21 +1447,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1389
1447
|
supportsVision: false,
|
|
1390
1448
|
|
|
1391
1449
|
// Metadata
|
|
1392
|
-
lastUpdated: "2026-
|
|
1450
|
+
lastUpdated: "2026-06-01",
|
|
1451
|
+
originalModel: "Motif-2-12.7B-Reasoning",
|
|
1393
1452
|
},
|
|
1394
|
-
"k2-v2
|
|
1395
|
-
// AA Intelligence Index (composite score)
|
|
1396
|
-
intelligenceIndex: 14.4,
|
|
1397
|
-
normalizedScore: 21,
|
|
1398
|
-
|
|
1453
|
+
"k2-think-v2": {
|
|
1399
1454
|
// AA specific benchmarks
|
|
1400
|
-
codingIndex:
|
|
1401
|
-
mathIndex:
|
|
1455
|
+
codingIndex: 15.5,
|
|
1456
|
+
mathIndex: undefined,
|
|
1402
1457
|
|
|
1403
1458
|
// Academic benchmarks
|
|
1404
|
-
mmluPro:
|
|
1405
|
-
gpqa: 0.
|
|
1406
|
-
hle: 0.
|
|
1459
|
+
mmluPro: undefined,
|
|
1460
|
+
gpqa: 0.713,
|
|
1461
|
+
hle: 0.095,
|
|
1407
1462
|
|
|
1408
1463
|
// Capabilities
|
|
1409
1464
|
contextWindow: 8192,
|
|
@@ -1411,21 +1466,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1411
1466
|
supportsVision: false,
|
|
1412
1467
|
|
|
1413
1468
|
// Metadata
|
|
1414
|
-
lastUpdated: "2026-
|
|
1469
|
+
lastUpdated: "2026-06-01",
|
|
1470
|
+
originalModel: "K2 Think V2",
|
|
1415
1471
|
},
|
|
1416
|
-
"k2-v2-
|
|
1417
|
-
// AA Intelligence Index (composite score)
|
|
1418
|
-
intelligenceIndex: 18.7,
|
|
1419
|
-
normalizedScore: 27,
|
|
1420
|
-
|
|
1472
|
+
"k2-v2-high": {
|
|
1421
1473
|
// AA specific benchmarks
|
|
1422
|
-
codingIndex:
|
|
1423
|
-
mathIndex:
|
|
1474
|
+
codingIndex: 16.1,
|
|
1475
|
+
mathIndex: 78.3,
|
|
1424
1476
|
|
|
1425
1477
|
// Academic benchmarks
|
|
1426
|
-
mmluPro: 0.
|
|
1427
|
-
gpqa: 0.
|
|
1428
|
-
hle: 0.
|
|
1478
|
+
mmluPro: 0.786,
|
|
1479
|
+
gpqa: 0.681,
|
|
1480
|
+
hle: 0.098,
|
|
1429
1481
|
|
|
1430
1482
|
// Capabilities
|
|
1431
1483
|
contextWindow: 8192,
|
|
@@ -1433,21 +1485,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1433
1485
|
supportsVision: false,
|
|
1434
1486
|
|
|
1435
1487
|
// Metadata
|
|
1436
|
-
lastUpdated: "2026-
|
|
1488
|
+
lastUpdated: "2026-06-01",
|
|
1489
|
+
originalModel: "K2-V2 (high)",
|
|
1437
1490
|
},
|
|
1438
|
-
"k2-v2-
|
|
1439
|
-
// AA Intelligence Index (composite score)
|
|
1440
|
-
intelligenceIndex: 20.6,
|
|
1441
|
-
normalizedScore: 29,
|
|
1442
|
-
|
|
1491
|
+
"k2-v2-low": {
|
|
1443
1492
|
// AA specific benchmarks
|
|
1444
|
-
codingIndex:
|
|
1445
|
-
mathIndex:
|
|
1493
|
+
codingIndex: 10.5,
|
|
1494
|
+
mathIndex: 35.3,
|
|
1446
1495
|
|
|
1447
1496
|
// Academic benchmarks
|
|
1448
|
-
mmluPro: 0.
|
|
1449
|
-
gpqa: 0.
|
|
1450
|
-
hle: 0.
|
|
1497
|
+
mmluPro: 0.713,
|
|
1498
|
+
gpqa: 0.541,
|
|
1499
|
+
hle: 0.039,
|
|
1451
1500
|
|
|
1452
1501
|
// Capabilities
|
|
1453
1502
|
contextWindow: 8192,
|
|
@@ -1455,21 +1504,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1455
1504
|
supportsVision: false,
|
|
1456
1505
|
|
|
1457
1506
|
// Metadata
|
|
1458
|
-
lastUpdated: "2026-
|
|
1507
|
+
lastUpdated: "2026-06-01",
|
|
1508
|
+
originalModel: "K2-V2 (low)",
|
|
1459
1509
|
},
|
|
1460
|
-
"k2-
|
|
1461
|
-
// AA Intelligence Index (composite score)
|
|
1462
|
-
intelligenceIndex: 24.1,
|
|
1463
|
-
normalizedScore: 34,
|
|
1464
|
-
|
|
1510
|
+
"k2-v2-medium": {
|
|
1465
1511
|
// AA specific benchmarks
|
|
1466
|
-
codingIndex:
|
|
1467
|
-
mathIndex:
|
|
1512
|
+
codingIndex: 14,
|
|
1513
|
+
mathIndex: 64.7,
|
|
1468
1514
|
|
|
1469
1515
|
// Academic benchmarks
|
|
1470
|
-
mmluPro:
|
|
1471
|
-
gpqa: 0.
|
|
1472
|
-
hle: 0.
|
|
1516
|
+
mmluPro: 0.761,
|
|
1517
|
+
gpqa: 0.598,
|
|
1518
|
+
hle: 0.044,
|
|
1473
1519
|
|
|
1474
1520
|
// Capabilities
|
|
1475
1521
|
contextWindow: 8192,
|
|
@@ -1477,13 +1523,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1477
1523
|
supportsVision: false,
|
|
1478
1524
|
|
|
1479
1525
|
// Metadata
|
|
1480
|
-
lastUpdated: "2026-
|
|
1526
|
+
lastUpdated: "2026-06-01",
|
|
1527
|
+
originalModel: "K2-V2 (medium)",
|
|
1481
1528
|
},
|
|
1482
1529
|
"mi-dm-k-2.5-pro": {
|
|
1483
|
-
// AA Intelligence Index (composite score)
|
|
1484
|
-
intelligenceIndex: 23.1,
|
|
1485
|
-
normalizedScore: 33,
|
|
1486
|
-
|
|
1487
1530
|
// AA specific benchmarks
|
|
1488
1531
|
codingIndex: 12.6,
|
|
1489
1532
|
mathIndex: 76.7,
|
|
@@ -1499,13 +1542,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1499
1542
|
supportsVision: false,
|
|
1500
1543
|
|
|
1501
1544
|
// Metadata
|
|
1502
|
-
lastUpdated: "2026-
|
|
1545
|
+
lastUpdated: "2026-06-01",
|
|
1546
|
+
originalModel: "Mi:dm K 2.5 Pro",
|
|
1503
1547
|
},
|
|
1504
1548
|
"hyperclova-x-seed-think-32b": {
|
|
1505
|
-
// AA Intelligence Index (composite score)
|
|
1506
|
-
intelligenceIndex: 23.7,
|
|
1507
|
-
normalizedScore: 34,
|
|
1508
|
-
|
|
1509
1549
|
// AA specific benchmarks
|
|
1510
1550
|
codingIndex: 17.5,
|
|
1511
1551
|
mathIndex: 59,
|
|
@@ -1521,13 +1561,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1521
1561
|
supportsVision: false,
|
|
1522
1562
|
|
|
1523
1563
|
// Metadata
|
|
1524
|
-
lastUpdated: "2026-
|
|
1564
|
+
lastUpdated: "2026-06-01",
|
|
1565
|
+
originalModel: "HyperCLOVA X SEED Think (32B)",
|
|
1525
1566
|
},
|
|
1526
1567
|
"longcat-flash-lite": {
|
|
1527
|
-
// AA Intelligence Index (composite score)
|
|
1528
|
-
intelligenceIndex: 23.9,
|
|
1529
|
-
normalizedScore: 34,
|
|
1530
|
-
|
|
1531
1568
|
// AA specific benchmarks
|
|
1532
1569
|
codingIndex: 16.5,
|
|
1533
1570
|
mathIndex: undefined,
|
|
@@ -1543,13 +1580,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1543
1580
|
supportsVision: false,
|
|
1544
1581
|
|
|
1545
1582
|
// Metadata
|
|
1546
|
-
lastUpdated: "2026-
|
|
1583
|
+
lastUpdated: "2026-06-01",
|
|
1584
|
+
originalModel: "LongCat Flash Lite",
|
|
1547
1585
|
},
|
|
1548
1586
|
"tri-21b-think": {
|
|
1549
|
-
// AA Intelligence Index (composite score)
|
|
1550
|
-
intelligenceIndex: 18.6,
|
|
1551
|
-
normalizedScore: 27,
|
|
1552
|
-
|
|
1553
1587
|
// AA specific benchmarks
|
|
1554
1588
|
codingIndex: 6.3,
|
|
1555
1589
|
mathIndex: undefined,
|
|
@@ -1565,13 +1599,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1565
1599
|
supportsVision: false,
|
|
1566
1600
|
|
|
1567
1601
|
// Metadata
|
|
1568
|
-
lastUpdated: "2026-
|
|
1602
|
+
lastUpdated: "2026-06-01",
|
|
1603
|
+
originalModel: "Tri-21B-Think",
|
|
1569
1604
|
},
|
|
1570
1605
|
"tri-21b-think-preview": {
|
|
1571
|
-
// AA Intelligence Index (composite score)
|
|
1572
|
-
intelligenceIndex: 20,
|
|
1573
|
-
normalizedScore: 29,
|
|
1574
|
-
|
|
1575
1606
|
// AA specific benchmarks
|
|
1576
1607
|
codingIndex: 7.4,
|
|
1577
1608
|
mathIndex: undefined,
|
|
@@ -1587,13 +1618,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1587
1618
|
supportsVision: false,
|
|
1588
1619
|
|
|
1589
1620
|
// Metadata
|
|
1590
|
-
lastUpdated: "2026-
|
|
1621
|
+
lastUpdated: "2026-06-01",
|
|
1622
|
+
originalModel: "Tri-21B-think Preview",
|
|
1591
1623
|
},
|
|
1592
1624
|
"nanbeige4.1-3b": {
|
|
1593
|
-
// AA Intelligence Index (composite score)
|
|
1594
|
-
intelligenceIndex: 16.1,
|
|
1595
|
-
normalizedScore: 23,
|
|
1596
|
-
|
|
1597
1625
|
// AA specific benchmarks
|
|
1598
1626
|
codingIndex: 8.9,
|
|
1599
1627
|
mathIndex: undefined,
|
|
@@ -1609,35 +1637,10 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1609
1637
|
supportsVision: false,
|
|
1610
1638
|
|
|
1611
1639
|
// Metadata
|
|
1612
|
-
lastUpdated: "2026-
|
|
1613
|
-
|
|
1614
|
-
"apertus-70b-instruct": {
|
|
1615
|
-
// AA Intelligence Index (composite score)
|
|
1616
|
-
intelligenceIndex: 7.7,
|
|
1617
|
-
normalizedScore: 11,
|
|
1618
|
-
|
|
1619
|
-
// AA specific benchmarks
|
|
1620
|
-
codingIndex: 1.9,
|
|
1621
|
-
mathIndex: undefined,
|
|
1622
|
-
|
|
1623
|
-
// Academic benchmarks
|
|
1624
|
-
mmluPro: undefined,
|
|
1625
|
-
gpqa: 0.272,
|
|
1626
|
-
hle: 0.055,
|
|
1627
|
-
|
|
1628
|
-
// Capabilities
|
|
1629
|
-
contextWindow: 8192,
|
|
1630
|
-
supportsReasoning: false,
|
|
1631
|
-
supportsVision: false,
|
|
1632
|
-
|
|
1633
|
-
// Metadata
|
|
1634
|
-
lastUpdated: "2026-04-06",
|
|
1640
|
+
lastUpdated: "2026-06-01",
|
|
1641
|
+
originalModel: "Nanbeige4.1-3B",
|
|
1635
1642
|
},
|
|
1636
1643
|
"apertus-8b-instruct": {
|
|
1637
|
-
// AA Intelligence Index (composite score)
|
|
1638
|
-
intelligenceIndex: 5.9,
|
|
1639
|
-
normalizedScore: 8,
|
|
1640
|
-
|
|
1641
1644
|
// AA specific benchmarks
|
|
1642
1645
|
codingIndex: 1.4,
|
|
1643
1646
|
mathIndex: undefined,
|
|
@@ -1653,131 +1656,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1653
1656
|
supportsVision: false,
|
|
1654
1657
|
|
|
1655
1658
|
// Metadata
|
|
1656
|
-
lastUpdated: "2026-
|
|
1657
|
-
|
|
1658
|
-
"qwen-chat-14b": {
|
|
1659
|
-
// AA Intelligence Index (composite score)
|
|
1660
|
-
intelligenceIndex: 7.4,
|
|
1661
|
-
normalizedScore: 11,
|
|
1662
|
-
|
|
1663
|
-
// AA specific benchmarks
|
|
1664
|
-
codingIndex: undefined,
|
|
1665
|
-
mathIndex: undefined,
|
|
1666
|
-
|
|
1667
|
-
// Academic benchmarks
|
|
1668
|
-
mmluPro: undefined,
|
|
1669
|
-
gpqa: undefined,
|
|
1670
|
-
hle: undefined,
|
|
1671
|
-
|
|
1672
|
-
// Capabilities
|
|
1673
|
-
contextWindow: 8192,
|
|
1674
|
-
supportsReasoning: false,
|
|
1675
|
-
supportsVision: false,
|
|
1676
|
-
|
|
1677
|
-
// Metadata
|
|
1678
|
-
lastUpdated: "2026-04-06",
|
|
1659
|
+
lastUpdated: "2026-06-01",
|
|
1660
|
+
originalModel: "Apertus 8B Instruct",
|
|
1679
1661
|
},
|
|
1680
|
-
"
|
|
1681
|
-
// AA Intelligence Index (composite score)
|
|
1682
|
-
intelligenceIndex: 23.4,
|
|
1683
|
-
normalizedScore: 33,
|
|
1684
|
-
|
|
1685
|
-
// AA specific benchmarks
|
|
1686
|
-
codingIndex: 19.7,
|
|
1687
|
-
mathIndex: 85.3,
|
|
1688
|
-
|
|
1689
|
-
// Academic benchmarks
|
|
1690
|
-
mmluPro: 0.799,
|
|
1691
|
-
gpqa: 0.719,
|
|
1692
|
-
hle: 0.089,
|
|
1693
|
-
|
|
1694
|
-
// Capabilities
|
|
1695
|
-
contextWindow: 8192,
|
|
1696
|
-
supportsReasoning: false,
|
|
1697
|
-
supportsVision: false,
|
|
1698
|
-
|
|
1699
|
-
// Metadata
|
|
1700
|
-
lastUpdated: "2026-04-06",
|
|
1701
|
-
},
|
|
1702
|
-
"glm-5-turbo": {
|
|
1703
|
-
// AA Intelligence Index (composite score)
|
|
1704
|
-
intelligenceIndex: 46.8,
|
|
1705
|
-
normalizedScore: 67,
|
|
1706
|
-
|
|
1707
|
-
// AA specific benchmarks
|
|
1708
|
-
codingIndex: 36.8,
|
|
1709
|
-
mathIndex: undefined,
|
|
1710
|
-
|
|
1711
|
-
// Academic benchmarks
|
|
1712
|
-
mmluPro: undefined,
|
|
1713
|
-
gpqa: 0.847,
|
|
1714
|
-
hle: 0.254,
|
|
1715
|
-
|
|
1716
|
-
// Capabilities
|
|
1717
|
-
contextWindow: 8192,
|
|
1718
|
-
supportsReasoning: false,
|
|
1719
|
-
supportsVision: false,
|
|
1720
|
-
|
|
1721
|
-
// Metadata
|
|
1722
|
-
lastUpdated: "2026-04-06",
|
|
1723
|
-
},
|
|
1724
|
-
"glm-4.6v-non-reasoning": {
|
|
1725
|
-
// AA Intelligence Index (composite score)
|
|
1726
|
-
intelligenceIndex: 17.1,
|
|
1727
|
-
normalizedScore: 24,
|
|
1728
|
-
|
|
1729
|
-
// AA specific benchmarks
|
|
1730
|
-
codingIndex: 11.1,
|
|
1731
|
-
mathIndex: 26.3,
|
|
1732
|
-
|
|
1733
|
-
// Academic benchmarks
|
|
1734
|
-
mmluPro: 0.752,
|
|
1735
|
-
gpqa: 0.566,
|
|
1736
|
-
hle: 0.037,
|
|
1737
|
-
|
|
1738
|
-
// Capabilities
|
|
1739
|
-
contextWindow: 8192,
|
|
1740
|
-
supportsReasoning: false,
|
|
1741
|
-
supportsVision: false,
|
|
1742
|
-
|
|
1743
|
-
// Metadata
|
|
1744
|
-
lastUpdated: "2026-04-06",
|
|
1745
|
-
},
|
|
1746
|
-
"glm-5-non-reasoning": {
|
|
1747
|
-
// AA Intelligence Index (composite score)
|
|
1748
|
-
intelligenceIndex: 40.6,
|
|
1749
|
-
normalizedScore: 58,
|
|
1750
|
-
|
|
1751
|
-
// AA specific benchmarks
|
|
1752
|
-
codingIndex: 39,
|
|
1753
|
-
mathIndex: undefined,
|
|
1754
|
-
|
|
1755
|
-
// Academic benchmarks
|
|
1756
|
-
mmluPro: undefined,
|
|
1757
|
-
gpqa: 0.666,
|
|
1758
|
-
hle: 0.072,
|
|
1759
|
-
|
|
1760
|
-
// Capabilities
|
|
1761
|
-
contextWindow: 8192,
|
|
1762
|
-
supportsReasoning: false,
|
|
1763
|
-
supportsVision: false,
|
|
1764
|
-
|
|
1765
|
-
// Metadata
|
|
1766
|
-
lastUpdated: "2026-04-06",
|
|
1767
|
-
},
|
|
1768
|
-
"glm-5-reasoning": {
|
|
1769
|
-
// AA Intelligence Index (composite score)
|
|
1770
|
-
intelligenceIndex: 49.8,
|
|
1771
|
-
normalizedScore: 71,
|
|
1772
|
-
|
|
1662
|
+
"apertus-70b-instruct": {
|
|
1773
1663
|
// AA specific benchmarks
|
|
1774
|
-
codingIndex:
|
|
1664
|
+
codingIndex: 1.9,
|
|
1775
1665
|
mathIndex: undefined,
|
|
1776
1666
|
|
|
1777
1667
|
// Academic benchmarks
|
|
1778
1668
|
mmluPro: undefined,
|
|
1779
|
-
gpqa: 0.
|
|
1780
|
-
hle: 0.
|
|
1669
|
+
gpqa: 0.272,
|
|
1670
|
+
hle: 0.055,
|
|
1781
1671
|
|
|
1782
1672
|
// Capabilities
|
|
1783
1673
|
contextWindow: 8192,
|
|
@@ -1785,21 +1675,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1785
1675
|
supportsVision: false,
|
|
1786
1676
|
|
|
1787
1677
|
// Metadata
|
|
1788
|
-
lastUpdated: "2026-
|
|
1678
|
+
lastUpdated: "2026-06-01",
|
|
1679
|
+
originalModel: "Apertus 70B Instruct",
|
|
1789
1680
|
},
|
|
1790
|
-
"
|
|
1791
|
-
// AA Intelligence Index (composite score)
|
|
1792
|
-
intelligenceIndex: 42.9,
|
|
1793
|
-
normalizedScore: 61,
|
|
1794
|
-
|
|
1681
|
+
"minicpm5-1b-non-reasoning": {
|
|
1795
1682
|
// AA specific benchmarks
|
|
1796
|
-
codingIndex:
|
|
1683
|
+
codingIndex: 0.5,
|
|
1797
1684
|
mathIndex: undefined,
|
|
1798
1685
|
|
|
1799
1686
|
// Academic benchmarks
|
|
1800
1687
|
mmluPro: undefined,
|
|
1801
|
-
gpqa: 0.
|
|
1802
|
-
hle: 0.
|
|
1688
|
+
gpqa: 0.269,
|
|
1689
|
+
hle: 0.046,
|
|
1803
1690
|
|
|
1804
1691
|
// Capabilities
|
|
1805
1692
|
contextWindow: 8192,
|
|
@@ -1807,175 +1694,18 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1807
1694
|
supportsVision: false,
|
|
1808
1695
|
|
|
1809
1696
|
// Metadata
|
|
1810
|
-
lastUpdated: "2026-
|
|
1697
|
+
lastUpdated: "2026-06-01",
|
|
1698
|
+
originalModel: "MiniCPM5-1B (Non-reasoning)",
|
|
1811
1699
|
},
|
|
1812
|
-
"
|
|
1813
|
-
// AA Intelligence Index (composite score)
|
|
1814
|
-
intelligenceIndex: 4.7,
|
|
1815
|
-
normalizedScore: 7,
|
|
1816
|
-
|
|
1700
|
+
"minicpm-v-4.6-1.3b": {
|
|
1817
1701
|
// AA specific benchmarks
|
|
1818
|
-
codingIndex:
|
|
1702
|
+
codingIndex: 0.7,
|
|
1819
1703
|
mathIndex: undefined,
|
|
1820
1704
|
|
|
1821
1705
|
// Academic benchmarks
|
|
1822
1706
|
mmluPro: undefined,
|
|
1823
1707
|
gpqa: 0.305,
|
|
1824
|
-
hle: 0.
|
|
1825
|
-
|
|
1826
|
-
// Capabilities
|
|
1827
|
-
contextWindow: 8192,
|
|
1828
|
-
supportsReasoning: false,
|
|
1829
|
-
supportsVision: false,
|
|
1830
|
-
|
|
1831
|
-
// Metadata
|
|
1832
|
-
lastUpdated: "2026-04-06",
|
|
1833
|
-
},
|
|
1834
|
-
"command-a": {
|
|
1835
|
-
// AA Intelligence Index (composite score)
|
|
1836
|
-
intelligenceIndex: 13.5,
|
|
1837
|
-
normalizedScore: 19,
|
|
1838
|
-
|
|
1839
|
-
// AA specific benchmarks
|
|
1840
|
-
codingIndex: 9.9,
|
|
1841
|
-
mathIndex: 13,
|
|
1842
|
-
|
|
1843
|
-
// Academic benchmarks
|
|
1844
|
-
mmluPro: 0.712,
|
|
1845
|
-
gpqa: 0.527,
|
|
1846
|
-
hle: 0.046,
|
|
1847
|
-
|
|
1848
|
-
// Capabilities
|
|
1849
|
-
contextWindow: 8192,
|
|
1850
|
-
supportsReasoning: false,
|
|
1851
|
-
supportsVision: false,
|
|
1852
|
-
|
|
1853
|
-
// Metadata
|
|
1854
|
-
lastUpdated: "2026-04-06",
|
|
1855
|
-
},
|
|
1856
|
-
"apriel-v1.6-15b-thinker": {
|
|
1857
|
-
// AA Intelligence Index (composite score)
|
|
1858
|
-
intelligenceIndex: 27.6,
|
|
1859
|
-
normalizedScore: 39,
|
|
1860
|
-
|
|
1861
|
-
// AA specific benchmarks
|
|
1862
|
-
codingIndex: 22,
|
|
1863
|
-
mathIndex: 88,
|
|
1864
|
-
|
|
1865
|
-
// Academic benchmarks
|
|
1866
|
-
mmluPro: 0.79,
|
|
1867
|
-
gpqa: 0.733,
|
|
1868
|
-
hle: 0.098,
|
|
1869
|
-
|
|
1870
|
-
// Capabilities
|
|
1871
|
-
contextWindow: 8192,
|
|
1872
|
-
supportsReasoning: false,
|
|
1873
|
-
supportsVision: false,
|
|
1874
|
-
|
|
1875
|
-
// Metadata
|
|
1876
|
-
lastUpdated: "2026-04-06",
|
|
1877
|
-
},
|
|
1878
|
-
"jamba-reasoning-3b": {
|
|
1879
|
-
// AA Intelligence Index (composite score)
|
|
1880
|
-
intelligenceIndex: 9.6,
|
|
1881
|
-
normalizedScore: 14,
|
|
1882
|
-
|
|
1883
|
-
// AA specific benchmarks
|
|
1884
|
-
codingIndex: 2.5,
|
|
1885
|
-
mathIndex: 10.7,
|
|
1886
|
-
|
|
1887
|
-
// Academic benchmarks
|
|
1888
|
-
mmluPro: 0.577,
|
|
1889
|
-
gpqa: 0.333,
|
|
1890
|
-
hle: 0.046,
|
|
1891
|
-
|
|
1892
|
-
// Capabilities
|
|
1893
|
-
contextWindow: 8192,
|
|
1894
|
-
supportsReasoning: false,
|
|
1895
|
-
supportsVision: false,
|
|
1896
|
-
|
|
1897
|
-
// Metadata
|
|
1898
|
-
lastUpdated: "2026-04-06",
|
|
1899
|
-
},
|
|
1900
|
-
"jamba-1.7-large": {
|
|
1901
|
-
// AA Intelligence Index (composite score)
|
|
1902
|
-
intelligenceIndex: 10.9,
|
|
1903
|
-
normalizedScore: 16,
|
|
1904
|
-
|
|
1905
|
-
// AA specific benchmarks
|
|
1906
|
-
codingIndex: 7.8,
|
|
1907
|
-
mathIndex: 2.3,
|
|
1908
|
-
|
|
1909
|
-
// Academic benchmarks
|
|
1910
|
-
mmluPro: 0.577,
|
|
1911
|
-
gpqa: 0.39,
|
|
1912
|
-
hle: 0.038,
|
|
1913
|
-
|
|
1914
|
-
// Capabilities
|
|
1915
|
-
contextWindow: 8192,
|
|
1916
|
-
supportsReasoning: false,
|
|
1917
|
-
supportsVision: false,
|
|
1918
|
-
|
|
1919
|
-
// Metadata
|
|
1920
|
-
lastUpdated: "2026-04-06",
|
|
1921
|
-
},
|
|
1922
|
-
"jamba-1.7-mini": {
|
|
1923
|
-
// AA Intelligence Index (composite score)
|
|
1924
|
-
intelligenceIndex: 8.1,
|
|
1925
|
-
normalizedScore: 12,
|
|
1926
|
-
|
|
1927
|
-
// AA specific benchmarks
|
|
1928
|
-
codingIndex: 3.1,
|
|
1929
|
-
mathIndex: 0.3,
|
|
1930
|
-
|
|
1931
|
-
// Academic benchmarks
|
|
1932
|
-
mmluPro: 0.388,
|
|
1933
|
-
gpqa: 0.322,
|
|
1934
|
-
hle: 0.045,
|
|
1935
|
-
|
|
1936
|
-
// Capabilities
|
|
1937
|
-
contextWindow: 8192,
|
|
1938
|
-
supportsReasoning: false,
|
|
1939
|
-
supportsVision: false,
|
|
1940
|
-
|
|
1941
|
-
// Metadata
|
|
1942
|
-
lastUpdated: "2026-04-06",
|
|
1943
|
-
},
|
|
1944
|
-
"qwen3-next-80b-a3b-reasoning": {
|
|
1945
|
-
// AA Intelligence Index (composite score)
|
|
1946
|
-
intelligenceIndex: 26.7,
|
|
1947
|
-
normalizedScore: 38,
|
|
1948
|
-
|
|
1949
|
-
// AA specific benchmarks
|
|
1950
|
-
codingIndex: 19.5,
|
|
1951
|
-
mathIndex: 84.3,
|
|
1952
|
-
|
|
1953
|
-
// Academic benchmarks
|
|
1954
|
-
mmluPro: 0.824,
|
|
1955
|
-
gpqa: 0.759,
|
|
1956
|
-
hle: 0.117,
|
|
1957
|
-
|
|
1958
|
-
// Capabilities
|
|
1959
|
-
contextWindow: 8192,
|
|
1960
|
-
supportsReasoning: false,
|
|
1961
|
-
supportsVision: false,
|
|
1962
|
-
|
|
1963
|
-
// Metadata
|
|
1964
|
-
lastUpdated: "2026-04-06",
|
|
1965
|
-
},
|
|
1966
|
-
"qwen3-coder-480b-a35b-instruct": {
|
|
1967
|
-
// AA Intelligence Index (composite score)
|
|
1968
|
-
intelligenceIndex: 24.8,
|
|
1969
|
-
normalizedScore: 35,
|
|
1970
|
-
|
|
1971
|
-
// AA specific benchmarks
|
|
1972
|
-
codingIndex: 24.6,
|
|
1973
|
-
mathIndex: 39.3,
|
|
1974
|
-
|
|
1975
|
-
// Academic benchmarks
|
|
1976
|
-
mmluPro: 0.788,
|
|
1977
|
-
gpqa: 0.618,
|
|
1978
|
-
hle: 0.044,
|
|
1708
|
+
hle: 0.049,
|
|
1979
1709
|
|
|
1980
1710
|
// Capabilities
|
|
1981
1711
|
contextWindow: 8192,
|
|
@@ -1983,6 +1713,7 @@ export const BENCHMARKS_CHUNK_1: Record<string, HardcodedBenchmark> = {
|
|
|
1983
1713
|
supportsVision: false,
|
|
1984
1714
|
|
|
1985
1715
|
// Metadata
|
|
1986
|
-
lastUpdated: "2026-
|
|
1716
|
+
lastUpdated: "2026-06-01",
|
|
1717
|
+
originalModel: "MiniCPM-V 4.6 1.3B",
|
|
1987
1718
|
},
|
|
1988
1719
|
};
|