pi-free 2.0.13 → 2.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +4 -1
- package/config.ts +15 -0
- package/constants.ts +3 -0
- package/index.ts +135 -0
- package/lib/built-in-toggle.ts +4 -4
- package/lib/probe-cache.ts +86 -0
- package/lib/registry.ts +25 -3
- package/lib/telemetry.ts +328 -0
- package/lib/util.ts +10 -1
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +94 -8
- package/provider-failover/benchmarks-chunk-0.ts +599 -890
- package/provider-failover/benchmarks-chunk-1.ts +655 -924
- package/provider-failover/benchmarks-chunk-2.ts +675 -966
- package/provider-failover/benchmarks-chunk-3.ts +676 -967
- package/provider-failover/benchmarks-chunk-4.ts +704 -954
- package/provider-failover/benchmarks-chunk-5.ts +1301 -0
- package/provider-failover/hardcoded-benchmarks.ts +9 -3
- package/providers/cline/cline-models.ts +196 -68
- package/providers/dynamic-built-in/index.ts +1 -1
- package/providers/kilo/kilo.ts +2 -2
- package/providers/model-fetcher.ts +3 -1
- package/providers/nvidia/nvidia.ts +47 -15
- package/providers/ollama/ollama.ts +103 -46
- package/providers/opencode-session.ts +398 -371
- package/providers/qwen/qwen.ts +2 -2
- package/providers/routeway/routeway.ts +213 -0
|
@@ -1,23 +1,20 @@
|
|
|
1
1
|
// Auto-generated benchmark data chunk 2
|
|
2
|
-
// Models:
|
|
2
|
+
// Models: trinity-large-thinking .. o3-mini (90 entries)
|
|
3
|
+
// Last updated: 2026-06-01
|
|
3
4
|
// DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
|
|
4
5
|
|
|
5
6
|
import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
|
|
6
7
|
|
|
7
8
|
export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
8
|
-
"
|
|
9
|
-
// AA Intelligence Index (composite score)
|
|
10
|
-
intelligenceIndex: 41.6,
|
|
11
|
-
normalizedScore: 59,
|
|
12
|
-
|
|
9
|
+
"trinity-large-thinking": {
|
|
13
10
|
// AA specific benchmarks
|
|
14
|
-
codingIndex:
|
|
11
|
+
codingIndex: 27.2,
|
|
15
12
|
mathIndex: undefined,
|
|
16
13
|
|
|
17
14
|
// Academic benchmarks
|
|
18
15
|
mmluPro: undefined,
|
|
19
|
-
gpqa: 0.
|
|
20
|
-
hle: 0.
|
|
16
|
+
gpqa: 0.752,
|
|
17
|
+
hle: 0.147,
|
|
21
18
|
|
|
22
19
|
// Capabilities
|
|
23
20
|
contextWindow: 8192,
|
|
@@ -25,21 +22,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
25
22
|
supportsVision: false,
|
|
26
23
|
|
|
27
24
|
// Metadata
|
|
28
|
-
lastUpdated: "2026-
|
|
25
|
+
lastUpdated: "2026-06-01",
|
|
26
|
+
originalModel: "Trinity Large Thinking",
|
|
29
27
|
},
|
|
30
|
-
"
|
|
31
|
-
// AA Intelligence Index (composite score)
|
|
32
|
-
intelligenceIndex: 38.6,
|
|
33
|
-
normalizedScore: 55,
|
|
34
|
-
|
|
28
|
+
"jt-mini": {
|
|
35
29
|
// AA specific benchmarks
|
|
36
|
-
codingIndex:
|
|
30
|
+
codingIndex: 21.2,
|
|
37
31
|
mathIndex: undefined,
|
|
38
32
|
|
|
39
33
|
// Academic benchmarks
|
|
40
34
|
mmluPro: undefined,
|
|
41
|
-
gpqa: 0.
|
|
42
|
-
hle: 0.
|
|
35
|
+
gpqa: 0.676,
|
|
36
|
+
hle: 0.066,
|
|
43
37
|
|
|
44
38
|
// Capabilities
|
|
45
39
|
contextWindow: 8192,
|
|
@@ -47,21 +41,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
47
41
|
supportsVision: false,
|
|
48
42
|
|
|
49
43
|
// Metadata
|
|
50
|
-
lastUpdated: "2026-
|
|
44
|
+
lastUpdated: "2026-06-01",
|
|
45
|
+
originalModel: "JT-MINI",
|
|
51
46
|
},
|
|
52
|
-
"
|
|
53
|
-
// AA Intelligence Index (composite score)
|
|
54
|
-
intelligenceIndex: 45,
|
|
55
|
-
normalizedScore: 64,
|
|
56
|
-
|
|
47
|
+
"jt-35b-flash": {
|
|
57
48
|
// AA specific benchmarks
|
|
58
|
-
codingIndex:
|
|
49
|
+
codingIndex: 28.9,
|
|
59
50
|
mathIndex: undefined,
|
|
60
51
|
|
|
61
52
|
// Academic benchmarks
|
|
62
53
|
mmluPro: undefined,
|
|
63
|
-
gpqa: 0.
|
|
64
|
-
hle: 0.
|
|
54
|
+
gpqa: 0.829,
|
|
55
|
+
hle: 0.061,
|
|
65
56
|
|
|
66
57
|
// Capabilities
|
|
67
58
|
contextWindow: 8192,
|
|
@@ -69,21 +60,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
69
60
|
supportsVision: false,
|
|
70
61
|
|
|
71
62
|
// Metadata
|
|
72
|
-
lastUpdated: "2026-
|
|
63
|
+
lastUpdated: "2026-06-01",
|
|
64
|
+
originalModel: "JT-35B-Flash",
|
|
73
65
|
},
|
|
74
|
-
"
|
|
75
|
-
// AA Intelligence Index (composite score)
|
|
76
|
-
intelligenceIndex: 40.1,
|
|
77
|
-
normalizedScore: 57,
|
|
78
|
-
|
|
66
|
+
"glm-5.1-reasoning": {
|
|
79
67
|
// AA specific benchmarks
|
|
80
|
-
codingIndex:
|
|
68
|
+
codingIndex: 43.4,
|
|
81
69
|
mathIndex: undefined,
|
|
82
70
|
|
|
83
71
|
// Academic benchmarks
|
|
84
72
|
mmluPro: undefined,
|
|
85
|
-
gpqa: 0.
|
|
86
|
-
hle: 0.
|
|
73
|
+
gpqa: 0.868,
|
|
74
|
+
hle: 0.28,
|
|
87
75
|
|
|
88
76
|
// Capabilities
|
|
89
77
|
contextWindow: 8192,
|
|
@@ -91,21 +79,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
91
79
|
supportsVision: false,
|
|
92
80
|
|
|
93
81
|
// Metadata
|
|
94
|
-
lastUpdated: "2026-
|
|
82
|
+
lastUpdated: "2026-06-01",
|
|
83
|
+
originalModel: "GLM-5.1 (Reasoning)",
|
|
95
84
|
},
|
|
96
|
-
"
|
|
97
|
-
// AA Intelligence Index (composite score)
|
|
98
|
-
intelligenceIndex: 37.1,
|
|
99
|
-
normalizedScore: 53,
|
|
100
|
-
|
|
85
|
+
"glm-5.1-non-reasoning": {
|
|
101
86
|
// AA specific benchmarks
|
|
102
|
-
codingIndex:
|
|
87
|
+
codingIndex: 35.8,
|
|
103
88
|
mathIndex: undefined,
|
|
104
89
|
|
|
105
90
|
// Academic benchmarks
|
|
106
91
|
mmluPro: undefined,
|
|
107
|
-
gpqa: 0.
|
|
108
|
-
hle: 0.
|
|
92
|
+
gpqa: 0.839,
|
|
93
|
+
hle: 0.256,
|
|
109
94
|
|
|
110
95
|
// Capabilities
|
|
111
96
|
contextWindow: 8192,
|
|
@@ -113,21 +98,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
113
98
|
supportsVision: false,
|
|
114
99
|
|
|
115
100
|
// Metadata
|
|
116
|
-
lastUpdated: "2026-
|
|
101
|
+
lastUpdated: "2026-06-01",
|
|
102
|
+
originalModel: "GLM-5.1 (Non-reasoning)",
|
|
117
103
|
},
|
|
118
|
-
"
|
|
119
|
-
// AA Intelligence Index (composite score)
|
|
120
|
-
intelligenceIndex: 28.3,
|
|
121
|
-
normalizedScore: 40,
|
|
122
|
-
|
|
104
|
+
"glm-5v-turbo-reasoning": {
|
|
123
105
|
// AA specific benchmarks
|
|
124
|
-
codingIndex:
|
|
106
|
+
codingIndex: 36.2,
|
|
125
107
|
mathIndex: undefined,
|
|
126
108
|
|
|
127
109
|
// Academic benchmarks
|
|
128
110
|
mmluPro: undefined,
|
|
129
|
-
gpqa: 0.
|
|
130
|
-
hle: 0.
|
|
111
|
+
gpqa: 0.809,
|
|
112
|
+
hle: 0.158,
|
|
131
113
|
|
|
132
114
|
// Capabilities
|
|
133
115
|
contextWindow: 8192,
|
|
@@ -135,21 +117,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
135
117
|
supportsVision: false,
|
|
136
118
|
|
|
137
119
|
// Metadata
|
|
138
|
-
lastUpdated: "2026-
|
|
120
|
+
lastUpdated: "2026-06-01",
|
|
121
|
+
originalModel: "GLM 5V Turbo (Reasoning)",
|
|
139
122
|
},
|
|
140
|
-
"
|
|
141
|
-
// AA Intelligence Index (composite score)
|
|
142
|
-
intelligenceIndex: 20.1,
|
|
143
|
-
normalizedScore: 29,
|
|
144
|
-
|
|
123
|
+
"glm-5-turbo": {
|
|
145
124
|
// AA specific benchmarks
|
|
146
|
-
codingIndex:
|
|
147
|
-
mathIndex:
|
|
125
|
+
codingIndex: 36.8,
|
|
126
|
+
mathIndex: undefined,
|
|
148
127
|
|
|
149
128
|
// Academic benchmarks
|
|
150
|
-
mmluPro:
|
|
151
|
-
gpqa: 0.
|
|
152
|
-
hle: 0.
|
|
129
|
+
mmluPro: undefined,
|
|
130
|
+
gpqa: 0.847,
|
|
131
|
+
hle: 0.254,
|
|
153
132
|
|
|
154
133
|
// Capabilities
|
|
155
134
|
contextWindow: 8192,
|
|
@@ -157,21 +136,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
157
136
|
supportsVision: false,
|
|
158
137
|
|
|
159
138
|
// Metadata
|
|
160
|
-
lastUpdated: "2026-
|
|
139
|
+
lastUpdated: "2026-06-01",
|
|
140
|
+
originalModel: "GLM-5-Turbo",
|
|
161
141
|
},
|
|
162
|
-
"
|
|
163
|
-
// AA Intelligence Index (composite score)
|
|
164
|
-
intelligenceIndex: 9.9,
|
|
165
|
-
normalizedScore: 14,
|
|
166
|
-
|
|
142
|
+
"command-a": {
|
|
167
143
|
// AA specific benchmarks
|
|
168
|
-
codingIndex:
|
|
169
|
-
mathIndex:
|
|
144
|
+
codingIndex: 9.9,
|
|
145
|
+
mathIndex: 13,
|
|
170
146
|
|
|
171
147
|
// Academic benchmarks
|
|
172
|
-
mmluPro:
|
|
173
|
-
gpqa: 0.
|
|
174
|
-
hle: 0.
|
|
148
|
+
mmluPro: 0.712,
|
|
149
|
+
gpqa: 0.527,
|
|
150
|
+
hle: 0.046,
|
|
175
151
|
|
|
176
152
|
// Capabilities
|
|
177
153
|
contextWindow: 8192,
|
|
@@ -179,21 +155,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
179
155
|
supportsVision: false,
|
|
180
156
|
|
|
181
157
|
// Metadata
|
|
182
|
-
lastUpdated: "2026-
|
|
158
|
+
lastUpdated: "2026-06-01",
|
|
159
|
+
originalModel: "Command A",
|
|
183
160
|
},
|
|
184
|
-
"
|
|
185
|
-
// AA Intelligence Index (composite score)
|
|
186
|
-
intelligenceIndex: 14.7,
|
|
187
|
-
normalizedScore: 21,
|
|
188
|
-
|
|
161
|
+
"tiny-aya-global": {
|
|
189
162
|
// AA specific benchmarks
|
|
190
|
-
codingIndex:
|
|
163
|
+
codingIndex: 1.2,
|
|
191
164
|
mathIndex: undefined,
|
|
192
165
|
|
|
193
166
|
// Academic benchmarks
|
|
194
167
|
mmluPro: undefined,
|
|
195
|
-
gpqa: 0.
|
|
196
|
-
hle: 0.
|
|
168
|
+
gpqa: 0.305,
|
|
169
|
+
hle: 0.052,
|
|
197
170
|
|
|
198
171
|
// Capabilities
|
|
199
172
|
contextWindow: 8192,
|
|
@@ -201,21 +174,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
201
174
|
supportsVision: false,
|
|
202
175
|
|
|
203
176
|
// Metadata
|
|
204
|
-
lastUpdated: "2026-
|
|
177
|
+
lastUpdated: "2026-06-01",
|
|
178
|
+
originalModel: "Tiny Aya Global",
|
|
205
179
|
},
|
|
206
|
-
"
|
|
207
|
-
// AA Intelligence Index (composite score)
|
|
208
|
-
intelligenceIndex: 22.6,
|
|
209
|
-
normalizedScore: 32,
|
|
210
|
-
|
|
180
|
+
"command-a+": {
|
|
211
181
|
// AA specific benchmarks
|
|
212
|
-
codingIndex:
|
|
182
|
+
codingIndex: 29.3,
|
|
213
183
|
mathIndex: undefined,
|
|
214
184
|
|
|
215
185
|
// Academic benchmarks
|
|
216
186
|
mmluPro: undefined,
|
|
217
|
-
gpqa: 0.
|
|
218
|
-
hle: 0.
|
|
187
|
+
gpqa: 0.761,
|
|
188
|
+
hle: 0.114,
|
|
219
189
|
|
|
220
190
|
// Capabilities
|
|
221
191
|
contextWindow: 8192,
|
|
@@ -223,21 +193,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
223
193
|
supportsVision: false,
|
|
224
194
|
|
|
225
195
|
// Metadata
|
|
226
|
-
lastUpdated: "2026-
|
|
196
|
+
lastUpdated: "2026-06-01",
|
|
197
|
+
originalModel: "Command A+",
|
|
227
198
|
},
|
|
228
|
-
"
|
|
229
|
-
// AA Intelligence Index (composite score)
|
|
230
|
-
intelligenceIndex: 37.2,
|
|
231
|
-
normalizedScore: 53,
|
|
232
|
-
|
|
199
|
+
"apriel-v1.6-15b-thinker": {
|
|
233
200
|
// AA specific benchmarks
|
|
234
|
-
codingIndex:
|
|
235
|
-
mathIndex:
|
|
201
|
+
codingIndex: 22,
|
|
202
|
+
mathIndex: 88,
|
|
236
203
|
|
|
237
204
|
// Academic benchmarks
|
|
238
|
-
mmluPro:
|
|
239
|
-
gpqa: 0.
|
|
240
|
-
hle: 0.
|
|
205
|
+
mmluPro: 0.79,
|
|
206
|
+
gpqa: 0.733,
|
|
207
|
+
hle: 0.098,
|
|
241
208
|
|
|
242
209
|
// Capabilities
|
|
243
210
|
contextWindow: 8192,
|
|
@@ -245,21 +212,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
245
212
|
supportsVision: false,
|
|
246
213
|
|
|
247
214
|
// Metadata
|
|
248
|
-
lastUpdated: "2026-
|
|
215
|
+
lastUpdated: "2026-06-01",
|
|
216
|
+
originalModel: "Apriel-v1.6-15B-Thinker",
|
|
249
217
|
},
|
|
250
|
-
"
|
|
251
|
-
// AA Intelligence Index (composite score)
|
|
252
|
-
intelligenceIndex: 27.3,
|
|
253
|
-
normalizedScore: 39,
|
|
254
|
-
|
|
218
|
+
"jamba-1.7-large": {
|
|
255
219
|
// AA specific benchmarks
|
|
256
|
-
codingIndex:
|
|
257
|
-
mathIndex:
|
|
220
|
+
codingIndex: 7.8,
|
|
221
|
+
mathIndex: 2.3,
|
|
258
222
|
|
|
259
223
|
// Academic benchmarks
|
|
260
|
-
mmluPro:
|
|
261
|
-
gpqa: 0.
|
|
262
|
-
hle: 0.
|
|
224
|
+
mmluPro: 0.577,
|
|
225
|
+
gpqa: 0.39,
|
|
226
|
+
hle: 0.038,
|
|
263
227
|
|
|
264
228
|
// Capabilities
|
|
265
229
|
contextWindow: 8192,
|
|
@@ -267,21 +231,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
267
231
|
supportsVision: false,
|
|
268
232
|
|
|
269
233
|
// Metadata
|
|
270
|
-
lastUpdated: "2026-
|
|
234
|
+
lastUpdated: "2026-06-01",
|
|
235
|
+
originalModel: "Jamba 1.7 Large",
|
|
271
236
|
},
|
|
272
|
-
"
|
|
273
|
-
// AA Intelligence Index (composite score)
|
|
274
|
-
intelligenceIndex: 30.7,
|
|
275
|
-
normalizedScore: 44,
|
|
276
|
-
|
|
237
|
+
"jamba-1.7-mini": {
|
|
277
238
|
// AA specific benchmarks
|
|
278
|
-
codingIndex:
|
|
279
|
-
mathIndex:
|
|
239
|
+
codingIndex: 3.1,
|
|
240
|
+
mathIndex: 0.3,
|
|
280
241
|
|
|
281
242
|
// Academic benchmarks
|
|
282
|
-
mmluPro:
|
|
283
|
-
gpqa: 0.
|
|
284
|
-
hle: 0.
|
|
243
|
+
mmluPro: 0.388,
|
|
244
|
+
gpqa: 0.322,
|
|
245
|
+
hle: 0.045,
|
|
285
246
|
|
|
286
247
|
// Capabilities
|
|
287
248
|
contextWindow: 8192,
|
|
@@ -289,21 +250,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
289
250
|
supportsVision: false,
|
|
290
251
|
|
|
291
252
|
// Metadata
|
|
292
|
-
lastUpdated: "2026-
|
|
253
|
+
lastUpdated: "2026-06-01",
|
|
254
|
+
originalModel: "Jamba 1.7 Mini",
|
|
293
255
|
},
|
|
294
|
-
"
|
|
295
|
-
// AA Intelligence Index (composite score)
|
|
296
|
-
intelligenceIndex: 32.4,
|
|
297
|
-
normalizedScore: 46,
|
|
298
|
-
|
|
256
|
+
"jamba-reasoning-3b": {
|
|
299
257
|
// AA specific benchmarks
|
|
300
|
-
codingIndex:
|
|
301
|
-
mathIndex:
|
|
258
|
+
codingIndex: 2.5,
|
|
259
|
+
mathIndex: 10.7,
|
|
302
260
|
|
|
303
261
|
// Academic benchmarks
|
|
304
|
-
mmluPro:
|
|
305
|
-
gpqa: 0.
|
|
306
|
-
hle: 0.
|
|
262
|
+
mmluPro: 0.577,
|
|
263
|
+
gpqa: 0.333,
|
|
264
|
+
hle: 0.046,
|
|
307
265
|
|
|
308
266
|
// Capabilities
|
|
309
267
|
contextWindow: 8192,
|
|
@@ -311,21 +269,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
311
269
|
supportsVision: false,
|
|
312
270
|
|
|
313
271
|
// Metadata
|
|
314
|
-
lastUpdated: "2026-
|
|
272
|
+
lastUpdated: "2026-06-01",
|
|
273
|
+
originalModel: "Jamba Reasoning 3B",
|
|
315
274
|
},
|
|
316
|
-
"qwen3
|
|
317
|
-
// AA Intelligence Index (composite score)
|
|
318
|
-
intelligenceIndex: 42.1,
|
|
319
|
-
normalizedScore: 60,
|
|
320
|
-
|
|
275
|
+
"qwen3-next-80b-a3b-reasoning": {
|
|
321
276
|
// AA specific benchmarks
|
|
322
|
-
codingIndex:
|
|
323
|
-
mathIndex:
|
|
277
|
+
codingIndex: 19.5,
|
|
278
|
+
mathIndex: 84.3,
|
|
324
279
|
|
|
325
280
|
// Academic benchmarks
|
|
326
|
-
mmluPro:
|
|
327
|
-
gpqa: 0.
|
|
328
|
-
hle: 0.
|
|
281
|
+
mmluPro: 0.824,
|
|
282
|
+
gpqa: 0.759,
|
|
283
|
+
hle: 0.117,
|
|
329
284
|
|
|
330
285
|
// Capabilities
|
|
331
286
|
contextWindow: 8192,
|
|
@@ -333,21 +288,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
333
288
|
supportsVision: false,
|
|
334
289
|
|
|
335
290
|
// Metadata
|
|
336
|
-
lastUpdated: "2026-
|
|
291
|
+
lastUpdated: "2026-06-01",
|
|
292
|
+
originalModel: "Qwen3 Next 80B A3B (Reasoning)",
|
|
337
293
|
},
|
|
338
|
-
"qwen3.
|
|
339
|
-
// AA Intelligence Index (composite score)
|
|
340
|
-
intelligenceIndex: 10.5,
|
|
341
|
-
normalizedScore: 15,
|
|
342
|
-
|
|
294
|
+
"qwen3.6-35b-a3b-reasoning": {
|
|
343
295
|
// AA specific benchmarks
|
|
344
|
-
codingIndex:
|
|
296
|
+
codingIndex: 35.2,
|
|
345
297
|
mathIndex: undefined,
|
|
346
298
|
|
|
347
299
|
// Academic benchmarks
|
|
348
300
|
mmluPro: undefined,
|
|
349
|
-
gpqa: 0.
|
|
350
|
-
hle: 0.
|
|
301
|
+
gpqa: 0.841,
|
|
302
|
+
hle: 0.202,
|
|
351
303
|
|
|
352
304
|
// Capabilities
|
|
353
305
|
contextWindow: 8192,
|
|
@@ -355,21 +307,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
355
307
|
supportsVision: false,
|
|
356
308
|
|
|
357
309
|
// Metadata
|
|
358
|
-
lastUpdated: "2026-
|
|
310
|
+
lastUpdated: "2026-06-01",
|
|
311
|
+
originalModel: "Qwen3.6 35B A3B (Reasoning)",
|
|
359
312
|
},
|
|
360
|
-
"qwen3-
|
|
361
|
-
// AA Intelligence Index (composite score)
|
|
362
|
-
intelligenceIndex: 15.6,
|
|
363
|
-
normalizedScore: 22,
|
|
364
|
-
|
|
313
|
+
"qwen3.5-122b-a10b-reasoning": {
|
|
365
314
|
// AA specific benchmarks
|
|
366
|
-
codingIndex:
|
|
367
|
-
mathIndex:
|
|
315
|
+
codingIndex: 34.7,
|
|
316
|
+
mathIndex: undefined,
|
|
368
317
|
|
|
369
318
|
// Academic benchmarks
|
|
370
|
-
mmluPro:
|
|
371
|
-
gpqa: 0.
|
|
372
|
-
hle: 0.
|
|
319
|
+
mmluPro: undefined,
|
|
320
|
+
gpqa: 0.857,
|
|
321
|
+
hle: 0.234,
|
|
373
322
|
|
|
374
323
|
// Capabilities
|
|
375
324
|
contextWindow: 8192,
|
|
@@ -377,21 +326,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
377
326
|
supportsVision: false,
|
|
378
327
|
|
|
379
328
|
// Metadata
|
|
380
|
-
lastUpdated: "2026-
|
|
329
|
+
lastUpdated: "2026-06-01",
|
|
330
|
+
originalModel: "Qwen3.5 122B A10B (Reasoning)",
|
|
381
331
|
},
|
|
382
|
-
"qwen3-
|
|
383
|
-
// AA Intelligence Index (composite score)
|
|
384
|
-
intelligenceIndex: 10.7,
|
|
385
|
-
normalizedScore: 15,
|
|
386
|
-
|
|
332
|
+
"qwen3.5-9b-non-reasoning": {
|
|
387
333
|
// AA specific benchmarks
|
|
388
|
-
codingIndex:
|
|
389
|
-
mathIndex:
|
|
334
|
+
codingIndex: 21.3,
|
|
335
|
+
mathIndex: undefined,
|
|
390
336
|
|
|
391
337
|
// Academic benchmarks
|
|
392
|
-
mmluPro:
|
|
393
|
-
gpqa: 0.
|
|
394
|
-
hle: 0.
|
|
338
|
+
mmluPro: undefined,
|
|
339
|
+
gpqa: 0.786,
|
|
340
|
+
hle: 0.086,
|
|
395
341
|
|
|
396
342
|
// Capabilities
|
|
397
343
|
contextWindow: 8192,
|
|
@@ -399,21 +345,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
399
345
|
supportsVision: false,
|
|
400
346
|
|
|
401
347
|
// Metadata
|
|
402
|
-
lastUpdated: "2026-
|
|
348
|
+
lastUpdated: "2026-06-01",
|
|
349
|
+
originalModel: "Qwen3.5 9B (Non-reasoning)",
|
|
403
350
|
},
|
|
404
|
-
"qwen3.5-
|
|
405
|
-
// AA Intelligence Index (composite score)
|
|
406
|
-
intelligenceIndex: 27.1,
|
|
407
|
-
normalizedScore: 39,
|
|
408
|
-
|
|
351
|
+
"qwen3.5-9b-reasoning": {
|
|
409
352
|
// AA specific benchmarks
|
|
410
|
-
codingIndex:
|
|
353
|
+
codingIndex: 25.3,
|
|
411
354
|
mathIndex: undefined,
|
|
412
355
|
|
|
413
356
|
// Academic benchmarks
|
|
414
357
|
mmluPro: undefined,
|
|
415
|
-
gpqa: 0.
|
|
416
|
-
hle: 0.
|
|
358
|
+
gpqa: 0.806,
|
|
359
|
+
hle: 0.133,
|
|
417
360
|
|
|
418
361
|
// Capabilities
|
|
419
362
|
contextWindow: 8192,
|
|
@@ -421,21 +364,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
421
364
|
supportsVision: false,
|
|
422
365
|
|
|
423
366
|
// Metadata
|
|
424
|
-
lastUpdated: "2026-
|
|
367
|
+
lastUpdated: "2026-06-01",
|
|
368
|
+
originalModel: "Qwen3.5 9B (Reasoning)",
|
|
425
369
|
},
|
|
426
|
-
"qwen3.
|
|
427
|
-
// AA Intelligence Index (composite score)
|
|
428
|
-
intelligenceIndex: 35.9,
|
|
429
|
-
normalizedScore: 51,
|
|
430
|
-
|
|
370
|
+
"qwen3.6-27b-reasoning": {
|
|
431
371
|
// AA specific benchmarks
|
|
432
|
-
codingIndex:
|
|
372
|
+
codingIndex: 36.5,
|
|
433
373
|
mathIndex: undefined,
|
|
434
374
|
|
|
435
375
|
// Academic benchmarks
|
|
436
376
|
mmluPro: undefined,
|
|
437
|
-
gpqa: 0.
|
|
438
|
-
hle: 0.
|
|
377
|
+
gpqa: 0.842,
|
|
378
|
+
hle: 0.216,
|
|
439
379
|
|
|
440
380
|
// Capabilities
|
|
441
381
|
contextWindow: 8192,
|
|
@@ -443,21 +383,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
443
383
|
supportsVision: false,
|
|
444
384
|
|
|
445
385
|
// Metadata
|
|
446
|
-
lastUpdated: "2026-
|
|
386
|
+
lastUpdated: "2026-06-01",
|
|
387
|
+
originalModel: "Qwen3.6 27B (Reasoning)",
|
|
447
388
|
},
|
|
448
|
-
"qwen3.5-
|
|
449
|
-
// AA Intelligence Index (composite score)
|
|
450
|
-
intelligenceIndex: 16.3,
|
|
451
|
-
normalizedScore: 23,
|
|
452
|
-
|
|
389
|
+
"qwen3.5-397b-a17b-reasoning": {
|
|
453
390
|
// AA specific benchmarks
|
|
454
|
-
codingIndex: 3
|
|
391
|
+
codingIndex: 41.3,
|
|
455
392
|
mathIndex: undefined,
|
|
456
393
|
|
|
457
394
|
// Academic benchmarks
|
|
458
395
|
mmluPro: undefined,
|
|
459
|
-
gpqa: 0.
|
|
460
|
-
hle: 0.
|
|
396
|
+
gpqa: 0.893,
|
|
397
|
+
hle: 0.273,
|
|
461
398
|
|
|
462
399
|
// Capabilities
|
|
463
400
|
contextWindow: 8192,
|
|
@@ -465,21 +402,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
465
402
|
supportsVision: false,
|
|
466
403
|
|
|
467
404
|
// Metadata
|
|
468
|
-
lastUpdated: "2026-
|
|
405
|
+
lastUpdated: "2026-06-01",
|
|
406
|
+
originalModel: "Qwen3.5 397B A17B (Reasoning)",
|
|
469
407
|
},
|
|
470
|
-
"qwen3-
|
|
471
|
-
// AA Intelligence Index (composite score)
|
|
472
|
-
intelligenceIndex: 39.9,
|
|
473
|
-
normalizedScore: 57,
|
|
474
|
-
|
|
408
|
+
"qwen3.5-397b-a17b-non-reasoning": {
|
|
475
409
|
// AA specific benchmarks
|
|
476
|
-
codingIndex:
|
|
410
|
+
codingIndex: 37.4,
|
|
477
411
|
mathIndex: undefined,
|
|
478
412
|
|
|
479
413
|
// Academic benchmarks
|
|
480
414
|
mmluPro: undefined,
|
|
481
415
|
gpqa: 0.861,
|
|
482
|
-
hle: 0.
|
|
416
|
+
hle: 0.188,
|
|
483
417
|
|
|
484
418
|
// Capabilities
|
|
485
419
|
contextWindow: 8192,
|
|
@@ -487,21 +421,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
487
421
|
supportsVision: false,
|
|
488
422
|
|
|
489
423
|
// Metadata
|
|
490
|
-
lastUpdated: "2026-
|
|
424
|
+
lastUpdated: "2026-06-01",
|
|
425
|
+
originalModel: "Qwen3.5 397B A17B (Non-reasoning)",
|
|
491
426
|
},
|
|
492
|
-
"
|
|
493
|
-
// AA Intelligence Index (composite score)
|
|
494
|
-
intelligenceIndex: 9.2,
|
|
495
|
-
normalizedScore: 13,
|
|
496
|
-
|
|
427
|
+
"qwen3.5-0.8b-reasoning": {
|
|
497
428
|
// AA specific benchmarks
|
|
498
|
-
codingIndex:
|
|
499
|
-
mathIndex:
|
|
429
|
+
codingIndex: 0,
|
|
430
|
+
mathIndex: undefined,
|
|
500
431
|
|
|
501
432
|
// Academic benchmarks
|
|
502
|
-
mmluPro:
|
|
503
|
-
gpqa: 0.
|
|
504
|
-
hle: 0.
|
|
433
|
+
mmluPro: undefined,
|
|
434
|
+
gpqa: 0.111,
|
|
435
|
+
hle: 0.012,
|
|
505
436
|
|
|
506
437
|
// Capabilities
|
|
507
438
|
contextWindow: 8192,
|
|
@@ -509,21 +440,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
509
440
|
supportsVision: false,
|
|
510
441
|
|
|
511
442
|
// Metadata
|
|
512
|
-
lastUpdated: "2026-
|
|
443
|
+
lastUpdated: "2026-06-01",
|
|
444
|
+
originalModel: "Qwen3.5 0.8B (Reasoning)",
|
|
513
445
|
},
|
|
514
|
-
"
|
|
515
|
-
// AA Intelligence Index (composite score)
|
|
516
|
-
intelligenceIndex: 22.8,
|
|
517
|
-
normalizedScore: 33,
|
|
518
|
-
|
|
446
|
+
"qwen3.5-2b-non-reasoning": {
|
|
519
447
|
// AA specific benchmarks
|
|
520
|
-
codingIndex:
|
|
521
|
-
mathIndex:
|
|
448
|
+
codingIndex: 4.9,
|
|
449
|
+
mathIndex: undefined,
|
|
522
450
|
|
|
523
451
|
// Academic benchmarks
|
|
524
|
-
mmluPro:
|
|
525
|
-
gpqa: 0.
|
|
526
|
-
hle: 0.
|
|
452
|
+
mmluPro: undefined,
|
|
453
|
+
gpqa: 0.438,
|
|
454
|
+
hle: 0.049,
|
|
527
455
|
|
|
528
456
|
// Capabilities
|
|
529
457
|
contextWindow: 8192,
|
|
@@ -531,21 +459,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
531
459
|
supportsVision: false,
|
|
532
460
|
|
|
533
461
|
// Metadata
|
|
534
|
-
lastUpdated: "2026-
|
|
462
|
+
lastUpdated: "2026-06-01",
|
|
463
|
+
originalModel: "Qwen3.5 2B (Non-reasoning)",
|
|
535
464
|
},
|
|
536
|
-
"
|
|
537
|
-
// AA Intelligence Index (composite score)
|
|
538
|
-
intelligenceIndex: 19,
|
|
539
|
-
normalizedScore: 27,
|
|
540
|
-
|
|
465
|
+
"qwen3.6-27b-non-reasoning": {
|
|
541
466
|
// AA specific benchmarks
|
|
542
|
-
codingIndex:
|
|
543
|
-
mathIndex:
|
|
467
|
+
codingIndex: 26.6,
|
|
468
|
+
mathIndex: undefined,
|
|
544
469
|
|
|
545
470
|
// Academic benchmarks
|
|
546
|
-
mmluPro:
|
|
547
|
-
gpqa: 0.
|
|
548
|
-
hle: 0.
|
|
471
|
+
mmluPro: undefined,
|
|
472
|
+
gpqa: 0.829,
|
|
473
|
+
hle: 0.136,
|
|
549
474
|
|
|
550
475
|
// Capabilities
|
|
551
476
|
contextWindow: 8192,
|
|
@@ -553,21 +478,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
553
478
|
supportsVision: false,
|
|
554
479
|
|
|
555
480
|
// Metadata
|
|
556
|
-
lastUpdated: "2026-
|
|
481
|
+
lastUpdated: "2026-06-01",
|
|
482
|
+
originalModel: "Qwen3.6 27B (Non-reasoning)",
|
|
557
483
|
},
|
|
558
|
-
"
|
|
559
|
-
// AA Intelligence Index (composite score)
|
|
560
|
-
intelligenceIndex: 14,
|
|
561
|
-
normalizedScore: 20,
|
|
562
|
-
|
|
484
|
+
"qwen3.5-122b-a10b-non-reasoning": {
|
|
563
485
|
// AA specific benchmarks
|
|
564
|
-
codingIndex:
|
|
565
|
-
mathIndex:
|
|
486
|
+
codingIndex: 31.6,
|
|
487
|
+
mathIndex: undefined,
|
|
566
488
|
|
|
567
489
|
// Academic benchmarks
|
|
568
|
-
mmluPro:
|
|
569
|
-
gpqa: 0.
|
|
570
|
-
hle: 0.
|
|
490
|
+
mmluPro: undefined,
|
|
491
|
+
gpqa: 0.827,
|
|
492
|
+
hle: 0.148,
|
|
571
493
|
|
|
572
494
|
// Capabilities
|
|
573
495
|
contextWindow: 8192,
|
|
@@ -575,21 +497,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
575
497
|
supportsVision: false,
|
|
576
498
|
|
|
577
499
|
// Metadata
|
|
578
|
-
lastUpdated: "2026-
|
|
500
|
+
lastUpdated: "2026-06-01",
|
|
501
|
+
originalModel: "Qwen3.5 122B A10B (Non-reasoning)",
|
|
579
502
|
},
|
|
580
|
-
"
|
|
581
|
-
// AA Intelligence Index (composite score)
|
|
582
|
-
intelligenceIndex: 15.7,
|
|
583
|
-
normalizedScore: 22,
|
|
584
|
-
|
|
503
|
+
"qwen3.6-35b-a3b-non-reasoning": {
|
|
585
504
|
// AA specific benchmarks
|
|
586
|
-
codingIndex:
|
|
587
|
-
mathIndex:
|
|
505
|
+
codingIndex: 17.6,
|
|
506
|
+
mathIndex: undefined,
|
|
588
507
|
|
|
589
508
|
// Academic benchmarks
|
|
590
|
-
mmluPro:
|
|
591
|
-
gpqa: 0.
|
|
592
|
-
hle: 0.
|
|
509
|
+
mmluPro: undefined,
|
|
510
|
+
gpqa: 0.817,
|
|
511
|
+
hle: 0.125,
|
|
593
512
|
|
|
594
513
|
// Capabilities
|
|
595
514
|
contextWindow: 8192,
|
|
@@ -597,21 +516,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
597
516
|
supportsVision: false,
|
|
598
517
|
|
|
599
518
|
// Metadata
|
|
600
|
-
lastUpdated: "2026-
|
|
519
|
+
lastUpdated: "2026-06-01",
|
|
520
|
+
originalModel: "Qwen3.6 35B A3B (Non-reasoning)",
|
|
601
521
|
},
|
|
602
|
-
"
|
|
603
|
-
// AA Intelligence Index (composite score)
|
|
604
|
-
intelligenceIndex: 33.5,
|
|
605
|
-
normalizedScore: 48,
|
|
606
|
-
|
|
522
|
+
"qwen3.6-plus": {
|
|
607
523
|
// AA specific benchmarks
|
|
608
|
-
codingIndex:
|
|
609
|
-
mathIndex:
|
|
524
|
+
codingIndex: 42.9,
|
|
525
|
+
mathIndex: undefined,
|
|
610
526
|
|
|
611
527
|
// Academic benchmarks
|
|
612
|
-
mmluPro:
|
|
613
|
-
gpqa: 0.
|
|
614
|
-
hle: 0.
|
|
528
|
+
mmluPro: undefined,
|
|
529
|
+
gpqa: 0.882,
|
|
530
|
+
hle: 0.257,
|
|
615
531
|
|
|
616
532
|
// Capabilities
|
|
617
533
|
contextWindow: 8192,
|
|
@@ -619,21 +535,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
619
535
|
supportsVision: false,
|
|
620
536
|
|
|
621
537
|
// Metadata
|
|
622
|
-
lastUpdated: "2026-
|
|
538
|
+
lastUpdated: "2026-06-01",
|
|
539
|
+
originalModel: "Qwen3.6 Plus",
|
|
623
540
|
},
|
|
624
|
-
|
|
625
|
-
// AA Intelligence Index (composite score)
|
|
626
|
-
intelligenceIndex: 30.8,
|
|
627
|
-
normalizedScore: 44,
|
|
628
|
-
|
|
541
|
+
"qwen3-omni-30b-a3b-reasoning": {
|
|
629
542
|
// AA specific benchmarks
|
|
630
|
-
codingIndex:
|
|
631
|
-
mathIndex:
|
|
543
|
+
codingIndex: 12.7,
|
|
544
|
+
mathIndex: 74,
|
|
632
545
|
|
|
633
546
|
// Academic benchmarks
|
|
634
|
-
mmluPro: 0.
|
|
635
|
-
gpqa: 0.
|
|
636
|
-
hle: 0.
|
|
547
|
+
mmluPro: 0.792,
|
|
548
|
+
gpqa: 0.726,
|
|
549
|
+
hle: 0.073,
|
|
637
550
|
|
|
638
551
|
// Capabilities
|
|
639
552
|
contextWindow: 8192,
|
|
@@ -641,21 +554,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
641
554
|
supportsVision: false,
|
|
642
555
|
|
|
643
556
|
// Metadata
|
|
644
|
-
lastUpdated: "2026-
|
|
557
|
+
lastUpdated: "2026-06-01",
|
|
558
|
+
originalModel: "Qwen3 Omni 30B A3B (Reasoning)",
|
|
645
559
|
},
|
|
646
|
-
"
|
|
647
|
-
// AA Intelligence Index (composite score)
|
|
648
|
-
intelligenceIndex: 23.7,
|
|
649
|
-
normalizedScore: 34,
|
|
650
|
-
|
|
560
|
+
"qwen3.5-omni-plus": {
|
|
651
561
|
// AA specific benchmarks
|
|
652
|
-
codingIndex:
|
|
562
|
+
codingIndex: 27.6,
|
|
653
563
|
mathIndex: undefined,
|
|
654
564
|
|
|
655
565
|
// Academic benchmarks
|
|
656
566
|
mmluPro: undefined,
|
|
657
|
-
gpqa:
|
|
658
|
-
hle:
|
|
567
|
+
gpqa: 0.826,
|
|
568
|
+
hle: 0.139,
|
|
659
569
|
|
|
660
570
|
// Capabilities
|
|
661
571
|
contextWindow: 8192,
|
|
@@ -663,21 +573,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
663
573
|
supportsVision: false,
|
|
664
574
|
|
|
665
575
|
// Metadata
|
|
666
|
-
lastUpdated: "2026-
|
|
576
|
+
lastUpdated: "2026-06-01",
|
|
577
|
+
originalModel: "Qwen3.5 Omni Plus",
|
|
667
578
|
},
|
|
668
|
-
"
|
|
669
|
-
// AA Intelligence Index (composite score)
|
|
670
|
-
intelligenceIndex: 20.4,
|
|
671
|
-
normalizedScore: 29,
|
|
672
|
-
|
|
579
|
+
"qwen3-omni-30b-a3b-instruct": {
|
|
673
580
|
// AA specific benchmarks
|
|
674
|
-
codingIndex:
|
|
675
|
-
mathIndex:
|
|
581
|
+
codingIndex: 7.2,
|
|
582
|
+
mathIndex: 52.3,
|
|
676
583
|
|
|
677
584
|
// Academic benchmarks
|
|
678
|
-
mmluPro: 0.
|
|
679
|
-
gpqa: 0.
|
|
680
|
-
hle: 0.
|
|
585
|
+
mmluPro: 0.725,
|
|
586
|
+
gpqa: 0.62,
|
|
587
|
+
hle: 0.051,
|
|
681
588
|
|
|
682
589
|
// Capabilities
|
|
683
590
|
contextWindow: 8192,
|
|
@@ -685,21 +592,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
685
592
|
supportsVision: false,
|
|
686
593
|
|
|
687
594
|
// Metadata
|
|
688
|
-
lastUpdated: "2026-
|
|
595
|
+
lastUpdated: "2026-06-01",
|
|
596
|
+
originalModel: "Qwen3 Omni 30B A3B Instruct",
|
|
689
597
|
},
|
|
690
|
-
"
|
|
691
|
-
// AA Intelligence Index (composite score)
|
|
692
|
-
intelligenceIndex: 18.6,
|
|
693
|
-
normalizedScore: 27,
|
|
694
|
-
|
|
598
|
+
"qwen3-next-80b-a3b-instruct": {
|
|
695
599
|
// AA specific benchmarks
|
|
696
|
-
codingIndex:
|
|
697
|
-
mathIndex:
|
|
600
|
+
codingIndex: 15.3,
|
|
601
|
+
mathIndex: 66.3,
|
|
698
602
|
|
|
699
603
|
// Academic benchmarks
|
|
700
|
-
mmluPro:
|
|
701
|
-
gpqa: 0.
|
|
702
|
-
hle: 0.
|
|
604
|
+
mmluPro: 0.819,
|
|
605
|
+
gpqa: 0.738,
|
|
606
|
+
hle: 0.073,
|
|
703
607
|
|
|
704
608
|
// Capabilities
|
|
705
609
|
contextWindow: 8192,
|
|
@@ -707,21 +611,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
707
611
|
supportsVision: false,
|
|
708
612
|
|
|
709
613
|
// Metadata
|
|
710
|
-
lastUpdated: "2026-
|
|
614
|
+
lastUpdated: "2026-06-01",
|
|
615
|
+
originalModel: "Qwen3 Next 80B A3B Instruct",
|
|
711
616
|
},
|
|
712
|
-
"
|
|
713
|
-
// AA Intelligence Index (composite score)
|
|
714
|
-
intelligenceIndex: 14.5,
|
|
715
|
-
normalizedScore: 21,
|
|
716
|
-
|
|
617
|
+
"qwen3.7-max": {
|
|
717
618
|
// AA specific benchmarks
|
|
718
|
-
codingIndex:
|
|
619
|
+
codingIndex: 50.1,
|
|
719
620
|
mathIndex: undefined,
|
|
720
621
|
|
|
721
622
|
// Academic benchmarks
|
|
722
|
-
mmluPro:
|
|
723
|
-
gpqa: 0.
|
|
724
|
-
hle: 0.
|
|
623
|
+
mmluPro: undefined,
|
|
624
|
+
gpqa: 0.923,
|
|
625
|
+
hle: 0.381,
|
|
725
626
|
|
|
726
627
|
// Capabilities
|
|
727
628
|
contextWindow: 8192,
|
|
@@ -729,43 +630,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
729
630
|
supportsVision: false,
|
|
730
631
|
|
|
731
632
|
// Metadata
|
|
732
|
-
lastUpdated: "2026-
|
|
633
|
+
lastUpdated: "2026-06-01",
|
|
634
|
+
originalModel: "Qwen3.7 Max",
|
|
733
635
|
},
|
|
734
|
-
"
|
|
735
|
-
// AA Intelligence Index (composite score)
|
|
736
|
-
intelligenceIndex: 13.7,
|
|
737
|
-
normalizedScore: 20,
|
|
738
|
-
|
|
636
|
+
"qwen3.5-omni-flash": {
|
|
739
637
|
// AA specific benchmarks
|
|
740
|
-
codingIndex:
|
|
638
|
+
codingIndex: 14,
|
|
741
639
|
mathIndex: undefined,
|
|
742
640
|
|
|
743
641
|
// Academic benchmarks
|
|
744
|
-
mmluPro:
|
|
745
|
-
gpqa:
|
|
746
|
-
hle: 0.
|
|
747
|
-
|
|
748
|
-
// Capabilities
|
|
749
|
-
contextWindow: 8192,
|
|
750
|
-
supportsReasoning: false,
|
|
751
|
-
supportsVision: false,
|
|
752
|
-
|
|
753
|
-
// Metadata
|
|
754
|
-
lastUpdated: "2026-04-06",
|
|
755
|
-
},
|
|
756
|
-
"gpt-4o-nov-24": {
|
|
757
|
-
// AA Intelligence Index (composite score)
|
|
758
|
-
intelligenceIndex: 17.3,
|
|
759
|
-
normalizedScore: 25,
|
|
760
|
-
|
|
761
|
-
// AA specific benchmarks
|
|
762
|
-
codingIndex: 16.7,
|
|
763
|
-
mathIndex: 6,
|
|
764
|
-
|
|
765
|
-
// Academic benchmarks
|
|
766
|
-
mmluPro: 0.748,
|
|
767
|
-
gpqa: 0.543,
|
|
768
|
-
hle: 0.033,
|
|
642
|
+
mmluPro: undefined,
|
|
643
|
+
gpqa: 0.742,
|
|
644
|
+
hle: 0.071,
|
|
769
645
|
|
|
770
646
|
// Capabilities
|
|
771
647
|
contextWindow: 8192,
|
|
@@ -773,21 +649,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
773
649
|
supportsVision: false,
|
|
774
650
|
|
|
775
651
|
// Metadata
|
|
776
|
-
lastUpdated: "2026-
|
|
652
|
+
lastUpdated: "2026-06-01",
|
|
653
|
+
originalModel: "Qwen3.5 Omni Flash",
|
|
777
654
|
},
|
|
778
|
-
"
|
|
779
|
-
// AA Intelligence Index (composite score)
|
|
780
|
-
intelligenceIndex: 12.6,
|
|
781
|
-
normalizedScore: 18,
|
|
782
|
-
|
|
655
|
+
"qwen3.5-35b-a3b-non-reasoning": {
|
|
783
656
|
// AA specific benchmarks
|
|
784
|
-
codingIndex:
|
|
785
|
-
mathIndex:
|
|
657
|
+
codingIndex: 16.8,
|
|
658
|
+
mathIndex: undefined,
|
|
786
659
|
|
|
787
660
|
// Academic benchmarks
|
|
788
|
-
mmluPro:
|
|
789
|
-
gpqa: 0.
|
|
790
|
-
hle: 0.
|
|
661
|
+
mmluPro: undefined,
|
|
662
|
+
gpqa: 0.819,
|
|
663
|
+
hle: 0.128,
|
|
791
664
|
|
|
792
665
|
// Capabilities
|
|
793
666
|
contextWindow: 8192,
|
|
@@ -795,21 +668,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
795
668
|
supportsVision: false,
|
|
796
669
|
|
|
797
670
|
// Metadata
|
|
798
|
-
lastUpdated: "2026-
|
|
671
|
+
lastUpdated: "2026-06-01",
|
|
672
|
+
originalModel: "Qwen3.5 35B A3B (Non-reasoning)",
|
|
799
673
|
},
|
|
800
|
-
"
|
|
801
|
-
// AA Intelligence Index (composite score)
|
|
802
|
-
intelligenceIndex: 9,
|
|
803
|
-
normalizedScore: 13,
|
|
804
|
-
|
|
674
|
+
"qwen3-coder-next": {
|
|
805
675
|
// AA specific benchmarks
|
|
806
|
-
codingIndex:
|
|
676
|
+
codingIndex: 22.9,
|
|
807
677
|
mathIndex: undefined,
|
|
808
678
|
|
|
809
679
|
// Academic benchmarks
|
|
810
|
-
mmluPro:
|
|
811
|
-
gpqa: 0.
|
|
812
|
-
hle:
|
|
680
|
+
mmluPro: undefined,
|
|
681
|
+
gpqa: 0.737,
|
|
682
|
+
hle: 0.093,
|
|
813
683
|
|
|
814
684
|
// Capabilities
|
|
815
685
|
contextWindow: 8192,
|
|
@@ -817,21 +687,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
817
687
|
supportsVision: false,
|
|
818
688
|
|
|
819
689
|
// Metadata
|
|
820
|
-
lastUpdated: "2026-
|
|
690
|
+
lastUpdated: "2026-06-01",
|
|
691
|
+
originalModel: "Qwen3 Coder Next",
|
|
821
692
|
},
|
|
822
|
-
"
|
|
823
|
-
// AA Intelligence Index (composite score)
|
|
824
|
-
intelligenceIndex: 38.9,
|
|
825
|
-
normalizedScore: 56,
|
|
826
|
-
|
|
693
|
+
"qwen3.5-4b-reasoning": {
|
|
827
694
|
// AA specific benchmarks
|
|
828
|
-
codingIndex:
|
|
829
|
-
mathIndex:
|
|
695
|
+
codingIndex: 17.5,
|
|
696
|
+
mathIndex: undefined,
|
|
830
697
|
|
|
831
698
|
// Academic benchmarks
|
|
832
|
-
mmluPro:
|
|
833
|
-
gpqa: 0.
|
|
834
|
-
hle: 0.
|
|
699
|
+
mmluPro: undefined,
|
|
700
|
+
gpqa: 0.771,
|
|
701
|
+
hle: 0.078,
|
|
835
702
|
|
|
836
703
|
// Capabilities
|
|
837
704
|
contextWindow: 8192,
|
|
@@ -839,21 +706,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
839
706
|
supportsVision: false,
|
|
840
707
|
|
|
841
708
|
// Metadata
|
|
842
|
-
lastUpdated: "2026-
|
|
709
|
+
lastUpdated: "2026-06-01",
|
|
710
|
+
originalModel: "Qwen3.5 4B (Reasoning)",
|
|
843
711
|
},
|
|
844
|
-
"
|
|
845
|
-
// AA Intelligence Index (composite score)
|
|
846
|
-
intelligenceIndex: 41.2,
|
|
847
|
-
normalizedScore: 59,
|
|
848
|
-
|
|
712
|
+
"qwen3.5-2b-reasoning": {
|
|
849
713
|
// AA specific benchmarks
|
|
850
|
-
codingIndex:
|
|
851
|
-
mathIndex:
|
|
714
|
+
codingIndex: 3.5,
|
|
715
|
+
mathIndex: undefined,
|
|
852
716
|
|
|
853
717
|
// Academic benchmarks
|
|
854
|
-
mmluPro:
|
|
855
|
-
gpqa: 0.
|
|
856
|
-
hle: 0.
|
|
718
|
+
mmluPro: undefined,
|
|
719
|
+
gpqa: 0.456,
|
|
720
|
+
hle: 0.021,
|
|
857
721
|
|
|
858
722
|
// Capabilities
|
|
859
723
|
contextWindow: 8192,
|
|
@@ -861,21 +725,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
861
725
|
supportsVision: false,
|
|
862
726
|
|
|
863
727
|
// Metadata
|
|
864
|
-
lastUpdated: "2026-
|
|
728
|
+
lastUpdated: "2026-06-01",
|
|
729
|
+
originalModel: "Qwen3.5 2B (Reasoning)",
|
|
865
730
|
},
|
|
866
|
-
"
|
|
867
|
-
// AA Intelligence Index (composite score)
|
|
868
|
-
intelligenceIndex: 43.1,
|
|
869
|
-
normalizedScore: 62,
|
|
870
|
-
|
|
731
|
+
"qwen3.5-0.8b-non-reasoning": {
|
|
871
732
|
// AA specific benchmarks
|
|
872
|
-
codingIndex:
|
|
873
|
-
mathIndex:
|
|
733
|
+
codingIndex: 1,
|
|
734
|
+
mathIndex: undefined,
|
|
874
735
|
|
|
875
736
|
// Academic benchmarks
|
|
876
|
-
mmluPro:
|
|
877
|
-
gpqa: 0.
|
|
878
|
-
hle: 0.
|
|
737
|
+
mmluPro: undefined,
|
|
738
|
+
gpqa: 0.236,
|
|
739
|
+
hle: 0.049,
|
|
879
740
|
|
|
880
741
|
// Capabilities
|
|
881
742
|
contextWindow: 8192,
|
|
@@ -883,21 +744,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
883
744
|
supportsVision: false,
|
|
884
745
|
|
|
885
746
|
// Metadata
|
|
886
|
-
lastUpdated: "2026-
|
|
747
|
+
lastUpdated: "2026-06-01",
|
|
748
|
+
originalModel: "Qwen3.5 0.8B (Non-reasoning)",
|
|
887
749
|
},
|
|
888
|
-
"
|
|
889
|
-
// AA Intelligence Index (composite score)
|
|
890
|
-
intelligenceIndex: 23.9,
|
|
891
|
-
normalizedScore: 34,
|
|
892
|
-
|
|
750
|
+
"qwen3.5-4b-non-reasoning": {
|
|
893
751
|
// AA specific benchmarks
|
|
894
|
-
codingIndex:
|
|
895
|
-
mathIndex:
|
|
752
|
+
codingIndex: 13.7,
|
|
753
|
+
mathIndex: undefined,
|
|
896
754
|
|
|
897
755
|
// Academic benchmarks
|
|
898
|
-
mmluPro:
|
|
899
|
-
gpqa: 0.
|
|
900
|
-
hle: 0.
|
|
756
|
+
mmluPro: undefined,
|
|
757
|
+
gpqa: 0.712,
|
|
758
|
+
hle: 0.075,
|
|
901
759
|
|
|
902
760
|
// Capabilities
|
|
903
761
|
contextWindow: 8192,
|
|
@@ -905,21 +763,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
905
763
|
supportsVision: false,
|
|
906
764
|
|
|
907
765
|
// Metadata
|
|
908
|
-
lastUpdated: "2026-
|
|
766
|
+
lastUpdated: "2026-06-01",
|
|
767
|
+
originalModel: "Qwen3.5 4B (Non-reasoning)",
|
|
909
768
|
},
|
|
910
|
-
"
|
|
911
|
-
// AA Intelligence Index (composite score)
|
|
912
|
-
intelligenceIndex: 25.8,
|
|
913
|
-
normalizedScore: 37,
|
|
914
|
-
|
|
769
|
+
"ring-flash-2.0": {
|
|
915
770
|
// AA specific benchmarks
|
|
916
|
-
codingIndex:
|
|
917
|
-
mathIndex:
|
|
771
|
+
codingIndex: 10.6,
|
|
772
|
+
mathIndex: 83.7,
|
|
918
773
|
|
|
919
774
|
// Academic benchmarks
|
|
920
|
-
mmluPro:
|
|
921
|
-
gpqa:
|
|
922
|
-
hle:
|
|
775
|
+
mmluPro: 0.793,
|
|
776
|
+
gpqa: 0.725,
|
|
777
|
+
hle: 0.089,
|
|
923
778
|
|
|
924
779
|
// Capabilities
|
|
925
780
|
contextWindow: 8192,
|
|
@@ -927,21 +782,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
927
782
|
supportsVision: false,
|
|
928
783
|
|
|
929
784
|
// Metadata
|
|
930
|
-
lastUpdated: "2026-
|
|
785
|
+
lastUpdated: "2026-06-01",
|
|
786
|
+
originalModel: "Ring-flash-2.0",
|
|
931
787
|
},
|
|
932
|
-
"
|
|
933
|
-
// AA Intelligence Index (composite score)
|
|
934
|
-
intelligenceIndex: 22.9,
|
|
935
|
-
normalizedScore: 33,
|
|
936
|
-
|
|
788
|
+
"ling-mini-2.0": {
|
|
937
789
|
// AA specific benchmarks
|
|
938
|
-
codingIndex:
|
|
939
|
-
mathIndex:
|
|
790
|
+
codingIndex: 5,
|
|
791
|
+
mathIndex: 49.3,
|
|
940
792
|
|
|
941
793
|
// Academic benchmarks
|
|
942
|
-
mmluPro: 0.
|
|
943
|
-
gpqa: 0.
|
|
944
|
-
hle: 0.
|
|
794
|
+
mmluPro: 0.671,
|
|
795
|
+
gpqa: 0.562,
|
|
796
|
+
hle: 0.05,
|
|
945
797
|
|
|
946
798
|
// Capabilities
|
|
947
799
|
contextWindow: 8192,
|
|
@@ -949,21 +801,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
949
801
|
supportsVision: false,
|
|
950
802
|
|
|
951
803
|
// Metadata
|
|
952
|
-
lastUpdated: "2026-
|
|
804
|
+
lastUpdated: "2026-06-01",
|
|
805
|
+
originalModel: "Ling-mini-2.0",
|
|
953
806
|
},
|
|
954
|
-
"
|
|
955
|
-
// AA Intelligence Index (composite score)
|
|
956
|
-
intelligenceIndex: 33.6,
|
|
957
|
-
normalizedScore: 48,
|
|
958
|
-
|
|
807
|
+
"ling-2.6-1t": {
|
|
959
808
|
// AA specific benchmarks
|
|
960
|
-
codingIndex:
|
|
961
|
-
mathIndex:
|
|
809
|
+
codingIndex: 33.1,
|
|
810
|
+
mathIndex: undefined,
|
|
962
811
|
|
|
963
812
|
// Academic benchmarks
|
|
964
|
-
mmluPro:
|
|
965
|
-
gpqa: 0.
|
|
966
|
-
hle: 0.
|
|
813
|
+
mmluPro: undefined,
|
|
814
|
+
gpqa: 0.752,
|
|
815
|
+
hle: 0.082,
|
|
967
816
|
|
|
968
817
|
// Capabilities
|
|
969
818
|
contextWindow: 8192,
|
|
@@ -971,21 +820,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
971
820
|
supportsVision: false,
|
|
972
821
|
|
|
973
822
|
// Metadata
|
|
974
|
-
lastUpdated: "2026-
|
|
823
|
+
lastUpdated: "2026-06-01",
|
|
824
|
+
originalModel: "Ling-2.6-1T",
|
|
975
825
|
},
|
|
976
|
-
"
|
|
977
|
-
// AA Intelligence Index (composite score)
|
|
978
|
-
intelligenceIndex: 33.1,
|
|
979
|
-
normalizedScore: 47,
|
|
980
|
-
|
|
826
|
+
"ling-2.6-flash": {
|
|
981
827
|
// AA specific benchmarks
|
|
982
|
-
codingIndex:
|
|
983
|
-
mathIndex:
|
|
828
|
+
codingIndex: 23.2,
|
|
829
|
+
mathIndex: undefined,
|
|
984
830
|
|
|
985
831
|
// Academic benchmarks
|
|
986
|
-
mmluPro:
|
|
987
|
-
gpqa: 0.
|
|
988
|
-
hle: 0.
|
|
832
|
+
mmluPro: undefined,
|
|
833
|
+
gpqa: 0.593,
|
|
834
|
+
hle: 0.062,
|
|
989
835
|
|
|
990
836
|
// Capabilities
|
|
991
837
|
contextWindow: 8192,
|
|
@@ -993,21 +839,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
993
839
|
supportsVision: false,
|
|
994
840
|
|
|
995
841
|
// Metadata
|
|
996
|
-
lastUpdated: "2026-
|
|
842
|
+
lastUpdated: "2026-06-01",
|
|
843
|
+
originalModel: "Ling 2.6 Flash",
|
|
997
844
|
},
|
|
998
|
-
"
|
|
999
|
-
// AA Intelligence Index (composite score)
|
|
1000
|
-
intelligenceIndex: 51.3,
|
|
1001
|
-
normalizedScore: 73,
|
|
1002
|
-
|
|
845
|
+
"ring-2.6-1t": {
|
|
1003
846
|
// AA specific benchmarks
|
|
1004
|
-
codingIndex:
|
|
1005
|
-
mathIndex:
|
|
847
|
+
codingIndex: 33.3,
|
|
848
|
+
mathIndex: undefined,
|
|
1006
849
|
|
|
1007
850
|
// Academic benchmarks
|
|
1008
|
-
mmluPro:
|
|
1009
|
-
gpqa: 0.
|
|
1010
|
-
hle: 0.
|
|
851
|
+
mmluPro: undefined,
|
|
852
|
+
gpqa: 0.857,
|
|
853
|
+
hle: 0.183,
|
|
1011
854
|
|
|
1012
855
|
// Capabilities
|
|
1013
856
|
contextWindow: 8192,
|
|
@@ -1015,21 +858,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1015
858
|
supportsVision: false,
|
|
1016
859
|
|
|
1017
860
|
// Metadata
|
|
1018
|
-
lastUpdated: "2026-
|
|
861
|
+
lastUpdated: "2026-06-01",
|
|
862
|
+
originalModel: "Ring-2.6-1T",
|
|
1019
863
|
},
|
|
1020
|
-
"
|
|
1021
|
-
// AA Intelligence Index (composite score)
|
|
1022
|
-
intelligenceIndex: 25.2,
|
|
1023
|
-
normalizedScore: 36,
|
|
1024
|
-
|
|
864
|
+
"doubao-seed-code": {
|
|
1025
865
|
// AA specific benchmarks
|
|
1026
|
-
codingIndex:
|
|
1027
|
-
mathIndex:
|
|
866
|
+
codingIndex: 31.3,
|
|
867
|
+
mathIndex: 79.3,
|
|
1028
868
|
|
|
1029
869
|
// Academic benchmarks
|
|
1030
|
-
mmluPro: 0.
|
|
1031
|
-
gpqa: 0.
|
|
1032
|
-
hle: 0.
|
|
870
|
+
mmluPro: 0.854,
|
|
871
|
+
gpqa: 0.764,
|
|
872
|
+
hle: 0.133,
|
|
1033
873
|
|
|
1034
874
|
// Capabilities
|
|
1035
875
|
contextWindow: 8192,
|
|
@@ -1037,21 +877,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1037
877
|
supportsVision: false,
|
|
1038
878
|
|
|
1039
879
|
// Metadata
|
|
1040
|
-
lastUpdated: "2026-
|
|
880
|
+
lastUpdated: "2026-06-01",
|
|
881
|
+
originalModel: "Doubao Seed Code",
|
|
1041
882
|
},
|
|
1042
|
-
"
|
|
1043
|
-
// AA Intelligence Index (composite score)
|
|
1044
|
-
intelligenceIndex: 39.2,
|
|
1045
|
-
normalizedScore: 56,
|
|
1046
|
-
|
|
883
|
+
"o1": {
|
|
1047
884
|
// AA specific benchmarks
|
|
1048
|
-
codingIndex:
|
|
1049
|
-
mathIndex:
|
|
885
|
+
codingIndex: 20.5,
|
|
886
|
+
mathIndex: undefined,
|
|
1050
887
|
|
|
1051
888
|
// Academic benchmarks
|
|
1052
|
-
mmluPro: 0.
|
|
1053
|
-
gpqa: 0.
|
|
1054
|
-
hle: 0.
|
|
889
|
+
mmluPro: 0.841,
|
|
890
|
+
gpqa: 0.747,
|
|
891
|
+
hle: 0.077,
|
|
1055
892
|
|
|
1056
893
|
// Capabilities
|
|
1057
894
|
contextWindow: 8192,
|
|
@@ -1059,21 +896,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1059
896
|
supportsVision: false,
|
|
1060
897
|
|
|
1061
898
|
// Metadata
|
|
1062
|
-
lastUpdated: "2026-
|
|
899
|
+
lastUpdated: "2026-06-01",
|
|
900
|
+
originalModel: "o1",
|
|
1063
901
|
},
|
|
1064
|
-
"
|
|
1065
|
-
// AA Intelligence Index (composite score)
|
|
1066
|
-
intelligenceIndex: 47.7,
|
|
1067
|
-
normalizedScore: 68,
|
|
1068
|
-
|
|
902
|
+
"o1-preview": {
|
|
1069
903
|
// AA specific benchmarks
|
|
1070
|
-
codingIndex:
|
|
1071
|
-
mathIndex:
|
|
904
|
+
codingIndex: 34,
|
|
905
|
+
mathIndex: undefined,
|
|
1072
906
|
|
|
1073
907
|
// Academic benchmarks
|
|
1074
|
-
mmluPro:
|
|
1075
|
-
gpqa:
|
|
1076
|
-
hle:
|
|
908
|
+
mmluPro: undefined,
|
|
909
|
+
gpqa: undefined,
|
|
910
|
+
hle: undefined,
|
|
1077
911
|
|
|
1078
912
|
// Capabilities
|
|
1079
913
|
contextWindow: 8192,
|
|
@@ -1081,21 +915,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1081
915
|
supportsVision: false,
|
|
1082
916
|
|
|
1083
917
|
// Metadata
|
|
1084
|
-
lastUpdated: "2026-
|
|
918
|
+
lastUpdated: "2026-06-01",
|
|
919
|
+
originalModel: "o1-preview",
|
|
1085
920
|
},
|
|
1086
|
-
"
|
|
1087
|
-
// AA Intelligence Index (composite score)
|
|
1088
|
-
intelligenceIndex: 26.3,
|
|
1089
|
-
normalizedScore: 38,
|
|
1090
|
-
|
|
921
|
+
"o1-mini": {
|
|
1091
922
|
// AA specific benchmarks
|
|
1092
|
-
codingIndex:
|
|
1093
|
-
mathIndex:
|
|
923
|
+
codingIndex: undefined,
|
|
924
|
+
mathIndex: undefined,
|
|
1094
925
|
|
|
1095
926
|
// Academic benchmarks
|
|
1096
|
-
mmluPro: 0.
|
|
1097
|
-
gpqa: 0.
|
|
1098
|
-
hle: 0.
|
|
927
|
+
mmluPro: 0.742,
|
|
928
|
+
gpqa: 0.603,
|
|
929
|
+
hle: 0.049,
|
|
1099
930
|
|
|
1100
931
|
// Capabilities
|
|
1101
932
|
contextWindow: 8192,
|
|
@@ -1103,21 +934,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1103
934
|
supportsVision: false,
|
|
1104
935
|
|
|
1105
936
|
// Metadata
|
|
1106
|
-
lastUpdated: "2026-
|
|
937
|
+
lastUpdated: "2026-06-01",
|
|
938
|
+
originalModel: "o1-mini",
|
|
1107
939
|
},
|
|
1108
|
-
"gpt-
|
|
1109
|
-
// AA Intelligence Index (composite score)
|
|
1110
|
-
intelligenceIndex: 49,
|
|
1111
|
-
normalizedScore: 70,
|
|
1112
|
-
|
|
940
|
+
"gpt-4o-aug-24": {
|
|
1113
941
|
// AA specific benchmarks
|
|
1114
|
-
codingIndex:
|
|
942
|
+
codingIndex: 16.6,
|
|
1115
943
|
mathIndex: undefined,
|
|
1116
944
|
|
|
1117
945
|
// Academic benchmarks
|
|
1118
946
|
mmluPro: undefined,
|
|
1119
|
-
gpqa: 0.
|
|
1120
|
-
hle: 0.
|
|
947
|
+
gpqa: 0.521,
|
|
948
|
+
hle: 0.029,
|
|
1121
949
|
|
|
1122
950
|
// Capabilities
|
|
1123
951
|
contextWindow: 8192,
|
|
@@ -1125,21 +953,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1125
953
|
supportsVision: false,
|
|
1126
954
|
|
|
1127
955
|
// Metadata
|
|
1128
|
-
lastUpdated: "2026-
|
|
956
|
+
lastUpdated: "2026-06-01",
|
|
957
|
+
originalModel: "GPT-4o (Aug '24)",
|
|
1129
958
|
},
|
|
1130
|
-
"
|
|
1131
|
-
// AA Intelligence Index (composite score)
|
|
1132
|
-
intelligenceIndex: 25.9,
|
|
1133
|
-
normalizedScore: 37,
|
|
1134
|
-
|
|
959
|
+
"gpt-4o-may-24": {
|
|
1135
960
|
// AA specific benchmarks
|
|
1136
|
-
codingIndex:
|
|
961
|
+
codingIndex: 24.2,
|
|
1137
962
|
mathIndex: undefined,
|
|
1138
963
|
|
|
1139
964
|
// Academic benchmarks
|
|
1140
|
-
mmluPro: 0.
|
|
1141
|
-
gpqa: 0.
|
|
1142
|
-
hle: 0.
|
|
965
|
+
mmluPro: 0.74,
|
|
966
|
+
gpqa: 0.526,
|
|
967
|
+
hle: 0.028,
|
|
1143
968
|
|
|
1144
969
|
// Capabilities
|
|
1145
970
|
contextWindow: 8192,
|
|
@@ -1147,21 +972,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1147
972
|
supportsVision: false,
|
|
1148
973
|
|
|
1149
974
|
// Metadata
|
|
1150
|
-
lastUpdated: "2026-
|
|
975
|
+
lastUpdated: "2026-06-01",
|
|
976
|
+
originalModel: "GPT-4o (May '24)",
|
|
1151
977
|
},
|
|
1152
|
-
"gpt-4
|
|
1153
|
-
// AA Intelligence Index (composite score)
|
|
1154
|
-
intelligenceIndex: 13,
|
|
1155
|
-
normalizedScore: 19,
|
|
1156
|
-
|
|
978
|
+
"gpt-4-turbo": {
|
|
1157
979
|
// AA specific benchmarks
|
|
1158
|
-
codingIndex:
|
|
1159
|
-
mathIndex:
|
|
980
|
+
codingIndex: 21.5,
|
|
981
|
+
mathIndex: undefined,
|
|
1160
982
|
|
|
1161
983
|
// Academic benchmarks
|
|
1162
|
-
mmluPro: 0.
|
|
1163
|
-
gpqa:
|
|
1164
|
-
hle: 0.
|
|
984
|
+
mmluPro: 0.694,
|
|
985
|
+
gpqa: undefined,
|
|
986
|
+
hle: 0.033,
|
|
1165
987
|
|
|
1166
988
|
// Capabilities
|
|
1167
989
|
contextWindow: 8192,
|
|
@@ -1169,21 +991,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1169
991
|
supportsVision: false,
|
|
1170
992
|
|
|
1171
993
|
// Metadata
|
|
1172
|
-
lastUpdated: "2026-
|
|
994
|
+
lastUpdated: "2026-06-01",
|
|
995
|
+
originalModel: "GPT-4 Turbo",
|
|
1173
996
|
},
|
|
1174
|
-
"gpt-
|
|
1175
|
-
// AA Intelligence Index (composite score)
|
|
1176
|
-
intelligenceIndex: 26.8,
|
|
1177
|
-
normalizedScore: 38,
|
|
1178
|
-
|
|
997
|
+
"gpt-4o-nov-24": {
|
|
1179
998
|
// AA specific benchmarks
|
|
1180
|
-
codingIndex:
|
|
1181
|
-
mathIndex:
|
|
999
|
+
codingIndex: 16.7,
|
|
1000
|
+
mathIndex: 6,
|
|
1182
1001
|
|
|
1183
1002
|
// Academic benchmarks
|
|
1184
|
-
mmluPro: 0.
|
|
1185
|
-
gpqa: 0.
|
|
1186
|
-
hle: 0.
|
|
1003
|
+
mmluPro: 0.748,
|
|
1004
|
+
gpqa: 0.543,
|
|
1005
|
+
hle: 0.033,
|
|
1187
1006
|
|
|
1188
1007
|
// Capabilities
|
|
1189
1008
|
contextWindow: 8192,
|
|
@@ -1191,21 +1010,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1191
1010
|
supportsVision: false,
|
|
1192
1011
|
|
|
1193
1012
|
// Metadata
|
|
1194
|
-
lastUpdated: "2026-
|
|
1013
|
+
lastUpdated: "2026-06-01",
|
|
1014
|
+
originalModel: "GPT-4o (Nov '24)",
|
|
1195
1015
|
},
|
|
1196
|
-
"gpt-
|
|
1197
|
-
// AA Intelligence Index (composite score)
|
|
1198
|
-
intelligenceIndex: 38.6,
|
|
1199
|
-
normalizedScore: 55,
|
|
1200
|
-
|
|
1016
|
+
"gpt-4o-mini": {
|
|
1201
1017
|
// AA specific benchmarks
|
|
1202
|
-
codingIndex:
|
|
1203
|
-
mathIndex:
|
|
1018
|
+
codingIndex: undefined,
|
|
1019
|
+
mathIndex: 14.7,
|
|
1204
1020
|
|
|
1205
1021
|
// Academic benchmarks
|
|
1206
|
-
mmluPro: 0.
|
|
1207
|
-
gpqa: 0.
|
|
1208
|
-
hle: 0.
|
|
1022
|
+
mmluPro: 0.648,
|
|
1023
|
+
gpqa: 0.426,
|
|
1024
|
+
hle: 0.04,
|
|
1209
1025
|
|
|
1210
1026
|
// Capabilities
|
|
1211
1027
|
contextWindow: 8192,
|
|
@@ -1213,21 +1029,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1213
1029
|
supportsVision: false,
|
|
1214
1030
|
|
|
1215
1031
|
// Metadata
|
|
1216
|
-
lastUpdated: "2026-
|
|
1032
|
+
lastUpdated: "2026-06-01",
|
|
1033
|
+
originalModel: "GPT-4o mini",
|
|
1217
1034
|
},
|
|
1218
|
-
"gpt-5
|
|
1219
|
-
// AA Intelligence Index (composite score)
|
|
1220
|
-
intelligenceIndex: 46.6,
|
|
1221
|
-
normalizedScore: 67,
|
|
1222
|
-
|
|
1035
|
+
"gpt-3.5-turbo": {
|
|
1223
1036
|
// AA specific benchmarks
|
|
1224
|
-
codingIndex:
|
|
1225
|
-
mathIndex:
|
|
1037
|
+
codingIndex: 10.7,
|
|
1038
|
+
mathIndex: undefined,
|
|
1226
1039
|
|
|
1227
1040
|
// Academic benchmarks
|
|
1228
|
-
mmluPro: 0.
|
|
1229
|
-
gpqa: 0.
|
|
1230
|
-
hle:
|
|
1041
|
+
mmluPro: 0.462,
|
|
1042
|
+
gpqa: 0.297,
|
|
1043
|
+
hle: undefined,
|
|
1231
1044
|
|
|
1232
1045
|
// Capabilities
|
|
1233
1046
|
contextWindow: 8192,
|
|
@@ -1235,21 +1048,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1235
1048
|
supportsVision: false,
|
|
1236
1049
|
|
|
1237
1050
|
// Metadata
|
|
1238
|
-
lastUpdated: "2026-
|
|
1051
|
+
lastUpdated: "2026-06-01",
|
|
1052
|
+
originalModel: "GPT-3.5 Turbo",
|
|
1239
1053
|
},
|
|
1240
|
-
"gpt-
|
|
1241
|
-
// AA Intelligence Index (composite score)
|
|
1242
|
-
intelligenceIndex: 42,
|
|
1243
|
-
normalizedScore: 60,
|
|
1244
|
-
|
|
1054
|
+
"gpt-4.1": {
|
|
1245
1055
|
// AA specific benchmarks
|
|
1246
|
-
codingIndex:
|
|
1247
|
-
mathIndex:
|
|
1056
|
+
codingIndex: 21.8,
|
|
1057
|
+
mathIndex: 34.7,
|
|
1248
1058
|
|
|
1249
1059
|
// Academic benchmarks
|
|
1250
|
-
mmluPro: 0.
|
|
1251
|
-
gpqa: 0.
|
|
1252
|
-
hle: 0.
|
|
1060
|
+
mmluPro: 0.806,
|
|
1061
|
+
gpqa: 0.666,
|
|
1062
|
+
hle: 0.046,
|
|
1253
1063
|
|
|
1254
1064
|
// Capabilities
|
|
1255
1065
|
contextWindow: 8192,
|
|
@@ -1257,21 +1067,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1257
1067
|
supportsVision: false,
|
|
1258
1068
|
|
|
1259
1069
|
// Metadata
|
|
1260
|
-
lastUpdated: "2026-
|
|
1070
|
+
lastUpdated: "2026-06-01",
|
|
1071
|
+
originalModel: "GPT-4.1",
|
|
1261
1072
|
},
|
|
1262
|
-
"gpt-
|
|
1263
|
-
// AA Intelligence Index (composite score)
|
|
1264
|
-
intelligenceIndex: 12.8,
|
|
1265
|
-
normalizedScore: 18,
|
|
1266
|
-
|
|
1073
|
+
"gpt-5-mini-high": {
|
|
1267
1074
|
// AA specific benchmarks
|
|
1268
|
-
codingIndex:
|
|
1269
|
-
mathIndex:
|
|
1075
|
+
codingIndex: 35.3,
|
|
1076
|
+
mathIndex: 90.7,
|
|
1270
1077
|
|
|
1271
1078
|
// Academic benchmarks
|
|
1272
|
-
mmluPro:
|
|
1273
|
-
gpqa:
|
|
1274
|
-
hle:
|
|
1079
|
+
mmluPro: 0.837,
|
|
1080
|
+
gpqa: 0.828,
|
|
1081
|
+
hle: 0.197,
|
|
1275
1082
|
|
|
1276
1083
|
// Capabilities
|
|
1277
1084
|
contextWindow: 8192,
|
|
@@ -1279,21 +1086,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1279
1086
|
supportsVision: false,
|
|
1280
1087
|
|
|
1281
1088
|
// Metadata
|
|
1282
|
-
lastUpdated: "2026-
|
|
1089
|
+
lastUpdated: "2026-06-01",
|
|
1090
|
+
originalModel: "GPT-5 mini (high)",
|
|
1283
1091
|
},
|
|
1284
|
-
"gpt-
|
|
1285
|
-
// AA Intelligence Index (composite score)
|
|
1286
|
-
intelligenceIndex: 18.6,
|
|
1287
|
-
normalizedScore: 27,
|
|
1288
|
-
|
|
1092
|
+
"gpt-5-high": {
|
|
1289
1093
|
// AA specific benchmarks
|
|
1290
|
-
codingIndex:
|
|
1291
|
-
mathIndex:
|
|
1094
|
+
codingIndex: 36,
|
|
1095
|
+
mathIndex: 94.3,
|
|
1292
1096
|
|
|
1293
1097
|
// Academic benchmarks
|
|
1294
|
-
mmluPro: 0.
|
|
1295
|
-
gpqa: 0.
|
|
1296
|
-
hle: 0.
|
|
1098
|
+
mmluPro: 0.871,
|
|
1099
|
+
gpqa: 0.854,
|
|
1100
|
+
hle: 0.265,
|
|
1297
1101
|
|
|
1298
1102
|
// Capabilities
|
|
1299
1103
|
contextWindow: 8192,
|
|
@@ -1301,21 +1105,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1301
1105
|
supportsVision: false,
|
|
1302
1106
|
|
|
1303
1107
|
// Metadata
|
|
1304
|
-
lastUpdated: "2026-
|
|
1108
|
+
lastUpdated: "2026-06-01",
|
|
1109
|
+
originalModel: "GPT-5 (high)",
|
|
1305
1110
|
},
|
|
1306
|
-
"gpt-5-
|
|
1307
|
-
// AA Intelligence Index (composite score)
|
|
1308
|
-
intelligenceIndex: 44.6,
|
|
1309
|
-
normalizedScore: 64,
|
|
1310
|
-
|
|
1111
|
+
"gpt-5.2-xhigh": {
|
|
1311
1112
|
// AA specific benchmarks
|
|
1312
|
-
codingIndex:
|
|
1313
|
-
mathIndex:
|
|
1113
|
+
codingIndex: 48.7,
|
|
1114
|
+
mathIndex: 99,
|
|
1314
1115
|
|
|
1315
1116
|
// Academic benchmarks
|
|
1316
|
-
mmluPro: 0.
|
|
1317
|
-
gpqa: 0.
|
|
1318
|
-
hle: 0.
|
|
1117
|
+
mmluPro: 0.874,
|
|
1118
|
+
gpqa: 0.903,
|
|
1119
|
+
hle: 0.354,
|
|
1319
1120
|
|
|
1320
1121
|
// Capabilities
|
|
1321
1122
|
contextWindow: 8192,
|
|
@@ -1323,21 +1124,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1323
1124
|
supportsVision: false,
|
|
1324
1125
|
|
|
1325
1126
|
// Metadata
|
|
1326
|
-
lastUpdated: "2026-
|
|
1127
|
+
lastUpdated: "2026-06-01",
|
|
1128
|
+
originalModel: "GPT-5.2 (xhigh)",
|
|
1327
1129
|
},
|
|
1328
|
-
"gpt-5-mini-
|
|
1329
|
-
// AA Intelligence Index (composite score)
|
|
1330
|
-
intelligenceIndex: 20.7,
|
|
1331
|
-
normalizedScore: 30,
|
|
1332
|
-
|
|
1130
|
+
"gpt-5.1-codex-mini-high": {
|
|
1333
1131
|
// AA specific benchmarks
|
|
1334
|
-
codingIndex:
|
|
1335
|
-
mathIndex:
|
|
1132
|
+
codingIndex: 36.4,
|
|
1133
|
+
mathIndex: 91.7,
|
|
1336
1134
|
|
|
1337
1135
|
// Academic benchmarks
|
|
1338
|
-
mmluPro: 0.
|
|
1339
|
-
gpqa: 0.
|
|
1340
|
-
hle: 0.
|
|
1136
|
+
mmluPro: 0.82,
|
|
1137
|
+
gpqa: 0.813,
|
|
1138
|
+
hle: 0.169,
|
|
1341
1139
|
|
|
1342
1140
|
// Capabilities
|
|
1343
1141
|
contextWindow: 8192,
|
|
@@ -1345,21 +1143,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1345
1143
|
supportsVision: false,
|
|
1346
1144
|
|
|
1347
1145
|
// Metadata
|
|
1348
|
-
lastUpdated: "2026-
|
|
1146
|
+
lastUpdated: "2026-06-01",
|
|
1147
|
+
originalModel: "GPT-5.1 Codex mini (high)",
|
|
1349
1148
|
},
|
|
1350
|
-
"gpt-5-
|
|
1351
|
-
// AA Intelligence Index (composite score)
|
|
1352
|
-
intelligenceIndex: 13.8,
|
|
1353
|
-
normalizedScore: 20,
|
|
1354
|
-
|
|
1149
|
+
"gpt-5.1-high": {
|
|
1355
1150
|
// AA specific benchmarks
|
|
1356
|
-
codingIndex:
|
|
1357
|
-
mathIndex:
|
|
1151
|
+
codingIndex: 44.7,
|
|
1152
|
+
mathIndex: 94,
|
|
1358
1153
|
|
|
1359
1154
|
// Academic benchmarks
|
|
1360
|
-
mmluPro: 0.
|
|
1361
|
-
gpqa: 0.
|
|
1362
|
-
hle: 0.
|
|
1155
|
+
mmluPro: 0.87,
|
|
1156
|
+
gpqa: 0.873,
|
|
1157
|
+
hle: 0.265,
|
|
1363
1158
|
|
|
1364
1159
|
// Capabilities
|
|
1365
1160
|
contextWindow: 8192,
|
|
@@ -1367,21 +1162,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1367
1162
|
supportsVision: false,
|
|
1368
1163
|
|
|
1369
1164
|
// Metadata
|
|
1370
|
-
lastUpdated: "2026-
|
|
1165
|
+
lastUpdated: "2026-06-01",
|
|
1166
|
+
originalModel: "GPT-5.1 (high)",
|
|
1371
1167
|
},
|
|
1372
|
-
"gpt-5-
|
|
1373
|
-
// AA Intelligence Index (composite score)
|
|
1374
|
-
intelligenceIndex: 44.6,
|
|
1375
|
-
normalizedScore: 64,
|
|
1376
|
-
|
|
1168
|
+
"gpt-5-nano-high": {
|
|
1377
1169
|
// AA specific benchmarks
|
|
1378
|
-
codingIndex:
|
|
1379
|
-
mathIndex:
|
|
1170
|
+
codingIndex: 20.3,
|
|
1171
|
+
mathIndex: 83.7,
|
|
1380
1172
|
|
|
1381
1173
|
// Academic benchmarks
|
|
1382
|
-
mmluPro: 0.
|
|
1383
|
-
gpqa: 0.
|
|
1384
|
-
hle: 0.
|
|
1174
|
+
mmluPro: 0.78,
|
|
1175
|
+
gpqa: 0.676,
|
|
1176
|
+
hle: 0.082,
|
|
1385
1177
|
|
|
1386
1178
|
// Capabilities
|
|
1387
1179
|
contextWindow: 8192,
|
|
@@ -1389,21 +1181,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1389
1181
|
supportsVision: false,
|
|
1390
1182
|
|
|
1391
1183
|
// Metadata
|
|
1392
|
-
lastUpdated: "2026-
|
|
1184
|
+
lastUpdated: "2026-06-01",
|
|
1185
|
+
originalModel: "GPT-5 nano (high)",
|
|
1393
1186
|
},
|
|
1394
|
-
"gpt-5-
|
|
1395
|
-
// AA Intelligence Index (composite score)
|
|
1396
|
-
intelligenceIndex: 25.9,
|
|
1397
|
-
normalizedScore: 37,
|
|
1398
|
-
|
|
1187
|
+
"gpt-5.1-non-reasoning": {
|
|
1399
1188
|
// AA specific benchmarks
|
|
1400
|
-
codingIndex:
|
|
1401
|
-
mathIndex:
|
|
1189
|
+
codingIndex: 27.3,
|
|
1190
|
+
mathIndex: 38,
|
|
1402
1191
|
|
|
1403
1192
|
// Academic benchmarks
|
|
1404
|
-
mmluPro: 0.
|
|
1405
|
-
gpqa: 0.
|
|
1406
|
-
hle: 0.
|
|
1193
|
+
mmluPro: 0.801,
|
|
1194
|
+
gpqa: 0.643,
|
|
1195
|
+
hle: 0.052,
|
|
1407
1196
|
|
|
1408
1197
|
// Capabilities
|
|
1409
1198
|
contextWindow: 8192,
|
|
@@ -1411,21 +1200,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1411
1200
|
supportsVision: false,
|
|
1412
1201
|
|
|
1413
1202
|
// Metadata
|
|
1414
|
-
lastUpdated: "2026-
|
|
1203
|
+
lastUpdated: "2026-06-01",
|
|
1204
|
+
originalModel: "GPT-5.1 (Non-reasoning)",
|
|
1415
1205
|
},
|
|
1416
|
-
"
|
|
1417
|
-
// AA Intelligence Index (composite score)
|
|
1418
|
-
intelligenceIndex: 14.1,
|
|
1419
|
-
normalizedScore: 20,
|
|
1420
|
-
|
|
1206
|
+
"o3-pro": {
|
|
1421
1207
|
// AA specific benchmarks
|
|
1422
1208
|
codingIndex: undefined,
|
|
1423
1209
|
mathIndex: undefined,
|
|
1424
1210
|
|
|
1425
1211
|
// Academic benchmarks
|
|
1426
|
-
mmluPro:
|
|
1427
|
-
gpqa: 0.
|
|
1428
|
-
hle:
|
|
1212
|
+
mmluPro: undefined,
|
|
1213
|
+
gpqa: 0.845,
|
|
1214
|
+
hle: undefined,
|
|
1429
1215
|
|
|
1430
1216
|
// Capabilities
|
|
1431
1217
|
contextWindow: 8192,
|
|
@@ -1433,21 +1219,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1433
1219
|
supportsVision: false,
|
|
1434
1220
|
|
|
1435
1221
|
// Metadata
|
|
1436
|
-
lastUpdated: "2026-
|
|
1222
|
+
lastUpdated: "2026-06-01",
|
|
1223
|
+
originalModel: "o3-pro",
|
|
1437
1224
|
},
|
|
1438
|
-
"
|
|
1439
|
-
// AA Intelligence Index (composite score)
|
|
1440
|
-
intelligenceIndex: 40.7,
|
|
1441
|
-
normalizedScore: 58,
|
|
1442
|
-
|
|
1225
|
+
"gpt-5-minimal": {
|
|
1443
1226
|
// AA specific benchmarks
|
|
1444
|
-
codingIndex:
|
|
1445
|
-
mathIndex:
|
|
1227
|
+
codingIndex: 25,
|
|
1228
|
+
mathIndex: 31.7,
|
|
1446
1229
|
|
|
1447
1230
|
// Academic benchmarks
|
|
1448
|
-
mmluPro:
|
|
1449
|
-
gpqa: 0.
|
|
1450
|
-
hle:
|
|
1231
|
+
mmluPro: 0.806,
|
|
1232
|
+
gpqa: 0.673,
|
|
1233
|
+
hle: 0.054,
|
|
1451
1234
|
|
|
1452
1235
|
// Capabilities
|
|
1453
1236
|
contextWindow: 8192,
|
|
@@ -1455,21 +1238,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1455
1238
|
supportsVision: false,
|
|
1456
1239
|
|
|
1457
1240
|
// Metadata
|
|
1458
|
-
lastUpdated: "2026-
|
|
1241
|
+
lastUpdated: "2026-06-01",
|
|
1242
|
+
originalModel: "GPT-5 (minimal)",
|
|
1459
1243
|
},
|
|
1460
|
-
"gpt-5.
|
|
1461
|
-
// AA Intelligence Index (composite score)
|
|
1462
|
-
intelligenceIndex: 27.4,
|
|
1463
|
-
normalizedScore: 39,
|
|
1464
|
-
|
|
1244
|
+
"gpt-5.2-codex-xhigh": {
|
|
1465
1245
|
// AA specific benchmarks
|
|
1466
|
-
codingIndex:
|
|
1467
|
-
mathIndex:
|
|
1246
|
+
codingIndex: 43,
|
|
1247
|
+
mathIndex: undefined,
|
|
1468
1248
|
|
|
1469
1249
|
// Academic benchmarks
|
|
1470
|
-
mmluPro:
|
|
1471
|
-
gpqa: 0.
|
|
1472
|
-
hle: 0.
|
|
1250
|
+
mmluPro: undefined,
|
|
1251
|
+
gpqa: 0.899,
|
|
1252
|
+
hle: 0.335,
|
|
1473
1253
|
|
|
1474
1254
|
// Capabilities
|
|
1475
1255
|
contextWindow: 8192,
|
|
@@ -1477,21 +1257,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1477
1257
|
supportsVision: false,
|
|
1478
1258
|
|
|
1479
1259
|
// Metadata
|
|
1480
|
-
lastUpdated: "2026-
|
|
1260
|
+
lastUpdated: "2026-06-01",
|
|
1261
|
+
originalModel: "GPT-5.2 Codex (xhigh)",
|
|
1481
1262
|
},
|
|
1482
|
-
"gpt-5-
|
|
1483
|
-
// AA Intelligence Index (composite score)
|
|
1484
|
-
intelligenceIndex: 21.8,
|
|
1485
|
-
normalizedScore: 31,
|
|
1486
|
-
|
|
1263
|
+
"gpt-5.1-codex-high": {
|
|
1487
1264
|
// AA specific benchmarks
|
|
1488
|
-
codingIndex:
|
|
1489
|
-
mathIndex:
|
|
1265
|
+
codingIndex: 36.6,
|
|
1266
|
+
mathIndex: 95.7,
|
|
1490
1267
|
|
|
1491
1268
|
// Academic benchmarks
|
|
1492
|
-
mmluPro: 0.
|
|
1493
|
-
gpqa: 0.
|
|
1494
|
-
hle: 0.
|
|
1269
|
+
mmluPro: 0.86,
|
|
1270
|
+
gpqa: 0.86,
|
|
1271
|
+
hle: 0.234,
|
|
1495
1272
|
|
|
1496
1273
|
// Capabilities
|
|
1497
1274
|
contextWindow: 8192,
|
|
@@ -1499,21 +1276,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1499
1276
|
supportsVision: false,
|
|
1500
1277
|
|
|
1501
1278
|
// Metadata
|
|
1502
|
-
lastUpdated: "2026-
|
|
1279
|
+
lastUpdated: "2026-06-01",
|
|
1280
|
+
originalModel: "GPT-5.1 Codex (high)",
|
|
1503
1281
|
},
|
|
1504
|
-
"gpt-
|
|
1505
|
-
// AA Intelligence Index (composite score)
|
|
1506
|
-
intelligenceIndex: 20,
|
|
1507
|
-
normalizedScore: 29,
|
|
1508
|
-
|
|
1282
|
+
"gpt-5-nano-minimal": {
|
|
1509
1283
|
// AA specific benchmarks
|
|
1510
|
-
codingIndex:
|
|
1511
|
-
mathIndex:
|
|
1284
|
+
codingIndex: 14.2,
|
|
1285
|
+
mathIndex: 27.3,
|
|
1512
1286
|
|
|
1513
1287
|
// Academic benchmarks
|
|
1514
|
-
mmluPro:
|
|
1515
|
-
gpqa:
|
|
1516
|
-
hle:
|
|
1288
|
+
mmluPro: 0.556,
|
|
1289
|
+
gpqa: 0.428,
|
|
1290
|
+
hle: 0.041,
|
|
1517
1291
|
|
|
1518
1292
|
// Capabilities
|
|
1519
1293
|
contextWindow: 8192,
|
|
@@ -1521,21 +1295,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1521
1295
|
supportsVision: false,
|
|
1522
1296
|
|
|
1523
1297
|
// Metadata
|
|
1524
|
-
lastUpdated: "2026-
|
|
1298
|
+
lastUpdated: "2026-06-01",
|
|
1299
|
+
originalModel: "GPT-5 nano (minimal)",
|
|
1525
1300
|
},
|
|
1526
|
-
"
|
|
1527
|
-
// AA Intelligence Index (composite score)
|
|
1528
|
-
intelligenceIndex: 12.5,
|
|
1529
|
-
normalizedScore: 18,
|
|
1530
|
-
|
|
1301
|
+
"o3-mini-high": {
|
|
1531
1302
|
// AA specific benchmarks
|
|
1532
|
-
codingIndex:
|
|
1533
|
-
mathIndex:
|
|
1303
|
+
codingIndex: 17.3,
|
|
1304
|
+
mathIndex: undefined,
|
|
1534
1305
|
|
|
1535
1306
|
// Academic benchmarks
|
|
1536
|
-
mmluPro: 0.
|
|
1537
|
-
gpqa: 0.
|
|
1538
|
-
hle: 0.
|
|
1307
|
+
mmluPro: 0.802,
|
|
1308
|
+
gpqa: 0.773,
|
|
1309
|
+
hle: 0.123,
|
|
1539
1310
|
|
|
1540
1311
|
// Capabilities
|
|
1541
1312
|
contextWindow: 8192,
|
|
@@ -1543,21 +1314,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1543
1314
|
supportsVision: false,
|
|
1544
1315
|
|
|
1545
1316
|
// Metadata
|
|
1546
|
-
lastUpdated: "2026-
|
|
1317
|
+
lastUpdated: "2026-06-01",
|
|
1318
|
+
originalModel: "o3-mini (high)",
|
|
1547
1319
|
},
|
|
1548
|
-
"
|
|
1549
|
-
// AA Intelligence Index (composite score)
|
|
1550
|
-
intelligenceIndex: 11.8,
|
|
1551
|
-
normalizedScore: 17,
|
|
1552
|
-
|
|
1320
|
+
"gpt-5.4-non-reasoning": {
|
|
1553
1321
|
// AA specific benchmarks
|
|
1554
|
-
codingIndex:
|
|
1555
|
-
mathIndex:
|
|
1322
|
+
codingIndex: 41,
|
|
1323
|
+
mathIndex: undefined,
|
|
1556
1324
|
|
|
1557
1325
|
// Academic benchmarks
|
|
1558
|
-
mmluPro:
|
|
1559
|
-
gpqa: 0.
|
|
1560
|
-
hle: 0.
|
|
1326
|
+
mmluPro: undefined,
|
|
1327
|
+
gpqa: 0.748,
|
|
1328
|
+
hle: 0.106,
|
|
1561
1329
|
|
|
1562
1330
|
// Capabilities
|
|
1563
1331
|
contextWindow: 8192,
|
|
@@ -1565,21 +1333,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1565
1333
|
supportsVision: false,
|
|
1566
1334
|
|
|
1567
1335
|
// Metadata
|
|
1568
|
-
lastUpdated: "2026-
|
|
1336
|
+
lastUpdated: "2026-06-01",
|
|
1337
|
+
originalModel: "GPT-5.4 (Non-reasoning)",
|
|
1569
1338
|
},
|
|
1570
|
-
"
|
|
1571
|
-
// AA Intelligence Index (composite score)
|
|
1572
|
-
intelligenceIndex: 9.7,
|
|
1573
|
-
normalizedScore: 14,
|
|
1574
|
-
|
|
1339
|
+
"gpt-5.4-low": {
|
|
1575
1340
|
// AA specific benchmarks
|
|
1576
|
-
codingIndex:
|
|
1577
|
-
mathIndex:
|
|
1341
|
+
codingIndex: 45.6,
|
|
1342
|
+
mathIndex: undefined,
|
|
1578
1343
|
|
|
1579
1344
|
// Academic benchmarks
|
|
1580
|
-
mmluPro:
|
|
1581
|
-
gpqa: 0.
|
|
1582
|
-
hle: 0.
|
|
1345
|
+
mmluPro: undefined,
|
|
1346
|
+
gpqa: 0.871,
|
|
1347
|
+
hle: 0.289,
|
|
1583
1348
|
|
|
1584
1349
|
// Capabilities
|
|
1585
1350
|
contextWindow: 8192,
|
|
@@ -1587,21 +1352,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1587
1352
|
supportsVision: false,
|
|
1588
1353
|
|
|
1589
1354
|
// Metadata
|
|
1590
|
-
lastUpdated: "2026-
|
|
1355
|
+
lastUpdated: "2026-06-01",
|
|
1356
|
+
originalModel: "GPT-5.4 (low)",
|
|
1591
1357
|
},
|
|
1592
|
-
"
|
|
1593
|
-
// AA Intelligence Index (composite score)
|
|
1594
|
-
intelligenceIndex: 8.9,
|
|
1595
|
-
normalizedScore: 13,
|
|
1596
|
-
|
|
1358
|
+
"gpt-4o-march-2025-chatgpt-4o-latest": {
|
|
1597
1359
|
// AA specific benchmarks
|
|
1598
|
-
codingIndex:
|
|
1599
|
-
mathIndex:
|
|
1360
|
+
codingIndex: undefined,
|
|
1361
|
+
mathIndex: 25.7,
|
|
1600
1362
|
|
|
1601
1363
|
// Academic benchmarks
|
|
1602
|
-
mmluPro: 0.
|
|
1603
|
-
gpqa: 0.
|
|
1604
|
-
hle: 0.
|
|
1364
|
+
mmluPro: 0.803,
|
|
1365
|
+
gpqa: 0.655,
|
|
1366
|
+
hle: 0.05,
|
|
1605
1367
|
|
|
1606
1368
|
// Capabilities
|
|
1607
1369
|
contextWindow: 8192,
|
|
@@ -1609,21 +1371,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1609
1371
|
supportsVision: false,
|
|
1610
1372
|
|
|
1611
1373
|
// Metadata
|
|
1612
|
-
lastUpdated: "2026-
|
|
1374
|
+
lastUpdated: "2026-06-01",
|
|
1375
|
+
originalModel: "GPT-4o (March 2025, chatgpt-4o-latest)",
|
|
1613
1376
|
},
|
|
1614
|
-
"
|
|
1615
|
-
// AA Intelligence Index (composite score)
|
|
1616
|
-
intelligenceIndex: 6.4,
|
|
1617
|
-
normalizedScore: 9,
|
|
1618
|
-
|
|
1377
|
+
"gpt-4.5-preview": {
|
|
1619
1378
|
// AA specific benchmarks
|
|
1620
|
-
codingIndex:
|
|
1379
|
+
codingIndex: undefined,
|
|
1621
1380
|
mathIndex: undefined,
|
|
1622
1381
|
|
|
1623
1382
|
// Academic benchmarks
|
|
1624
|
-
mmluPro:
|
|
1625
|
-
gpqa:
|
|
1626
|
-
hle:
|
|
1383
|
+
mmluPro: undefined,
|
|
1384
|
+
gpqa: undefined,
|
|
1385
|
+
hle: undefined,
|
|
1627
1386
|
|
|
1628
1387
|
// Capabilities
|
|
1629
1388
|
contextWindow: 8192,
|
|
@@ -1631,21 +1390,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1631
1390
|
supportsVision: false,
|
|
1632
1391
|
|
|
1633
1392
|
// Metadata
|
|
1634
|
-
lastUpdated: "2026-
|
|
1393
|
+
lastUpdated: "2026-06-01",
|
|
1394
|
+
originalModel: "GPT-4.5 (Preview)",
|
|
1635
1395
|
},
|
|
1636
|
-
"
|
|
1637
|
-
// AA Intelligence Index (composite score)
|
|
1638
|
-
intelligenceIndex: 6.3,
|
|
1639
|
-
normalizedScore: 9,
|
|
1640
|
-
|
|
1396
|
+
"gpt-5-codex-high": {
|
|
1641
1397
|
// AA specific benchmarks
|
|
1642
|
-
codingIndex:
|
|
1643
|
-
mathIndex:
|
|
1398
|
+
codingIndex: 38.9,
|
|
1399
|
+
mathIndex: 98.7,
|
|
1644
1400
|
|
|
1645
1401
|
// Academic benchmarks
|
|
1646
|
-
mmluPro: 0.
|
|
1647
|
-
gpqa: 0.
|
|
1648
|
-
hle: 0.
|
|
1402
|
+
mmluPro: 0.865,
|
|
1403
|
+
gpqa: 0.837,
|
|
1404
|
+
hle: 0.256,
|
|
1649
1405
|
|
|
1650
1406
|
// Capabilities
|
|
1651
1407
|
contextWindow: 8192,
|
|
@@ -1653,21 +1409,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1653
1409
|
supportsVision: false,
|
|
1654
1410
|
|
|
1655
1411
|
// Metadata
|
|
1656
|
-
lastUpdated: "2026-
|
|
1412
|
+
lastUpdated: "2026-06-01",
|
|
1413
|
+
originalModel: "GPT-5 Codex (high)",
|
|
1657
1414
|
},
|
|
1658
|
-
"
|
|
1659
|
-
// AA Intelligence Index (composite score)
|
|
1660
|
-
intelligenceIndex: 8.4,
|
|
1661
|
-
normalizedScore: 12,
|
|
1662
|
-
|
|
1415
|
+
"gpt-5-medium": {
|
|
1663
1416
|
// AA specific benchmarks
|
|
1664
|
-
codingIndex:
|
|
1665
|
-
mathIndex:
|
|
1417
|
+
codingIndex: 38.9,
|
|
1418
|
+
mathIndex: 91.7,
|
|
1666
1419
|
|
|
1667
1420
|
// Academic benchmarks
|
|
1668
|
-
mmluPro: 0.
|
|
1669
|
-
gpqa: 0.
|
|
1670
|
-
hle: 0.
|
|
1421
|
+
mmluPro: 0.867,
|
|
1422
|
+
gpqa: 0.842,
|
|
1423
|
+
hle: 0.235,
|
|
1671
1424
|
|
|
1672
1425
|
// Capabilities
|
|
1673
1426
|
contextWindow: 8192,
|
|
@@ -1675,21 +1428,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1675
1428
|
supportsVision: false,
|
|
1676
1429
|
|
|
1677
1430
|
// Metadata
|
|
1678
|
-
lastUpdated: "2026-
|
|
1431
|
+
lastUpdated: "2026-06-01",
|
|
1432
|
+
originalModel: "GPT-5 (medium)",
|
|
1679
1433
|
},
|
|
1680
|
-
"
|
|
1681
|
-
// AA Intelligence Index (composite score)
|
|
1682
|
-
intelligenceIndex: 8.4,
|
|
1683
|
-
normalizedScore: 12,
|
|
1684
|
-
|
|
1434
|
+
"gpt-4": {
|
|
1685
1435
|
// AA specific benchmarks
|
|
1686
|
-
codingIndex:
|
|
1436
|
+
codingIndex: 13.1,
|
|
1687
1437
|
mathIndex: undefined,
|
|
1688
1438
|
|
|
1689
1439
|
// Academic benchmarks
|
|
1690
|
-
mmluPro:
|
|
1691
|
-
gpqa:
|
|
1692
|
-
hle:
|
|
1440
|
+
mmluPro: undefined,
|
|
1441
|
+
gpqa: undefined,
|
|
1442
|
+
hle: undefined,
|
|
1693
1443
|
|
|
1694
1444
|
// Capabilities
|
|
1695
1445
|
contextWindow: 8192,
|
|
@@ -1697,21 +1447,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1697
1447
|
supportsVision: false,
|
|
1698
1448
|
|
|
1699
1449
|
// Metadata
|
|
1700
|
-
lastUpdated: "2026-
|
|
1450
|
+
lastUpdated: "2026-06-01",
|
|
1451
|
+
originalModel: "GPT-4",
|
|
1701
1452
|
},
|
|
1702
|
-
"
|
|
1703
|
-
// AA Intelligence Index (composite score)
|
|
1704
|
-
intelligenceIndex: 9.7,
|
|
1705
|
-
normalizedScore: 14,
|
|
1706
|
-
|
|
1453
|
+
"gpt-5.2-non-reasoning": {
|
|
1707
1454
|
// AA specific benchmarks
|
|
1708
|
-
codingIndex:
|
|
1709
|
-
mathIndex:
|
|
1455
|
+
codingIndex: 34.7,
|
|
1456
|
+
mathIndex: 51,
|
|
1710
1457
|
|
|
1711
1458
|
// Academic benchmarks
|
|
1712
|
-
mmluPro: 0.
|
|
1713
|
-
gpqa: 0.
|
|
1714
|
-
hle: 0.
|
|
1459
|
+
mmluPro: 0.814,
|
|
1460
|
+
gpqa: 0.712,
|
|
1461
|
+
hle: 0.073,
|
|
1715
1462
|
|
|
1716
1463
|
// Capabilities
|
|
1717
1464
|
contextWindow: 8192,
|
|
@@ -1719,21 +1466,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1719
1466
|
supportsVision: false,
|
|
1720
1467
|
|
|
1721
1468
|
// Metadata
|
|
1722
|
-
lastUpdated: "2026-
|
|
1469
|
+
lastUpdated: "2026-06-01",
|
|
1470
|
+
originalModel: "GPT-5.2 (Non-reasoning)",
|
|
1723
1471
|
},
|
|
1724
|
-
"
|
|
1725
|
-
// AA Intelligence Index (composite score)
|
|
1726
|
-
intelligenceIndex: 18.1,
|
|
1727
|
-
normalizedScore: 26,
|
|
1728
|
-
|
|
1472
|
+
"gpt-4.1-nano": {
|
|
1729
1473
|
// AA specific benchmarks
|
|
1730
|
-
codingIndex:
|
|
1731
|
-
mathIndex:
|
|
1474
|
+
codingIndex: 11.2,
|
|
1475
|
+
mathIndex: 24,
|
|
1732
1476
|
|
|
1733
1477
|
// Academic benchmarks
|
|
1734
|
-
mmluPro: 0.
|
|
1735
|
-
gpqa: 0.
|
|
1736
|
-
hle: 0.
|
|
1478
|
+
mmluPro: 0.657,
|
|
1479
|
+
gpqa: 0.512,
|
|
1480
|
+
hle: 0.039,
|
|
1737
1481
|
|
|
1738
1482
|
// Capabilities
|
|
1739
1483
|
contextWindow: 8192,
|
|
@@ -1741,21 +1485,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1741
1485
|
supportsVision: false,
|
|
1742
1486
|
|
|
1743
1487
|
// Metadata
|
|
1744
|
-
lastUpdated: "2026-
|
|
1488
|
+
lastUpdated: "2026-06-01",
|
|
1489
|
+
originalModel: "GPT-4.1 nano",
|
|
1745
1490
|
},
|
|
1746
|
-
"
|
|
1747
|
-
// AA Intelligence Index (composite score)
|
|
1748
|
-
intelligenceIndex: 16.8,
|
|
1749
|
-
normalizedScore: 24,
|
|
1750
|
-
|
|
1491
|
+
"gpt-5-low": {
|
|
1751
1492
|
// AA specific benchmarks
|
|
1752
|
-
codingIndex:
|
|
1753
|
-
mathIndex:
|
|
1493
|
+
codingIndex: 30.7,
|
|
1494
|
+
mathIndex: 83,
|
|
1754
1495
|
|
|
1755
1496
|
// Academic benchmarks
|
|
1756
|
-
mmluPro: 0.
|
|
1757
|
-
gpqa: 0.
|
|
1758
|
-
hle: 0.
|
|
1497
|
+
mmluPro: 0.86,
|
|
1498
|
+
gpqa: 0.808,
|
|
1499
|
+
hle: 0.184,
|
|
1759
1500
|
|
|
1760
1501
|
// Capabilities
|
|
1761
1502
|
contextWindow: 8192,
|
|
@@ -1763,21 +1504,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1763
1504
|
supportsVision: false,
|
|
1764
1505
|
|
|
1765
1506
|
// Metadata
|
|
1766
|
-
lastUpdated: "2026-
|
|
1507
|
+
lastUpdated: "2026-06-01",
|
|
1508
|
+
originalModel: "GPT-5 (low)",
|
|
1767
1509
|
},
|
|
1768
|
-
"
|
|
1769
|
-
// AA Intelligence Index (composite score)
|
|
1770
|
-
intelligenceIndex: 16,
|
|
1771
|
-
normalizedScore: 23,
|
|
1772
|
-
|
|
1510
|
+
"o4-mini-high": {
|
|
1773
1511
|
// AA specific benchmarks
|
|
1774
|
-
codingIndex:
|
|
1775
|
-
mathIndex:
|
|
1512
|
+
codingIndex: 25.6,
|
|
1513
|
+
mathIndex: 90.7,
|
|
1776
1514
|
|
|
1777
1515
|
// Academic benchmarks
|
|
1778
|
-
mmluPro: 0.
|
|
1779
|
-
gpqa: 0.
|
|
1780
|
-
hle: 0.
|
|
1516
|
+
mmluPro: 0.832,
|
|
1517
|
+
gpqa: 0.784,
|
|
1518
|
+
hle: 0.175,
|
|
1781
1519
|
|
|
1782
1520
|
// Capabilities
|
|
1783
1521
|
contextWindow: 8192,
|
|
@@ -1785,21 +1523,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1785
1523
|
supportsVision: false,
|
|
1786
1524
|
|
|
1787
1525
|
// Metadata
|
|
1788
|
-
lastUpdated: "2026-
|
|
1526
|
+
lastUpdated: "2026-06-01",
|
|
1527
|
+
originalModel: "o4-mini (high)",
|
|
1789
1528
|
},
|
|
1790
|
-
"
|
|
1791
|
-
// AA Intelligence Index (composite score)
|
|
1792
|
-
intelligenceIndex: 14.5,
|
|
1793
|
-
normalizedScore: 21,
|
|
1794
|
-
|
|
1529
|
+
"gpt-5.2-medium": {
|
|
1795
1530
|
// AA specific benchmarks
|
|
1796
|
-
codingIndex:
|
|
1797
|
-
mathIndex:
|
|
1531
|
+
codingIndex: 44.2,
|
|
1532
|
+
mathIndex: 96.7,
|
|
1798
1533
|
|
|
1799
1534
|
// Academic benchmarks
|
|
1800
|
-
mmluPro:
|
|
1801
|
-
gpqa: 0.
|
|
1802
|
-
hle: 0.
|
|
1535
|
+
mmluPro: 0.859,
|
|
1536
|
+
gpqa: 0.864,
|
|
1537
|
+
hle: 0.249,
|
|
1803
1538
|
|
|
1804
1539
|
// Capabilities
|
|
1805
1540
|
contextWindow: 8192,
|
|
@@ -1807,21 +1542,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1807
1542
|
supportsVision: false,
|
|
1808
1543
|
|
|
1809
1544
|
// Metadata
|
|
1810
|
-
lastUpdated: "2026-
|
|
1545
|
+
lastUpdated: "2026-06-01",
|
|
1546
|
+
originalModel: "GPT-5.2 (medium)",
|
|
1811
1547
|
},
|
|
1812
|
-
"
|
|
1813
|
-
// AA Intelligence Index (composite score)
|
|
1814
|
-
intelligenceIndex: 18.5,
|
|
1815
|
-
normalizedScore: 26,
|
|
1816
|
-
|
|
1548
|
+
"gpt-5-nano-medium": {
|
|
1817
1549
|
// AA specific benchmarks
|
|
1818
|
-
codingIndex:
|
|
1819
|
-
mathIndex:
|
|
1550
|
+
codingIndex: 22.9,
|
|
1551
|
+
mathIndex: 78.3,
|
|
1820
1552
|
|
|
1821
1553
|
// Academic benchmarks
|
|
1822
|
-
mmluPro: 0.
|
|
1823
|
-
gpqa: 0.
|
|
1824
|
-
hle: 0.
|
|
1554
|
+
mmluPro: 0.772,
|
|
1555
|
+
gpqa: 0.67,
|
|
1556
|
+
hle: 0.076,
|
|
1825
1557
|
|
|
1826
1558
|
// Capabilities
|
|
1827
1559
|
contextWindow: 8192,
|
|
@@ -1829,21 +1561,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1829
1561
|
supportsVision: false,
|
|
1830
1562
|
|
|
1831
1563
|
// Metadata
|
|
1832
|
-
lastUpdated: "2026-
|
|
1564
|
+
lastUpdated: "2026-06-01",
|
|
1565
|
+
originalModel: "GPT-5 nano (medium)",
|
|
1833
1566
|
},
|
|
1834
|
-
"
|
|
1835
|
-
// AA Intelligence Index (composite score)
|
|
1836
|
-
intelligenceIndex: 13.8,
|
|
1837
|
-
normalizedScore: 20,
|
|
1838
|
-
|
|
1567
|
+
"gpt-4.1-mini": {
|
|
1839
1568
|
// AA specific benchmarks
|
|
1840
|
-
codingIndex:
|
|
1841
|
-
mathIndex:
|
|
1569
|
+
codingIndex: 18.5,
|
|
1570
|
+
mathIndex: 46.3,
|
|
1842
1571
|
|
|
1843
1572
|
// Academic benchmarks
|
|
1844
|
-
mmluPro: 0.
|
|
1845
|
-
gpqa: 0.
|
|
1846
|
-
hle: 0.
|
|
1573
|
+
mmluPro: 0.781,
|
|
1574
|
+
gpqa: 0.664,
|
|
1575
|
+
hle: 0.046,
|
|
1847
1576
|
|
|
1848
1577
|
// Capabilities
|
|
1849
1578
|
contextWindow: 8192,
|
|
@@ -1851,21 +1580,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1851
1580
|
supportsVision: false,
|
|
1852
1581
|
|
|
1853
1582
|
// Metadata
|
|
1854
|
-
lastUpdated: "2026-
|
|
1583
|
+
lastUpdated: "2026-06-01",
|
|
1584
|
+
originalModel: "GPT-4.1 mini",
|
|
1855
1585
|
},
|
|
1856
|
-
"
|
|
1857
|
-
// AA Intelligence Index (composite score)
|
|
1858
|
-
intelligenceIndex: 11.1,
|
|
1859
|
-
normalizedScore: 16,
|
|
1860
|
-
|
|
1586
|
+
"gpt-5.4-xhigh": {
|
|
1861
1587
|
// AA specific benchmarks
|
|
1862
|
-
codingIndex:
|
|
1588
|
+
codingIndex: 57.2,
|
|
1863
1589
|
mathIndex: undefined,
|
|
1864
1590
|
|
|
1865
1591
|
// Academic benchmarks
|
|
1866
|
-
mmluPro:
|
|
1867
|
-
gpqa: 0.
|
|
1868
|
-
hle: 0.
|
|
1592
|
+
mmluPro: undefined,
|
|
1593
|
+
gpqa: 0.92,
|
|
1594
|
+
hle: 0.416,
|
|
1869
1595
|
|
|
1870
1596
|
// Capabilities
|
|
1871
1597
|
contextWindow: 8192,
|
|
@@ -1873,21 +1599,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1873
1599
|
supportsVision: false,
|
|
1874
1600
|
|
|
1875
1601
|
// Metadata
|
|
1876
|
-
lastUpdated: "2026-
|
|
1602
|
+
lastUpdated: "2026-06-01",
|
|
1603
|
+
originalModel: "GPT-5.4 (xhigh)",
|
|
1877
1604
|
},
|
|
1878
|
-
"
|
|
1879
|
-
// AA Intelligence Index (composite score)
|
|
1880
|
-
intelligenceIndex: 19.6,
|
|
1881
|
-
normalizedScore: 28,
|
|
1882
|
-
|
|
1605
|
+
"gpt-4o-chatgpt": {
|
|
1883
1606
|
// AA specific benchmarks
|
|
1884
|
-
codingIndex:
|
|
1607
|
+
codingIndex: undefined,
|
|
1885
1608
|
mathIndex: undefined,
|
|
1886
1609
|
|
|
1887
1610
|
// Academic benchmarks
|
|
1888
|
-
mmluPro: 0.
|
|
1889
|
-
gpqa: 0.
|
|
1890
|
-
hle: 0.
|
|
1611
|
+
mmluPro: 0.773,
|
|
1612
|
+
gpqa: 0.511,
|
|
1613
|
+
hle: 0.037,
|
|
1891
1614
|
|
|
1892
1615
|
// Capabilities
|
|
1893
1616
|
contextWindow: 8192,
|
|
@@ -1895,21 +1618,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1895
1618
|
supportsVision: false,
|
|
1896
1619
|
|
|
1897
1620
|
// Metadata
|
|
1898
|
-
lastUpdated: "2026-
|
|
1621
|
+
lastUpdated: "2026-06-01",
|
|
1622
|
+
originalModel: "GPT-4o (ChatGPT)",
|
|
1899
1623
|
},
|
|
1900
|
-
"
|
|
1901
|
-
// AA Intelligence Index (composite score)
|
|
1902
|
-
intelligenceIndex: 8.6,
|
|
1903
|
-
normalizedScore: 12,
|
|
1904
|
-
|
|
1624
|
+
"gpt-5-mini-minimal": {
|
|
1905
1625
|
// AA specific benchmarks
|
|
1906
|
-
codingIndex:
|
|
1907
|
-
mathIndex:
|
|
1626
|
+
codingIndex: 21.9,
|
|
1627
|
+
mathIndex: 46.7,
|
|
1908
1628
|
|
|
1909
1629
|
// Academic benchmarks
|
|
1910
|
-
mmluPro:
|
|
1911
|
-
gpqa:
|
|
1912
|
-
hle:
|
|
1630
|
+
mmluPro: 0.775,
|
|
1631
|
+
gpqa: 0.687,
|
|
1632
|
+
hle: 0.05,
|
|
1913
1633
|
|
|
1914
1634
|
// Capabilities
|
|
1915
1635
|
contextWindow: 8192,
|
|
@@ -1917,21 +1637,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1917
1637
|
supportsVision: false,
|
|
1918
1638
|
|
|
1919
1639
|
// Metadata
|
|
1920
|
-
lastUpdated: "2026-
|
|
1640
|
+
lastUpdated: "2026-06-01",
|
|
1641
|
+
originalModel: "GPT-5 mini (minimal)",
|
|
1921
1642
|
},
|
|
1922
|
-
"
|
|
1923
|
-
// AA Intelligence Index (composite score)
|
|
1924
|
-
intelligenceIndex: 12.7,
|
|
1925
|
-
normalizedScore: 18,
|
|
1926
|
-
|
|
1643
|
+
"gpt-5-mini-medium": {
|
|
1927
1644
|
// AA specific benchmarks
|
|
1928
|
-
codingIndex:
|
|
1929
|
-
mathIndex:
|
|
1645
|
+
codingIndex: 32.8,
|
|
1646
|
+
mathIndex: 85,
|
|
1930
1647
|
|
|
1931
1648
|
// Academic benchmarks
|
|
1932
|
-
mmluPro: 0.
|
|
1933
|
-
gpqa: 0.
|
|
1934
|
-
hle: 0.
|
|
1649
|
+
mmluPro: 0.828,
|
|
1650
|
+
gpqa: 0.803,
|
|
1651
|
+
hle: 0.146,
|
|
1935
1652
|
|
|
1936
1653
|
// Capabilities
|
|
1937
1654
|
contextWindow: 8192,
|
|
@@ -1939,13 +1656,10 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1939
1656
|
supportsVision: false,
|
|
1940
1657
|
|
|
1941
1658
|
// Metadata
|
|
1942
|
-
lastUpdated: "2026-
|
|
1659
|
+
lastUpdated: "2026-06-01",
|
|
1660
|
+
originalModel: "GPT-5 mini (medium)",
|
|
1943
1661
|
},
|
|
1944
|
-
"
|
|
1945
|
-
// AA Intelligence Index (composite score)
|
|
1946
|
-
intelligenceIndex: 12.3,
|
|
1947
|
-
normalizedScore: 18,
|
|
1948
|
-
|
|
1662
|
+
"o1-pro": {
|
|
1949
1663
|
// AA specific benchmarks
|
|
1950
1664
|
codingIndex: undefined,
|
|
1951
1665
|
mathIndex: undefined,
|
|
@@ -1961,21 +1675,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1961
1675
|
supportsVision: false,
|
|
1962
1676
|
|
|
1963
1677
|
// Metadata
|
|
1964
|
-
lastUpdated: "2026-
|
|
1678
|
+
lastUpdated: "2026-06-01",
|
|
1679
|
+
originalModel: "o1-pro",
|
|
1965
1680
|
},
|
|
1966
|
-
"
|
|
1967
|
-
// AA Intelligence Index (composite score)
|
|
1968
|
-
intelligenceIndex: 48.4,
|
|
1969
|
-
normalizedScore: 69,
|
|
1970
|
-
|
|
1681
|
+
"gpt-5-chatgpt": {
|
|
1971
1682
|
// AA specific benchmarks
|
|
1972
|
-
codingIndex:
|
|
1973
|
-
mathIndex:
|
|
1683
|
+
codingIndex: 21.2,
|
|
1684
|
+
mathIndex: 48.3,
|
|
1974
1685
|
|
|
1975
1686
|
// Academic benchmarks
|
|
1976
|
-
mmluPro: 0.
|
|
1977
|
-
gpqa: 0.
|
|
1978
|
-
hle: 0.
|
|
1687
|
+
mmluPro: 0.82,
|
|
1688
|
+
gpqa: 0.686,
|
|
1689
|
+
hle: 0.058,
|
|
1979
1690
|
|
|
1980
1691
|
// Capabilities
|
|
1981
1692
|
contextWindow: 8192,
|
|
@@ -1983,21 +1694,18 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
1983
1694
|
supportsVision: false,
|
|
1984
1695
|
|
|
1985
1696
|
// Metadata
|
|
1986
|
-
lastUpdated: "2026-
|
|
1697
|
+
lastUpdated: "2026-06-01",
|
|
1698
|
+
originalModel: "GPT-5 (ChatGPT)",
|
|
1987
1699
|
},
|
|
1988
|
-
"
|
|
1989
|
-
// AA Intelligence Index (composite score)
|
|
1990
|
-
intelligenceIndex: 8.5,
|
|
1991
|
-
normalizedScore: 12,
|
|
1992
|
-
|
|
1700
|
+
"o3-mini": {
|
|
1993
1701
|
// AA specific benchmarks
|
|
1994
|
-
codingIndex:
|
|
1702
|
+
codingIndex: 17.9,
|
|
1995
1703
|
mathIndex: undefined,
|
|
1996
1704
|
|
|
1997
1705
|
// Academic benchmarks
|
|
1998
|
-
mmluPro: 0.
|
|
1999
|
-
gpqa: 0.
|
|
2000
|
-
hle: 0.
|
|
1706
|
+
mmluPro: 0.791,
|
|
1707
|
+
gpqa: 0.748,
|
|
1708
|
+
hle: 0.087,
|
|
2001
1709
|
|
|
2002
1710
|
// Capabilities
|
|
2003
1711
|
contextWindow: 8192,
|
|
@@ -2005,6 +1713,7 @@ export const BENCHMARKS_CHUNK_2: Record<string, HardcodedBenchmark> = {
|
|
|
2005
1713
|
supportsVision: false,
|
|
2006
1714
|
|
|
2007
1715
|
// Metadata
|
|
2008
|
-
lastUpdated: "2026-
|
|
1716
|
+
lastUpdated: "2026-06-01",
|
|
1717
|
+
originalModel: "o3-mini",
|
|
2009
1718
|
},
|
|
2010
1719
|
};
|