pi-free 1.0.8 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +107 -1
- package/README.md +95 -46
- package/config.ts +165 -120
- package/constants.ts +22 -61
- package/index.ts +186 -0
- package/lib/json-persistence.ts +11 -10
- package/lib/logger.ts +2 -2
- package/lib/model-enhancer.ts +20 -20
- package/lib/open-browser.ts +41 -0
- package/lib/provider-cache.ts +106 -0
- package/lib/registry.ts +144 -0
- package/package.json +67 -82
- package/provider-factory.ts +25 -41
- package/provider-failover/benchmark-lookup.ts +247 -0
- package/provider-failover/benchmarks-chunk-0.ts +2010 -0
- package/provider-failover/benchmarks-chunk-1.ts +1988 -0
- package/provider-failover/benchmarks-chunk-2.ts +2010 -0
- package/provider-failover/benchmarks-chunk-3.ts +2010 -0
- package/provider-failover/benchmarks-chunk-4.ts +1969 -0
- package/provider-failover/hardcoded-benchmarks.ts +22 -10025
- package/provider-helper.ts +38 -37
- package/providers/{cline-auth.ts → cline/cline-auth.ts} +2 -2
- package/providers/cline/cline-models.ts +128 -0
- package/providers/{cline.ts → cline/cline.ts} +300 -257
- package/providers/cloudflare/cloudflare.ts +368 -0
- package/providers/dynamic-built-in/index.ts +513 -0
- package/providers/{kilo-auth.ts → kilo/kilo-auth.ts} +3 -20
- package/providers/{kilo-models.ts → kilo/kilo-models.ts} +2 -2
- package/providers/kilo/kilo.ts +235 -0
- package/providers/{modal.ts → modal/modal.ts} +4 -3
- package/providers/{nvidia.ts → nvidia/nvidia.ts} +152 -113
- package/providers/ollama/ollama.ts +172 -0
- package/providers/opencode-session.ts +34 -34
- package/providers/{qwen-auth.ts → qwen/qwen-auth.ts} +24 -40
- package/providers/{qwen-models.ts → qwen/qwen-models.ts} +101 -95
- package/providers/qwen/qwen.ts +202 -0
- package/provider-failover/auto-switch.ts +0 -350
- package/provider-failover/errors.ts +0 -275
- package/provider-failover/index.ts +0 -238
- package/providers/cline-models.ts +0 -77
- package/providers/factory.ts +0 -125
- package/providers/fireworks.ts +0 -49
- package/providers/go.ts +0 -216
- package/providers/kilo.ts +0 -146
- package/providers/mistral.ts +0 -144
- package/providers/ollama.ts +0 -113
- package/providers/openrouter.ts +0 -175
- package/providers/qwen.ts +0 -127
- package/providers/zen.ts +0 -371
- package/usage/commands.ts +0 -17
- package/usage/cumulative.ts +0 -193
- package/usage/formatters.ts +0 -115
- package/usage/index.ts +0 -46
- package/usage/limits.ts +0 -148
- package/usage/metrics.ts +0 -222
- package/usage/sessions.ts +0 -355
- package/usage/store.ts +0 -99
- package/usage/tracking.ts +0 -329
- package/usage/types.ts +0 -26
- package/usage/widget.ts +0 -90
- package/widget/data.ts +0 -113
- package/widget/format.ts +0 -26
- package/widget/render.ts +0 -117
|
@@ -0,0 +1,2010 @@
|
|
|
1
|
+
// Auto-generated benchmark data chunk 3
|
|
2
|
+
// Models: gemini-2.5-pro-preview-may-25 .. phi-3-mini-instruct-3.8b (90 entries)
|
|
3
|
+
// DO NOT EDIT MANUALLY — generated by scripts/update-benchmarks.ts
|
|
4
|
+
|
|
5
|
+
import type { HardcodedBenchmark } from "./hardcoded-benchmarks.ts";
|
|
6
|
+
|
|
7
|
+
export const BENCHMARKS_CHUNK_3: Record<string, HardcodedBenchmark> = {
|
|
8
|
+
"gemini-2.5-pro-preview-may-25": {
|
|
9
|
+
// AA Intelligence Index (composite score)
|
|
10
|
+
intelligenceIndex: 29.5,
|
|
11
|
+
normalizedScore: 42,
|
|
12
|
+
|
|
13
|
+
// AA specific benchmarks
|
|
14
|
+
codingIndex: undefined,
|
|
15
|
+
mathIndex: undefined,
|
|
16
|
+
|
|
17
|
+
// Academic benchmarks
|
|
18
|
+
mmluPro: 0.837,
|
|
19
|
+
gpqa: 0.822,
|
|
20
|
+
hle: 0.154,
|
|
21
|
+
|
|
22
|
+
// Capabilities
|
|
23
|
+
contextWindow: 8192,
|
|
24
|
+
supportsReasoning: false,
|
|
25
|
+
supportsVision: false,
|
|
26
|
+
|
|
27
|
+
// Metadata
|
|
28
|
+
lastUpdated: "2026-04-06",
|
|
29
|
+
},
|
|
30
|
+
"gemini-2.5-flash-preview-non-reasoning": {
|
|
31
|
+
// AA Intelligence Index (composite score)
|
|
32
|
+
intelligenceIndex: 17.8,
|
|
33
|
+
normalizedScore: 25,
|
|
34
|
+
|
|
35
|
+
// AA specific benchmarks
|
|
36
|
+
codingIndex: undefined,
|
|
37
|
+
mathIndex: undefined,
|
|
38
|
+
|
|
39
|
+
// Academic benchmarks
|
|
40
|
+
mmluPro: 0.783,
|
|
41
|
+
gpqa: 0.594,
|
|
42
|
+
hle: 0.05,
|
|
43
|
+
|
|
44
|
+
// Capabilities
|
|
45
|
+
contextWindow: 8192,
|
|
46
|
+
supportsReasoning: false,
|
|
47
|
+
supportsVision: false,
|
|
48
|
+
|
|
49
|
+
// Metadata
|
|
50
|
+
lastUpdated: "2026-04-06",
|
|
51
|
+
},
|
|
52
|
+
"gemini-1.5-pro-may-24": {
|
|
53
|
+
// AA Intelligence Index (composite score)
|
|
54
|
+
intelligenceIndex: 12.0,
|
|
55
|
+
normalizedScore: 17,
|
|
56
|
+
|
|
57
|
+
// AA specific benchmarks
|
|
58
|
+
codingIndex: 19.8,
|
|
59
|
+
mathIndex: undefined,
|
|
60
|
+
|
|
61
|
+
// Academic benchmarks
|
|
62
|
+
mmluPro: 0.657,
|
|
63
|
+
gpqa: 0.371,
|
|
64
|
+
hle: 0.039,
|
|
65
|
+
|
|
66
|
+
// Capabilities
|
|
67
|
+
contextWindow: 8192,
|
|
68
|
+
supportsReasoning: false,
|
|
69
|
+
supportsVision: false,
|
|
70
|
+
|
|
71
|
+
// Metadata
|
|
72
|
+
lastUpdated: "2026-04-06",
|
|
73
|
+
},
|
|
74
|
+
"gemini-2.5-flash-non-reasoning": {
|
|
75
|
+
// AA Intelligence Index (composite score)
|
|
76
|
+
intelligenceIndex: 20.6,
|
|
77
|
+
normalizedScore: 29,
|
|
78
|
+
|
|
79
|
+
// AA specific benchmarks
|
|
80
|
+
codingIndex: 17.8,
|
|
81
|
+
mathIndex: 60.3,
|
|
82
|
+
|
|
83
|
+
// Academic benchmarks
|
|
84
|
+
mmluPro: 0.809,
|
|
85
|
+
gpqa: 0.683,
|
|
86
|
+
hle: 0.051,
|
|
87
|
+
|
|
88
|
+
// Capabilities
|
|
89
|
+
contextWindow: 8192,
|
|
90
|
+
supportsReasoning: false,
|
|
91
|
+
supportsVision: false,
|
|
92
|
+
|
|
93
|
+
// Metadata
|
|
94
|
+
lastUpdated: "2026-04-06",
|
|
95
|
+
},
|
|
96
|
+
"gemini-2.5-flash-reasoning": {
|
|
97
|
+
// AA Intelligence Index (composite score)
|
|
98
|
+
intelligenceIndex: 27.0,
|
|
99
|
+
normalizedScore: 39,
|
|
100
|
+
|
|
101
|
+
// AA specific benchmarks
|
|
102
|
+
codingIndex: 22.2,
|
|
103
|
+
mathIndex: 73.3,
|
|
104
|
+
|
|
105
|
+
// Academic benchmarks
|
|
106
|
+
mmluPro: 0.832,
|
|
107
|
+
gpqa: 0.79,
|
|
108
|
+
hle: 0.111,
|
|
109
|
+
|
|
110
|
+
// Capabilities
|
|
111
|
+
contextWindow: 8192,
|
|
112
|
+
supportsReasoning: false,
|
|
113
|
+
supportsVision: false,
|
|
114
|
+
|
|
115
|
+
// Metadata
|
|
116
|
+
lastUpdated: "2026-04-06",
|
|
117
|
+
},
|
|
118
|
+
"gemini-2.5-flash-preview-sep-25-non-reasoning": {
|
|
119
|
+
// AA Intelligence Index (composite score)
|
|
120
|
+
intelligenceIndex: 25.7,
|
|
121
|
+
normalizedScore: 37,
|
|
122
|
+
|
|
123
|
+
// AA specific benchmarks
|
|
124
|
+
codingIndex: 22.1,
|
|
125
|
+
mathIndex: 56.7,
|
|
126
|
+
|
|
127
|
+
// Academic benchmarks
|
|
128
|
+
mmluPro: 0.836,
|
|
129
|
+
gpqa: 0.766,
|
|
130
|
+
hle: 0.078,
|
|
131
|
+
|
|
132
|
+
// Capabilities
|
|
133
|
+
contextWindow: 8192,
|
|
134
|
+
supportsReasoning: false,
|
|
135
|
+
supportsVision: false,
|
|
136
|
+
|
|
137
|
+
// Metadata
|
|
138
|
+
lastUpdated: "2026-04-06",
|
|
139
|
+
},
|
|
140
|
+
"gemma-3n-e4b-instruct-preview-may-25": {
|
|
141
|
+
// AA Intelligence Index (composite score)
|
|
142
|
+
intelligenceIndex: 10.1,
|
|
143
|
+
normalizedScore: 14,
|
|
144
|
+
|
|
145
|
+
// AA specific benchmarks
|
|
146
|
+
codingIndex: undefined,
|
|
147
|
+
mathIndex: undefined,
|
|
148
|
+
|
|
149
|
+
// Academic benchmarks
|
|
150
|
+
mmluPro: 0.483,
|
|
151
|
+
gpqa: 0.278,
|
|
152
|
+
hle: 0.049,
|
|
153
|
+
|
|
154
|
+
// Capabilities
|
|
155
|
+
contextWindow: 8192,
|
|
156
|
+
supportsReasoning: false,
|
|
157
|
+
supportsVision: false,
|
|
158
|
+
|
|
159
|
+
// Metadata
|
|
160
|
+
lastUpdated: "2026-04-06",
|
|
161
|
+
},
|
|
162
|
+
"gemini-1.5-flash-may-24": {
|
|
163
|
+
// AA Intelligence Index (composite score)
|
|
164
|
+
intelligenceIndex: 10.5,
|
|
165
|
+
normalizedScore: 15,
|
|
166
|
+
|
|
167
|
+
// AA specific benchmarks
|
|
168
|
+
codingIndex: undefined,
|
|
169
|
+
mathIndex: undefined,
|
|
170
|
+
|
|
171
|
+
// Academic benchmarks
|
|
172
|
+
mmluPro: 0.574,
|
|
173
|
+
gpqa: 0.324,
|
|
174
|
+
hle: 0.042,
|
|
175
|
+
|
|
176
|
+
// Capabilities
|
|
177
|
+
contextWindow: 8192,
|
|
178
|
+
supportsReasoning: false,
|
|
179
|
+
supportsVision: false,
|
|
180
|
+
|
|
181
|
+
// Metadata
|
|
182
|
+
lastUpdated: "2026-04-06",
|
|
183
|
+
},
|
|
184
|
+
"gemini-2.5-flash-lite-reasoning": {
|
|
185
|
+
// AA Intelligence Index (composite score)
|
|
186
|
+
intelligenceIndex: 17.6,
|
|
187
|
+
normalizedScore: 25,
|
|
188
|
+
|
|
189
|
+
// AA specific benchmarks
|
|
190
|
+
codingIndex: 9.5,
|
|
191
|
+
mathIndex: 53.3,
|
|
192
|
+
|
|
193
|
+
// Academic benchmarks
|
|
194
|
+
mmluPro: 0.759,
|
|
195
|
+
gpqa: 0.625,
|
|
196
|
+
hle: 0.064,
|
|
197
|
+
|
|
198
|
+
// Capabilities
|
|
199
|
+
contextWindow: 8192,
|
|
200
|
+
supportsReasoning: false,
|
|
201
|
+
supportsVision: false,
|
|
202
|
+
|
|
203
|
+
// Metadata
|
|
204
|
+
lastUpdated: "2026-04-06",
|
|
205
|
+
},
|
|
206
|
+
"gemini-2.0-flash-lite-feb-25": {
|
|
207
|
+
// AA Intelligence Index (composite score)
|
|
208
|
+
intelligenceIndex: 14.7,
|
|
209
|
+
normalizedScore: 21,
|
|
210
|
+
|
|
211
|
+
// AA specific benchmarks
|
|
212
|
+
codingIndex: undefined,
|
|
213
|
+
mathIndex: undefined,
|
|
214
|
+
|
|
215
|
+
// Academic benchmarks
|
|
216
|
+
mmluPro: 0.724,
|
|
217
|
+
gpqa: 0.535,
|
|
218
|
+
hle: 0.036,
|
|
219
|
+
|
|
220
|
+
// Capabilities
|
|
221
|
+
contextWindow: 8192,
|
|
222
|
+
supportsReasoning: false,
|
|
223
|
+
supportsVision: false,
|
|
224
|
+
|
|
225
|
+
// Metadata
|
|
226
|
+
lastUpdated: "2026-04-06",
|
|
227
|
+
},
|
|
228
|
+
"gemini-2.5-flash-preview-reasoning": {
|
|
229
|
+
// AA Intelligence Index (composite score)
|
|
230
|
+
intelligenceIndex: 24.3,
|
|
231
|
+
normalizedScore: 35,
|
|
232
|
+
|
|
233
|
+
// AA specific benchmarks
|
|
234
|
+
codingIndex: undefined,
|
|
235
|
+
mathIndex: undefined,
|
|
236
|
+
|
|
237
|
+
// Academic benchmarks
|
|
238
|
+
mmluPro: 0.8,
|
|
239
|
+
gpqa: 0.698,
|
|
240
|
+
hle: 0.116,
|
|
241
|
+
|
|
242
|
+
// Capabilities
|
|
243
|
+
contextWindow: 8192,
|
|
244
|
+
supportsReasoning: false,
|
|
245
|
+
supportsVision: false,
|
|
246
|
+
|
|
247
|
+
// Metadata
|
|
248
|
+
lastUpdated: "2026-04-06",
|
|
249
|
+
},
|
|
250
|
+
"gemini-2.5-pro-preview-mar-25": {
|
|
251
|
+
// AA Intelligence Index (composite score)
|
|
252
|
+
intelligenceIndex: 30.3,
|
|
253
|
+
normalizedScore: 43,
|
|
254
|
+
|
|
255
|
+
// AA specific benchmarks
|
|
256
|
+
codingIndex: 46.7,
|
|
257
|
+
mathIndex: undefined,
|
|
258
|
+
|
|
259
|
+
// Academic benchmarks
|
|
260
|
+
mmluPro: 0.858,
|
|
261
|
+
gpqa: 0.836,
|
|
262
|
+
hle: 0.171,
|
|
263
|
+
|
|
264
|
+
// Capabilities
|
|
265
|
+
contextWindow: 8192,
|
|
266
|
+
supportsReasoning: false,
|
|
267
|
+
supportsVision: false,
|
|
268
|
+
|
|
269
|
+
// Metadata
|
|
270
|
+
lastUpdated: "2026-04-06",
|
|
271
|
+
},
|
|
272
|
+
"gemini-1.0-ultra": {
|
|
273
|
+
// AA Intelligence Index (composite score)
|
|
274
|
+
intelligenceIndex: 10.1,
|
|
275
|
+
normalizedScore: 14,
|
|
276
|
+
|
|
277
|
+
// AA specific benchmarks
|
|
278
|
+
codingIndex: 17.6,
|
|
279
|
+
mathIndex: undefined,
|
|
280
|
+
|
|
281
|
+
// Academic benchmarks
|
|
282
|
+
mmluPro: undefined,
|
|
283
|
+
gpqa: undefined,
|
|
284
|
+
hle: undefined,
|
|
285
|
+
|
|
286
|
+
// Capabilities
|
|
287
|
+
contextWindow: 8192,
|
|
288
|
+
supportsReasoning: false,
|
|
289
|
+
supportsVision: false,
|
|
290
|
+
|
|
291
|
+
// Metadata
|
|
292
|
+
lastUpdated: "2026-04-06",
|
|
293
|
+
},
|
|
294
|
+
"gemini-2.5-flash-preview-sep-25-reasoning": {
|
|
295
|
+
// AA Intelligence Index (composite score)
|
|
296
|
+
intelligenceIndex: 31.1,
|
|
297
|
+
normalizedScore: 44,
|
|
298
|
+
|
|
299
|
+
// AA specific benchmarks
|
|
300
|
+
codingIndex: 24.6,
|
|
301
|
+
mathIndex: 78.3,
|
|
302
|
+
|
|
303
|
+
// Academic benchmarks
|
|
304
|
+
mmluPro: 0.842,
|
|
305
|
+
gpqa: 0.793,
|
|
306
|
+
hle: 0.127,
|
|
307
|
+
|
|
308
|
+
// Capabilities
|
|
309
|
+
contextWindow: 8192,
|
|
310
|
+
supportsReasoning: false,
|
|
311
|
+
supportsVision: false,
|
|
312
|
+
|
|
313
|
+
// Metadata
|
|
314
|
+
lastUpdated: "2026-04-06",
|
|
315
|
+
},
|
|
316
|
+
"claude-3.5-sonnet-oct-24": {
|
|
317
|
+
// AA Intelligence Index (composite score)
|
|
318
|
+
intelligenceIndex: 15.9,
|
|
319
|
+
normalizedScore: 23,
|
|
320
|
+
|
|
321
|
+
// AA specific benchmarks
|
|
322
|
+
codingIndex: 30.2,
|
|
323
|
+
mathIndex: undefined,
|
|
324
|
+
|
|
325
|
+
// Academic benchmarks
|
|
326
|
+
mmluPro: 0.772,
|
|
327
|
+
gpqa: 0.599,
|
|
328
|
+
hle: 0.039,
|
|
329
|
+
|
|
330
|
+
// Capabilities
|
|
331
|
+
contextWindow: 8192,
|
|
332
|
+
supportsReasoning: false,
|
|
333
|
+
supportsVision: false,
|
|
334
|
+
|
|
335
|
+
// Metadata
|
|
336
|
+
lastUpdated: "2026-04-06",
|
|
337
|
+
},
|
|
338
|
+
"claude-3.5-sonnet-june-24": {
|
|
339
|
+
// AA Intelligence Index (composite score)
|
|
340
|
+
intelligenceIndex: 14.2,
|
|
341
|
+
normalizedScore: 20,
|
|
342
|
+
|
|
343
|
+
// AA specific benchmarks
|
|
344
|
+
codingIndex: 26.0,
|
|
345
|
+
mathIndex: undefined,
|
|
346
|
+
|
|
347
|
+
// Academic benchmarks
|
|
348
|
+
mmluPro: 0.751,
|
|
349
|
+
gpqa: 0.56,
|
|
350
|
+
hle: 0.037,
|
|
351
|
+
|
|
352
|
+
// Capabilities
|
|
353
|
+
contextWindow: 8192,
|
|
354
|
+
supportsReasoning: false,
|
|
355
|
+
supportsVision: false,
|
|
356
|
+
|
|
357
|
+
// Metadata
|
|
358
|
+
lastUpdated: "2026-04-06",
|
|
359
|
+
},
|
|
360
|
+
"claude-3-opus": {
|
|
361
|
+
// AA Intelligence Index (composite score)
|
|
362
|
+
intelligenceIndex: 18.0,
|
|
363
|
+
normalizedScore: 26,
|
|
364
|
+
|
|
365
|
+
// AA specific benchmarks
|
|
366
|
+
codingIndex: 19.5,
|
|
367
|
+
mathIndex: undefined,
|
|
368
|
+
|
|
369
|
+
// Academic benchmarks
|
|
370
|
+
mmluPro: 0.696,
|
|
371
|
+
gpqa: 0.489,
|
|
372
|
+
hle: 0.031,
|
|
373
|
+
|
|
374
|
+
// Capabilities
|
|
375
|
+
contextWindow: 8192,
|
|
376
|
+
supportsReasoning: false,
|
|
377
|
+
supportsVision: false,
|
|
378
|
+
|
|
379
|
+
// Metadata
|
|
380
|
+
lastUpdated: "2026-04-06",
|
|
381
|
+
},
|
|
382
|
+
"claude-3.5-haiku": {
|
|
383
|
+
// AA Intelligence Index (composite score)
|
|
384
|
+
intelligenceIndex: 18.7,
|
|
385
|
+
normalizedScore: 27,
|
|
386
|
+
|
|
387
|
+
// AA specific benchmarks
|
|
388
|
+
codingIndex: 10.7,
|
|
389
|
+
mathIndex: undefined,
|
|
390
|
+
|
|
391
|
+
// Academic benchmarks
|
|
392
|
+
mmluPro: 0.634,
|
|
393
|
+
gpqa: 0.408,
|
|
394
|
+
hle: 0.035,
|
|
395
|
+
|
|
396
|
+
// Capabilities
|
|
397
|
+
contextWindow: 8192,
|
|
398
|
+
supportsReasoning: false,
|
|
399
|
+
supportsVision: false,
|
|
400
|
+
|
|
401
|
+
// Metadata
|
|
402
|
+
lastUpdated: "2026-04-06",
|
|
403
|
+
},
|
|
404
|
+
"claude-3-sonnet": {
|
|
405
|
+
// AA Intelligence Index (composite score)
|
|
406
|
+
intelligenceIndex: 10.3,
|
|
407
|
+
normalizedScore: 15,
|
|
408
|
+
|
|
409
|
+
// AA specific benchmarks
|
|
410
|
+
codingIndex: undefined,
|
|
411
|
+
mathIndex: undefined,
|
|
412
|
+
|
|
413
|
+
// Academic benchmarks
|
|
414
|
+
mmluPro: 0.579,
|
|
415
|
+
gpqa: 0.4,
|
|
416
|
+
hle: 0.038,
|
|
417
|
+
|
|
418
|
+
// Capabilities
|
|
419
|
+
contextWindow: 8192,
|
|
420
|
+
supportsReasoning: false,
|
|
421
|
+
supportsVision: false,
|
|
422
|
+
|
|
423
|
+
// Metadata
|
|
424
|
+
lastUpdated: "2026-04-06",
|
|
425
|
+
},
|
|
426
|
+
"claude-3-haiku": {
|
|
427
|
+
// AA Intelligence Index (composite score)
|
|
428
|
+
intelligenceIndex: 12.3,
|
|
429
|
+
normalizedScore: 18,
|
|
430
|
+
|
|
431
|
+
// AA specific benchmarks
|
|
432
|
+
codingIndex: 6.7,
|
|
433
|
+
mathIndex: undefined,
|
|
434
|
+
|
|
435
|
+
// Academic benchmarks
|
|
436
|
+
mmluPro: undefined,
|
|
437
|
+
gpqa: 0.374,
|
|
438
|
+
hle: 0.039,
|
|
439
|
+
|
|
440
|
+
// Capabilities
|
|
441
|
+
contextWindow: 8192,
|
|
442
|
+
supportsReasoning: false,
|
|
443
|
+
supportsVision: false,
|
|
444
|
+
|
|
445
|
+
// Metadata
|
|
446
|
+
lastUpdated: "2026-04-06",
|
|
447
|
+
},
|
|
448
|
+
"claude-instant": {
|
|
449
|
+
// AA Intelligence Index (composite score)
|
|
450
|
+
intelligenceIndex: 7.4,
|
|
451
|
+
normalizedScore: 11,
|
|
452
|
+
|
|
453
|
+
// AA specific benchmarks
|
|
454
|
+
codingIndex: 7.8,
|
|
455
|
+
mathIndex: undefined,
|
|
456
|
+
|
|
457
|
+
// Academic benchmarks
|
|
458
|
+
mmluPro: 0.434,
|
|
459
|
+
gpqa: 0.33,
|
|
460
|
+
hle: 0.038,
|
|
461
|
+
|
|
462
|
+
// Capabilities
|
|
463
|
+
contextWindow: 8192,
|
|
464
|
+
supportsReasoning: false,
|
|
465
|
+
supportsVision: false,
|
|
466
|
+
|
|
467
|
+
// Metadata
|
|
468
|
+
lastUpdated: "2026-04-06",
|
|
469
|
+
},
|
|
470
|
+
"claude-3.7-sonnet-non-reasoning": {
|
|
471
|
+
// AA Intelligence Index (composite score)
|
|
472
|
+
intelligenceIndex: 30.8,
|
|
473
|
+
normalizedScore: 44,
|
|
474
|
+
|
|
475
|
+
// AA specific benchmarks
|
|
476
|
+
codingIndex: 26.7,
|
|
477
|
+
mathIndex: 21.0,
|
|
478
|
+
|
|
479
|
+
// Academic benchmarks
|
|
480
|
+
mmluPro: 0.803,
|
|
481
|
+
gpqa: 0.656,
|
|
482
|
+
hle: 0.048,
|
|
483
|
+
|
|
484
|
+
// Capabilities
|
|
485
|
+
contextWindow: 8192,
|
|
486
|
+
supportsReasoning: false,
|
|
487
|
+
supportsVision: false,
|
|
488
|
+
|
|
489
|
+
// Metadata
|
|
490
|
+
lastUpdated: "2026-04-06",
|
|
491
|
+
},
|
|
492
|
+
"claude-2.1": {
|
|
493
|
+
// AA Intelligence Index (composite score)
|
|
494
|
+
intelligenceIndex: 9.3,
|
|
495
|
+
normalizedScore: 13,
|
|
496
|
+
|
|
497
|
+
// AA specific benchmarks
|
|
498
|
+
codingIndex: 14.0,
|
|
499
|
+
mathIndex: undefined,
|
|
500
|
+
|
|
501
|
+
// Academic benchmarks
|
|
502
|
+
mmluPro: 0.495,
|
|
503
|
+
gpqa: 0.319,
|
|
504
|
+
hle: 0.042,
|
|
505
|
+
|
|
506
|
+
// Capabilities
|
|
507
|
+
contextWindow: 8192,
|
|
508
|
+
supportsReasoning: false,
|
|
509
|
+
supportsVision: false,
|
|
510
|
+
|
|
511
|
+
// Metadata
|
|
512
|
+
lastUpdated: "2026-04-06",
|
|
513
|
+
},
|
|
514
|
+
"claude-3.7-sonnet-reasoning": {
|
|
515
|
+
// AA Intelligence Index (composite score)
|
|
516
|
+
intelligenceIndex: 34.7,
|
|
517
|
+
normalizedScore: 50,
|
|
518
|
+
|
|
519
|
+
// AA specific benchmarks
|
|
520
|
+
codingIndex: 27.6,
|
|
521
|
+
mathIndex: 56.3,
|
|
522
|
+
|
|
523
|
+
// Academic benchmarks
|
|
524
|
+
mmluPro: 0.837,
|
|
525
|
+
gpqa: 0.772,
|
|
526
|
+
hle: 0.103,
|
|
527
|
+
|
|
528
|
+
// Capabilities
|
|
529
|
+
contextWindow: 8192,
|
|
530
|
+
supportsReasoning: false,
|
|
531
|
+
supportsVision: false,
|
|
532
|
+
|
|
533
|
+
// Metadata
|
|
534
|
+
lastUpdated: "2026-04-06",
|
|
535
|
+
},
|
|
536
|
+
"claude-4.1-opus-non-reasoning": {
|
|
537
|
+
// AA Intelligence Index (composite score)
|
|
538
|
+
intelligenceIndex: 36.0,
|
|
539
|
+
normalizedScore: 51,
|
|
540
|
+
|
|
541
|
+
// AA specific benchmarks
|
|
542
|
+
codingIndex: undefined,
|
|
543
|
+
mathIndex: undefined,
|
|
544
|
+
|
|
545
|
+
// Academic benchmarks
|
|
546
|
+
mmluPro: undefined,
|
|
547
|
+
gpqa: undefined,
|
|
548
|
+
hle: undefined,
|
|
549
|
+
|
|
550
|
+
// Capabilities
|
|
551
|
+
contextWindow: 8192,
|
|
552
|
+
supportsReasoning: false,
|
|
553
|
+
supportsVision: false,
|
|
554
|
+
|
|
555
|
+
// Metadata
|
|
556
|
+
lastUpdated: "2026-04-06",
|
|
557
|
+
},
|
|
558
|
+
"claude-4.1-opus-reasoning": {
|
|
559
|
+
// AA Intelligence Index (composite score)
|
|
560
|
+
intelligenceIndex: 42.0,
|
|
561
|
+
normalizedScore: 60,
|
|
562
|
+
|
|
563
|
+
// AA specific benchmarks
|
|
564
|
+
codingIndex: 36.5,
|
|
565
|
+
mathIndex: 80.3,
|
|
566
|
+
|
|
567
|
+
// Academic benchmarks
|
|
568
|
+
mmluPro: 0.88,
|
|
569
|
+
gpqa: 0.809,
|
|
570
|
+
hle: 0.119,
|
|
571
|
+
|
|
572
|
+
// Capabilities
|
|
573
|
+
contextWindow: 8192,
|
|
574
|
+
supportsReasoning: false,
|
|
575
|
+
supportsVision: false,
|
|
576
|
+
|
|
577
|
+
// Metadata
|
|
578
|
+
lastUpdated: "2026-04-06",
|
|
579
|
+
},
|
|
580
|
+
"claude-4-sonnet-non-reasoning": {
|
|
581
|
+
// AA Intelligence Index (composite score)
|
|
582
|
+
intelligenceIndex: 33.0,
|
|
583
|
+
normalizedScore: 47,
|
|
584
|
+
|
|
585
|
+
// AA specific benchmarks
|
|
586
|
+
codingIndex: 30.6,
|
|
587
|
+
mathIndex: 38.0,
|
|
588
|
+
|
|
589
|
+
// Academic benchmarks
|
|
590
|
+
mmluPro: 0.837,
|
|
591
|
+
gpqa: 0.683,
|
|
592
|
+
hle: 0.04,
|
|
593
|
+
|
|
594
|
+
// Capabilities
|
|
595
|
+
contextWindow: 8192,
|
|
596
|
+
supportsReasoning: false,
|
|
597
|
+
supportsVision: false,
|
|
598
|
+
|
|
599
|
+
// Metadata
|
|
600
|
+
lastUpdated: "2026-04-06",
|
|
601
|
+
},
|
|
602
|
+
"claude-4-opus-non-reasoning": {
|
|
603
|
+
// AA Intelligence Index (composite score)
|
|
604
|
+
intelligenceIndex: 33.0,
|
|
605
|
+
normalizedScore: 47,
|
|
606
|
+
|
|
607
|
+
// AA specific benchmarks
|
|
608
|
+
codingIndex: undefined,
|
|
609
|
+
mathIndex: 36.3,
|
|
610
|
+
|
|
611
|
+
// Academic benchmarks
|
|
612
|
+
mmluPro: 0.86,
|
|
613
|
+
gpqa: 0.701,
|
|
614
|
+
hle: 0.059,
|
|
615
|
+
|
|
616
|
+
// Capabilities
|
|
617
|
+
contextWindow: 8192,
|
|
618
|
+
supportsReasoning: false,
|
|
619
|
+
supportsVision: false,
|
|
620
|
+
|
|
621
|
+
// Metadata
|
|
622
|
+
lastUpdated: "2026-04-06",
|
|
623
|
+
},
|
|
624
|
+
"claude-4-sonnet-reasoning": {
|
|
625
|
+
// AA Intelligence Index (composite score)
|
|
626
|
+
intelligenceIndex: 38.7,
|
|
627
|
+
normalizedScore: 55,
|
|
628
|
+
|
|
629
|
+
// AA specific benchmarks
|
|
630
|
+
codingIndex: 34.1,
|
|
631
|
+
mathIndex: 74.3,
|
|
632
|
+
|
|
633
|
+
// Academic benchmarks
|
|
634
|
+
mmluPro: 0.842,
|
|
635
|
+
gpqa: 0.777,
|
|
636
|
+
hle: 0.096,
|
|
637
|
+
|
|
638
|
+
// Capabilities
|
|
639
|
+
contextWindow: 8192,
|
|
640
|
+
supportsReasoning: false,
|
|
641
|
+
supportsVision: false,
|
|
642
|
+
|
|
643
|
+
// Metadata
|
|
644
|
+
lastUpdated: "2026-04-06",
|
|
645
|
+
},
|
|
646
|
+
"claude-opus-4.5-non-reasoning": {
|
|
647
|
+
// AA Intelligence Index (composite score)
|
|
648
|
+
intelligenceIndex: 43.1,
|
|
649
|
+
normalizedScore: 62,
|
|
650
|
+
|
|
651
|
+
// AA specific benchmarks
|
|
652
|
+
codingIndex: 42.9,
|
|
653
|
+
mathIndex: 62.7,
|
|
654
|
+
|
|
655
|
+
// Academic benchmarks
|
|
656
|
+
mmluPro: 0.889,
|
|
657
|
+
gpqa: 0.81,
|
|
658
|
+
hle: 0.129,
|
|
659
|
+
|
|
660
|
+
// Capabilities
|
|
661
|
+
contextWindow: 8192,
|
|
662
|
+
supportsReasoning: false,
|
|
663
|
+
supportsVision: false,
|
|
664
|
+
|
|
665
|
+
// Metadata
|
|
666
|
+
lastUpdated: "2026-04-06",
|
|
667
|
+
},
|
|
668
|
+
"claude-opus-4.5-reasoning": {
|
|
669
|
+
// AA Intelligence Index (composite score)
|
|
670
|
+
intelligenceIndex: 49.7,
|
|
671
|
+
normalizedScore: 71,
|
|
672
|
+
|
|
673
|
+
// AA specific benchmarks
|
|
674
|
+
codingIndex: 47.8,
|
|
675
|
+
mathIndex: 91.3,
|
|
676
|
+
|
|
677
|
+
// Academic benchmarks
|
|
678
|
+
mmluPro: 0.895,
|
|
679
|
+
gpqa: 0.866,
|
|
680
|
+
hle: 0.284,
|
|
681
|
+
|
|
682
|
+
// Capabilities
|
|
683
|
+
contextWindow: 8192,
|
|
684
|
+
supportsReasoning: false,
|
|
685
|
+
supportsVision: false,
|
|
686
|
+
|
|
687
|
+
// Metadata
|
|
688
|
+
lastUpdated: "2026-04-06",
|
|
689
|
+
},
|
|
690
|
+
"claude-4-opus-reasoning": {
|
|
691
|
+
// AA Intelligence Index (composite score)
|
|
692
|
+
intelligenceIndex: 39.0,
|
|
693
|
+
normalizedScore: 56,
|
|
694
|
+
|
|
695
|
+
// AA specific benchmarks
|
|
696
|
+
codingIndex: 34.0,
|
|
697
|
+
mathIndex: 73.3,
|
|
698
|
+
|
|
699
|
+
// Academic benchmarks
|
|
700
|
+
mmluPro: 0.873,
|
|
701
|
+
gpqa: 0.796,
|
|
702
|
+
hle: 0.117,
|
|
703
|
+
|
|
704
|
+
// Capabilities
|
|
705
|
+
contextWindow: 8192,
|
|
706
|
+
supportsReasoning: false,
|
|
707
|
+
supportsVision: false,
|
|
708
|
+
|
|
709
|
+
// Metadata
|
|
710
|
+
lastUpdated: "2026-04-06",
|
|
711
|
+
},
|
|
712
|
+
"claude-4.5-sonnet-non-reasoning": {
|
|
713
|
+
// AA Intelligence Index (composite score)
|
|
714
|
+
intelligenceIndex: 37.1,
|
|
715
|
+
normalizedScore: 53,
|
|
716
|
+
|
|
717
|
+
// AA specific benchmarks
|
|
718
|
+
codingIndex: 33.5,
|
|
719
|
+
mathIndex: 37.0,
|
|
720
|
+
|
|
721
|
+
// Academic benchmarks
|
|
722
|
+
mmluPro: 0.86,
|
|
723
|
+
gpqa: 0.727,
|
|
724
|
+
hle: 0.071,
|
|
725
|
+
|
|
726
|
+
// Capabilities
|
|
727
|
+
contextWindow: 8192,
|
|
728
|
+
supportsReasoning: false,
|
|
729
|
+
supportsVision: false,
|
|
730
|
+
|
|
731
|
+
// Metadata
|
|
732
|
+
lastUpdated: "2026-04-06",
|
|
733
|
+
},
|
|
734
|
+
"claude-4.5-sonnet-reasoning": {
|
|
735
|
+
// AA Intelligence Index (composite score)
|
|
736
|
+
intelligenceIndex: 43.0,
|
|
737
|
+
normalizedScore: 61,
|
|
738
|
+
|
|
739
|
+
// AA specific benchmarks
|
|
740
|
+
codingIndex: 38.6,
|
|
741
|
+
mathIndex: 88.0,
|
|
742
|
+
|
|
743
|
+
// Academic benchmarks
|
|
744
|
+
mmluPro: 0.875,
|
|
745
|
+
gpqa: 0.834,
|
|
746
|
+
hle: 0.173,
|
|
747
|
+
|
|
748
|
+
// Capabilities
|
|
749
|
+
contextWindow: 8192,
|
|
750
|
+
supportsReasoning: false,
|
|
751
|
+
supportsVision: false,
|
|
752
|
+
|
|
753
|
+
// Metadata
|
|
754
|
+
lastUpdated: "2026-04-06",
|
|
755
|
+
},
|
|
756
|
+
"claude-2.0": {
|
|
757
|
+
// AA Intelligence Index (composite score)
|
|
758
|
+
intelligenceIndex: 9.1,
|
|
759
|
+
normalizedScore: 13,
|
|
760
|
+
|
|
761
|
+
// AA specific benchmarks
|
|
762
|
+
codingIndex: 12.9,
|
|
763
|
+
mathIndex: undefined,
|
|
764
|
+
|
|
765
|
+
// Academic benchmarks
|
|
766
|
+
mmluPro: 0.486,
|
|
767
|
+
gpqa: 0.344,
|
|
768
|
+
hle: undefined,
|
|
769
|
+
|
|
770
|
+
// Capabilities
|
|
771
|
+
contextWindow: 8192,
|
|
772
|
+
supportsReasoning: false,
|
|
773
|
+
supportsVision: false,
|
|
774
|
+
|
|
775
|
+
// Metadata
|
|
776
|
+
lastUpdated: "2026-04-06",
|
|
777
|
+
},
|
|
778
|
+
"mistral-large-2-nov-24": {
|
|
779
|
+
// AA Intelligence Index (composite score)
|
|
780
|
+
intelligenceIndex: 15.1,
|
|
781
|
+
normalizedScore: 22,
|
|
782
|
+
|
|
783
|
+
// AA specific benchmarks
|
|
784
|
+
codingIndex: 13.8,
|
|
785
|
+
mathIndex: 14.0,
|
|
786
|
+
|
|
787
|
+
// Academic benchmarks
|
|
788
|
+
mmluPro: 0.697,
|
|
789
|
+
gpqa: 0.486,
|
|
790
|
+
hle: 0.04,
|
|
791
|
+
|
|
792
|
+
// Capabilities
|
|
793
|
+
contextWindow: 8192,
|
|
794
|
+
supportsReasoning: false,
|
|
795
|
+
supportsVision: false,
|
|
796
|
+
|
|
797
|
+
// Metadata
|
|
798
|
+
lastUpdated: "2026-04-06",
|
|
799
|
+
},
|
|
800
|
+
"mistral-large-2-jul-24": {
|
|
801
|
+
// AA Intelligence Index (composite score)
|
|
802
|
+
intelligenceIndex: 13.0,
|
|
803
|
+
normalizedScore: 19,
|
|
804
|
+
|
|
805
|
+
// AA specific benchmarks
|
|
806
|
+
codingIndex: undefined,
|
|
807
|
+
mathIndex: 0.0,
|
|
808
|
+
|
|
809
|
+
// Academic benchmarks
|
|
810
|
+
mmluPro: 0.683,
|
|
811
|
+
gpqa: 0.472,
|
|
812
|
+
hle: 0.032,
|
|
813
|
+
|
|
814
|
+
// Capabilities
|
|
815
|
+
contextWindow: 8192,
|
|
816
|
+
supportsReasoning: false,
|
|
817
|
+
supportsVision: false,
|
|
818
|
+
|
|
819
|
+
// Metadata
|
|
820
|
+
lastUpdated: "2026-04-06",
|
|
821
|
+
},
|
|
822
|
+
"pixtral-large": {
|
|
823
|
+
// AA Intelligence Index (composite score)
|
|
824
|
+
intelligenceIndex: 14.0,
|
|
825
|
+
normalizedScore: 20,
|
|
826
|
+
|
|
827
|
+
// AA specific benchmarks
|
|
828
|
+
codingIndex: undefined,
|
|
829
|
+
mathIndex: 2.3,
|
|
830
|
+
|
|
831
|
+
// Academic benchmarks
|
|
832
|
+
mmluPro: 0.701,
|
|
833
|
+
gpqa: 0.505,
|
|
834
|
+
hle: 0.036,
|
|
835
|
+
|
|
836
|
+
// Capabilities
|
|
837
|
+
contextWindow: 8192,
|
|
838
|
+
supportsReasoning: false,
|
|
839
|
+
supportsVision: false,
|
|
840
|
+
|
|
841
|
+
// Metadata
|
|
842
|
+
lastUpdated: "2026-04-06",
|
|
843
|
+
},
|
|
844
|
+
"mistral-small-3": {
|
|
845
|
+
// AA Intelligence Index (composite score)
|
|
846
|
+
intelligenceIndex: 12.7,
|
|
847
|
+
normalizedScore: 18,
|
|
848
|
+
|
|
849
|
+
// AA specific benchmarks
|
|
850
|
+
codingIndex: undefined,
|
|
851
|
+
mathIndex: 4.3,
|
|
852
|
+
|
|
853
|
+
// Academic benchmarks
|
|
854
|
+
mmluPro: 0.652,
|
|
855
|
+
gpqa: 0.462,
|
|
856
|
+
hle: 0.041,
|
|
857
|
+
|
|
858
|
+
// Capabilities
|
|
859
|
+
contextWindow: 8192,
|
|
860
|
+
supportsReasoning: false,
|
|
861
|
+
supportsVision: false,
|
|
862
|
+
|
|
863
|
+
// Metadata
|
|
864
|
+
lastUpdated: "2026-04-06",
|
|
865
|
+
},
|
|
866
|
+
"mistral-small-sep-24": {
|
|
867
|
+
// AA Intelligence Index (composite score)
|
|
868
|
+
intelligenceIndex: 10.2,
|
|
869
|
+
normalizedScore: 15,
|
|
870
|
+
|
|
871
|
+
// AA specific benchmarks
|
|
872
|
+
codingIndex: undefined,
|
|
873
|
+
mathIndex: undefined,
|
|
874
|
+
|
|
875
|
+
// Academic benchmarks
|
|
876
|
+
mmluPro: 0.529,
|
|
877
|
+
gpqa: 0.381,
|
|
878
|
+
hle: 0.043,
|
|
879
|
+
|
|
880
|
+
// Capabilities
|
|
881
|
+
contextWindow: 8192,
|
|
882
|
+
supportsReasoning: false,
|
|
883
|
+
supportsVision: false,
|
|
884
|
+
|
|
885
|
+
// Metadata
|
|
886
|
+
lastUpdated: "2026-04-06",
|
|
887
|
+
},
|
|
888
|
+
"mixtral-8x22b-instruct": {
|
|
889
|
+
// AA Intelligence Index (composite score)
|
|
890
|
+
intelligenceIndex: 9.8,
|
|
891
|
+
normalizedScore: 14,
|
|
892
|
+
|
|
893
|
+
// AA specific benchmarks
|
|
894
|
+
codingIndex: undefined,
|
|
895
|
+
mathIndex: undefined,
|
|
896
|
+
|
|
897
|
+
// Academic benchmarks
|
|
898
|
+
mmluPro: 0.537,
|
|
899
|
+
gpqa: 0.332,
|
|
900
|
+
hle: 0.041,
|
|
901
|
+
|
|
902
|
+
// Capabilities
|
|
903
|
+
contextWindow: 8192,
|
|
904
|
+
supportsReasoning: false,
|
|
905
|
+
supportsVision: false,
|
|
906
|
+
|
|
907
|
+
// Metadata
|
|
908
|
+
lastUpdated: "2026-04-06",
|
|
909
|
+
},
|
|
910
|
+
"mistral-small-feb-24": {
|
|
911
|
+
// AA Intelligence Index (composite score)
|
|
912
|
+
intelligenceIndex: 9.0,
|
|
913
|
+
normalizedScore: 13,
|
|
914
|
+
|
|
915
|
+
// AA specific benchmarks
|
|
916
|
+
codingIndex: undefined,
|
|
917
|
+
mathIndex: undefined,
|
|
918
|
+
|
|
919
|
+
// Academic benchmarks
|
|
920
|
+
mmluPro: 0.419,
|
|
921
|
+
gpqa: 0.302,
|
|
922
|
+
hle: 0.044,
|
|
923
|
+
|
|
924
|
+
// Capabilities
|
|
925
|
+
contextWindow: 8192,
|
|
926
|
+
supportsReasoning: false,
|
|
927
|
+
supportsVision: false,
|
|
928
|
+
|
|
929
|
+
// Metadata
|
|
930
|
+
lastUpdated: "2026-04-06",
|
|
931
|
+
},
|
|
932
|
+
"mistral-large-feb-24": {
|
|
933
|
+
// AA Intelligence Index (composite score)
|
|
934
|
+
intelligenceIndex: 9.9,
|
|
935
|
+
normalizedScore: 14,
|
|
936
|
+
|
|
937
|
+
// AA specific benchmarks
|
|
938
|
+
codingIndex: undefined,
|
|
939
|
+
mathIndex: undefined,
|
|
940
|
+
|
|
941
|
+
// Academic benchmarks
|
|
942
|
+
mmluPro: 0.515,
|
|
943
|
+
gpqa: 0.351,
|
|
944
|
+
hle: 0.034,
|
|
945
|
+
|
|
946
|
+
// Capabilities
|
|
947
|
+
contextWindow: 8192,
|
|
948
|
+
supportsReasoning: false,
|
|
949
|
+
supportsVision: false,
|
|
950
|
+
|
|
951
|
+
// Metadata
|
|
952
|
+
lastUpdated: "2026-04-06",
|
|
953
|
+
},
|
|
954
|
+
"mixtral-8x7b-instruct": {
|
|
955
|
+
// AA Intelligence Index (composite score)
|
|
956
|
+
intelligenceIndex: 7.7,
|
|
957
|
+
normalizedScore: 11,
|
|
958
|
+
|
|
959
|
+
// AA specific benchmarks
|
|
960
|
+
codingIndex: undefined,
|
|
961
|
+
mathIndex: undefined,
|
|
962
|
+
|
|
963
|
+
// Academic benchmarks
|
|
964
|
+
mmluPro: 0.387,
|
|
965
|
+
gpqa: 0.292,
|
|
966
|
+
hle: 0.045,
|
|
967
|
+
|
|
968
|
+
// Capabilities
|
|
969
|
+
contextWindow: 8192,
|
|
970
|
+
supportsReasoning: false,
|
|
971
|
+
supportsVision: false,
|
|
972
|
+
|
|
973
|
+
// Metadata
|
|
974
|
+
lastUpdated: "2026-04-06",
|
|
975
|
+
},
|
|
976
|
+
"mistral-7b-instruct": {
|
|
977
|
+
// AA Intelligence Index (composite score)
|
|
978
|
+
intelligenceIndex: 7.4,
|
|
979
|
+
normalizedScore: 11,
|
|
980
|
+
|
|
981
|
+
// AA specific benchmarks
|
|
982
|
+
codingIndex: undefined,
|
|
983
|
+
mathIndex: undefined,
|
|
984
|
+
|
|
985
|
+
// Academic benchmarks
|
|
986
|
+
mmluPro: 0.245,
|
|
987
|
+
gpqa: 0.177,
|
|
988
|
+
hle: 0.043,
|
|
989
|
+
|
|
990
|
+
// Capabilities
|
|
991
|
+
contextWindow: 8192,
|
|
992
|
+
supportsReasoning: false,
|
|
993
|
+
supportsVision: false,
|
|
994
|
+
|
|
995
|
+
// Metadata
|
|
996
|
+
lastUpdated: "2026-04-06",
|
|
997
|
+
},
|
|
998
|
+
"mistral-small-3.1": {
|
|
999
|
+
// AA Intelligence Index (composite score)
|
|
1000
|
+
intelligenceIndex: 14.5,
|
|
1001
|
+
normalizedScore: 21,
|
|
1002
|
+
|
|
1003
|
+
// AA specific benchmarks
|
|
1004
|
+
codingIndex: 13.9,
|
|
1005
|
+
mathIndex: 3.7,
|
|
1006
|
+
|
|
1007
|
+
// Academic benchmarks
|
|
1008
|
+
mmluPro: 0.659,
|
|
1009
|
+
gpqa: 0.454,
|
|
1010
|
+
hle: 0.048,
|
|
1011
|
+
|
|
1012
|
+
// Capabilities
|
|
1013
|
+
contextWindow: 8192,
|
|
1014
|
+
supportsReasoning: false,
|
|
1015
|
+
supportsVision: false,
|
|
1016
|
+
|
|
1017
|
+
// Metadata
|
|
1018
|
+
lastUpdated: "2026-04-06",
|
|
1019
|
+
},
|
|
1020
|
+
"mistral-medium-3": {
|
|
1021
|
+
// AA Intelligence Index (composite score)
|
|
1022
|
+
intelligenceIndex: 18.8,
|
|
1023
|
+
normalizedScore: 27,
|
|
1024
|
+
|
|
1025
|
+
// AA specific benchmarks
|
|
1026
|
+
codingIndex: 13.6,
|
|
1027
|
+
mathIndex: 30.3,
|
|
1028
|
+
|
|
1029
|
+
// Academic benchmarks
|
|
1030
|
+
mmluPro: 0.76,
|
|
1031
|
+
gpqa: 0.578,
|
|
1032
|
+
hle: 0.043,
|
|
1033
|
+
|
|
1034
|
+
// Capabilities
|
|
1035
|
+
contextWindow: 8192,
|
|
1036
|
+
supportsReasoning: false,
|
|
1037
|
+
supportsVision: false,
|
|
1038
|
+
|
|
1039
|
+
// Metadata
|
|
1040
|
+
lastUpdated: "2026-04-06",
|
|
1041
|
+
},
|
|
1042
|
+
"mistral-saba": {
|
|
1043
|
+
// AA Intelligence Index (composite score)
|
|
1044
|
+
intelligenceIndex: 12.1,
|
|
1045
|
+
normalizedScore: 17,
|
|
1046
|
+
|
|
1047
|
+
// AA specific benchmarks
|
|
1048
|
+
codingIndex: undefined,
|
|
1049
|
+
mathIndex: undefined,
|
|
1050
|
+
|
|
1051
|
+
// Academic benchmarks
|
|
1052
|
+
mmluPro: 0.611,
|
|
1053
|
+
gpqa: 0.424,
|
|
1054
|
+
hle: 0.041,
|
|
1055
|
+
|
|
1056
|
+
// Capabilities
|
|
1057
|
+
contextWindow: 8192,
|
|
1058
|
+
supportsReasoning: false,
|
|
1059
|
+
supportsVision: false,
|
|
1060
|
+
|
|
1061
|
+
// Metadata
|
|
1062
|
+
lastUpdated: "2026-04-06",
|
|
1063
|
+
},
|
|
1064
|
+
"mistral-small-3.2": {
|
|
1065
|
+
// AA Intelligence Index (composite score)
|
|
1066
|
+
intelligenceIndex: 15.1,
|
|
1067
|
+
normalizedScore: 22,
|
|
1068
|
+
|
|
1069
|
+
// AA specific benchmarks
|
|
1070
|
+
codingIndex: 13.3,
|
|
1071
|
+
mathIndex: 27.0,
|
|
1072
|
+
|
|
1073
|
+
// Academic benchmarks
|
|
1074
|
+
mmluPro: 0.681,
|
|
1075
|
+
gpqa: 0.505,
|
|
1076
|
+
hle: 0.043,
|
|
1077
|
+
|
|
1078
|
+
// Capabilities
|
|
1079
|
+
contextWindow: 8192,
|
|
1080
|
+
supportsReasoning: false,
|
|
1081
|
+
supportsVision: false,
|
|
1082
|
+
|
|
1083
|
+
// Metadata
|
|
1084
|
+
lastUpdated: "2026-04-06",
|
|
1085
|
+
},
|
|
1086
|
+
"magistral-medium-1": {
|
|
1087
|
+
// AA Intelligence Index (composite score)
|
|
1088
|
+
intelligenceIndex: 18.8,
|
|
1089
|
+
normalizedScore: 27,
|
|
1090
|
+
|
|
1091
|
+
// AA specific benchmarks
|
|
1092
|
+
codingIndex: 16.0,
|
|
1093
|
+
mathIndex: 40.3,
|
|
1094
|
+
|
|
1095
|
+
// Academic benchmarks
|
|
1096
|
+
mmluPro: 0.753,
|
|
1097
|
+
gpqa: 0.679,
|
|
1098
|
+
hle: 0.095,
|
|
1099
|
+
|
|
1100
|
+
// Capabilities
|
|
1101
|
+
contextWindow: 8192,
|
|
1102
|
+
supportsReasoning: false,
|
|
1103
|
+
supportsVision: false,
|
|
1104
|
+
|
|
1105
|
+
// Metadata
|
|
1106
|
+
lastUpdated: "2026-04-06",
|
|
1107
|
+
},
|
|
1108
|
+
"devstral-medium": {
|
|
1109
|
+
// AA Intelligence Index (composite score)
|
|
1110
|
+
intelligenceIndex: 18.7,
|
|
1111
|
+
normalizedScore: 27,
|
|
1112
|
+
|
|
1113
|
+
// AA specific benchmarks
|
|
1114
|
+
codingIndex: 15.9,
|
|
1115
|
+
mathIndex: 4.7,
|
|
1116
|
+
|
|
1117
|
+
// Academic benchmarks
|
|
1118
|
+
mmluPro: 0.708,
|
|
1119
|
+
gpqa: 0.492,
|
|
1120
|
+
hle: 0.038,
|
|
1121
|
+
|
|
1122
|
+
// Capabilities
|
|
1123
|
+
contextWindow: 8192,
|
|
1124
|
+
supportsReasoning: false,
|
|
1125
|
+
supportsVision: false,
|
|
1126
|
+
|
|
1127
|
+
// Metadata
|
|
1128
|
+
lastUpdated: "2026-04-06",
|
|
1129
|
+
},
|
|
1130
|
+
"magistral-small-1": {
|
|
1131
|
+
// AA Intelligence Index (composite score)
|
|
1132
|
+
intelligenceIndex: 16.8,
|
|
1133
|
+
normalizedScore: 24,
|
|
1134
|
+
|
|
1135
|
+
// AA specific benchmarks
|
|
1136
|
+
codingIndex: 11.1,
|
|
1137
|
+
mathIndex: 41.3,
|
|
1138
|
+
|
|
1139
|
+
// Academic benchmarks
|
|
1140
|
+
mmluPro: 0.746,
|
|
1141
|
+
gpqa: 0.641,
|
|
1142
|
+
hle: 0.072,
|
|
1143
|
+
|
|
1144
|
+
// Capabilities
|
|
1145
|
+
contextWindow: 8192,
|
|
1146
|
+
supportsReasoning: false,
|
|
1147
|
+
supportsVision: false,
|
|
1148
|
+
|
|
1149
|
+
// Metadata
|
|
1150
|
+
lastUpdated: "2026-04-06",
|
|
1151
|
+
},
|
|
1152
|
+
"mistral-medium": {
|
|
1153
|
+
// AA Intelligence Index (composite score)
|
|
1154
|
+
intelligenceIndex: 9.0,
|
|
1155
|
+
normalizedScore: 13,
|
|
1156
|
+
|
|
1157
|
+
// AA specific benchmarks
|
|
1158
|
+
codingIndex: undefined,
|
|
1159
|
+
mathIndex: undefined,
|
|
1160
|
+
|
|
1161
|
+
// Academic benchmarks
|
|
1162
|
+
mmluPro: 0.491,
|
|
1163
|
+
gpqa: 0.349,
|
|
1164
|
+
hle: 0.034,
|
|
1165
|
+
|
|
1166
|
+
// Capabilities
|
|
1167
|
+
contextWindow: 8192,
|
|
1168
|
+
supportsReasoning: false,
|
|
1169
|
+
supportsVision: false,
|
|
1170
|
+
|
|
1171
|
+
// Metadata
|
|
1172
|
+
lastUpdated: "2026-04-06",
|
|
1173
|
+
},
|
|
1174
|
+
"devstral-small-jul-25": {
|
|
1175
|
+
// AA Intelligence Index (composite score)
|
|
1176
|
+
intelligenceIndex: 15.2,
|
|
1177
|
+
normalizedScore: 22,
|
|
1178
|
+
|
|
1179
|
+
// AA specific benchmarks
|
|
1180
|
+
codingIndex: 12.1,
|
|
1181
|
+
mathIndex: 29.3,
|
|
1182
|
+
|
|
1183
|
+
// Academic benchmarks
|
|
1184
|
+
mmluPro: 0.622,
|
|
1185
|
+
gpqa: 0.414,
|
|
1186
|
+
hle: 0.037,
|
|
1187
|
+
|
|
1188
|
+
// Capabilities
|
|
1189
|
+
contextWindow: 8192,
|
|
1190
|
+
supportsReasoning: false,
|
|
1191
|
+
supportsVision: false,
|
|
1192
|
+
|
|
1193
|
+
// Metadata
|
|
1194
|
+
lastUpdated: "2026-04-06",
|
|
1195
|
+
},
|
|
1196
|
+
"devstral-small-may-25": {
|
|
1197
|
+
// AA Intelligence Index (composite score)
|
|
1198
|
+
intelligenceIndex: 18.0,
|
|
1199
|
+
normalizedScore: 26,
|
|
1200
|
+
|
|
1201
|
+
// AA specific benchmarks
|
|
1202
|
+
codingIndex: 12.2,
|
|
1203
|
+
mathIndex: undefined,
|
|
1204
|
+
|
|
1205
|
+
// Academic benchmarks
|
|
1206
|
+
mmluPro: 0.632,
|
|
1207
|
+
gpqa: 0.434,
|
|
1208
|
+
hle: 0.04,
|
|
1209
|
+
|
|
1210
|
+
// Capabilities
|
|
1211
|
+
contextWindow: 8192,
|
|
1212
|
+
supportsReasoning: false,
|
|
1213
|
+
supportsVision: false,
|
|
1214
|
+
|
|
1215
|
+
// Metadata
|
|
1216
|
+
lastUpdated: "2026-04-06",
|
|
1217
|
+
},
|
|
1218
|
+
"deepseek-r1-distill-qwen-32b": {
|
|
1219
|
+
// AA Intelligence Index (composite score)
|
|
1220
|
+
intelligenceIndex: 17.2,
|
|
1221
|
+
normalizedScore: 25,
|
|
1222
|
+
|
|
1223
|
+
// AA specific benchmarks
|
|
1224
|
+
codingIndex: undefined,
|
|
1225
|
+
mathIndex: 63.0,
|
|
1226
|
+
|
|
1227
|
+
// Academic benchmarks
|
|
1228
|
+
mmluPro: 0.739,
|
|
1229
|
+
gpqa: 0.615,
|
|
1230
|
+
hle: 0.055,
|
|
1231
|
+
|
|
1232
|
+
// Capabilities
|
|
1233
|
+
contextWindow: 8192,
|
|
1234
|
+
supportsReasoning: false,
|
|
1235
|
+
supportsVision: false,
|
|
1236
|
+
|
|
1237
|
+
// Metadata
|
|
1238
|
+
lastUpdated: "2026-04-06",
|
|
1239
|
+
},
|
|
1240
|
+
"deepseek-v3-dec-24": {
|
|
1241
|
+
// AA Intelligence Index (composite score)
|
|
1242
|
+
intelligenceIndex: 16.5,
|
|
1243
|
+
normalizedScore: 24,
|
|
1244
|
+
|
|
1245
|
+
// AA specific benchmarks
|
|
1246
|
+
codingIndex: 16.4,
|
|
1247
|
+
mathIndex: 26.0,
|
|
1248
|
+
|
|
1249
|
+
// Academic benchmarks
|
|
1250
|
+
mmluPro: 0.752,
|
|
1251
|
+
gpqa: 0.557,
|
|
1252
|
+
hle: 0.036,
|
|
1253
|
+
|
|
1254
|
+
// Capabilities
|
|
1255
|
+
contextWindow: 8192,
|
|
1256
|
+
supportsReasoning: false,
|
|
1257
|
+
supportsVision: false,
|
|
1258
|
+
|
|
1259
|
+
// Metadata
|
|
1260
|
+
lastUpdated: "2026-04-06",
|
|
1261
|
+
},
|
|
1262
|
+
"deepseek-r1-distill-qwen-14b": {
|
|
1263
|
+
// AA Intelligence Index (composite score)
|
|
1264
|
+
intelligenceIndex: 15.8,
|
|
1265
|
+
normalizedScore: 23,
|
|
1266
|
+
|
|
1267
|
+
// AA specific benchmarks
|
|
1268
|
+
codingIndex: undefined,
|
|
1269
|
+
mathIndex: 55.7,
|
|
1270
|
+
|
|
1271
|
+
// Academic benchmarks
|
|
1272
|
+
mmluPro: 0.74,
|
|
1273
|
+
gpqa: 0.484,
|
|
1274
|
+
hle: 0.044,
|
|
1275
|
+
|
|
1276
|
+
// Capabilities
|
|
1277
|
+
contextWindow: 8192,
|
|
1278
|
+
supportsReasoning: false,
|
|
1279
|
+
supportsVision: false,
|
|
1280
|
+
|
|
1281
|
+
// Metadata
|
|
1282
|
+
lastUpdated: "2026-04-06",
|
|
1283
|
+
},
|
|
1284
|
+
"deepseek-v2.5-dec-24": {
|
|
1285
|
+
// AA Intelligence Index (composite score)
|
|
1286
|
+
intelligenceIndex: 12.5,
|
|
1287
|
+
normalizedScore: 18,
|
|
1288
|
+
|
|
1289
|
+
// AA specific benchmarks
|
|
1290
|
+
codingIndex: undefined,
|
|
1291
|
+
mathIndex: undefined,
|
|
1292
|
+
|
|
1293
|
+
// Academic benchmarks
|
|
1294
|
+
mmluPro: undefined,
|
|
1295
|
+
gpqa: undefined,
|
|
1296
|
+
hle: undefined,
|
|
1297
|
+
|
|
1298
|
+
// Capabilities
|
|
1299
|
+
contextWindow: 8192,
|
|
1300
|
+
supportsReasoning: false,
|
|
1301
|
+
supportsVision: false,
|
|
1302
|
+
|
|
1303
|
+
// Metadata
|
|
1304
|
+
lastUpdated: "2026-04-06",
|
|
1305
|
+
},
|
|
1306
|
+
"deepseek-coder-v2": {
|
|
1307
|
+
// AA Intelligence Index (composite score)
|
|
1308
|
+
intelligenceIndex: 10.6,
|
|
1309
|
+
normalizedScore: 15,
|
|
1310
|
+
|
|
1311
|
+
// AA specific benchmarks
|
|
1312
|
+
codingIndex: undefined,
|
|
1313
|
+
mathIndex: undefined,
|
|
1314
|
+
|
|
1315
|
+
// Academic benchmarks
|
|
1316
|
+
mmluPro: undefined,
|
|
1317
|
+
gpqa: undefined,
|
|
1318
|
+
hle: undefined,
|
|
1319
|
+
|
|
1320
|
+
// Capabilities
|
|
1321
|
+
contextWindow: 8192,
|
|
1322
|
+
supportsReasoning: false,
|
|
1323
|
+
supportsVision: false,
|
|
1324
|
+
|
|
1325
|
+
// Metadata
|
|
1326
|
+
lastUpdated: "2026-04-06",
|
|
1327
|
+
},
|
|
1328
|
+
"deepseek-r1-distill-llama-8b": {
|
|
1329
|
+
// AA Intelligence Index (composite score)
|
|
1330
|
+
intelligenceIndex: 12.1,
|
|
1331
|
+
normalizedScore: 17,
|
|
1332
|
+
|
|
1333
|
+
// AA specific benchmarks
|
|
1334
|
+
codingIndex: undefined,
|
|
1335
|
+
mathIndex: 41.3,
|
|
1336
|
+
|
|
1337
|
+
// Academic benchmarks
|
|
1338
|
+
mmluPro: 0.543,
|
|
1339
|
+
gpqa: 0.302,
|
|
1340
|
+
hle: 0.042,
|
|
1341
|
+
|
|
1342
|
+
// Capabilities
|
|
1343
|
+
contextWindow: 8192,
|
|
1344
|
+
supportsReasoning: false,
|
|
1345
|
+
supportsVision: false,
|
|
1346
|
+
|
|
1347
|
+
// Metadata
|
|
1348
|
+
lastUpdated: "2026-04-06",
|
|
1349
|
+
},
|
|
1350
|
+
"deepseek-llm-67b-chat-v1": {
|
|
1351
|
+
// AA Intelligence Index (composite score)
|
|
1352
|
+
intelligenceIndex: 8.4,
|
|
1353
|
+
normalizedScore: 12,
|
|
1354
|
+
|
|
1355
|
+
// AA specific benchmarks
|
|
1356
|
+
codingIndex: undefined,
|
|
1357
|
+
mathIndex: undefined,
|
|
1358
|
+
|
|
1359
|
+
// Academic benchmarks
|
|
1360
|
+
mmluPro: undefined,
|
|
1361
|
+
gpqa: undefined,
|
|
1362
|
+
hle: undefined,
|
|
1363
|
+
|
|
1364
|
+
// Capabilities
|
|
1365
|
+
contextWindow: 8192,
|
|
1366
|
+
supportsReasoning: false,
|
|
1367
|
+
supportsVision: false,
|
|
1368
|
+
|
|
1369
|
+
// Metadata
|
|
1370
|
+
lastUpdated: "2026-04-06",
|
|
1371
|
+
},
|
|
1372
|
+
"deepseek-r1-distill-qwen-1.5b": {
|
|
1373
|
+
// AA Intelligence Index (composite score)
|
|
1374
|
+
intelligenceIndex: 9.1,
|
|
1375
|
+
normalizedScore: 13,
|
|
1376
|
+
|
|
1377
|
+
// AA specific benchmarks
|
|
1378
|
+
codingIndex: undefined,
|
|
1379
|
+
mathIndex: 22.0,
|
|
1380
|
+
|
|
1381
|
+
// Academic benchmarks
|
|
1382
|
+
mmluPro: 0.269,
|
|
1383
|
+
gpqa: 0.098,
|
|
1384
|
+
hle: 0.033,
|
|
1385
|
+
|
|
1386
|
+
// Capabilities
|
|
1387
|
+
contextWindow: 8192,
|
|
1388
|
+
supportsReasoning: false,
|
|
1389
|
+
supportsVision: false,
|
|
1390
|
+
|
|
1391
|
+
// Metadata
|
|
1392
|
+
lastUpdated: "2026-04-06",
|
|
1393
|
+
},
|
|
1394
|
+
"deepseek-v3.1-terminus-non-reasoning": {
|
|
1395
|
+
// AA Intelligence Index (composite score)
|
|
1396
|
+
intelligenceIndex: 28.5,
|
|
1397
|
+
normalizedScore: 41,
|
|
1398
|
+
|
|
1399
|
+
// AA specific benchmarks
|
|
1400
|
+
codingIndex: 31.9,
|
|
1401
|
+
mathIndex: 53.7,
|
|
1402
|
+
|
|
1403
|
+
// Academic benchmarks
|
|
1404
|
+
mmluPro: 0.836,
|
|
1405
|
+
gpqa: 0.751,
|
|
1406
|
+
hle: 0.084,
|
|
1407
|
+
|
|
1408
|
+
// Capabilities
|
|
1409
|
+
contextWindow: 8192,
|
|
1410
|
+
supportsReasoning: false,
|
|
1411
|
+
supportsVision: false,
|
|
1412
|
+
|
|
1413
|
+
// Metadata
|
|
1414
|
+
lastUpdated: "2026-04-06",
|
|
1415
|
+
},
|
|
1416
|
+
"deepseek-v3.2-exp-reasoning": {
|
|
1417
|
+
// AA Intelligence Index (composite score)
|
|
1418
|
+
intelligenceIndex: 32.9,
|
|
1419
|
+
normalizedScore: 47,
|
|
1420
|
+
|
|
1421
|
+
// AA specific benchmarks
|
|
1422
|
+
codingIndex: 33.3,
|
|
1423
|
+
mathIndex: 87.7,
|
|
1424
|
+
|
|
1425
|
+
// Academic benchmarks
|
|
1426
|
+
mmluPro: 0.85,
|
|
1427
|
+
gpqa: 0.797,
|
|
1428
|
+
hle: 0.138,
|
|
1429
|
+
|
|
1430
|
+
// Capabilities
|
|
1431
|
+
contextWindow: 8192,
|
|
1432
|
+
supportsReasoning: false,
|
|
1433
|
+
supportsVision: false,
|
|
1434
|
+
|
|
1435
|
+
// Metadata
|
|
1436
|
+
lastUpdated: "2026-04-06",
|
|
1437
|
+
},
|
|
1438
|
+
"deepseek-v3.1-reasoning": {
|
|
1439
|
+
// AA Intelligence Index (composite score)
|
|
1440
|
+
intelligenceIndex: 27.7,
|
|
1441
|
+
normalizedScore: 40,
|
|
1442
|
+
|
|
1443
|
+
// AA specific benchmarks
|
|
1444
|
+
codingIndex: 29.7,
|
|
1445
|
+
mathIndex: 89.7,
|
|
1446
|
+
|
|
1447
|
+
// Academic benchmarks
|
|
1448
|
+
mmluPro: 0.851,
|
|
1449
|
+
gpqa: 0.779,
|
|
1450
|
+
hle: 0.13,
|
|
1451
|
+
|
|
1452
|
+
// Capabilities
|
|
1453
|
+
contextWindow: 8192,
|
|
1454
|
+
supportsReasoning: false,
|
|
1455
|
+
supportsVision: false,
|
|
1456
|
+
|
|
1457
|
+
// Metadata
|
|
1458
|
+
lastUpdated: "2026-04-06",
|
|
1459
|
+
},
|
|
1460
|
+
"deepseek-v3.2-exp-non-reasoning": {
|
|
1461
|
+
// AA Intelligence Index (composite score)
|
|
1462
|
+
intelligenceIndex: 28.4,
|
|
1463
|
+
normalizedScore: 41,
|
|
1464
|
+
|
|
1465
|
+
// AA specific benchmarks
|
|
1466
|
+
codingIndex: 30.0,
|
|
1467
|
+
mathIndex: 57.7,
|
|
1468
|
+
|
|
1469
|
+
// Academic benchmarks
|
|
1470
|
+
mmluPro: 0.836,
|
|
1471
|
+
gpqa: 0.738,
|
|
1472
|
+
hle: 0.086,
|
|
1473
|
+
|
|
1474
|
+
// Capabilities
|
|
1475
|
+
contextWindow: 8192,
|
|
1476
|
+
supportsReasoning: false,
|
|
1477
|
+
supportsVision: false,
|
|
1478
|
+
|
|
1479
|
+
// Metadata
|
|
1480
|
+
lastUpdated: "2026-04-06",
|
|
1481
|
+
},
|
|
1482
|
+
"deepseek-v3.1-terminus-reasoning": {
|
|
1483
|
+
// AA Intelligence Index (composite score)
|
|
1484
|
+
intelligenceIndex: 33.9,
|
|
1485
|
+
normalizedScore: 48,
|
|
1486
|
+
|
|
1487
|
+
// AA specific benchmarks
|
|
1488
|
+
codingIndex: 33.7,
|
|
1489
|
+
mathIndex: 89.7,
|
|
1490
|
+
|
|
1491
|
+
// Academic benchmarks
|
|
1492
|
+
mmluPro: 0.851,
|
|
1493
|
+
gpqa: 0.792,
|
|
1494
|
+
hle: 0.152,
|
|
1495
|
+
|
|
1496
|
+
// Capabilities
|
|
1497
|
+
contextWindow: 8192,
|
|
1498
|
+
supportsReasoning: false,
|
|
1499
|
+
supportsVision: false,
|
|
1500
|
+
|
|
1501
|
+
// Metadata
|
|
1502
|
+
lastUpdated: "2026-04-06",
|
|
1503
|
+
},
|
|
1504
|
+
"deepseek-v3-0324": {
|
|
1505
|
+
// AA Intelligence Index (composite score)
|
|
1506
|
+
intelligenceIndex: 22.3,
|
|
1507
|
+
normalizedScore: 32,
|
|
1508
|
+
|
|
1509
|
+
// AA specific benchmarks
|
|
1510
|
+
codingIndex: 22.0,
|
|
1511
|
+
mathIndex: 41.0,
|
|
1512
|
+
|
|
1513
|
+
// Academic benchmarks
|
|
1514
|
+
mmluPro: 0.819,
|
|
1515
|
+
gpqa: 0.655,
|
|
1516
|
+
hle: 0.052,
|
|
1517
|
+
|
|
1518
|
+
// Capabilities
|
|
1519
|
+
contextWindow: 8192,
|
|
1520
|
+
supportsReasoning: false,
|
|
1521
|
+
supportsVision: false,
|
|
1522
|
+
|
|
1523
|
+
// Metadata
|
|
1524
|
+
lastUpdated: "2026-04-06",
|
|
1525
|
+
},
|
|
1526
|
+
"deepseek-r1-jan-25": {
|
|
1527
|
+
// AA Intelligence Index (composite score)
|
|
1528
|
+
intelligenceIndex: 18.8,
|
|
1529
|
+
normalizedScore: 27,
|
|
1530
|
+
|
|
1531
|
+
// AA specific benchmarks
|
|
1532
|
+
codingIndex: 15.9,
|
|
1533
|
+
mathIndex: 68.0,
|
|
1534
|
+
|
|
1535
|
+
// Academic benchmarks
|
|
1536
|
+
mmluPro: 0.844,
|
|
1537
|
+
gpqa: 0.708,
|
|
1538
|
+
hle: 0.093,
|
|
1539
|
+
|
|
1540
|
+
// Capabilities
|
|
1541
|
+
contextWindow: 8192,
|
|
1542
|
+
supportsReasoning: false,
|
|
1543
|
+
supportsVision: false,
|
|
1544
|
+
|
|
1545
|
+
// Metadata
|
|
1546
|
+
lastUpdated: "2026-04-06",
|
|
1547
|
+
},
|
|
1548
|
+
"deepseek-v3.1-non-reasoning": {
|
|
1549
|
+
// AA Intelligence Index (composite score)
|
|
1550
|
+
intelligenceIndex: 28.1,
|
|
1551
|
+
normalizedScore: 40,
|
|
1552
|
+
|
|
1553
|
+
// AA specific benchmarks
|
|
1554
|
+
codingIndex: 28.4,
|
|
1555
|
+
mathIndex: 49.7,
|
|
1556
|
+
|
|
1557
|
+
// Academic benchmarks
|
|
1558
|
+
mmluPro: 0.833,
|
|
1559
|
+
gpqa: 0.735,
|
|
1560
|
+
hle: 0.063,
|
|
1561
|
+
|
|
1562
|
+
// Capabilities
|
|
1563
|
+
contextWindow: 8192,
|
|
1564
|
+
supportsReasoning: false,
|
|
1565
|
+
supportsVision: false,
|
|
1566
|
+
|
|
1567
|
+
// Metadata
|
|
1568
|
+
lastUpdated: "2026-04-06",
|
|
1569
|
+
},
|
|
1570
|
+
"deepseek-v2.5": {
|
|
1571
|
+
// AA Intelligence Index (composite score)
|
|
1572
|
+
intelligenceIndex: 12.3,
|
|
1573
|
+
normalizedScore: 18,
|
|
1574
|
+
|
|
1575
|
+
// AA specific benchmarks
|
|
1576
|
+
codingIndex: undefined,
|
|
1577
|
+
mathIndex: undefined,
|
|
1578
|
+
|
|
1579
|
+
// Academic benchmarks
|
|
1580
|
+
mmluPro: undefined,
|
|
1581
|
+
gpqa: undefined,
|
|
1582
|
+
hle: undefined,
|
|
1583
|
+
|
|
1584
|
+
// Capabilities
|
|
1585
|
+
contextWindow: 8192,
|
|
1586
|
+
supportsReasoning: false,
|
|
1587
|
+
supportsVision: false,
|
|
1588
|
+
|
|
1589
|
+
// Metadata
|
|
1590
|
+
lastUpdated: "2026-04-06",
|
|
1591
|
+
},
|
|
1592
|
+
"deepseek-v2-chat": {
|
|
1593
|
+
// AA Intelligence Index (composite score)
|
|
1594
|
+
intelligenceIndex: 9.1,
|
|
1595
|
+
normalizedScore: 13,
|
|
1596
|
+
|
|
1597
|
+
// AA specific benchmarks
|
|
1598
|
+
codingIndex: undefined,
|
|
1599
|
+
mathIndex: undefined,
|
|
1600
|
+
|
|
1601
|
+
// Academic benchmarks
|
|
1602
|
+
mmluPro: undefined,
|
|
1603
|
+
gpqa: undefined,
|
|
1604
|
+
hle: undefined,
|
|
1605
|
+
|
|
1606
|
+
// Capabilities
|
|
1607
|
+
contextWindow: 8192,
|
|
1608
|
+
supportsReasoning: false,
|
|
1609
|
+
supportsVision: false,
|
|
1610
|
+
|
|
1611
|
+
// Metadata
|
|
1612
|
+
lastUpdated: "2026-04-06",
|
|
1613
|
+
},
|
|
1614
|
+
"deepseek-coder-v2-lite-instruct": {
|
|
1615
|
+
// AA Intelligence Index (composite score)
|
|
1616
|
+
intelligenceIndex: 8.5,
|
|
1617
|
+
normalizedScore: 12,
|
|
1618
|
+
|
|
1619
|
+
// AA specific benchmarks
|
|
1620
|
+
codingIndex: undefined,
|
|
1621
|
+
mathIndex: undefined,
|
|
1622
|
+
|
|
1623
|
+
// Academic benchmarks
|
|
1624
|
+
mmluPro: 0.429,
|
|
1625
|
+
gpqa: 0.319,
|
|
1626
|
+
hle: 0.053,
|
|
1627
|
+
|
|
1628
|
+
// Capabilities
|
|
1629
|
+
contextWindow: 8192,
|
|
1630
|
+
supportsReasoning: false,
|
|
1631
|
+
supportsVision: false,
|
|
1632
|
+
|
|
1633
|
+
// Metadata
|
|
1634
|
+
lastUpdated: "2026-04-06",
|
|
1635
|
+
},
|
|
1636
|
+
sonar: {
|
|
1637
|
+
// AA Intelligence Index (composite score)
|
|
1638
|
+
intelligenceIndex: 15.5,
|
|
1639
|
+
normalizedScore: 22,
|
|
1640
|
+
|
|
1641
|
+
// AA specific benchmarks
|
|
1642
|
+
codingIndex: undefined,
|
|
1643
|
+
mathIndex: undefined,
|
|
1644
|
+
|
|
1645
|
+
// Academic benchmarks
|
|
1646
|
+
mmluPro: 0.689,
|
|
1647
|
+
gpqa: 0.471,
|
|
1648
|
+
hle: 0.073,
|
|
1649
|
+
|
|
1650
|
+
// Capabilities
|
|
1651
|
+
contextWindow: 8192,
|
|
1652
|
+
supportsReasoning: false,
|
|
1653
|
+
supportsVision: false,
|
|
1654
|
+
|
|
1655
|
+
// Metadata
|
|
1656
|
+
lastUpdated: "2026-04-06",
|
|
1657
|
+
},
|
|
1658
|
+
"sonar-reasoning-pro": {
|
|
1659
|
+
// AA Intelligence Index (composite score)
|
|
1660
|
+
intelligenceIndex: 24.6,
|
|
1661
|
+
normalizedScore: 35,
|
|
1662
|
+
|
|
1663
|
+
// AA specific benchmarks
|
|
1664
|
+
codingIndex: undefined,
|
|
1665
|
+
mathIndex: undefined,
|
|
1666
|
+
|
|
1667
|
+
// Academic benchmarks
|
|
1668
|
+
mmluPro: undefined,
|
|
1669
|
+
gpqa: undefined,
|
|
1670
|
+
hle: undefined,
|
|
1671
|
+
|
|
1672
|
+
// Capabilities
|
|
1673
|
+
contextWindow: 8192,
|
|
1674
|
+
supportsReasoning: false,
|
|
1675
|
+
supportsVision: false,
|
|
1676
|
+
|
|
1677
|
+
// Metadata
|
|
1678
|
+
lastUpdated: "2026-04-06",
|
|
1679
|
+
},
|
|
1680
|
+
"sonar-pro": {
|
|
1681
|
+
// AA Intelligence Index (composite score)
|
|
1682
|
+
intelligenceIndex: 15.2,
|
|
1683
|
+
normalizedScore: 22,
|
|
1684
|
+
|
|
1685
|
+
// AA specific benchmarks
|
|
1686
|
+
codingIndex: undefined,
|
|
1687
|
+
mathIndex: undefined,
|
|
1688
|
+
|
|
1689
|
+
// Academic benchmarks
|
|
1690
|
+
mmluPro: 0.755,
|
|
1691
|
+
gpqa: 0.578,
|
|
1692
|
+
hle: 0.079,
|
|
1693
|
+
|
|
1694
|
+
// Capabilities
|
|
1695
|
+
contextWindow: 8192,
|
|
1696
|
+
supportsReasoning: false,
|
|
1697
|
+
supportsVision: false,
|
|
1698
|
+
|
|
1699
|
+
// Metadata
|
|
1700
|
+
lastUpdated: "2026-04-06",
|
|
1701
|
+
},
|
|
1702
|
+
"sonar-reasoning": {
|
|
1703
|
+
// AA Intelligence Index (composite score)
|
|
1704
|
+
intelligenceIndex: 17.9,
|
|
1705
|
+
normalizedScore: 26,
|
|
1706
|
+
|
|
1707
|
+
// AA specific benchmarks
|
|
1708
|
+
codingIndex: undefined,
|
|
1709
|
+
mathIndex: undefined,
|
|
1710
|
+
|
|
1711
|
+
// Academic benchmarks
|
|
1712
|
+
mmluPro: undefined,
|
|
1713
|
+
gpqa: 0.623,
|
|
1714
|
+
hle: undefined,
|
|
1715
|
+
|
|
1716
|
+
// Capabilities
|
|
1717
|
+
contextWindow: 8192,
|
|
1718
|
+
supportsReasoning: false,
|
|
1719
|
+
supportsVision: false,
|
|
1720
|
+
|
|
1721
|
+
// Metadata
|
|
1722
|
+
lastUpdated: "2026-04-06",
|
|
1723
|
+
},
|
|
1724
|
+
"grok-beta": {
|
|
1725
|
+
// AA Intelligence Index (composite score)
|
|
1726
|
+
intelligenceIndex: 13.3,
|
|
1727
|
+
normalizedScore: 19,
|
|
1728
|
+
|
|
1729
|
+
// AA specific benchmarks
|
|
1730
|
+
codingIndex: undefined,
|
|
1731
|
+
mathIndex: undefined,
|
|
1732
|
+
|
|
1733
|
+
// Academic benchmarks
|
|
1734
|
+
mmluPro: 0.703,
|
|
1735
|
+
gpqa: 0.471,
|
|
1736
|
+
hle: 0.047,
|
|
1737
|
+
|
|
1738
|
+
// Capabilities
|
|
1739
|
+
contextWindow: 8192,
|
|
1740
|
+
supportsReasoning: false,
|
|
1741
|
+
supportsVision: false,
|
|
1742
|
+
|
|
1743
|
+
// Metadata
|
|
1744
|
+
lastUpdated: "2026-04-06",
|
|
1745
|
+
},
|
|
1746
|
+
"grok-4-fast-reasoning": {
|
|
1747
|
+
// AA Intelligence Index (composite score)
|
|
1748
|
+
intelligenceIndex: 35.1,
|
|
1749
|
+
normalizedScore: 50,
|
|
1750
|
+
|
|
1751
|
+
// AA specific benchmarks
|
|
1752
|
+
codingIndex: 27.4,
|
|
1753
|
+
mathIndex: 89.7,
|
|
1754
|
+
|
|
1755
|
+
// Academic benchmarks
|
|
1756
|
+
mmluPro: 0.85,
|
|
1757
|
+
gpqa: 0.847,
|
|
1758
|
+
hle: 0.17,
|
|
1759
|
+
|
|
1760
|
+
// Capabilities
|
|
1761
|
+
contextWindow: 8192,
|
|
1762
|
+
supportsReasoning: false,
|
|
1763
|
+
supportsVision: false,
|
|
1764
|
+
|
|
1765
|
+
// Metadata
|
|
1766
|
+
lastUpdated: "2026-04-06",
|
|
1767
|
+
},
|
|
1768
|
+
"grok-3-reasoning-beta": {
|
|
1769
|
+
// AA Intelligence Index (composite score)
|
|
1770
|
+
intelligenceIndex: 21.6,
|
|
1771
|
+
normalizedScore: 31,
|
|
1772
|
+
|
|
1773
|
+
// AA specific benchmarks
|
|
1774
|
+
codingIndex: undefined,
|
|
1775
|
+
mathIndex: undefined,
|
|
1776
|
+
|
|
1777
|
+
// Academic benchmarks
|
|
1778
|
+
mmluPro: undefined,
|
|
1779
|
+
gpqa: undefined,
|
|
1780
|
+
hle: undefined,
|
|
1781
|
+
|
|
1782
|
+
// Capabilities
|
|
1783
|
+
contextWindow: 8192,
|
|
1784
|
+
supportsReasoning: false,
|
|
1785
|
+
supportsVision: false,
|
|
1786
|
+
|
|
1787
|
+
// Metadata
|
|
1788
|
+
lastUpdated: "2026-04-06",
|
|
1789
|
+
},
|
|
1790
|
+
"grok-3": {
|
|
1791
|
+
// AA Intelligence Index (composite score)
|
|
1792
|
+
intelligenceIndex: 25.2,
|
|
1793
|
+
normalizedScore: 36,
|
|
1794
|
+
|
|
1795
|
+
// AA specific benchmarks
|
|
1796
|
+
codingIndex: 19.8,
|
|
1797
|
+
mathIndex: 58.0,
|
|
1798
|
+
|
|
1799
|
+
// Academic benchmarks
|
|
1800
|
+
mmluPro: 0.799,
|
|
1801
|
+
gpqa: 0.693,
|
|
1802
|
+
hle: 0.051,
|
|
1803
|
+
|
|
1804
|
+
// Capabilities
|
|
1805
|
+
contextWindow: 8192,
|
|
1806
|
+
supportsReasoning: false,
|
|
1807
|
+
supportsVision: false,
|
|
1808
|
+
|
|
1809
|
+
// Metadata
|
|
1810
|
+
lastUpdated: "2026-04-06",
|
|
1811
|
+
},
|
|
1812
|
+
"grok-4": {
|
|
1813
|
+
// AA Intelligence Index (composite score)
|
|
1814
|
+
intelligenceIndex: 41.5,
|
|
1815
|
+
normalizedScore: 59,
|
|
1816
|
+
|
|
1817
|
+
// AA specific benchmarks
|
|
1818
|
+
codingIndex: 40.5,
|
|
1819
|
+
mathIndex: 92.7,
|
|
1820
|
+
|
|
1821
|
+
// Academic benchmarks
|
|
1822
|
+
mmluPro: 0.866,
|
|
1823
|
+
gpqa: 0.877,
|
|
1824
|
+
hle: 0.239,
|
|
1825
|
+
|
|
1826
|
+
// Capabilities
|
|
1827
|
+
contextWindow: 8192,
|
|
1828
|
+
supportsReasoning: false,
|
|
1829
|
+
supportsVision: false,
|
|
1830
|
+
|
|
1831
|
+
// Metadata
|
|
1832
|
+
lastUpdated: "2026-04-06",
|
|
1833
|
+
},
|
|
1834
|
+
"grok-4.1-fast-non-reasoning": {
|
|
1835
|
+
// AA Intelligence Index (composite score)
|
|
1836
|
+
intelligenceIndex: 23.6,
|
|
1837
|
+
normalizedScore: 34,
|
|
1838
|
+
|
|
1839
|
+
// AA specific benchmarks
|
|
1840
|
+
codingIndex: 19.5,
|
|
1841
|
+
mathIndex: 34.3,
|
|
1842
|
+
|
|
1843
|
+
// Academic benchmarks
|
|
1844
|
+
mmluPro: 0.743,
|
|
1845
|
+
gpqa: 0.637,
|
|
1846
|
+
hle: 0.05,
|
|
1847
|
+
|
|
1848
|
+
// Capabilities
|
|
1849
|
+
contextWindow: 8192,
|
|
1850
|
+
supportsReasoning: false,
|
|
1851
|
+
supportsVision: false,
|
|
1852
|
+
|
|
1853
|
+
// Metadata
|
|
1854
|
+
lastUpdated: "2026-04-06",
|
|
1855
|
+
},
|
|
1856
|
+
"grok-4.1-fast-reasoning": {
|
|
1857
|
+
// AA Intelligence Index (composite score)
|
|
1858
|
+
intelligenceIndex: 38.6,
|
|
1859
|
+
normalizedScore: 55,
|
|
1860
|
+
|
|
1861
|
+
// AA specific benchmarks
|
|
1862
|
+
codingIndex: 30.9,
|
|
1863
|
+
mathIndex: 89.3,
|
|
1864
|
+
|
|
1865
|
+
// Academic benchmarks
|
|
1866
|
+
mmluPro: 0.854,
|
|
1867
|
+
gpqa: 0.853,
|
|
1868
|
+
hle: 0.176,
|
|
1869
|
+
|
|
1870
|
+
// Capabilities
|
|
1871
|
+
contextWindow: 8192,
|
|
1872
|
+
supportsReasoning: false,
|
|
1873
|
+
supportsVision: false,
|
|
1874
|
+
|
|
1875
|
+
// Metadata
|
|
1876
|
+
lastUpdated: "2026-04-06",
|
|
1877
|
+
},
|
|
1878
|
+
"grok-2-dec-24": {
|
|
1879
|
+
// AA Intelligence Index (composite score)
|
|
1880
|
+
intelligenceIndex: 13.9,
|
|
1881
|
+
normalizedScore: 20,
|
|
1882
|
+
|
|
1883
|
+
// AA specific benchmarks
|
|
1884
|
+
codingIndex: undefined,
|
|
1885
|
+
mathIndex: undefined,
|
|
1886
|
+
|
|
1887
|
+
// Academic benchmarks
|
|
1888
|
+
mmluPro: 0.709,
|
|
1889
|
+
gpqa: 0.51,
|
|
1890
|
+
hle: 0.038,
|
|
1891
|
+
|
|
1892
|
+
// Capabilities
|
|
1893
|
+
contextWindow: 8192,
|
|
1894
|
+
supportsReasoning: false,
|
|
1895
|
+
supportsVision: false,
|
|
1896
|
+
|
|
1897
|
+
// Metadata
|
|
1898
|
+
lastUpdated: "2026-04-06",
|
|
1899
|
+
},
|
|
1900
|
+
"grok-4-fast-non-reasoning": {
|
|
1901
|
+
// AA Intelligence Index (composite score)
|
|
1902
|
+
intelligenceIndex: 23.1,
|
|
1903
|
+
normalizedScore: 33,
|
|
1904
|
+
|
|
1905
|
+
// AA specific benchmarks
|
|
1906
|
+
codingIndex: 19.0,
|
|
1907
|
+
mathIndex: 41.3,
|
|
1908
|
+
|
|
1909
|
+
// Academic benchmarks
|
|
1910
|
+
mmluPro: 0.73,
|
|
1911
|
+
gpqa: 0.606,
|
|
1912
|
+
hle: 0.05,
|
|
1913
|
+
|
|
1914
|
+
// Capabilities
|
|
1915
|
+
contextWindow: 8192,
|
|
1916
|
+
supportsReasoning: false,
|
|
1917
|
+
supportsVision: false,
|
|
1918
|
+
|
|
1919
|
+
// Metadata
|
|
1920
|
+
lastUpdated: "2026-04-06",
|
|
1921
|
+
},
|
|
1922
|
+
"openchat-3.5-1210": {
|
|
1923
|
+
// AA Intelligence Index (composite score)
|
|
1924
|
+
intelligenceIndex: 8.3,
|
|
1925
|
+
normalizedScore: 12,
|
|
1926
|
+
|
|
1927
|
+
// AA specific benchmarks
|
|
1928
|
+
codingIndex: undefined,
|
|
1929
|
+
mathIndex: undefined,
|
|
1930
|
+
|
|
1931
|
+
// Academic benchmarks
|
|
1932
|
+
mmluPro: 0.31,
|
|
1933
|
+
gpqa: 0.23,
|
|
1934
|
+
hle: 0.048,
|
|
1935
|
+
|
|
1936
|
+
// Capabilities
|
|
1937
|
+
contextWindow: 8192,
|
|
1938
|
+
supportsReasoning: false,
|
|
1939
|
+
supportsVision: false,
|
|
1940
|
+
|
|
1941
|
+
// Metadata
|
|
1942
|
+
lastUpdated: "2026-04-06",
|
|
1943
|
+
},
|
|
1944
|
+
"nova-pro": {
|
|
1945
|
+
// AA Intelligence Index (composite score)
|
|
1946
|
+
intelligenceIndex: 13.5,
|
|
1947
|
+
normalizedScore: 19,
|
|
1948
|
+
|
|
1949
|
+
// AA specific benchmarks
|
|
1950
|
+
codingIndex: 11.0,
|
|
1951
|
+
mathIndex: 7.0,
|
|
1952
|
+
|
|
1953
|
+
// Academic benchmarks
|
|
1954
|
+
mmluPro: 0.691,
|
|
1955
|
+
gpqa: 0.499,
|
|
1956
|
+
hle: 0.034,
|
|
1957
|
+
|
|
1958
|
+
// Capabilities
|
|
1959
|
+
contextWindow: 8192,
|
|
1960
|
+
supportsReasoning: false,
|
|
1961
|
+
supportsVision: false,
|
|
1962
|
+
|
|
1963
|
+
// Metadata
|
|
1964
|
+
lastUpdated: "2026-04-06",
|
|
1965
|
+
},
|
|
1966
|
+
"nova-lite": {
|
|
1967
|
+
// AA Intelligence Index (composite score)
|
|
1968
|
+
intelligenceIndex: 12.7,
|
|
1969
|
+
normalizedScore: 18,
|
|
1970
|
+
|
|
1971
|
+
// AA specific benchmarks
|
|
1972
|
+
codingIndex: 5.1,
|
|
1973
|
+
mathIndex: 7.0,
|
|
1974
|
+
|
|
1975
|
+
// Academic benchmarks
|
|
1976
|
+
mmluPro: 0.59,
|
|
1977
|
+
gpqa: 0.433,
|
|
1978
|
+
hle: 0.046,
|
|
1979
|
+
|
|
1980
|
+
// Capabilities
|
|
1981
|
+
contextWindow: 8192,
|
|
1982
|
+
supportsReasoning: false,
|
|
1983
|
+
supportsVision: false,
|
|
1984
|
+
|
|
1985
|
+
// Metadata
|
|
1986
|
+
lastUpdated: "2026-04-06",
|
|
1987
|
+
},
|
|
1988
|
+
"phi-3-mini-instruct-3.8b": {
|
|
1989
|
+
// AA Intelligence Index (composite score)
|
|
1990
|
+
intelligenceIndex: 10.1,
|
|
1991
|
+
normalizedScore: 14,
|
|
1992
|
+
|
|
1993
|
+
// AA specific benchmarks
|
|
1994
|
+
codingIndex: 3.0,
|
|
1995
|
+
mathIndex: 0.3,
|
|
1996
|
+
|
|
1997
|
+
// Academic benchmarks
|
|
1998
|
+
mmluPro: 0.435,
|
|
1999
|
+
gpqa: 0.319,
|
|
2000
|
+
hle: 0.044,
|
|
2001
|
+
|
|
2002
|
+
// Capabilities
|
|
2003
|
+
contextWindow: 8192,
|
|
2004
|
+
supportsReasoning: false,
|
|
2005
|
+
supportsVision: false,
|
|
2006
|
+
|
|
2007
|
+
// Metadata
|
|
2008
|
+
lastUpdated: "2026-04-06",
|
|
2009
|
+
},
|
|
2010
|
+
};
|