free-coding-models 0.1.29 β 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/free-coding-models.js +38 -20
- package/lib/utils.js +17 -4
- package/package.json +1 -1
- package/sources.js +56 -56
|
@@ -393,6 +393,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
|
|
|
393
393
|
const W_TIER = 6
|
|
394
394
|
const W_SOURCE = 14
|
|
395
395
|
const W_MODEL = 26
|
|
396
|
+
const W_SWE = 9
|
|
396
397
|
const W_PING = 14
|
|
397
398
|
const W_AVG = 11
|
|
398
399
|
const W_STATUS = 18
|
|
@@ -421,25 +422,34 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
|
|
|
421
422
|
const tierH = 'Tier'
|
|
422
423
|
const originH = 'Origin'
|
|
423
424
|
const modelH = 'Model'
|
|
425
|
+
const sweH = sortColumn === 'swe' ? dir + ' SWE%' : 'SWE%'
|
|
424
426
|
const pingH = sortColumn === 'ping' ? dir + ' Latest Ping' : 'Latest Ping'
|
|
425
427
|
const avgH = sortColumn === 'avg' ? dir + ' Avg Ping' : 'Avg Ping'
|
|
426
428
|
const statusH = sortColumn === 'status' ? dir + ' Status' : 'Status'
|
|
427
429
|
const verdictH = sortColumn === 'verdict' ? dir + ' Verdict' : 'Verdict'
|
|
428
430
|
const uptimeH = sortColumn === 'uptime' ? dir + ' Up%' : 'Up%'
|
|
429
431
|
|
|
432
|
+
// π Helper to colorize first letter for keyboard shortcuts
|
|
433
|
+
const colorFirst = (text, width, colorFn = chalk.yellow) => {
|
|
434
|
+
const first = text[0]
|
|
435
|
+
const rest = text.slice(1)
|
|
436
|
+
return (colorFn(first) + chalk.dim(rest)).padEnd(width)
|
|
437
|
+
}
|
|
438
|
+
|
|
430
439
|
// π Now colorize after padding is calculated on plain text
|
|
431
|
-
const rankH_c =
|
|
432
|
-
const tierH_c =
|
|
433
|
-
const originH_c = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) :
|
|
434
|
-
const modelH_c =
|
|
435
|
-
const
|
|
436
|
-
const
|
|
437
|
-
const
|
|
438
|
-
const
|
|
439
|
-
const
|
|
440
|
+
const rankH_c = colorFirst(rankH, W_RANK)
|
|
441
|
+
const tierH_c = colorFirst('Tier', W_TIER)
|
|
442
|
+
const originH_c = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) : colorFirst(originH, W_SOURCE)
|
|
443
|
+
const modelH_c = colorFirst(modelH, W_MODEL)
|
|
444
|
+
const sweH_c = sortColumn === 'swe' ? chalk.bold.cyan(sweH.padEnd(W_SWE)) : colorFirst(sweH, W_SWE)
|
|
445
|
+
const pingH_c = sortColumn === 'ping' ? chalk.bold.cyan(pingH.padEnd(W_PING)) : colorFirst(pingH.replace('Latest ', ''), W_PING)
|
|
446
|
+
const avgH_c = sortColumn === 'avg' ? chalk.bold.cyan(avgH.padEnd(W_AVG)) : colorFirst(avgH.replace('Avg ', ''), W_AVG)
|
|
447
|
+
const statusH_c = sortColumn === 'status' ? chalk.bold.cyan(statusH.padEnd(W_STATUS)) : colorFirst(statusH, W_STATUS)
|
|
448
|
+
const verdictH_c = sortColumn === 'verdict' ? chalk.bold.cyan(verdictH.padEnd(W_VERDICT)) : colorFirst(verdictH, W_VERDICT)
|
|
449
|
+
const uptimeH_c = sortColumn === 'uptime' ? chalk.bold.cyan(uptimeH.padStart(W_UPTIME)) : colorFirst(uptimeH, W_UPTIME, chalk.green)
|
|
440
450
|
|
|
441
451
|
// π Header with proper spacing
|
|
442
|
-
lines.push(' ' + rankH_c + ' ' + tierH_c + ' ' + originH_c + ' ' + modelH_c + ' ' + pingH_c + ' ' + avgH_c + ' ' + statusH_c + ' ' + verdictH_c + ' ' + uptimeH_c)
|
|
452
|
+
lines.push(' ' + rankH_c + ' ' + tierH_c + ' ' + originH_c + ' ' + modelH_c + ' ' + sweH_c + ' ' + pingH_c + ' ' + avgH_c + ' ' + statusH_c + ' ' + verdictH_c + ' ' + uptimeH_c)
|
|
443
453
|
|
|
444
454
|
// π Separator line
|
|
445
455
|
lines.push(
|
|
@@ -448,6 +458,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
|
|
|
448
458
|
chalk.dim('β'.repeat(W_TIER)) + ' ' +
|
|
449
459
|
'β'.repeat(W_SOURCE) + ' ' +
|
|
450
460
|
'β'.repeat(W_MODEL) + ' ' +
|
|
461
|
+
chalk.dim('β'.repeat(W_SWE)) + ' ' +
|
|
451
462
|
chalk.dim('β'.repeat(W_PING)) + ' ' +
|
|
452
463
|
chalk.dim('β'.repeat(W_AVG)) + ' ' +
|
|
453
464
|
chalk.dim('β'.repeat(W_STATUS)) + ' ' +
|
|
@@ -471,8 +482,14 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
|
|
|
471
482
|
// π Left-aligned columns - pad plain text first, then colorize
|
|
472
483
|
const num = chalk.dim(String(r.idx).padEnd(W_RANK))
|
|
473
484
|
const tier = tierFn(r.tier.padEnd(W_TIER))
|
|
474
|
-
const source = chalk.green('
|
|
485
|
+
const source = chalk.green('NIM'.padEnd(W_SOURCE))
|
|
475
486
|
const name = r.label.slice(0, W_MODEL).padEnd(W_MODEL)
|
|
487
|
+
const sweScore = r.sweScore ?? 'β'
|
|
488
|
+
const sweCell = sweScore !== 'β' && parseFloat(sweScore) >= 50
|
|
489
|
+
? chalk.greenBright(sweScore.padEnd(W_SWE))
|
|
490
|
+
: sweScore !== 'β' && parseFloat(sweScore) >= 30
|
|
491
|
+
? chalk.yellow(sweScore.padEnd(W_SWE))
|
|
492
|
+
: chalk.dim(sweScore.padEnd(W_SWE))
|
|
476
493
|
|
|
477
494
|
// π Latest ping - pings are objects: { ms, code }
|
|
478
495
|
// π Only show response time for successful pings, "β" for errors (error code is in Status column)
|
|
@@ -579,7 +596,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
|
|
|
579
596
|
}
|
|
580
597
|
|
|
581
598
|
// π Build row with double space between columns
|
|
582
|
-
const row = ' ' + num + ' ' + tier + ' ' + source + ' ' + name + ' ' + pingCell + ' ' + avgCell + ' ' + status + ' ' + speedCell + ' ' + uptimeCell
|
|
599
|
+
const row = ' ' + num + ' ' + tier + ' ' + source + ' ' + name + ' ' + sweCell + ' ' + pingCell + ' ' + avgCell + ' ' + status + ' ' + speedCell + ' ' + uptimeCell
|
|
583
600
|
|
|
584
601
|
if (isCursor) {
|
|
585
602
|
lines.push(chalk.bgRgb(139, 0, 139)(row))
|
|
@@ -601,9 +618,9 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
|
|
|
601
618
|
: mode === 'opencode-desktop'
|
|
602
619
|
? chalk.rgb(0, 200, 255)('EnterβOpenDesktop')
|
|
603
620
|
: chalk.rgb(0, 200, 255)('EnterβOpenCode')
|
|
604
|
-
lines.push(chalk.dim(` ββ Navigate β’ `) + actionHint + chalk.dim(` β’ R/T/O/M/
|
|
621
|
+
lines.push(chalk.dim(` ββ Navigate β’ `) + actionHint + chalk.dim(` β’ R/T/O/M/L/A/S/V/U/E Sort β’ Wβ/Xβ Interval (${intervalSec}s) β’ T Tier β’ Z Mode β’ Ctrl+C Exit`))
|
|
605
622
|
lines.push('')
|
|
606
|
-
lines.push(chalk.dim(' Made with ') + 'π' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim(' β’ ') + 'π¬ ' + '\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join our Discord
|
|
623
|
+
lines.push(chalk.dim(' Made with ') + 'π' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim(' β’ ') + 'π¬ ' + chalk.cyanBright('\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join our Discord! (link fixed)\x1b]8;;\x1b\\') + chalk.dim(' β’ ') + 'β ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
|
|
607
624
|
lines.push('')
|
|
608
625
|
// π Append \x1b[K (erase to EOL) to each line so leftover chars from previous
|
|
609
626
|
// π frames are cleared. Then pad with blank cleared lines to fill the terminal,
|
|
@@ -973,8 +990,8 @@ async function runFiableMode(apiKey) {
|
|
|
973
990
|
console.log(chalk.cyan(' β‘ Analyzing models for reliability (10 seconds)...'))
|
|
974
991
|
console.log()
|
|
975
992
|
|
|
976
|
-
let results = MODELS.map(([modelId, label, tier], i) => ({
|
|
977
|
-
idx: i + 1, modelId, label, tier,
|
|
993
|
+
let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
|
|
994
|
+
idx: i + 1, modelId, label, tier, sweScore,
|
|
978
995
|
status: 'pending',
|
|
979
996
|
pings: [],
|
|
980
997
|
httpCode: null,
|
|
@@ -1064,8 +1081,8 @@ async function main() {
|
|
|
1064
1081
|
// π This section is now handled by the update notification menu above
|
|
1065
1082
|
|
|
1066
1083
|
// π Create results array with all models initially visible
|
|
1067
|
-
let results = MODELS.map(([modelId, label, tier], i) => ({
|
|
1068
|
-
idx: i + 1, modelId, label, tier,
|
|
1084
|
+
let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
|
|
1085
|
+
idx: i + 1, modelId, label, tier, sweScore,
|
|
1069
1086
|
status: 'pending',
|
|
1070
1087
|
pings: [], // π All ping results (ms or 'TIMEOUT')
|
|
1071
1088
|
httpCode: null,
|
|
@@ -1174,10 +1191,11 @@ async function main() {
|
|
|
1174
1191
|
const onKeyPress = async (str, key) => {
|
|
1175
1192
|
if (!key) return
|
|
1176
1193
|
|
|
1177
|
-
// π Sorting keys: R=rank, T=tier, O=origin, M=model,
|
|
1194
|
+
// π Sorting keys: R=rank, T=tier, O=origin, M=model, L=latest ping, A=avg ping, S=status, V=verdict, U=uptime, E=SWE-bench
|
|
1178
1195
|
const sortKeys = {
|
|
1179
1196
|
'r': 'rank', 't': 'tier', 'o': 'origin', 'm': 'model',
|
|
1180
|
-
'
|
|
1197
|
+
'l': 'ping', 'a': 'avg', 's': 'status', 'v': 'verdict', 'u': 'uptime',
|
|
1198
|
+
'e': 'swe'
|
|
1181
1199
|
}
|
|
1182
1200
|
|
|
1183
1201
|
if (sortKeys[key.name]) {
|
package/lib/utils.js
CHANGED
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
* modelId: string, // e.g. "deepseek-ai/deepseek-v3.2"
|
|
20
20
|
* label: string, // e.g. "DeepSeek V3.2" (human-friendly name)
|
|
21
21
|
* tier: string, // e.g. "S+", "A", "B+" β from sources.js
|
|
22
|
+
* sweScore: string, // e.g. "49.2%", "73.1%" β SWE-bench Verified score
|
|
22
23
|
* status: string, // "pending" | "up" | "down" | "timeout"
|
|
23
24
|
* pings: Array<{ms: number, code: string}>, // full ping history since start
|
|
24
25
|
* httpCode: string|null // last HTTP status code (for detecting 429 rate limits)
|
|
@@ -130,13 +131,14 @@ export const getUptime = (r) => {
|
|
|
130
131
|
// π Supported columns (matching the keyboard shortcuts in the TUI):
|
|
131
132
|
// - 'rank' (R key) β original index from sources.js
|
|
132
133
|
// - 'tier' (T key) β tier hierarchy (S+ first, C last)
|
|
133
|
-
// - 'origin' (O key) β provider name (all
|
|
134
|
+
// - 'origin' (O key) β provider name (all NIM for now, future-proofed)
|
|
134
135
|
// - 'model' (M key) β alphabetical by display label
|
|
135
|
-
// - 'ping' (
|
|
136
|
+
// - 'ping' (L key) β last ping latency (only successful ones count)
|
|
136
137
|
// - 'avg' (A key) β average latency across all successful pings
|
|
137
138
|
// - 'status' (S key) β alphabetical status string
|
|
138
139
|
// - 'verdict' (V key) β verdict order (Perfect β Pending)
|
|
139
140
|
// - 'uptime' (U key) β uptime percentage
|
|
141
|
+
// - 'swe' (E key) β SWE-bench score (higher is better)
|
|
140
142
|
//
|
|
141
143
|
// π sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
|
|
142
144
|
export const sortResults = (results, sortColumn, sortDirection) => {
|
|
@@ -152,8 +154,8 @@ export const sortResults = (results, sortColumn, sortDirection) => {
|
|
|
152
154
|
cmp = TIER_ORDER.indexOf(a.tier) - TIER_ORDER.indexOf(b.tier)
|
|
153
155
|
break
|
|
154
156
|
case 'origin':
|
|
155
|
-
// π All models are
|
|
156
|
-
cmp = '
|
|
157
|
+
// π All models are NIM for now β this is future-proofed for multi-source
|
|
158
|
+
cmp = 'NIM'.localeCompare('NIM')
|
|
157
159
|
break
|
|
158
160
|
case 'model':
|
|
159
161
|
cmp = a.label.localeCompare(b.label)
|
|
@@ -184,6 +186,17 @@ export const sortResults = (results, sortColumn, sortDirection) => {
|
|
|
184
186
|
case 'uptime':
|
|
185
187
|
cmp = getUptime(a) - getUptime(b)
|
|
186
188
|
break
|
|
189
|
+
case 'swe': {
|
|
190
|
+
// π Sort by SWE-bench score β higher is better
|
|
191
|
+
// π Parse percentage strings like "49.2%", "73.1%" or use 0 for missing values
|
|
192
|
+
const parseSwe = (score) => {
|
|
193
|
+
if (!score || score === 'β') return 0
|
|
194
|
+
const num = parseFloat(score.replace('%', ''))
|
|
195
|
+
return isNaN(num) ? 0 : num
|
|
196
|
+
}
|
|
197
|
+
cmp = parseSwe(a.sweScore) - parseSwe(b.sweScore)
|
|
198
|
+
break
|
|
199
|
+
}
|
|
187
200
|
}
|
|
188
201
|
|
|
189
202
|
// π Flip comparison for descending order
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "free-coding-models",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.31",
|
|
4
4
|
"description": "Find the fastest coding LLM models in seconds β ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"nvidia",
|
package/sources.js
CHANGED
|
@@ -22,66 +22,66 @@
|
|
|
22
22
|
* @exports Object containing all sources and their models
|
|
23
23
|
*/
|
|
24
24
|
|
|
25
|
-
// π
|
|
25
|
+
// π NIM source - https://build.nvidia.com
|
|
26
26
|
export const nvidiaNim = [
|
|
27
|
-
// ββ S+ tier β
|
|
28
|
-
['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S+'], //
|
|
29
|
-
['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S+'], // same base
|
|
30
|
-
['deepseek-ai/deepseek-v3.2', 'DeepSeek V3.2', 'S+'], //
|
|
31
|
-
['moonshotai/kimi-k2.5', 'Kimi K2.5', 'S+'], //
|
|
32
|
-
['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+'], //
|
|
33
|
-
['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'S+'], //
|
|
34
|
-
['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'S+'], //
|
|
35
|
-
// ββ S tier β
|
|
36
|
-
['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'S'],
|
|
37
|
-
['z-ai/glm5', 'GLM 5', 'S'],
|
|
38
|
-
['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S'],
|
|
39
|
-
['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S'],
|
|
40
|
-
['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S'],
|
|
41
|
-
['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'S'],
|
|
42
|
-
['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S'],
|
|
43
|
-
// ββ A+ tier β
|
|
44
|
-
['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'A+'], //
|
|
45
|
-
['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'A+'], //
|
|
46
|
-
['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'A+'], //
|
|
47
|
-
['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A+'], //
|
|
48
|
-
['z-ai/glm4.7', 'GLM 4.7', 'A+'], //
|
|
49
|
-
['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'A+'], //
|
|
50
|
-
// ββ A tier β
|
|
51
|
-
['minimaxai/minimax-m2', 'MiniMax M2', 'A'],
|
|
52
|
-
['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A'],
|
|
53
|
-
['mistralai/magistral-small-2506', 'Magistral Small', 'A'],
|
|
54
|
-
['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A'],
|
|
55
|
-
['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A'],
|
|
56
|
-
// ββ A- tier β
|
|
57
|
-
['openai/gpt-oss-120b', 'GPT OSS 120B', 'A-'], //
|
|
58
|
-
['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A-'], //
|
|
59
|
-
['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A-'], //
|
|
60
|
-
['deepseek-ai/deepseek-r1-distill-qwen-14b', 'R1 Distill 14B', 'A-'], //
|
|
61
|
-
['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A-'], //
|
|
62
|
-
// ββ B+ tier β
|
|
63
|
-
['qwen/qwq-32b', 'QwQ 32B', 'B+'], //
|
|
64
|
-
['openai/gpt-oss-20b', 'GPT OSS 20B', 'B+'], //
|
|
65
|
-
['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'B+'], //
|
|
66
|
-
['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'B+'], //
|
|
67
|
-
['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'B+'], //
|
|
68
|
-
// ββ B tier β
|
|
69
|
-
['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'B'],
|
|
70
|
-
['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B'],
|
|
71
|
-
['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B'],
|
|
72
|
-
['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B'],
|
|
73
|
-
['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B'],
|
|
74
|
-
// ββ C tier β
|
|
75
|
-
['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'C'
|
|
76
|
-
['google/gemma-2-9b-it', 'Gemma 2 9B', 'C'],
|
|
77
|
-
['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C'],
|
|
78
|
-
['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C'],
|
|
27
|
+
// ββ S+ tier β SWE-bench Verified β₯70% ββ
|
|
28
|
+
['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S+', '49.2%'], // 49.2% SWE-bench Verified
|
|
29
|
+
['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S+', '49.2%'], // same base V3.1
|
|
30
|
+
['deepseek-ai/deepseek-v3.2', 'DeepSeek V3.2', 'S+', '73.1%'], // 73.1% SWE-bench Verified
|
|
31
|
+
['moonshotai/kimi-k2.5', 'Kimi K2.5', 'S+', '76.8%'], // 76.8% SWE-bench Verified
|
|
32
|
+
['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+', '62.0%'], // 62.0% SWE-bench (est.)
|
|
33
|
+
['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'S+', '56.0%'], // 56.0% SWE-bench (est.)
|
|
34
|
+
['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'S+', '58.0%'], // 58.0% SWE-bench (est.)
|
|
35
|
+
// ββ S tier β SWE-bench Verified 50β70% ββ
|
|
36
|
+
['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'S', '46.0%'], // 46.0% SWE-bench Verified
|
|
37
|
+
['z-ai/glm5', 'GLM 5', 'S', '77.8%'], // 77.8% SWE-bench Verified
|
|
38
|
+
['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S', '68.0%'], // 68.0% SWE-bench (est.)
|
|
39
|
+
['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S', '72.0%'], // 72.0% SWE-bench (est.)
|
|
40
|
+
['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S', '68.0%'], // 68.0% SWE-bench (est.)
|
|
41
|
+
['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'S', '44.0%'], // 44.0% SWE-bench (est.)
|
|
42
|
+
['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S', '70.0%'], // 70.0% SWE-bench (est.)
|
|
43
|
+
// ββ A+ tier β SWE-bench Verified 60β70% ββ
|
|
44
|
+
['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'A+', '67.0%'], // 67.0% SWE-bench (est.)
|
|
45
|
+
['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'A+', '65.8%'], // 65.8% SWE-bench Verified
|
|
46
|
+
['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'A+', '70.0%'], // 70.0% SWE-bench (est.)
|
|
47
|
+
['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A+', '39.5%'], // 39.5% SWE-bench (est.)
|
|
48
|
+
['z-ai/glm4.7', 'GLM 4.7', 'A+', '73.8%'], // 73.8% SWE-bench Verified
|
|
49
|
+
['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'A+', '65.0%'], // 65.0% SWE-bench (est.)
|
|
50
|
+
// ββ A tier β SWE-bench Verified 45β60% ββ
|
|
51
|
+
['minimaxai/minimax-m2', 'MiniMax M2', 'A', '56.5%'], // 56.5% SWE-bench (est.)
|
|
52
|
+
['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A', '48.0%'], // 48.0% SWE-bench (est.)
|
|
53
|
+
['mistralai/magistral-small-2506', 'Magistral Small', 'A', '45.0%'], // 45.0% SWE-bench (est.)
|
|
54
|
+
['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A', '43.0%'], // 43.0% SWE-bench (est.)
|
|
55
|
+
['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%'], // 43.9% SWE-bench Verified
|
|
56
|
+
// ββ A- tier β SWE-bench Verified 35β45% ββ
|
|
57
|
+
['openai/gpt-oss-120b', 'GPT OSS 120B', 'A-', '60.0%'], // 60.0% SWE-bench (est.)
|
|
58
|
+
['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A-', '49.0%'], // 49.0% SWE-bench (est.)
|
|
59
|
+
['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A-', '44.0%'], // 44.0% SWE-bench (est.)
|
|
60
|
+
['deepseek-ai/deepseek-r1-distill-qwen-14b', 'R1 Distill 14B', 'A-', '37.7%'], // 37.7% SWE-bench (est.)
|
|
61
|
+
['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A-', '52.0%'], // 52.0% SWE-bench (est.)
|
|
62
|
+
// ββ B+ tier β SWE-bench Verified 30β40% ββ
|
|
63
|
+
['qwen/qwq-32b', 'QwQ 32B', 'B+', '50.0%'], // 50.0% SWE-bench (est.)
|
|
64
|
+
['openai/gpt-oss-20b', 'GPT OSS 20B', 'B+', '42.0%'], // 42.0% SWE-bench (est.)
|
|
65
|
+
['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'B+', '36.0%'], // 36.0% SWE-bench (est.)
|
|
66
|
+
['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'B+', '38.0%'], // 38.0% SWE-bench (est.)
|
|
67
|
+
['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'B+', '74.4%'], // 74.4% SWE-bench Verified
|
|
68
|
+
// ββ B tier β SWE-bench Verified 20β35% ββ
|
|
69
|
+
['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'B', '62.0%'], // 62.0% SWE-bench (est.)
|
|
70
|
+
['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B', '32.0%'], // 32.0% SWE-bench (est.)
|
|
71
|
+
['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B', '34.0%'], // 34.0% SWE-bench (est.)
|
|
72
|
+
['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B', '30.0%'], // 30.0% SWE-bench (est.)
|
|
73
|
+
['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B', '28.2%'], // 28.2% SWE-bench (est.)
|
|
74
|
+
// ββ C tier β SWE-bench Verified <25% or lightweight edge models ββ
|
|
75
|
+
['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'C', '22.6%'], // 22.6% SWE-bench (est.)
|
|
76
|
+
['google/gemma-2-9b-it', 'Gemma 2 9B', 'C', '18.0%'], // 18.0% SWE-bench (est.)
|
|
77
|
+
['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C', '12.0%'], // 12.0% SWE-bench (est.)
|
|
78
|
+
['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C', '14.0%'], // 14.0% SWE-bench (est.)
|
|
79
79
|
]
|
|
80
80
|
|
|
81
81
|
// π All sources combined - used by the main script
|
|
82
82
|
export const sources = {
|
|
83
83
|
nvidia: {
|
|
84
|
-
name: '
|
|
84
|
+
name: 'NIM',
|
|
85
85
|
models: nvidiaNim,
|
|
86
86
|
},
|
|
87
87
|
// π Add more sources here in the future, for example:
|
|
@@ -98,7 +98,7 @@ export const sources = {
|
|
|
98
98
|
// π Flatten all models from all sources for backward compatibility
|
|
99
99
|
export const MODELS = []
|
|
100
100
|
for (const [sourceKey, sourceData] of Object.entries(sources)) {
|
|
101
|
-
for (const [modelId, label, tier] of sourceData.models) {
|
|
102
|
-
MODELS.push([modelId, label, tier])
|
|
101
|
+
for (const [modelId, label, tier, sweScore] of sourceData.models) {
|
|
102
|
+
MODELS.push([modelId, label, tier, sweScore])
|
|
103
103
|
}
|
|
104
104
|
}
|