free-coding-models 0.1.29 β†’ 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -393,6 +393,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
393
393
  const W_TIER = 6
394
394
  const W_SOURCE = 14
395
395
  const W_MODEL = 26
396
+ const W_SWE = 9
396
397
  const W_PING = 14
397
398
  const W_AVG = 11
398
399
  const W_STATUS = 18
@@ -421,25 +422,34 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
421
422
  const tierH = 'Tier'
422
423
  const originH = 'Origin'
423
424
  const modelH = 'Model'
425
+ const sweH = sortColumn === 'swe' ? dir + ' SWE%' : 'SWE%'
424
426
  const pingH = sortColumn === 'ping' ? dir + ' Latest Ping' : 'Latest Ping'
425
427
  const avgH = sortColumn === 'avg' ? dir + ' Avg Ping' : 'Avg Ping'
426
- const statusH = sortColumn === 'status' ? dir + ' Status' : 'Status'
428
+ const conditionH = sortColumn === 'condition' ? dir + ' Condition' : 'Condition'
427
429
  const verdictH = sortColumn === 'verdict' ? dir + ' Verdict' : 'Verdict'
428
430
  const uptimeH = sortColumn === 'uptime' ? dir + ' Up%' : 'Up%'
429
431
 
432
+ // πŸ“– Helper to colorize first letter for keyboard shortcuts
433
+ const colorFirst = (text, width, colorFn = chalk.yellow) => {
434
+ const first = text[0]
435
+ const rest = text.slice(1)
436
+ return (colorFn(first) + chalk.dim(rest)).padEnd(width)
437
+ }
438
+
430
439
  // πŸ“– Now colorize after padding is calculated on plain text
431
- const rankH_c = chalk.dim(rankH.padEnd(W_RANK))
432
- const tierH_c = chalk.dim(tierH.padEnd(W_TIER))
433
- const originH_c = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) : chalk.dim(originH.padEnd(W_SOURCE))
434
- const modelH_c = chalk.dim(modelH.padEnd(W_MODEL))
435
- const pingH_c = sortColumn === 'ping' ? chalk.bold.cyan(pingH.padEnd(W_PING)) : chalk.dim(pingH.padEnd(W_PING))
436
- const avgH_c = sortColumn === 'avg' ? chalk.bold.cyan(avgH.padEnd(W_AVG)) : chalk.dim(avgH.padEnd(W_AVG))
437
- const statusH_c = sortColumn === 'status' ? chalk.bold.cyan(statusH.padEnd(W_STATUS)) : chalk.dim(statusH.padEnd(W_STATUS))
438
- const verdictH_c = sortColumn === 'verdict' ? chalk.bold.cyan(verdictH.padEnd(W_VERDICT)) : chalk.dim(verdictH.padEnd(W_VERDICT))
439
- const uptimeH_c = sortColumn === 'uptime' ? chalk.bold.cyan(uptimeH.padStart(W_UPTIME)) : chalk.dim(uptimeH.padStart(W_UPTIME))
440
+ const rankH_c = colorFirst(rankH, W_RANK)
441
+ const tierH_c = colorFirst('Tier', W_TIER)
442
+ const originH_c = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) : colorFirst(originH, W_SOURCE)
443
+ const modelH_c = colorFirst(modelH, W_MODEL)
444
+ const sweH_c = sortColumn === 'swe' ? chalk.bold.cyan(sweH.padEnd(W_SWE)) : colorFirst('SWE%', W_SWE)
445
+ const pingH_c = sortColumn === 'ping' ? chalk.bold.cyan(pingH.padEnd(W_PING)) : colorFirst('Latest Ping', W_PING)
446
+ const avgH_c = sortColumn === 'avg' ? chalk.bold.cyan(avgH.padEnd(W_AVG)) : colorFirst('Avg Ping', W_AVG)
447
+ const conditionH_c = sortColumn === 'condition' ? chalk.bold.cyan(conditionH.padEnd(W_STATUS)) : colorFirst('Condition', W_STATUS)
448
+ const verdictH_c = sortColumn === 'verdict' ? chalk.bold.cyan(verdictH.padEnd(W_VERDICT)) : colorFirst(verdictH, W_VERDICT)
449
+ const uptimeH_c = sortColumn === 'uptime' ? chalk.bold.cyan(uptimeH.padStart(W_UPTIME)) : colorFirst(uptimeH, W_UPTIME, chalk.green)
440
450
 
441
451
  // πŸ“– Header with proper spacing
442
- lines.push(' ' + rankH_c + ' ' + tierH_c + ' ' + originH_c + ' ' + modelH_c + ' ' + pingH_c + ' ' + avgH_c + ' ' + statusH_c + ' ' + verdictH_c + ' ' + uptimeH_c)
452
+ lines.push(' ' + rankH_c + ' ' + tierH_c + ' ' + originH_c + ' ' + modelH_c + ' ' + sweH_c + ' ' + pingH_c + ' ' + avgH_c + ' ' + conditionH_c + ' ' + verdictH_c + ' ' + uptimeH_c)
443
453
 
444
454
  // πŸ“– Separator line
445
455
  lines.push(
@@ -448,6 +458,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
448
458
  chalk.dim('─'.repeat(W_TIER)) + ' ' +
449
459
  '─'.repeat(W_SOURCE) + ' ' +
450
460
  '─'.repeat(W_MODEL) + ' ' +
461
+ chalk.dim('─'.repeat(W_SWE)) + ' ' +
451
462
  chalk.dim('─'.repeat(W_PING)) + ' ' +
452
463
  chalk.dim('─'.repeat(W_AVG)) + ' ' +
453
464
  chalk.dim('─'.repeat(W_STATUS)) + ' ' +
@@ -471,8 +482,14 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
471
482
  // πŸ“– Left-aligned columns - pad plain text first, then colorize
472
483
  const num = chalk.dim(String(r.idx).padEnd(W_RANK))
473
484
  const tier = tierFn(r.tier.padEnd(W_TIER))
474
- const source = chalk.green('NVIDIA NIM'.padEnd(W_SOURCE))
485
+ const source = chalk.green('NIM'.padEnd(W_SOURCE))
475
486
  const name = r.label.slice(0, W_MODEL).padEnd(W_MODEL)
487
+ const sweScore = r.sweScore ?? 'β€”'
488
+ const sweCell = sweScore !== 'β€”' && parseFloat(sweScore) >= 50
489
+ ? chalk.greenBright(sweScore.padEnd(W_SWE))
490
+ : sweScore !== 'β€”' && parseFloat(sweScore) >= 30
491
+ ? chalk.yellow(sweScore.padEnd(W_SWE))
492
+ : chalk.dim(sweScore.padEnd(W_SWE))
476
493
 
477
494
  // πŸ“– Latest ping - pings are objects: { ms, code }
478
495
  // πŸ“– Only show response time for successful pings, "β€”" for errors (error code is in Status column)
@@ -579,7 +596,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
579
596
  }
580
597
 
581
598
  // πŸ“– Build row with double space between columns
582
- const row = ' ' + num + ' ' + tier + ' ' + source + ' ' + name + ' ' + pingCell + ' ' + avgCell + ' ' + status + ' ' + speedCell + ' ' + uptimeCell
599
+ const row = ' ' + num + ' ' + tier + ' ' + source + ' ' + name + ' ' + sweCell + ' ' + pingCell + ' ' + avgCell + ' ' + status + ' ' + speedCell + ' ' + uptimeCell
583
600
 
584
601
  if (isCursor) {
585
602
  lines.push(chalk.bgRgb(139, 0, 139)(row))
@@ -601,9 +618,9 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
601
618
  : mode === 'opencode-desktop'
602
619
  ? chalk.rgb(0, 200, 255)('Enterβ†’OpenDesktop')
603
620
  : chalk.rgb(0, 200, 255)('Enterβ†’OpenCode')
604
- lines.push(chalk.dim(` ↑↓ Navigate β€’ `) + actionHint + chalk.dim(` β€’ R/T/O/M/P/A/S/V/U Sort β€’ W↓/X↑ Interval (${intervalSec}s) β€’ T Tier β€’ Z Mode β€’ Ctrl+C Exit`))
621
+ lines.push(chalk.dim(` ↑↓ Navigate β€’ `) + actionHint + chalk.dim(` β€’ R/T/O/M/L/A/S/C/V/U Sort β€’ W↓/X↑ Interval (${intervalSec}s) β€’ T Tier β€’ Z Mode β€’ Ctrl+C Exit`))
605
622
  lines.push('')
606
- lines.push(chalk.dim(' Made with ') + 'πŸ’–' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim(' β€’ ') + 'πŸ’¬ ' + '\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join our Discord!\x1b]8;;\x1b\\' + chalk.dim(' β€’ ') + '⭐ ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
623
+ lines.push(chalk.dim(' Made with ') + 'πŸ’– & β˜•' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim(' β€’ ') + 'πŸ’¬ ' + chalk.cyanBright('\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join Free-Coding-Models Discord!\x1b]8;;\x1b\\') + chalk.dim(' β€’ ') + '⭐ ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
607
624
  lines.push('')
608
625
  // πŸ“– Append \x1b[K (erase to EOL) to each line so leftover chars from previous
609
626
  // πŸ“– frames are cleared. Then pad with blank cleared lines to fill the terminal,
@@ -973,8 +990,8 @@ async function runFiableMode(apiKey) {
973
990
  console.log(chalk.cyan(' ⚑ Analyzing models for reliability (10 seconds)...'))
974
991
  console.log()
975
992
 
976
- let results = MODELS.map(([modelId, label, tier], i) => ({
977
- idx: i + 1, modelId, label, tier,
993
+ let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
994
+ idx: i + 1, modelId, label, tier, sweScore,
978
995
  status: 'pending',
979
996
  pings: [],
980
997
  httpCode: null,
@@ -1064,8 +1081,8 @@ async function main() {
1064
1081
  // πŸ“– This section is now handled by the update notification menu above
1065
1082
 
1066
1083
  // πŸ“– Create results array with all models initially visible
1067
- let results = MODELS.map(([modelId, label, tier], i) => ({
1068
- idx: i + 1, modelId, label, tier,
1084
+ let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
1085
+ idx: i + 1, modelId, label, tier, sweScore,
1069
1086
  status: 'pending',
1070
1087
  pings: [], // πŸ“– All ping results (ms or 'TIMEOUT')
1071
1088
  httpCode: null,
@@ -1174,10 +1191,10 @@ async function main() {
1174
1191
  const onKeyPress = async (str, key) => {
1175
1192
  if (!key) return
1176
1193
 
1177
- // πŸ“– Sorting keys: R=rank, T=tier, O=origin, M=model, P=ping, A=avg, S=status, V=verdict, U=uptime
1194
+ // πŸ“– Sorting keys: R=rank, T=tier, O=origin, M=model, L=latest ping, A=avg ping, S=SWE-bench, C=condition, V=verdict, U=uptime
1178
1195
  const sortKeys = {
1179
1196
  'r': 'rank', 't': 'tier', 'o': 'origin', 'm': 'model',
1180
- 'p': 'ping', 'a': 'avg', 's': 'status', 'v': 'verdict', 'u': 'uptime'
1197
+ 'l': 'ping', 'a': 'avg', 's': 'swe', 'c': 'condition', 'v': 'verdict', 'u': 'uptime'
1181
1198
  }
1182
1199
 
1183
1200
  if (sortKeys[key.name]) {
package/lib/utils.js CHANGED
@@ -19,6 +19,7 @@
19
19
  * modelId: string, // e.g. "deepseek-ai/deepseek-v3.2"
20
20
  * label: string, // e.g. "DeepSeek V3.2" (human-friendly name)
21
21
  * tier: string, // e.g. "S+", "A", "B+" β€” from sources.js
22
+ * sweScore: string, // e.g. "49.2%", "73.1%" β€” SWE-bench Verified score
22
23
  * status: string, // "pending" | "up" | "down" | "timeout"
23
24
  * pings: Array<{ms: number, code: string}>, // full ping history since start
24
25
  * httpCode: string|null // last HTTP status code (for detecting 429 rate limits)
@@ -128,15 +129,16 @@ export const getUptime = (r) => {
128
129
  // πŸ“– Returns a NEW array β€” never mutates the original (important for React-style re-renders).
129
130
  //
130
131
  // πŸ“– Supported columns (matching the keyboard shortcuts in the TUI):
131
- // - 'rank' (R key) β€” original index from sources.js
132
- // - 'tier' (T key) β€” tier hierarchy (S+ first, C last)
133
- // - 'origin' (O key) β€” provider name (all NVIDIA NIM for now, future-proofed)
134
- // - 'model' (M key) β€” alphabetical by display label
135
- // - 'ping' (P key) β€” last ping latency (only successful ones count)
136
- // - 'avg' (A key) β€” average latency across all successful pings
137
- // - 'status' (S key) β€” alphabetical status string
138
- // - 'verdict' (V key) β€” verdict order (Perfect β†’ Pending)
139
- // - 'uptime' (U key) β€” uptime percentage
132
+ // - 'rank' (R key) β€” original index from sources.js
133
+ // - 'tier' (T key) β€” tier hierarchy (S+ first, C last)
134
+ // - 'origin' (O key) β€” provider name (all NIM for now, future-proofed)
135
+ // - 'model' (M key) β€” alphabetical by display label
136
+ // - 'ping' (L key) β€” last ping latency (only successful ones count)
137
+ // - 'avg' (A key) β€” average latency across all successful pings
138
+ // - 'swe' (S key) β€” SWE-bench score (higher is better)
139
+ // - 'condition'(C key) β€” alphabetical condition string
140
+ // - 'verdict' (V key) β€” verdict order (Perfect β†’ Pending)
141
+ // - 'uptime' (U key) β€” uptime percentage
140
142
  //
141
143
  // πŸ“– sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
142
144
  export const sortResults = (results, sortColumn, sortDirection) => {
@@ -152,8 +154,8 @@ export const sortResults = (results, sortColumn, sortDirection) => {
152
154
  cmp = TIER_ORDER.indexOf(a.tier) - TIER_ORDER.indexOf(b.tier)
153
155
  break
154
156
  case 'origin':
155
- // πŸ“– All models are NVIDIA NIM for now β€” this is future-proofed for multi-source
156
- cmp = 'NVIDIA NIM'.localeCompare('NVIDIA NIM')
157
+ // πŸ“– All models are NIM for now β€” this is future-proofed for multi-source
158
+ cmp = 'NIM'.localeCompare('NIM')
157
159
  break
158
160
  case 'model':
159
161
  cmp = a.label.localeCompare(b.label)
@@ -171,7 +173,18 @@ export const sortResults = (results, sortColumn, sortDirection) => {
171
173
  case 'avg':
172
174
  cmp = getAvg(a) - getAvg(b)
173
175
  break
174
- case 'status':
176
+ case 'swe': {
177
+ // πŸ“– Sort by SWE-bench score β€” higher is better
178
+ // πŸ“– Parse percentage strings like "49.2%", "73.1%" or use 0 for missing values
179
+ const parseSwe = (score) => {
180
+ if (!score || score === 'β€”') return 0
181
+ const num = parseFloat(score.replace('%', ''))
182
+ return isNaN(num) ? 0 : num
183
+ }
184
+ cmp = parseSwe(a.sweScore) - parseSwe(b.sweScore)
185
+ break
186
+ }
187
+ case 'condition':
175
188
  cmp = a.status.localeCompare(b.status)
176
189
  break
177
190
  case 'verdict': {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.1.29",
3
+ "version": "0.1.32",
4
4
  "description": "Find the fastest coding LLM models in seconds β€” ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",
package/sources.js CHANGED
@@ -22,66 +22,66 @@
22
22
  * @exports Object containing all sources and their models
23
23
  */
24
24
 
25
- // πŸ“– NVIDIA NIM source - https://build.nvidia.com
25
+ // πŸ“– NIM source - https://build.nvidia.com
26
26
  export const nvidiaNim = [
27
- // ── S+ tier β€” Aider polyglot β‰₯75% or equivalent frontier coding performance ──
28
- ['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S+'], // ~76.1% Aider polyglot (thinking mode)
29
- ['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S+'], // same base, terminus variant
30
- ['deepseek-ai/deepseek-v3.2', 'DeepSeek V3.2', 'S+'], // 74.2% Aider polyglot (reasoner)
31
- ['moonshotai/kimi-k2.5', 'Kimi K2.5', 'S+'], // newer than K2 (59%), estimated S+
32
- ['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+'], // coding-focused 123B, estimated S+
33
- ['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'S+'], // 253B NVIDIA flagship, estimated S+
34
- ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'S+'], // 675B frontier, estimated S+
35
- // ── S tier β€” Aider polyglot 62–74% ─────────────────────────────────────────
36
- ['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'S'], // 71.4% Aider edit (best confirmed small coder)
37
- ['z-ai/glm5', 'GLM 5', 'S'], // GLM flagship, estimated S
38
- ['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S'], // 400B VLM, estimated S
39
- ['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S'], // 61.8% Aider polyglot
40
- ['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S'], // 80B thinking, estimated S
41
- ['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'S'], // 66.2% Aider edit benchmark
42
- ['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S'], // M2.1 flagship, estimated S
43
- // ── A+ tier β€” Aider polyglot 54–62% ────────────────────────────────────────
44
- ['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'A+'], // thinking variant of K2 (59.1%)
45
- ['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'A+'], // 59.1% Aider polyglot (confirmed)
46
- ['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'A+'], // 59.6% Aider polyglot (confirmed)
47
- ['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A+'], // 59.4% Aider edit benchmark
48
- ['z-ai/glm4.7', 'GLM 4.7', 'A+'], // GLM 4.7, estimated A+
49
- ['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'A+'], // 80B instruct, estimated A+
50
- // ── A tier β€” Aider polyglot 44–54% ─────────────────────────────────────────
51
- ['minimaxai/minimax-m2', 'MiniMax M2', 'A'], // MiniMax M2, estimated A
52
- ['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A'], // medium model, estimated A
53
- ['mistralai/magistral-small-2506', 'Magistral Small', 'A'], // reasoning variant, estimated A
54
- ['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A'], // 30B NVIDIA, estimated A
55
- ['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A'], // 32B R1 distill, estimated A
56
- // ── A- tier β€” Aider polyglot 36–44% ────────────────────────────────────────
57
- ['openai/gpt-oss-120b', 'GPT OSS 120B', 'A-'], // 41.8% Aider polyglot (confirmed)
58
- ['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A-'], // 49B NVIDIA, estimated A-
59
- ['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A-'], // Scout 17B, estimated A-
60
- ['deepseek-ai/deepseek-r1-distill-qwen-14b', 'R1 Distill 14B', 'A-'], // 14B R1 distill, estimated A-
61
- ['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A-'], // 355B MoE, estimated A-
62
- // ── B+ tier β€” Aider polyglot 25–36% ────────────────────────────────────────
63
- ['qwen/qwq-32b', 'QwQ 32B', 'B+'], // 20.9% Aider (format penalty β€” actually stronger)
64
- ['openai/gpt-oss-20b', 'GPT OSS 20B', 'B+'], // smaller OSS variant, estimated B+
65
- ['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'B+'], // JP-specialized 100B, estimated B+
66
- ['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'B+'], // ByteDance 36B, estimated B+
67
- ['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'B+'], // flash model, estimated B+
68
- // ── B tier β€” Aider polyglot 14–25% ─────────────────────────────────────────
69
- ['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'B'], // 15.6% Aider polyglot (confirmed)
70
- ['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B'], // older MoE, estimated B
71
- ['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B'], // 14B, estimated B
72
- ['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B'], // IBM code model, estimated B
73
- ['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B'], // 8B R1 distill, estimated B
74
- // ── C tier β€” Aider polyglot <14% or lightweight edge models ─────────────────
75
- ['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'C'], // 7B, too small for complex coding
76
- ['google/gemma-2-9b-it', 'Gemma 2 9B', 'C'], // 9B, lightweight
77
- ['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C'], // mini, edge-focused
78
- ['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C'], // mini, edge-focused
27
+ // ── S+ tier β€” SWE-bench Verified β‰₯70% ──
28
+ ['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S+', '49.2%'], // 49.2% SWE-bench Verified
29
+ ['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S+', '49.2%'], // same base V3.1
30
+ ['deepseek-ai/deepseek-v3.2', 'DeepSeek V3.2', 'S+', '73.1%'], // 73.1% SWE-bench Verified
31
+ ['moonshotai/kimi-k2.5', 'Kimi K2.5', 'S+', '76.8%'], // 76.8% SWE-bench Verified
32
+ ['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+', '62.0%'], // 62.0% SWE-bench (est.)
33
+ ['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'S+', '56.0%'], // 56.0% SWE-bench (est.)
34
+ ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'S+', '58.0%'], // 58.0% SWE-bench (est.)
35
+ // ── S tier β€” SWE-bench Verified 50–70% ──
36
+ ['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'S', '46.0%'], // 46.0% SWE-bench Verified
37
+ ['z-ai/glm5', 'GLM 5', 'S', '77.8%'], // 77.8% SWE-bench Verified
38
+ ['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S', '68.0%'], // 68.0% SWE-bench (est.)
39
+ ['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S', '72.0%'], // 72.0% SWE-bench (est.)
40
+ ['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S', '68.0%'], // 68.0% SWE-bench (est.)
41
+ ['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'S', '44.0%'], // 44.0% SWE-bench (est.)
42
+ ['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S', '70.0%'], // 70.0% SWE-bench (est.)
43
+ // ── A+ tier β€” SWE-bench Verified 60–70% ──
44
+ ['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'A+', '67.0%'], // 67.0% SWE-bench (est.)
45
+ ['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'A+', '65.8%'], // 65.8% SWE-bench Verified
46
+ ['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'A+', '70.0%'], // 70.0% SWE-bench (est.)
47
+ ['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A+', '39.5%'], // 39.5% SWE-bench (est.)
48
+ ['z-ai/glm4.7', 'GLM 4.7', 'A+', '73.8%'], // 73.8% SWE-bench Verified
49
+ ['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'A+', '65.0%'], // 65.0% SWE-bench (est.)
50
+ // ── A tier β€” SWE-bench Verified 45–60% ──
51
+ ['minimaxai/minimax-m2', 'MiniMax M2', 'A', '56.5%'], // 56.5% SWE-bench (est.)
52
+ ['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A', '48.0%'], // 48.0% SWE-bench (est.)
53
+ ['mistralai/magistral-small-2506', 'Magistral Small', 'A', '45.0%'], // 45.0% SWE-bench (est.)
54
+ ['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A', '43.0%'], // 43.0% SWE-bench (est.)
55
+ ['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%'], // 43.9% SWE-bench Verified
56
+ // ── A- tier β€” SWE-bench Verified 35–45% ──
57
+ ['openai/gpt-oss-120b', 'GPT OSS 120B', 'A-', '60.0%'], // 60.0% SWE-bench (est.)
58
+ ['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A-', '49.0%'], // 49.0% SWE-bench (est.)
59
+ ['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A-', '44.0%'], // 44.0% SWE-bench (est.)
60
+ ['deepseek-ai/deepseek-r1-distill-qwen-14b', 'R1 Distill 14B', 'A-', '37.7%'], // 37.7% SWE-bench (est.)
61
+ ['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A-', '52.0%'], // 52.0% SWE-bench (est.)
62
+ // ── B+ tier β€” SWE-bench Verified 30–40% ──
63
+ ['qwen/qwq-32b', 'QwQ 32B', 'B+', '50.0%'], // 50.0% SWE-bench (est.)
64
+ ['openai/gpt-oss-20b', 'GPT OSS 20B', 'B+', '42.0%'], // 42.0% SWE-bench (est.)
65
+ ['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'B+', '36.0%'], // 36.0% SWE-bench (est.)
66
+ ['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'B+', '38.0%'], // 38.0% SWE-bench (est.)
67
+ ['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'B+', '74.4%'], // 74.4% SWE-bench Verified
68
+ // ── B tier β€” SWE-bench Verified 20–35% ──
69
+ ['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'B', '62.0%'], // 62.0% SWE-bench (est.)
70
+ ['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B', '32.0%'], // 32.0% SWE-bench (est.)
71
+ ['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B', '34.0%'], // 34.0% SWE-bench (est.)
72
+ ['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B', '30.0%'], // 30.0% SWE-bench (est.)
73
+ ['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B', '28.2%'], // 28.2% SWE-bench (est.)
74
+ // ── C tier β€” SWE-bench Verified <25% or lightweight edge models ──
75
+ ['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'C', '22.6%'], // 22.6% SWE-bench (est.)
76
+ ['google/gemma-2-9b-it', 'Gemma 2 9B', 'C', '18.0%'], // 18.0% SWE-bench (est.)
77
+ ['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C', '12.0%'], // 12.0% SWE-bench (est.)
78
+ ['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C', '14.0%'], // 14.0% SWE-bench (est.)
79
79
  ]
80
80
 
81
81
  // πŸ“– All sources combined - used by the main script
82
82
  export const sources = {
83
83
  nvidia: {
84
- name: 'NVIDIA NIM',
84
+ name: 'NIM',
85
85
  models: nvidiaNim,
86
86
  },
87
87
  // πŸ“– Add more sources here in the future, for example:
@@ -98,7 +98,7 @@ export const sources = {
98
98
  // πŸ“– Flatten all models from all sources for backward compatibility
99
99
  export const MODELS = []
100
100
  for (const [sourceKey, sourceData] of Object.entries(sources)) {
101
- for (const [modelId, label, tier] of sourceData.models) {
102
- MODELS.push([modelId, label, tier])
101
+ for (const [modelId, label, tier, sweScore] of sourceData.models) {
102
+ MODELS.push([modelId, label, tier, sweScore])
103
103
  }
104
104
  }