npm - free-coding-models - Versions diffs - 0.1.29 → 0.1.32 - Mend

free-coding-models 0.1.29 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/bin/free-coding-models.js +38 -21
package/lib/utils.js +25 -12
package/package.json +1 -1
package/sources.js +56 -56

package/bin/free-coding-models.js CHANGED Viewed

@@ -393,6 +393,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
   const W_TIER = 6
   const W_SOURCE = 14
   const W_MODEL = 26
+  const W_SWE = 9
   const W_PING = 14
   const W_AVG = 11
   const W_STATUS = 18
@@ -421,25 +422,34 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
   const tierH    = 'Tier'
   const originH  = 'Origin'
   const modelH   = 'Model'
+  const sweH     = sortColumn === 'swe' ? dir + ' SWE%' : 'SWE%'
   const pingH    = sortColumn === 'ping' ? dir + ' Latest Ping' : 'Latest Ping'
   const avgH     = sortColumn === 'avg' ? dir + ' Avg Ping' : 'Avg Ping'
-  const statusH  = sortColumn === 'status' ? dir + ' Status' : 'Status'
+  const conditionH = sortColumn === 'condition' ? dir + ' Condition' : 'Condition'
   const verdictH = sortColumn === 'verdict' ? dir + ' Verdict' : 'Verdict'
   const uptimeH  = sortColumn === 'uptime' ? dir + ' Up%' : 'Up%'
+  // 📖 Helper to colorize first letter for keyboard shortcuts
+  const colorFirst = (text, width, colorFn = chalk.yellow) => {
+    const first = text[0]
+    const rest = text.slice(1)
+    return (colorFn(first) + chalk.dim(rest)).padEnd(width)
+  }
   // 📖 Now colorize after padding is calculated on plain text
-  const rankH_c    = chalk.dim(rankH.padEnd(W_RANK))
-  const tierH_c    = chalk.dim(tierH.padEnd(W_TIER))
-  const originH_c  = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) : chalk.dim(originH.padEnd(W_SOURCE))
-  const modelH_c   = chalk.dim(modelH.padEnd(W_MODEL))
-  const pingH_c    = sortColumn === 'ping' ? chalk.bold.cyan(pingH.padEnd(W_PING)) : chalk.dim(pingH.padEnd(W_PING))
-  const avgH_c     = sortColumn === 'avg' ? chalk.bold.cyan(avgH.padEnd(W_AVG)) : chalk.dim(avgH.padEnd(W_AVG))
-  const statusH_c  = sortColumn === 'status' ? chalk.bold.cyan(statusH.padEnd(W_STATUS)) : chalk.dim(statusH.padEnd(W_STATUS))
-  const verdictH_c = sortColumn === 'verdict' ? chalk.bold.cyan(verdictH.padEnd(W_VERDICT)) : chalk.dim(verdictH.padEnd(W_VERDICT))
-  const uptimeH_c  = sortColumn === 'uptime' ? chalk.bold.cyan(uptimeH.padStart(W_UPTIME)) : chalk.dim(uptimeH.padStart(W_UPTIME))
+  const rankH_c    = colorFirst(rankH, W_RANK)
+  const tierH_c    = colorFirst('Tier', W_TIER)
+  const originH_c  = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) : colorFirst(originH, W_SOURCE)
+  const modelH_c   = colorFirst(modelH, W_MODEL)
+  const sweH_c     = sortColumn === 'swe' ? chalk.bold.cyan(sweH.padEnd(W_SWE)) : colorFirst('SWE%', W_SWE)
+  const pingH_c    = sortColumn === 'ping' ? chalk.bold.cyan(pingH.padEnd(W_PING)) : colorFirst('Latest Ping', W_PING)
+  const avgH_c     = sortColumn === 'avg' ? chalk.bold.cyan(avgH.padEnd(W_AVG)) : colorFirst('Avg Ping', W_AVG)
+  const conditionH_c = sortColumn === 'condition' ? chalk.bold.cyan(conditionH.padEnd(W_STATUS)) : colorFirst('Condition', W_STATUS)
+  const verdictH_c = sortColumn === 'verdict' ? chalk.bold.cyan(verdictH.padEnd(W_VERDICT)) : colorFirst(verdictH, W_VERDICT)
+  const uptimeH_c  = sortColumn === 'uptime' ? chalk.bold.cyan(uptimeH.padStart(W_UPTIME)) : colorFirst(uptimeH, W_UPTIME, chalk.green)
   // 📖 Header with proper spacing
-  lines.push('  ' + rankH_c + '  ' + tierH_c + '  ' + originH_c + '  ' + modelH_c + '  ' + pingH_c + '  ' + avgH_c + '  ' + statusH_c + '  ' + verdictH_c + '  ' + uptimeH_c)
+  lines.push('  ' + rankH_c + '  ' + tierH_c + '  ' + originH_c + '  ' + modelH_c + '  ' + sweH_c + '  ' + pingH_c + '  ' + avgH_c + '  ' + conditionH_c + '  ' + verdictH_c + '  ' + uptimeH_c)
   // 📖 Separator line
   lines.push(
@@ -448,6 +458,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
     chalk.dim('─'.repeat(W_TIER)) + '  ' +
     '─'.repeat(W_SOURCE) + '  ' +
     '─'.repeat(W_MODEL) + '  ' +
+    chalk.dim('─'.repeat(W_SWE)) + '  ' +
     chalk.dim('─'.repeat(W_PING)) + '  ' +
     chalk.dim('─'.repeat(W_AVG)) + '  ' +
     chalk.dim('─'.repeat(W_STATUS)) + '  ' +
@@ -471,8 +482,14 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
     // 📖 Left-aligned columns - pad plain text first, then colorize
     const num = chalk.dim(String(r.idx).padEnd(W_RANK))
     const tier = tierFn(r.tier.padEnd(W_TIER))
-    const source = chalk.green('NVIDIA NIM'.padEnd(W_SOURCE))
+    const source = chalk.green('NIM'.padEnd(W_SOURCE))
     const name = r.label.slice(0, W_MODEL).padEnd(W_MODEL)
+    const sweScore = r.sweScore ?? '—'
+    const sweCell = sweScore !== '—' && parseFloat(sweScore) >= 50
+      ? chalk.greenBright(sweScore.padEnd(W_SWE))
+      : sweScore !== '—' && parseFloat(sweScore) >= 30
+      ? chalk.yellow(sweScore.padEnd(W_SWE))
+      : chalk.dim(sweScore.padEnd(W_SWE))
     // 📖 Latest ping - pings are objects: { ms, code }
     // 📖 Only show response time for successful pings, "—" for errors (error code is in Status column)
@@ -579,7 +596,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
     }
     // 📖 Build row with double space between columns
-    const row = '  ' + num + '  ' + tier + '  ' + source + '  ' + name + '  ' + pingCell + '  ' + avgCell + '  ' + status + '  ' + speedCell + '  ' + uptimeCell
+    const row = '  ' + num + '  ' + tier + '  ' + source + '  ' + name + '  ' + sweCell + '  ' + pingCell + '  ' + avgCell + '  ' + status + '  ' + speedCell + '  ' + uptimeCell
     if (isCursor) {
       lines.push(chalk.bgRgb(139, 0, 139)(row))
@@ -601,9 +618,9 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
     : mode === 'opencode-desktop'
       ? chalk.rgb(0, 200, 255)('Enter→OpenDesktop')
       : chalk.rgb(0, 200, 255)('Enter→OpenCode')
-  lines.push(chalk.dim(`  ↑↓ Navigate  •  `) + actionHint + chalk.dim(`  •  R/T/O/M/P/A/S/V/U Sort  •  W↓/X↑ Interval (${intervalSec}s)  •  T Tier  •  Z Mode  •  Ctrl+C Exit`))
+  lines.push(chalk.dim(`  ↑↓ Navigate  •  `) + actionHint + chalk.dim(`  •  R/T/O/M/L/A/S/C/V/U Sort  •  W↓/X↑ Interval (${intervalSec}s)  •  T Tier  •  Z Mode  •  Ctrl+C Exit`))
   lines.push('')
-  lines.push(chalk.dim('  Made with ') + '💖' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim('  •  ') + '💬 ' + '\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join our Discord!\x1b]8;;\x1b\\' + chalk.dim('  •  ') + '⭐ ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
+  lines.push(chalk.dim('  Made with ') + '💖 & ☕' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim('  •  ') + '💬 ' + chalk.cyanBright('\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join Free-Coding-Models Discord!\x1b]8;;\x1b\\') + chalk.dim('  •  ') + '⭐ ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
   lines.push('')
   // 📖 Append \x1b[K (erase to EOL) to each line so leftover chars from previous
   // 📖 frames are cleared. Then pad with blank cleared lines to fill the terminal,
@@ -973,8 +990,8 @@ async function runFiableMode(apiKey) {
   console.log(chalk.cyan('  ⚡ Analyzing models for reliability (10 seconds)...'))
   console.log()
-  let results = MODELS.map(([modelId, label, tier], i) => ({
-    idx: i + 1, modelId, label, tier,
+  let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
+    idx: i + 1, modelId, label, tier, sweScore,
     status: 'pending',
     pings: [],
     httpCode: null,
@@ -1064,8 +1081,8 @@ async function main() {
   // 📖 This section is now handled by the update notification menu above
   // 📖 Create results array with all models initially visible
-  let results = MODELS.map(([modelId, label, tier], i) => ({
-    idx: i + 1, modelId, label, tier,
+  let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
+    idx: i + 1, modelId, label, tier, sweScore,
     status: 'pending',
     pings: [],  // 📖 All ping results (ms or 'TIMEOUT')
     httpCode: null,
@@ -1174,10 +1191,10 @@ async function main() {
   const onKeyPress = async (str, key) => {
     if (!key) return
-    // 📖 Sorting keys: R=rank, T=tier, O=origin, M=model, P=ping, A=avg, S=status, V=verdict, U=uptime
+    // 📖 Sorting keys: R=rank, T=tier, O=origin, M=model, L=latest ping, A=avg ping, S=SWE-bench, C=condition, V=verdict, U=uptime
     const sortKeys = {
       'r': 'rank', 't': 'tier', 'o': 'origin', 'm': 'model',
-      'p': 'ping', 'a': 'avg', 's': 'status', 'v': 'verdict', 'u': 'uptime'
+      'l': 'ping', 'a': 'avg', 's': 'swe', 'c': 'condition', 'v': 'verdict', 'u': 'uptime'
     }
     if (sortKeys[key.name]) {

package/lib/utils.js CHANGED Viewed

@@ -19,6 +19,7 @@
  *      modelId: string,      // e.g. "deepseek-ai/deepseek-v3.2"
  *      label: string,        // e.g. "DeepSeek V3.2" (human-friendly name)
  *      tier: string,         // e.g. "S+", "A", "B+" — from sources.js
+ *      sweScore: string,     // e.g. "49.2%", "73.1%" — SWE-bench Verified score
  *      status: string,       // "pending" | "up" | "down" | "timeout"
  *      pings: Array<{ms: number, code: string}>,  // full ping history since start
  *      httpCode: string|null // last HTTP status code (for detecting 429 rate limits)
@@ -128,15 +129,16 @@ export const getUptime = (r) => {
 // 📖 Returns a NEW array — never mutates the original (important for React-style re-renders).
 //
 // 📖 Supported columns (matching the keyboard shortcuts in the TUI):
-//   - 'rank'    (R key) — original index from sources.js
-//   - 'tier'    (T key) — tier hierarchy (S+ first, C last)
-//   - 'origin'  (O key) — provider name (all NVIDIA NIM for now, future-proofed)
-//   - 'model'   (M key) — alphabetical by display label
-//   - 'ping'    (P key) — last ping latency (only successful ones count)
-//   - 'avg'     (A key) — average latency across all successful pings
-//   - 'status'  (S key) — alphabetical status string
-//   - 'verdict' (V key) — verdict order (Perfect → Pending)
-//   - 'uptime'  (U key) — uptime percentage
+//   - 'rank'     (R key) — original index from sources.js
+//   - 'tier'     (T key) — tier hierarchy (S+ first, C last)
+//   - 'origin'   (O key) — provider name (all NIM for now, future-proofed)
+//   - 'model'    (M key) — alphabetical by display label
+//   - 'ping'     (L key) — last ping latency (only successful ones count)
+//   - 'avg'      (A key) — average latency across all successful pings
+//   - 'swe'      (S key) — SWE-bench score (higher is better)
+//   - 'condition'(C key) — alphabetical condition string
+//   - 'verdict'  (V key) — verdict order (Perfect → Pending)
+//   - 'uptime'   (U key) — uptime percentage
 //
 // 📖 sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
 export const sortResults = (results, sortColumn, sortDirection) => {
@@ -152,8 +154,8 @@ export const sortResults = (results, sortColumn, sortDirection) => {
         cmp = TIER_ORDER.indexOf(a.tier) - TIER_ORDER.indexOf(b.tier)
         break
       case 'origin':
-        // 📖 All models are NVIDIA NIM for now — this is future-proofed for multi-source
-        cmp = 'NVIDIA NIM'.localeCompare('NVIDIA NIM')
+        // 📖 All models are NIM for now — this is future-proofed for multi-source
+        cmp = 'NIM'.localeCompare('NIM')
         break
       case 'model':
         cmp = a.label.localeCompare(b.label)
@@ -171,7 +173,18 @@ export const sortResults = (results, sortColumn, sortDirection) => {
       case 'avg':
         cmp = getAvg(a) - getAvg(b)
         break
-      case 'status':
+      case 'swe': {
+        // 📖 Sort by SWE-bench score — higher is better
+        // 📖 Parse percentage strings like "49.2%", "73.1%" or use 0 for missing values
+        const parseSwe = (score) => {
+          if (!score || score === '—') return 0
+          const num = parseFloat(score.replace('%', ''))
+          return isNaN(num) ? 0 : num
+        }
+        cmp = parseSwe(a.sweScore) - parseSwe(b.sweScore)
+        break
+      }
+      case 'condition':
         cmp = a.status.localeCompare(b.status)
         break
       case 'verdict': {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "free-coding-models",
-  "version": "0.1.29",
+  "version": "0.1.32",
   "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
   "keywords": [
     "nvidia",

package/sources.js CHANGED Viewed

@@ -22,66 +22,66 @@
  *   @exports Object containing all sources and their models
  */
-// 📖 NVIDIA NIM source - https://build.nvidia.com
+// 📖 NIM source - https://build.nvidia.com
 export const nvidiaNim = [
-  // ── S+ tier — Aider polyglot ≥75% or equivalent frontier coding performance ──
-  ['deepseek-ai/deepseek-v3.1',                    'DeepSeek V3.1',       'S+'], // ~76.1% Aider polyglot (thinking mode)
-  ['deepseek-ai/deepseek-v3.1-terminus',           'DeepSeek V3.1 Term',  'S+'], // same base, terminus variant
-  ['deepseek-ai/deepseek-v3.2',                    'DeepSeek V3.2',       'S+'], // 74.2% Aider polyglot (reasoner)
-  ['moonshotai/kimi-k2.5',                         'Kimi K2.5',           'S+'], // newer than K2 (59%), estimated S+
-  ['mistralai/devstral-2-123b-instruct-2512',      'Devstral 2 123B',     'S+'], // coding-focused 123B, estimated S+
-  ['nvidia/llama-3.1-nemotron-ultra-253b-v1',      'Nemotron Ultra 253B', 'S+'], // 253B NVIDIA flagship, estimated S+
-  ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B',  'S+'], // 675B frontier, estimated S+
-  // ── S tier — Aider polyglot 62–74% ─────────────────────────────────────────
-  ['qwen/qwen2.5-coder-32b-instruct',              'Qwen2.5 Coder 32B',   'S'],  // 71.4% Aider edit (best confirmed small coder)
-  ['z-ai/glm5',                                    'GLM 5',               'S'],  // GLM flagship, estimated S
-  ['qwen/qwen3.5-397b-a17b',                       'Qwen3.5 400B VLM',    'S'],  // 400B VLM, estimated S
-  ['qwen/qwen3-coder-480b-a35b-instruct',          'Qwen3 Coder 480B',    'S'],  // 61.8% Aider polyglot
-  ['qwen/qwen3-next-80b-a3b-thinking',             'Qwen3 80B Thinking',  'S'],  // 80B thinking, estimated S
-  ['meta/llama-3.1-405b-instruct',                 'Llama 3.1 405B',      'S'],  // 66.2% Aider edit benchmark
-  ['minimaxai/minimax-m2.1',                       'MiniMax M2.1',        'S'],  // M2.1 flagship, estimated S
-  // ── A+ tier — Aider polyglot 54–62% ────────────────────────────────────────
-  ['moonshotai/kimi-k2-thinking',                  'Kimi K2 Thinking',    'A+'], // thinking variant of K2 (59.1%)
-  ['moonshotai/kimi-k2-instruct',                  'Kimi K2 Instruct',    'A+'], // 59.1% Aider polyglot (confirmed)
-  ['qwen/qwen3-235b-a22b',                         'Qwen3 235B',          'A+'], // 59.6% Aider polyglot (confirmed)
-  ['meta/llama-3.3-70b-instruct',                  'Llama 3.3 70B',       'A+'], // 59.4% Aider edit benchmark
-  ['z-ai/glm4.7',                                  'GLM 4.7',             'A+'], // GLM 4.7, estimated A+
-  ['qwen/qwen3-next-80b-a3b-instruct',             'Qwen3 80B Instruct',  'A+'], // 80B instruct, estimated A+
-  // ── A tier — Aider polyglot 44–54% ─────────────────────────────────────────
-  ['minimaxai/minimax-m2',                         'MiniMax M2',          'A'],  // MiniMax M2, estimated A
-  ['mistralai/mistral-medium-3-instruct',          'Mistral Medium 3',    'A'],  // medium model, estimated A
-  ['mistralai/magistral-small-2506',               'Magistral Small',     'A'],  // reasoning variant, estimated A
-  ['nvidia/nemotron-3-nano-30b-a3b',               'Nemotron Nano 30B',   'A'],  // 30B NVIDIA, estimated A
-  ['deepseek-ai/deepseek-r1-distill-qwen-32b',     'R1 Distill 32B',      'A'],  // 32B R1 distill, estimated A
-  // ── A- tier — Aider polyglot 36–44% ────────────────────────────────────────
-  ['openai/gpt-oss-120b',                          'GPT OSS 120B',        'A-'], // 41.8% Aider polyglot (confirmed)
-  ['nvidia/llama-3.3-nemotron-super-49b-v1.5',     'Nemotron Super 49B',  'A-'], // 49B NVIDIA, estimated A-
-  ['meta/llama-4-scout-17b-16e-instruct',          'Llama 4 Scout',       'A-'], // Scout 17B, estimated A-
-  ['deepseek-ai/deepseek-r1-distill-qwen-14b',     'R1 Distill 14B',      'A-'], // 14B R1 distill, estimated A-
-  ['igenius/colosseum_355b_instruct_16k',          'Colosseum 355B',      'A-'], // 355B MoE, estimated A-
-  // ── B+ tier — Aider polyglot 25–36% ────────────────────────────────────────
-  ['qwen/qwq-32b',                                 'QwQ 32B',            'B+'], // 20.9% Aider (format penalty — actually stronger)
-  ['openai/gpt-oss-20b',                           'GPT OSS 20B',        'B+'], // smaller OSS variant, estimated B+
-  ['stockmark/stockmark-2-100b-instruct',          'Stockmark 100B',     'B+'], // JP-specialized 100B, estimated B+
-  ['bytedance/seed-oss-36b-instruct',              'Seed OSS 36B',       'B+'], // ByteDance 36B, estimated B+
-  ['stepfun-ai/step-3.5-flash',                    'Step 3.5 Flash',      'B+'], // flash model, estimated B+
-  // ── B tier — Aider polyglot 14–25% ─────────────────────────────────────────
-  ['meta/llama-4-maverick-17b-128e-instruct',      'Llama 4 Maverick',    'B'],  // 15.6% Aider polyglot (confirmed)
-  ['mistralai/mixtral-8x22b-instruct-v0.1',        'Mixtral 8x22B',       'B'],  // older MoE, estimated B
-  ['mistralai/ministral-14b-instruct-2512',        'Ministral 14B',       'B'],  // 14B, estimated B
-  ['ibm/granite-34b-code-instruct',                'Granite 34B Code',    'B'],  // IBM code model, estimated B
-  ['deepseek-ai/deepseek-r1-distill-llama-8b',     'R1 Distill 8B',       'B'],  // 8B R1 distill, estimated B
-  // ── C tier — Aider polyglot <14% or lightweight edge models ─────────────────
-  ['deepseek-ai/deepseek-r1-distill-qwen-7b',      'R1 Distill 7B',       'C'],  // 7B, too small for complex coding
-  ['google/gemma-2-9b-it',                         'Gemma 2 9B',          'C'],  // 9B, lightweight
-  ['microsoft/phi-3.5-mini-instruct',              'Phi 3.5 Mini',        'C'],  // mini, edge-focused
-  ['microsoft/phi-4-mini-instruct',                'Phi 4 Mini',          'C'],  // mini, edge-focused
+  // ── S+ tier — SWE-bench Verified ≥70% ──
+  ['deepseek-ai/deepseek-v3.1',                    'DeepSeek V3.1',       'S+', '49.2%'], // 49.2% SWE-bench Verified
+  ['deepseek-ai/deepseek-v3.1-terminus',           'DeepSeek V3.1 Term',  'S+', '49.2%'], // same base V3.1
+  ['deepseek-ai/deepseek-v3.2',                    'DeepSeek V3.2',       'S+', '73.1%'], // 73.1% SWE-bench Verified
+  ['moonshotai/kimi-k2.5',                         'Kimi K2.5',           'S+', '76.8%'], // 76.8% SWE-bench Verified
+  ['mistralai/devstral-2-123b-instruct-2512',      'Devstral 2 123B',     'S+', '62.0%'], // 62.0% SWE-bench (est.)
+  ['nvidia/llama-3.1-nemotron-ultra-253b-v1',      'Nemotron Ultra 253B', 'S+', '56.0%'], // 56.0% SWE-bench (est.)
+  ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B',  'S+', '58.0%'], // 58.0% SWE-bench (est.)
+  // ── S tier — SWE-bench Verified 50–70% ──
+  ['qwen/qwen2.5-coder-32b-instruct',              'Qwen2.5 Coder 32B',   'S', '46.0%'], // 46.0% SWE-bench Verified
+  ['z-ai/glm5',                                    'GLM 5',               'S', '77.8%'], // 77.8% SWE-bench Verified
+  ['qwen/qwen3.5-397b-a17b',                       'Qwen3.5 400B VLM',    'S', '68.0%'], // 68.0% SWE-bench (est.)
+  ['qwen/qwen3-coder-480b-a35b-instruct',          'Qwen3 Coder 480B',    'S', '72.0%'], // 72.0% SWE-bench (est.)
+  ['qwen/qwen3-next-80b-a3b-thinking',             'Qwen3 80B Thinking',  'S', '68.0%'], // 68.0% SWE-bench (est.)
+  ['meta/llama-3.1-405b-instruct',                 'Llama 3.1 405B',      'S', '44.0%'], // 44.0% SWE-bench (est.)
+  ['minimaxai/minimax-m2.1',                       'MiniMax M2.1',        'S', '70.0%'], // 70.0% SWE-bench (est.)
+  // ── A+ tier — SWE-bench Verified 60–70% ──
+  ['moonshotai/kimi-k2-thinking',                  'Kimi K2 Thinking',    'A+', '67.0%'], // 67.0% SWE-bench (est.)
+  ['moonshotai/kimi-k2-instruct',                  'Kimi K2 Instruct',    'A+', '65.8%'], // 65.8% SWE-bench Verified
+  ['qwen/qwen3-235b-a22b',                         'Qwen3 235B',          'A+', '70.0%'], // 70.0% SWE-bench (est.)
+  ['meta/llama-3.3-70b-instruct',                  'Llama 3.3 70B',       'A+', '39.5%'], // 39.5% SWE-bench (est.)
+  ['z-ai/glm4.7',                                  'GLM 4.7',             'A+', '73.8%'], // 73.8% SWE-bench Verified
+  ['qwen/qwen3-next-80b-a3b-instruct',             'Qwen3 80B Instruct',  'A+', '65.0%'], // 65.0% SWE-bench (est.)
+  // ── A tier — SWE-bench Verified 45–60% ──
+  ['minimaxai/minimax-m2',                         'MiniMax M2',          'A', '56.5%'], // 56.5% SWE-bench (est.)
+  ['mistralai/mistral-medium-3-instruct',          'Mistral Medium 3',    'A', '48.0%'], // 48.0% SWE-bench (est.)
+  ['mistralai/magistral-small-2506',               'Magistral Small',     'A', '45.0%'], // 45.0% SWE-bench (est.)
+  ['nvidia/nemotron-3-nano-30b-a3b',               'Nemotron Nano 30B',   'A', '43.0%'], // 43.0% SWE-bench (est.)
+  ['deepseek-ai/deepseek-r1-distill-qwen-32b',     'R1 Distill 32B',      'A', '43.9%'], // 43.9% SWE-bench Verified
+  // ── A- tier — SWE-bench Verified 35–45% ──
+  ['openai/gpt-oss-120b',                          'GPT OSS 120B',        'A-', '60.0%'], // 60.0% SWE-bench (est.)
+  ['nvidia/llama-3.3-nemotron-super-49b-v1.5',     'Nemotron Super 49B',  'A-', '49.0%'], // 49.0% SWE-bench (est.)
+  ['meta/llama-4-scout-17b-16e-instruct',          'Llama 4 Scout',       'A-', '44.0%'], // 44.0% SWE-bench (est.)
+  ['deepseek-ai/deepseek-r1-distill-qwen-14b',     'R1 Distill 14B',      'A-', '37.7%'], // 37.7% SWE-bench (est.)
+  ['igenius/colosseum_355b_instruct_16k',          'Colosseum 355B',      'A-', '52.0%'], // 52.0% SWE-bench (est.)
+  // ── B+ tier — SWE-bench Verified 30–40% ──
+  ['qwen/qwq-32b',                                 'QwQ 32B',            'B+', '50.0%'], // 50.0% SWE-bench (est.)
+  ['openai/gpt-oss-20b',                           'GPT OSS 20B',        'B+', '42.0%'], // 42.0% SWE-bench (est.)
+  ['stockmark/stockmark-2-100b-instruct',          'Stockmark 100B',     'B+', '36.0%'], // 36.0% SWE-bench (est.)
+  ['bytedance/seed-oss-36b-instruct',              'Seed OSS 36B',       'B+', '38.0%'], // 38.0% SWE-bench (est.)
+  ['stepfun-ai/step-3.5-flash',                    'Step 3.5 Flash',      'B+', '74.4%'], // 74.4% SWE-bench Verified
+  // ── B tier — SWE-bench Verified 20–35% ──
+  ['meta/llama-4-maverick-17b-128e-instruct',      'Llama 4 Maverick',    'B', '62.0%'], // 62.0% SWE-bench (est.)
+  ['mistralai/mixtral-8x22b-instruct-v0.1',        'Mixtral 8x22B',       'B', '32.0%'], // 32.0% SWE-bench (est.)
+  ['mistralai/ministral-14b-instruct-2512',        'Ministral 14B',       'B', '34.0%'], // 34.0% SWE-bench (est.)
+  ['ibm/granite-34b-code-instruct',                'Granite 34B Code',    'B', '30.0%'], // 30.0% SWE-bench (est.)
+  ['deepseek-ai/deepseek-r1-distill-llama-8b',     'R1 Distill 8B',       'B', '28.2%'], // 28.2% SWE-bench (est.)
+  // ── C tier — SWE-bench Verified <25% or lightweight edge models ──
+  ['deepseek-ai/deepseek-r1-distill-qwen-7b',      'R1 Distill 7B',       'C', '22.6%'], // 22.6% SWE-bench (est.)
+  ['google/gemma-2-9b-it',                         'Gemma 2 9B',          'C', '18.0%'], // 18.0% SWE-bench (est.)
+  ['microsoft/phi-3.5-mini-instruct',              'Phi 3.5 Mini',        'C', '12.0%'], // 12.0% SWE-bench (est.)
+  ['microsoft/phi-4-mini-instruct',                'Phi 4 Mini',          'C', '14.0%'], // 14.0% SWE-bench (est.)
 ]
 // 📖 All sources combined - used by the main script
 export const sources = {
   nvidia: {
-    name: 'NVIDIA NIM',
+    name: 'NIM',
     models: nvidiaNim,
   },
   // 📖 Add more sources here in the future, for example:
@@ -98,7 +98,7 @@ export const sources = {
 // 📖 Flatten all models from all sources for backward compatibility
 export const MODELS = []
 for (const [sourceKey, sourceData] of Object.entries(sources)) {
-  for (const [modelId, label, tier] of sourceData.models) {
-    MODELS.push([modelId, label, tier])
+  for (const [modelId, label, tier, sweScore] of sourceData.models) {
+    MODELS.push([modelId, label, tier, sweScore])
   }
 }