npm - free-coding-models - Versions diffs - 0.1.39 → 0.1.41 - Mend

free-coding-models 0.1.39 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +14 -14
package/bin/free-coding-models.js +53 -27
package/lib/utils.js +6 -6
package/package.json +1 -1
package/sources.js +59 -57

package/README.md CHANGED Viewed

@@ -199,23 +199,23 @@ free-coding-models
 ## 🤖 Coding Models
-**44 coding models** across 8 tiers, ranked by [Aider Polyglot benchmark](https://aider.chat/docs/leaderboards) (225 coding exercises across C++/Go/Java/JS/Python/Rust). Models without a confirmed Aider score are estimated from model family, size, and published release benchmarks.
-| Tier | Score | Count | Models |
-|------|-------|-------|--------|
-| **S+** | 75%+ | 7 | DeepSeek V3.1/Terminus, DeepSeek V3.2, Kimi K2.5, Devstral 2, Nemotron Ultra 253B, Mistral Large 675B |
-| **S**  | 62–74% | 7 | Qwen2.5 Coder 32B, GLM 5, Qwen3.5 400B VLM, Qwen3 Coder 480B, Qwen3 80B Thinking, Llama 3.1 405B, MiniMax M2.1 |
-| **A+** | 54–62% | 6 | Kimi K2 Thinking/Instruct, Qwen3 235B, Llama 3.3 70B, GLM 4.7, Qwen3 80B Instruct |
-| **A**  | 44–54% | 5 | MiniMax M2, Mistral Medium 3, Magistral Small, Nemotron Nano 30B, R1 Distill 32B |
-| **A-** | 36–44% | 5 | GPT OSS 120B, Nemotron Super 49B, Llama 4 Scout, R1 Distill 14B, Colosseum 355B |
-| **B+** | 25–36% | 5 | QwQ 32B, GPT OSS 20B, Stockmark 100B, Seed OSS 36B, Step 3.5 Flash |
-| **B**  | 14–25% | 5 | Llama 4 Maverick, Mixtral 8x22B, Ministral 14B, Granite 34B Code, R1 Distill 8B |
-| **C**  | <14%  | 4 | R1 Distill 7B, Gemma 2 9B, Phi 3.5 Mini, Phi 4 Mini |
+**44 coding models** across 8 tiers, ranked by [SWE-bench Verified](https://www.swebench.com) — the industry-standard benchmark measuring real GitHub issue resolution. Scores are self-reported by providers unless noted.
+| Tier | SWE-bench | Models |
+|------|-----------|--------|
+| **S+** ≥70% | GLM 5 (77.8%), Kimi K2.5 (76.8%), Step 3.5 Flash (74.4%), MiniMax M2.1 (74.0%), GLM 4.7 (73.8%), DeepSeek V3.2 (73.1%), Devstral 2 (72.2%), Kimi K2 Thinking (71.3%), Qwen3 Coder 480B (70.6%), Qwen3 235B (70.0%) |
+| **S** 60–70% | MiniMax M2 (69.4%), DeepSeek V3.1 Terminus (68.4%), Qwen3 80B Thinking (68.0%), Qwen3.5 400B (68.0%), Kimi K2 Instruct (65.8%), Qwen3 80B Instruct (65.0%), DeepSeek V3.1 (62.0%), Llama 4 Maverick (62.0%), GPT OSS 120B (60.0%) |
+| **A+** 50–60% | Mistral Large 675B (58.0%), Nemotron Ultra 253B (56.0%), Colosseum 355B (52.0%), QwQ 32B (50.0%) |
+| **A** 40–50% | Nemotron Super 49B (49.0%), Mistral Medium 3 (48.0%), Qwen2.5 Coder 32B (46.0%), Magistral Small (45.0%), Llama 4 Scout (44.0%), Llama 3.1 405B (44.0%), Nemotron Nano 30B (43.0%), R1 Distill 32B (43.9%), GPT OSS 20B (42.0%) |
+| **A-** 35–40% | Llama 3.3 70B (39.5%), Seed OSS 36B (38.0%), R1 Distill 14B (37.7%), Stockmark 100B (36.0%) |
+| **B+** 30–35% | Ministral 14B (34.0%), Mixtral 8x22B (32.0%), Granite 34B Code (30.0%) |
+| **B** 20–30% | R1 Distill 8B (28.2%), R1 Distill 7B (22.6%) |
+| **C** <20% | Gemma 2 9B (18.0%), Phi 4 Mini (14.0%), Phi 3.5 Mini (12.0%) |
 ### Tier scale
-- **S+/S** — Frontier coders, top Aider polyglot scores, best for complex refactors
-- **A+/A** — Excellent alternatives, strong at most coding tasks
+- **S+/S** — Elite frontier coders (≥60% SWE-bench), best for complex real-world tasks and refactors
+- **A+/A** — Great alternatives, strong at most coding tasks
 - **A-/B+** — Solid performers, good for targeted programming tasks
 - **B/C** — Lightweight or older models, good for code completion on constrained infra

package/bin/free-coding-models.js CHANGED Viewed

@@ -421,7 +421,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
   // 📖 Column widths (generous spacing with margins)
   const W_RANK = 6
   const W_TIER = 6
-  const W_CTW = 6
+  const W_CTX = 6
   const W_SOURCE = 14
   const W_MODEL = 26
   const W_SWE = 9
@@ -454,7 +454,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
   const originH  = 'Origin'
   const modelH   = 'Model'
   const sweH     = sortColumn === 'swe' ? dir + ' SWE%' : 'SWE%'
-  const ctwH     = sortColumn === 'ctw' ? dir + ' CTW' : 'CTW'
+  const ctxH     = sortColumn === 'ctx' ? dir + ' CTX' : 'CTX'
   const pingH    = sortColumn === 'ping' ? dir + ' Latest Ping' : 'Latest Ping'
   const avgH     = sortColumn === 'avg' ? dir + ' Avg Ping' : 'Avg Ping'
   const healthH  = sortColumn === 'condition' ? dir + ' Health' : 'Health'
@@ -477,15 +477,15 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
   const originH_c  = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) : colorFirst(originH, W_SOURCE)
   const modelH_c   = colorFirst(modelH, W_MODEL)
   const sweH_c     = sortColumn === 'swe' ? chalk.bold.cyan(sweH.padEnd(W_SWE)) : colorFirst(sweH, W_SWE)
-  const ctwH_c     = sortColumn === 'ctw' ? chalk.bold.cyan(ctwH.padEnd(W_CTW)) : colorFirst(ctwH, W_CTW)
+  const ctxH_c     = sortColumn === 'ctx' ? chalk.bold.cyan(ctxH.padEnd(W_CTX)) : colorFirst(ctxH, W_CTX)
   const pingH_c    = sortColumn === 'ping' ? chalk.bold.cyan(pingH.padEnd(W_PING)) : colorFirst('Latest Ping', W_PING)
   const avgH_c     = sortColumn === 'avg' ? chalk.bold.cyan(avgH.padEnd(W_AVG)) : colorFirst('Avg Ping', W_AVG)
   const healthH_c  = sortColumn === 'condition' ? chalk.bold.cyan(healthH.padEnd(W_STATUS)) : colorFirst('Health', W_STATUS)
   const verdictH_c = sortColumn === 'verdict' ? chalk.bold.cyan(verdictH.padEnd(W_VERDICT)) : colorFirst(verdictH, W_VERDICT)
   const uptimeH_c  = sortColumn === 'uptime' ? chalk.bold.cyan(uptimeH.padStart(W_UPTIME)) : colorFirst(uptimeH, W_UPTIME, chalk.green)
-  // 📖 Header with proper spacing (column order: Rank, Tier, SWE%, CTW, Model, Origin, Latest Ping, Avg Ping, Health, Verdict, Up%)
-  lines.push('  ' + rankH_c + '  ' + tierH_c + '  ' + sweH_c + '  ' + ctwH_c + '  ' + modelH_c + '  ' + originH_c + '  ' + pingH_c + '  ' + avgH_c + '  ' + healthH_c + '  ' + verdictH_c + '  ' + uptimeH_c)
+  // 📖 Header with proper spacing (column order: Rank, Tier, SWE%, CTX, Model, Origin, Latest Ping, Avg Ping, Health, Verdict, Up%)
+  lines.push('  ' + rankH_c + '  ' + tierH_c + '  ' + sweH_c + '  ' + ctxH_c + '  ' + modelH_c + '  ' + originH_c + '  ' + pingH_c + '  ' + avgH_c + '  ' + healthH_c + '  ' + verdictH_c + '  ' + uptimeH_c)
   // 📖 Separator line
   lines.push(
@@ -493,7 +493,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
     chalk.dim('─'.repeat(W_RANK)) + '  ' +
     chalk.dim('─'.repeat(W_TIER)) + '  ' +
     chalk.dim('─'.repeat(W_SWE)) + '  ' +
-    chalk.dim('─'.repeat(W_CTW)) + '  ' +
+    chalk.dim('─'.repeat(W_CTX)) + '  ' +
     '─'.repeat(W_MODEL) + '  ' +
     '─'.repeat(W_SOURCE) + '  ' +
     chalk.dim('─'.repeat(W_PING)) + '  ' +
@@ -529,12 +529,12 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
       : chalk.dim(sweScore.padEnd(W_SWE))
     // 📖 Context window column - colorized by size (larger = better)
-    const ctwRaw = r.ctw ?? '—'
-    const ctwCell = ctwRaw !== '—' && (ctwRaw.includes('128k') || ctwRaw.includes('200k') || ctwRaw.includes('1m'))
-      ? chalk.greenBright(ctwRaw.padEnd(W_CTW))
-      : ctwRaw !== '—' && (ctwRaw.includes('32k') || ctwRaw.includes('64k'))
-      ? chalk.cyan(ctwRaw.padEnd(W_CTW))
-      : chalk.dim(ctwRaw.padEnd(W_CTW))
+    const ctxRaw = r.ctx ?? '—'
+    const ctxCell = ctxRaw !== '—' && (ctxRaw.includes('128k') || ctxRaw.includes('200k') || ctxRaw.includes('1m'))
+      ? chalk.greenBright(ctxRaw.padEnd(W_CTX))
+      : ctxRaw !== '—' && (ctxRaw.includes('32k') || ctxRaw.includes('64k'))
+      ? chalk.cyan(ctxRaw.padEnd(W_CTX))
+      : chalk.dim(ctxRaw.padEnd(W_CTX))
     // 📖 Latest ping - pings are objects: { ms, code }
     // 📖 Only show response time for successful pings, "—" for errors (error code is in Status column)
@@ -640,8 +640,8 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
       uptimeCell = chalk.red(uptimeStr.padStart(W_UPTIME))
     }
-    // 📖 Build row with double space between columns (order: Rank, Tier, SWE%, CTW, Model, Origin, Latest Ping, Avg Ping, Health, Verdict, Up%)
-    const row = '  ' + num + '  ' + tier + '  ' + sweCell + '  ' + ctwCell + '  ' + name + '  ' + source + '  ' + pingCell + '  ' + avgCell + '  ' + status + '  ' + speedCell + '  ' + uptimeCell
+    // 📖 Build row with double space between columns (order: Rank, Tier, SWE%, CTX, Model, Origin, Latest Ping, Avg Ping, Health, Verdict, Up%)
+    const row = '  ' + num + '  ' + tier + '  ' + sweCell + '  ' + ctxCell + '  ' + name + '  ' + source + '  ' + pingCell + '  ' + avgCell + '  ' + status + '  ' + speedCell + '  ' + uptimeCell
     if (isCursor) {
       lines.push(chalk.bgRgb(139, 0, 139)(row))
@@ -1105,8 +1105,8 @@ async function runFiableMode(apiKey) {
   console.log(chalk.cyan('  ⚡ Analyzing models for reliability (10 seconds)...'))
   console.log()
-  let results = MODELS.map(([modelId, label, tier, sweScore, ctw], i) => ({
-    idx: i + 1, modelId, label, tier, sweScore, ctw,
+  let results = MODELS.map(([modelId, label, tier, sweScore, ctx], i) => ({
+    idx: i + 1, modelId, label, tier, sweScore, ctx,
     status: 'pending',
     pings: [],
     httpCode: null,
@@ -1183,21 +1183,47 @@ async function main() {
     }
   }
-  // 📖 Skip update check during development to avoid blocking menus
-  // 📖 In production, this will work correctly when versions are published
-  const latestVersion = null // Skip update check for now
+  // 📖 Check for updates in the background
+  let latestVersion = null
+  try {
+    latestVersion = await checkForUpdate()
+  } catch {
+    // Silently fail - don't block the app if npm registry is unreachable
+  }
   // 📖 Default mode: OpenCode CLI
   let mode = 'opencode'
-  // 📖 AUTO-UPDATE: Disabled during development
-  // 📖 Will be re-enabled when versions are properly published
-  // 📖 This section is now handled by the update notification menu above
+  // 📖 Show update notification menu if a new version is available
+  if (latestVersion) {
+    const action = await promptUpdateNotification(latestVersion)
+    if (action === 'update') {
+      runUpdate(latestVersion)
+      return // runUpdate will restart the process
+    } else if (action === 'changelogs') {
+      console.log()
+      console.log(chalk.cyan('  Opening changelog in browser...'))
+      console.log()
+      const { execSync } = require('child_process')
+      const changelogUrl = 'https://github.com/vava-nessa/free-coding-models/releases'
+      try {
+        if (isMac) {
+          execSync(`open "${changelogUrl}"`, { stdio: 'ignore' })
+        } else if (isWindows) {
+          execSync(`start "" "${changelogUrl}"`, { stdio: 'ignore' })
+        } else {
+          execSync(`xdg-open "${changelogUrl}"`, { stdio: 'ignore' })
+        }
+      } catch {
+        console.log(chalk.dim(`  Could not open browser. Visit: ${changelogUrl}`))
+      }
+    }
+    // If action is null (Continue without update) or changelogs, proceed to main app
+  }
   // 📖 Create results array with all models initially visible
-  let results = MODELS.map(([modelId, label, tier, sweScore, ctw], i) => ({
-    idx: i + 1, modelId, label, tier, sweScore, ctw,
+  let results = MODELS.map(([modelId, label, tier, sweScore, ctx], i) => ({
+    idx: i + 1, modelId, label, tier, sweScore, ctx,
     status: 'pending',
     pings: [],  // 📖 All ping results (ms or 'TIMEOUT')
     httpCode: null,
@@ -1306,10 +1332,10 @@ async function main() {
   const onKeyPress = async (str, key) => {
     if (!key) return
-    // 📖 Sorting keys: R=rank, T=tier, O=origin, M=model, L=latest ping, A=avg ping, S=SWE-bench, C=context window, H=health, V=verdict, U=uptime
+    // 📖 Sorting keys: R=rank, T=tier, O=origin, M=model, L=latest ping, A=avg ping, S=SWE-bench, N=context, H=health, V=verdict, U=uptime
     const sortKeys = {
       'r': 'rank', 't': 'tier', 'o': 'origin', 'm': 'model',
-      'l': 'ping', 'a': 'avg', 's': 'swe', 'c': 'ctw', 'h': 'condition', 'v': 'verdict', 'u': 'uptime'
+      'l': 'ping', 'a': 'avg', 's': 'swe', 'n': 'ctx', 'h': 'condition', 'v': 'verdict', 'u': 'uptime'
     }
     if (sortKeys[key.name]) {

package/lib/utils.js CHANGED Viewed

@@ -136,7 +136,7 @@ export const getUptime = (r) => {
 //   - 'ping'    (L key) — last ping latency (only successful ones count)
 //   - 'avg'     (A key) — average latency across all successful pings
 //   - 'swe'     (S key) — SWE-bench score (higher is better)
-//   - 'ctw'     (C key) — context window size (larger is better)
+//   - 'ctx'     (N key) — context window size (larger is better)
 //   - 'condition' (H key) — health status (alphabetical)
 //   - 'verdict' (V key) — verdict order (Perfect → Pending)
 //   - 'uptime'  (U key) — uptime percentage
@@ -185,12 +185,12 @@ export const sortResults = (results, sortColumn, sortDirection) => {
         cmp = parseSwe(a.sweScore) - parseSwe(b.sweScore)
         break
       }
-      case 'ctw': {
+      case 'ctx': {
         // 📖 Sort by context window size — larger is better
         // 📖 Parse strings like "128k", "32k", "1m" into numeric tokens
-        const parseCtw = (ctw) => {
-          if (!ctw || ctw === '—') return 0
-          const str = ctw.toLowerCase()
+        const parseCtx = (ctx) => {
+          if (!ctx || ctx === '—') return 0
+          const str = ctx.toLowerCase()
           // 📖 Handle millions (1m = 1000k)
           if (str.includes('m')) {
             const num = parseFloat(str.replace('m', ''))
@@ -203,7 +203,7 @@ export const sortResults = (results, sortColumn, sortDirection) => {
           }
           return 0
         }
-        cmp = parseCtw(a.ctw) - parseCtw(b.ctw)
+        cmp = parseCtx(a.ctx) - parseCtx(b.ctx)
         break
       }
       case 'condition':

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "free-coding-models",
-  "version": "0.1.39",
+  "version": "0.1.41",
   "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
   "keywords": [
     "nvidia",

package/sources.js CHANGED Viewed

@@ -4,13 +4,13 @@
  *
  * @details
  *   This file contains all model definitions organized by provider/source.
- *   Each source has its own models array with [model_id, display_label, tier, swe_score, ctw].
+ *   Each source has its own models array with [model_id, display_label, tier, swe_score, ctx].
  *   - model_id: The model identifier for API calls
  *   - display_label: Human-friendly name for display
  *   - tier: Performance tier (S+, S, A+, A, A-, B+, B, C)
- *   - swe_score: SWE-bench Verified score percentage
- *   - ctw: Context window size in tokens (e.g., "128k", "32k")
- *
+ *   - swe_score: SWE-bench Verified score percentage (self-reported by model provider)
+ *   - ctx: Context window size in tokens (e.g., "128k", "32k")
+ *
  *   Add new sources here to support additional providers beyond NIM.
  *
  *   🎯 Tier scale (based on SWE-bench Verified):
@@ -18,12 +18,14 @@
  *   - S:  60-70% (excellent)
  *   - A+: 50-60% (great)
  *   - A:  40-50% (good)
- *   - A-: 35-45% (decent)
- *   - B+: 30-40% (average)
+ *   - A-: 35-40% (decent)
+ *   - B+: 30-35% (average)
  *   - B:  20-30% (below average)
  *   - C:  <20% (lightweight/edge)
  *
- *   📖 Source: https://www.swebench.com
+ *   📖 Source: https://www.swebench.com — scores are self-reported unless noted
+ *   📖 Secondary: https://swe-rebench.com (independent evals, scores are lower)
+ *   📖 Leaderboard tracker: https://www.marc0.dev/en/leaderboard
  *
  *   @exports Object containing all sources and their models
  */
@@ -31,57 +33,57 @@
 // 📖 NIM source - https://build.nvidia.com
 export const nvidiaNim = [
   // ── S+ tier — SWE-bench Verified ≥70% ──
-  ['deepseek-ai/deepseek-v3.1',                    'DeepSeek V3.1',       'S+', '49.2%', '128k'],
-  ['deepseek-ai/deepseek-v3.1-terminus',           'DeepSeek V3.1 Term',  'S+', '49.2%', '128k'],
   ['deepseek-ai/deepseek-v3.2',                    'DeepSeek V3.2',       'S+', '73.1%', '128k'],
   ['moonshotai/kimi-k2.5',                         'Kimi K2.5',           'S+', '76.8%', '128k'],
-  ['mistralai/devstral-2-123b-instruct-2512',      'Devstral 2 123B',     'S+', '62.0%', '128k'],
-  ['nvidia/llama-3.1-nemotron-ultra-253b-v1',      'Nemotron Ultra 253B', 'S+', '56.0%', '128k'],
-  ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B',  'S+', '58.0%', '128k'],
-  // ── S tier — SWE-bench Verified 50–70% ──
-  ['qwen/qwen2.5-coder-32b-instruct',              'Qwen2.5 Coder 32B',   'S', '46.0%', '32k'],
-  ['z-ai/glm5',                                    'GLM 5',               'S', '77.8%', '128k'],
-  ['qwen/qwen3.5-397b-a17b',                       'Qwen3.5 400B VLM',    'S', '68.0%', '128k'],
-  ['qwen/qwen3-coder-480b-a35b-instruct',          'Qwen3 Coder 480B',    'S', '72.0%', '128k'],
-  ['qwen/qwen3-next-80b-a3b-thinking',             'Qwen3 80B Thinking',  'S', '68.0%', '128k'],
-  ['meta/llama-3.1-405b-instruct',                 'Llama 3.1 405B',      'S', '44.0%', '128k'],
-  ['minimaxai/minimax-m2.1',                       'MiniMax M2.1',        'S', '70.0%', '128k'],
-  // ── A+ tier — SWE-bench Verified 60–70% ──
-  ['moonshotai/kimi-k2-thinking',                  'Kimi K2 Thinking',    'A+', '67.0%', '128k'],
-  ['moonshotai/kimi-k2-instruct',                  'Kimi K2 Instruct',    'A+', '65.8%', '128k'],
-  ['qwen/qwen3-235b-a22b',                         'Qwen3 235B',          'A+', '70.0%', '128k'],
-  ['meta/llama-3.3-70b-instruct',                  'Llama 3.3 70B',       'A+', '39.5%', '128k'],
-  ['z-ai/glm4.7',                                  'GLM 4.7',             'A+', '73.8%', '128k'],
-  ['qwen/qwen3-next-80b-a3b-instruct',             'Qwen3 80B Instruct',  'A+', '65.0%', '128k'],
-  // ── A tier — SWE-bench Verified 45–60% ──
-  ['minimaxai/minimax-m2',                         'MiniMax M2',          'A', '56.5%', '128k'],
-  ['mistralai/mistral-medium-3-instruct',          'Mistral Medium 3',    'A', '48.0%', '128k'],
-  ['mistralai/magistral-small-2506',               'Magistral Small',     'A', '45.0%', '32k'],
-  ['nvidia/nemotron-3-nano-30b-a3b',               'Nemotron Nano 30B',   'A', '43.0%', '128k'],
-  ['deepseek-ai/deepseek-r1-distill-qwen-32b',     'R1 Distill 32B',      'A', '43.9%', '128k'],
-  // ── A- tier — SWE-bench Verified 35–45% ──
-  ['openai/gpt-oss-120b',                          'GPT OSS 120B',        'A-', '60.0%', '128k'],
-  ['nvidia/llama-3.3-nemotron-super-49b-v1.5',     'Nemotron Super 49B',  'A-', '49.0%', '128k'],
-  ['meta/llama-4-scout-17b-16e-instruct',          'Llama 4 Scout',       'A-', '44.0%', '128k'],
+  ['z-ai/glm5',                                    'GLM 5',               'S+', '77.8%', '128k'],
+  ['z-ai/glm4.7',                                  'GLM 4.7',             'S+', '73.8%', '200k'],
+  ['moonshotai/kimi-k2-thinking',                  'Kimi K2 Thinking',    'S+', '71.3%', '256k'],
+  ['minimaxai/minimax-m2.1',                       'MiniMax M2.1',        'S+', '74.0%', '200k'],
+  ['stepfun-ai/step-3.5-flash',                    'Step 3.5 Flash',      'S+', '74.4%', '256k'],
+  ['qwen/qwen3-coder-480b-a35b-instruct',          'Qwen3 Coder 480B',    'S+', '70.6%', '256k'],
+  ['qwen/qwen3-235b-a22b',                         'Qwen3 235B',          'S+', '70.0%', '128k'],
+  ['mistralai/devstral-2-123b-instruct-2512',      'Devstral 2 123B',     'S+', '72.2%', '256k'],
+  // ── S tier — SWE-bench Verified 60–70% ──
+  ['deepseek-ai/deepseek-v3.1-terminus',           'DeepSeek V3.1 Term',  'S',  '68.4%', '128k'],
+  ['moonshotai/kimi-k2-instruct',                  'Kimi K2 Instruct',    'S',  '65.8%', '128k'],
+  ['minimaxai/minimax-m2',                         'MiniMax M2',          'S',  '69.4%', '128k'],
+  ['qwen/qwen3-next-80b-a3b-thinking',             'Qwen3 80B Thinking',  'S',  '68.0%', '128k'],
+  ['qwen/qwen3-next-80b-a3b-instruct',             'Qwen3 80B Instruct',  'S',  '65.0%', '128k'],
+  ['qwen/qwen3.5-397b-a17b',                       'Qwen3.5 400B VLM',    'S',  '68.0%', '128k'],
+  ['openai/gpt-oss-120b',                          'GPT OSS 120B',        'S',  '60.0%', '128k'],
+  ['meta/llama-4-maverick-17b-128e-instruct',      'Llama 4 Maverick',    'S',  '62.0%', '1M'],
+  ['deepseek-ai/deepseek-v3.1',                    'DeepSeek V3.1',       'S',  '62.0%', '128k'],
+  // ── A+ tier — SWE-bench Verified 50–60% ──
+  ['nvidia/llama-3.1-nemotron-ultra-253b-v1',      'Nemotron Ultra 253B', 'A+', '56.0%', '128k'],
+  ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B',  'A+', '58.0%', '256k'],
+  ['qwen/qwq-32b',                                 'QwQ 32B',             'A+', '50.0%', '131k'],
+  ['igenius/colosseum_355b_instruct_16k',          'Colosseum 355B',      'A+', '52.0%', '16k'],
+  // ── A tier — SWE-bench Verified 40–50% ──
+  ['mistralai/mistral-medium-3-instruct',          'Mistral Medium 3',    'A',  '48.0%', '128k'],
+  ['mistralai/magistral-small-2506',               'Magistral Small',     'A',  '45.0%', '32k'],
+  ['nvidia/llama-3.3-nemotron-super-49b-v1.5',     'Nemotron Super 49B',  'A',  '49.0%', '128k'],
+  ['meta/llama-4-scout-17b-16e-instruct',          'Llama 4 Scout',       'A',  '44.0%', '10M'],
+  ['nvidia/nemotron-3-nano-30b-a3b',               'Nemotron Nano 30B',   'A',  '43.0%', '128k'],
+  ['deepseek-ai/deepseek-r1-distill-qwen-32b',     'R1 Distill 32B',      'A',  '43.9%', '128k'],
+  ['openai/gpt-oss-20b',                           'GPT OSS 20B',         'A',  '42.0%', '128k'],
+  ['qwen/qwen2.5-coder-32b-instruct',              'Qwen2.5 Coder 32B',   'A',  '46.0%', '32k'],
+  ['meta/llama-3.1-405b-instruct',                 'Llama 3.1 405B',      'A',  '44.0%', '128k'],
+  // ── A- tier — SWE-bench Verified 35–40% ──
+  ['meta/llama-3.3-70b-instruct',                  'Llama 3.3 70B',       'A-', '39.5%', '128k'],
   ['deepseek-ai/deepseek-r1-distill-qwen-14b',     'R1 Distill 14B',      'A-', '37.7%', '64k'],
-  ['igenius/colosseum_355b_instruct_16k',          'Colosseum 355B',      'A-', '52.0%', '16k'],
-  // ── B+ tier — SWE-bench Verified 30–40% ──
-  ['qwen/qwq-32b',                                 'QwQ 32B',            'B+', '50.0%', '32k'],
-  ['openai/gpt-oss-20b',                           'GPT OSS 20B',        'B+', '42.0%', '32k'],
-  ['stockmark/stockmark-2-100b-instruct',          'Stockmark 100B',     'B+', '36.0%', '32k'],
-  ['bytedance/seed-oss-36b-instruct',              'Seed OSS 36B',       'B+', '38.0%', '32k'],
-  ['stepfun-ai/step-3.5-flash',                    'Step 3.5 Flash',      'B+', '74.4%', '32k'],
-  // ── B tier — SWE-bench Verified 20–35% ──
-  ['meta/llama-4-maverick-17b-128e-instruct',      'Llama 4 Maverick',    'B', '62.0%', '128k'],
-  ['mistralai/mixtral-8x22b-instruct-v0.1',        'Mixtral 8x22B',       'B', '32.0%', '64k'],
-  ['mistralai/ministral-14b-instruct-2512',        'Ministral 14B',       'B', '34.0%', '32k'],
-  ['ibm/granite-34b-code-instruct',                'Granite 34B Code',    'B', '30.0%', '32k'],
-  ['deepseek-ai/deepseek-r1-distill-llama-8b',     'R1 Distill 8B',       'B', '28.2%', '32k'],
-  // ── C tier — SWE-bench Verified <25% or lightweight edge models ──
-  ['deepseek-ai/deepseek-r1-distill-qwen-7b',      'R1 Distill 7B',       'C', '22.6%', '32k'],
-  ['google/gemma-2-9b-it',                         'Gemma 2 9B',          'C', '18.0%', '8k'],
-  ['microsoft/phi-3.5-mini-instruct',              'Phi 3.5 Mini',        'C', '12.0%', '128k'],
-  ['microsoft/phi-4-mini-instruct',                'Phi 4 Mini',          'C', '14.0%', '128k'],
+  ['bytedance/seed-oss-36b-instruct',              'Seed OSS 36B',        'A-', '38.0%', '32k'],
+  ['stockmark/stockmark-2-100b-instruct',          'Stockmark 100B',      'A-', '36.0%', '32k'],
+  // ── B+ tier — SWE-bench Verified 30–35% ──
+  ['mistralai/mixtral-8x22b-instruct-v0.1',        'Mixtral 8x22B',       'B+', '32.0%', '64k'],
+  ['mistralai/ministral-14b-instruct-2512',        'Ministral 14B',       'B+', '34.0%', '32k'],
+  ['ibm/granite-34b-code-instruct',                'Granite 34B Code',    'B+', '30.0%', '32k'],
+  // ── B tier — SWE-bench Verified 20–30% ──
+  ['deepseek-ai/deepseek-r1-distill-llama-8b',     'R1 Distill 8B',       'B',  '28.2%', '32k'],
+  ['deepseek-ai/deepseek-r1-distill-qwen-7b',      'R1 Distill 7B',       'B',  '22.6%', '32k'],
+  // ── C tier — SWE-bench Verified <20% or lightweight edge models ──
+  ['google/gemma-2-9b-it',                         'Gemma 2 9B',          'C',  '18.0%', '8k'],
+  ['microsoft/phi-3.5-mini-instruct',              'Phi 3.5 Mini',        'C',  '12.0%', '128k'],
+  ['microsoft/phi-4-mini-instruct',                'Phi 4 Mini',          'C',  '14.0%', '128k'],
 ]
 // 📖 All sources combined - used by the main script
@@ -95,7 +97,7 @@ export const sources = {
 // 📖 Flatten all models from all sources for backward compatibility
 export const MODELS = []
 for (const [sourceKey, sourceData] of Object.entries(sources)) {
-  for (const [modelId, label, tier, sweScore, ctw] of sourceData.models) {
-    MODELS.push([modelId, label, tier, sweScore, ctw])
+  for (const [modelId, label, tier, sweScore, ctx] of sourceData.models) {
+    MODELS.push([modelId, label, tier, sweScore, ctx])
   }
 }