free-coding-models 0.1.40 β†’ 0.1.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -199,23 +199,23 @@ free-coding-models
199
199
 
200
200
  ## πŸ€– Coding Models
201
201
 
202
- **44 coding models** across 8 tiers, ranked by [Aider Polyglot benchmark](https://aider.chat/docs/leaderboards) (225 coding exercises across C++/Go/Java/JS/Python/Rust). Models without a confirmed Aider score are estimated from model family, size, and published release benchmarks.
203
-
204
- | Tier | Score | Count | Models |
205
- |------|-------|-------|--------|
206
- | **S+** | 75%+ | 7 | DeepSeek V3.1/Terminus, DeepSeek V3.2, Kimi K2.5, Devstral 2, Nemotron Ultra 253B, Mistral Large 675B |
207
- | **S** | 62–74% | 7 | Qwen2.5 Coder 32B, GLM 5, Qwen3.5 400B VLM, Qwen3 Coder 480B, Qwen3 80B Thinking, Llama 3.1 405B, MiniMax M2.1 |
208
- | **A+** | 54–62% | 6 | Kimi K2 Thinking/Instruct, Qwen3 235B, Llama 3.3 70B, GLM 4.7, Qwen3 80B Instruct |
209
- | **A** | 44–54% | 5 | MiniMax M2, Mistral Medium 3, Magistral Small, Nemotron Nano 30B, R1 Distill 32B |
210
- | **A-** | 36–44% | 5 | GPT OSS 120B, Nemotron Super 49B, Llama 4 Scout, R1 Distill 14B, Colosseum 355B |
211
- | **B+** | 25–36% | 5 | QwQ 32B, GPT OSS 20B, Stockmark 100B, Seed OSS 36B, Step 3.5 Flash |
212
- | **B** | 14–25% | 5 | Llama 4 Maverick, Mixtral 8x22B, Ministral 14B, Granite 34B Code, R1 Distill 8B |
213
- | **C** | <14% | 4 | R1 Distill 7B, Gemma 2 9B, Phi 3.5 Mini, Phi 4 Mini |
202
+ **44 coding models** across 8 tiers, ranked by [SWE-bench Verified](https://www.swebench.com) β€” the industry-standard benchmark measuring real GitHub issue resolution. Scores are self-reported by providers unless noted.
203
+
204
+ | Tier | SWE-bench | Models |
205
+ |------|-----------|--------|
206
+ | **S+** β‰₯70% | GLM 5 (77.8%), Kimi K2.5 (76.8%), Step 3.5 Flash (74.4%), MiniMax M2.1 (74.0%), GLM 4.7 (73.8%), DeepSeek V3.2 (73.1%), Devstral 2 (72.2%), Kimi K2 Thinking (71.3%), Qwen3 Coder 480B (70.6%), Qwen3 235B (70.0%) |
207
+ | **S** 60–70% | MiniMax M2 (69.4%), DeepSeek V3.1 Terminus (68.4%), Qwen3 80B Thinking (68.0%), Qwen3.5 400B (68.0%), Kimi K2 Instruct (65.8%), Qwen3 80B Instruct (65.0%), DeepSeek V3.1 (62.0%), Llama 4 Maverick (62.0%), GPT OSS 120B (60.0%) |
208
+ | **A+** 50–60% | Mistral Large 675B (58.0%), Nemotron Ultra 253B (56.0%), Colosseum 355B (52.0%), QwQ 32B (50.0%) |
209
+ | **A** 40–50% | Nemotron Super 49B (49.0%), Mistral Medium 3 (48.0%), Qwen2.5 Coder 32B (46.0%), Magistral Small (45.0%), Llama 4 Scout (44.0%), Llama 3.1 405B (44.0%), Nemotron Nano 30B (43.0%), R1 Distill 32B (43.9%), GPT OSS 20B (42.0%) |
210
+ | **A-** 35–40% | Llama 3.3 70B (39.5%), Seed OSS 36B (38.0%), R1 Distill 14B (37.7%), Stockmark 100B (36.0%) |
211
+ | **B+** 30–35% | Ministral 14B (34.0%), Mixtral 8x22B (32.0%), Granite 34B Code (30.0%) |
212
+ | **B** 20–30% | R1 Distill 8B (28.2%), R1 Distill 7B (22.6%) |
213
+ | **C** <20% | Gemma 2 9B (18.0%), Phi 4 Mini (14.0%), Phi 3.5 Mini (12.0%) |
214
214
 
215
215
  ### Tier scale
216
216
 
217
- - **S+/S** β€” Frontier coders, top Aider polyglot scores, best for complex refactors
218
- - **A+/A** β€” Excellent alternatives, strong at most coding tasks
217
+ - **S+/S** β€” Elite frontier coders (β‰₯60% SWE-bench), best for complex real-world tasks and refactors
218
+ - **A+/A** β€” Great alternatives, strong at most coding tasks
219
219
  - **A-/B+** β€” Solid performers, good for targeted programming tasks
220
220
  - **B/C** β€” Lightweight or older models, good for code completion on constrained infra
221
221
 
@@ -411,11 +411,11 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
411
411
  // πŸ“– Add mode toggle hint
412
412
  const modeHint = chalk.dim.yellow(' (Z to toggle)')
413
413
 
414
- // πŸ“– Tier filter badge shown when filtering is active
414
+ // πŸ“– Tier filter badge shown when filtering is active (shows exact tier name)
415
+ const TIER_CYCLE_NAMES = [null, 'S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
415
416
  let tierBadge = ''
416
417
  if (tierFilterMode > 0) {
417
- const tierNames = ['All', 'S+/S', 'A+/A/A-', 'B+/B', 'C']
418
- tierBadge = chalk.bold.rgb(255, 200, 0)(` [${tierNames[tierFilterMode]}]`)
418
+ tierBadge = chalk.bold.rgb(255, 200, 0)(` [${TIER_CYCLE_NAMES[tierFilterMode]}]`)
419
419
  }
420
420
 
421
421
  // πŸ“– Column widths (generous spacing with margins)
@@ -665,7 +665,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
665
665
  : chalk.rgb(0, 200, 255)('Enterβ†’OpenCode')
666
666
  lines.push(chalk.dim(` ↑↓ Navigate β€’ `) + actionHint + chalk.dim(` β€’ R/T/O/M/L/A/S/C/H/V/U Sort β€’ W↓/X↑ Interval (${intervalSec}s) β€’ T Tier β€’ Z Mode β€’ Ctrl+C Exit`))
667
667
  lines.push('')
668
- lines.push(chalk.dim(' Made with ') + 'πŸ’– & β˜•' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim(' β€’ ') + 'πŸ’¬ ' + chalk.cyanBright('\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join Free-Coding-Models Discord!\x1b]8;;\x1b\\') + chalk.dim(' β€’ ') + '⭐ ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
668
+ lines.push(chalk.dim(' Made with ') + 'πŸ’– & β˜•' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim(' β€’ ') + 'πŸ«‚ ' + chalk.cyanBright('\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join our Discord!\x1b]8;;\x1b\\') + chalk.dim(' β€’ ') + '⭐ ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
669
669
  lines.push('')
670
670
  // πŸ“– Append \x1b[K (erase to EOL) to each line so leftover chars from previous
671
671
  // πŸ“– frames are cleared. Then pad with blank cleared lines to fill the terminal,
@@ -1296,29 +1296,15 @@ async function main() {
1296
1296
  process.on('SIGINT', () => exit(0))
1297
1297
  process.on('SIGTERM', () => exit(0))
1298
1298
 
1299
- // πŸ“– Tier filtering system - cycles through filter modes
1300
- let tierFilterMode = 0 // 0=all, 1=S+/S, 2=A+/A/A-, 3=B+/B, 4=C
1299
+ // πŸ“– Tier filtering system - cycles through each individual tier one by one
1300
+ // πŸ“– 0=All, 1=S+, 2=S, 3=A+, 4=A, 5=A-, 6=B+, 7=B, 8=C
1301
+ const TIER_CYCLE = [null, 'S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
1302
+ let tierFilterMode = 0
1301
1303
  function applyTierFilter() {
1304
+ const activeTier = TIER_CYCLE[tierFilterMode]
1302
1305
  state.results.forEach(r => {
1303
- switch (tierFilterMode) {
1304
- case 0: // All tiers visible
1305
- r.hidden = false
1306
- break
1307
- case 1: // S+ and S only
1308
- r.hidden = !(r.tier === 'S+' || r.tier === 'S')
1309
- break
1310
- case 2: // A+, A, A- only
1311
- r.hidden = !(r.tier === 'A+' || r.tier === 'A' || r.tier === 'A-')
1312
- break
1313
- case 3: // B+ and B only
1314
- r.hidden = !(r.tier === 'B+' || r.tier === 'B')
1315
- break
1316
- case 4: // C only
1317
- r.hidden = r.tier !== 'C'
1318
- break
1319
- }
1306
+ r.hidden = activeTier !== null && r.tier !== activeTier
1320
1307
  })
1321
-
1322
1308
  return state.results
1323
1309
  }
1324
1310
 
@@ -1359,9 +1345,9 @@ async function main() {
1359
1345
  state.pingInterval = Math.min(60000, state.pingInterval + 1000)
1360
1346
  }
1361
1347
 
1362
- // πŸ“– Tier toggle key: T = cycle through tier filters (all β†’ S+/S β†’ A+/A/A- β†’ B+/B β†’ C β†’ all)
1348
+ // πŸ“– Tier toggle key: T = cycle through each individual tier (All β†’ S+ β†’ S β†’ A+ β†’ A β†’ A- β†’ B+ β†’ B β†’ C β†’ All)
1363
1349
  if (key.name === 't') {
1364
- tierFilterMode = (tierFilterMode + 1) % 5
1350
+ tierFilterMode = (tierFilterMode + 1) % TIER_CYCLE.length
1365
1351
  applyTierFilter()
1366
1352
  adjustScrollOffset(state)
1367
1353
  return
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.1.40",
3
+ "version": "0.1.42",
4
4
  "description": "Find the fastest coding LLM models in seconds β€” ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",
package/sources.js CHANGED
@@ -8,9 +8,9 @@
8
8
  * - model_id: The model identifier for API calls
9
9
  * - display_label: Human-friendly name for display
10
10
  * - tier: Performance tier (S+, S, A+, A, A-, B+, B, C)
11
- * - swe_score: SWE-bench Verified score percentage
11
+ * - swe_score: SWE-bench Verified score percentage (self-reported by model provider)
12
12
  * - ctx: Context window size in tokens (e.g., "128k", "32k")
13
- *
13
+ *
14
14
  * Add new sources here to support additional providers beyond NIM.
15
15
  *
16
16
  * 🎯 Tier scale (based on SWE-bench Verified):
@@ -18,12 +18,14 @@
18
18
  * - S: 60-70% (excellent)
19
19
  * - A+: 50-60% (great)
20
20
  * - A: 40-50% (good)
21
- * - A-: 35-45% (decent)
22
- * - B+: 30-40% (average)
21
+ * - A-: 35-40% (decent)
22
+ * - B+: 30-35% (average)
23
23
  * - B: 20-30% (below average)
24
24
  * - C: <20% (lightweight/edge)
25
25
  *
26
- * πŸ“– Source: https://www.swebench.com
26
+ * πŸ“– Source: https://www.swebench.com β€” scores are self-reported unless noted
27
+ * πŸ“– Secondary: https://swe-rebench.com (independent evals, scores are lower)
28
+ * πŸ“– Leaderboard tracker: https://www.marc0.dev/en/leaderboard
27
29
  *
28
30
  * @exports Object containing all sources and their models
29
31
  */
@@ -31,57 +33,57 @@
31
33
  // πŸ“– NIM source - https://build.nvidia.com
32
34
  export const nvidiaNim = [
33
35
  // ── S+ tier β€” SWE-bench Verified β‰₯70% ──
34
- ['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S+', '49.2%', '128k'],
35
- ['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S+', '49.2%', '128k'],
36
36
  ['deepseek-ai/deepseek-v3.2', 'DeepSeek V3.2', 'S+', '73.1%', '128k'],
37
37
  ['moonshotai/kimi-k2.5', 'Kimi K2.5', 'S+', '76.8%', '128k'],
38
- ['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+', '62.0%', '128k'],
39
- ['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'S+', '56.0%', '128k'],
40
- ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'S+', '58.0%', '128k'],
41
- // ── S tier β€” SWE-bench Verified 50–70% ──
42
- ['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'S', '46.0%', '32k'],
43
- ['z-ai/glm5', 'GLM 5', 'S', '77.8%', '128k'],
44
- ['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S', '68.0%', '128k'],
45
- ['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S', '72.0%', '128k'],
46
- ['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S', '68.0%', '128k'],
47
- ['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'S', '44.0%', '128k'],
48
- ['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S', '70.0%', '128k'],
49
- // ── A+ tier β€” SWE-bench Verified 60–70% ──
50
- ['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'A+', '67.0%', '128k'],
51
- ['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'A+', '65.8%', '128k'],
52
- ['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'A+', '70.0%', '128k'],
53
- ['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A+', '39.5%', '128k'],
54
- ['z-ai/glm4.7', 'GLM 4.7', 'A+', '73.8%', '128k'],
55
- ['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'A+', '65.0%', '128k'],
56
- // ── A tier β€” SWE-bench Verified 45–60% ──
57
- ['minimaxai/minimax-m2', 'MiniMax M2', 'A', '56.5%', '128k'],
58
- ['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A', '48.0%', '128k'],
59
- ['mistralai/magistral-small-2506', 'Magistral Small', 'A', '45.0%', '32k'],
60
- ['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A', '43.0%', '128k'],
61
- ['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%', '128k'],
62
- // ── A- tier β€” SWE-bench Verified 35–45% ──
63
- ['openai/gpt-oss-120b', 'GPT OSS 120B', 'A-', '60.0%', '128k'],
64
- ['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A-', '49.0%', '128k'],
65
- ['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A-', '44.0%', '128k'],
38
+ ['z-ai/glm5', 'GLM 5', 'S+', '77.8%', '128k'],
39
+ ['z-ai/glm4.7', 'GLM 4.7', 'S+', '73.8%', '200k'],
40
+ ['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'S+', '71.3%', '256k'],
41
+ ['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S+', '74.0%', '200k'],
42
+ ['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'S+', '74.4%', '256k'],
43
+ ['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S+', '70.6%', '256k'],
44
+ ['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'S+', '70.0%', '128k'],
45
+ ['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+', '72.2%', '256k'],
46
+ // ── S tier β€” SWE-bench Verified 60–70% ──
47
+ ['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S', '68.4%', '128k'],
48
+ ['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'S', '65.8%', '128k'],
49
+ ['minimaxai/minimax-m2', 'MiniMax M2', 'S', '69.4%', '128k'],
50
+ ['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S', '68.0%', '128k'],
51
+ ['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'S', '65.0%', '128k'],
52
+ ['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S', '68.0%', '128k'],
53
+ ['openai/gpt-oss-120b', 'GPT OSS 120B', 'S', '60.0%', '128k'],
54
+ ['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'S', '62.0%', '1M'],
55
+ ['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S', '62.0%', '128k'],
56
+ // ── A+ tier β€” SWE-bench Verified 50–60% ──
57
+ ['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'A+', '56.0%', '128k'],
58
+ ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'A+', '58.0%', '256k'],
59
+ ['qwen/qwq-32b', 'QwQ 32B', 'A+', '50.0%', '131k'],
60
+ ['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A+', '52.0%', '16k'],
61
+ // ── A tier β€” SWE-bench Verified 40–50% ──
62
+ ['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A', '48.0%', '128k'],
63
+ ['mistralai/magistral-small-2506', 'Magistral Small', 'A', '45.0%', '32k'],
64
+ ['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A', '49.0%', '128k'],
65
+ ['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A', '44.0%', '10M'],
66
+ ['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A', '43.0%', '128k'],
67
+ ['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%', '128k'],
68
+ ['openai/gpt-oss-20b', 'GPT OSS 20B', 'A', '42.0%', '128k'],
69
+ ['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'A', '46.0%', '32k'],
70
+ ['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'A', '44.0%', '128k'],
71
+ // ── A- tier β€” SWE-bench Verified 35–40% ──
72
+ ['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A-', '39.5%', '128k'],
66
73
  ['deepseek-ai/deepseek-r1-distill-qwen-14b', 'R1 Distill 14B', 'A-', '37.7%', '64k'],
67
- ['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A-', '52.0%', '16k'],
68
- // ── B+ tier β€” SWE-bench Verified 30–40% ──
69
- ['qwen/qwq-32b', 'QwQ 32B', 'B+', '50.0%', '32k'],
70
- ['openai/gpt-oss-20b', 'GPT OSS 20B', 'B+', '42.0%', '32k'],
71
- ['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'B+', '36.0%', '32k'],
72
- ['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'B+', '38.0%', '32k'],
73
- ['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'B+', '74.4%', '32k'],
74
- // ── B tier β€” SWE-bench Verified 20–35% ──
75
- ['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'B', '62.0%', '128k'],
76
- ['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B', '32.0%', '64k'],
77
- ['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B', '34.0%', '32k'],
78
- ['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B', '30.0%', '32k'],
79
- ['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B', '28.2%', '32k'],
80
- // ── C tier β€” SWE-bench Verified <25% or lightweight edge models ──
81
- ['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'C', '22.6%', '32k'],
82
- ['google/gemma-2-9b-it', 'Gemma 2 9B', 'C', '18.0%', '8k'],
83
- ['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C', '12.0%', '128k'],
84
- ['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C', '14.0%', '128k'],
74
+ ['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'A-', '38.0%', '32k'],
75
+ ['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'A-', '36.0%', '32k'],
76
+ // ── B+ tier β€” SWE-bench Verified 30–35% ──
77
+ ['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B+', '32.0%', '64k'],
78
+ ['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B+', '34.0%', '32k'],
79
+ ['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B+', '30.0%', '32k'],
80
+ // ── B tier β€” SWE-bench Verified 20–30% ──
81
+ ['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B', '28.2%', '32k'],
82
+ ['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'B', '22.6%', '32k'],
83
+ // ── C tier β€” SWE-bench Verified <20% or lightweight edge models ──
84
+ ['google/gemma-2-9b-it', 'Gemma 2 9B', 'C', '18.0%', '8k'],
85
+ ['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C', '12.0%', '128k'],
86
+ ['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C', '14.0%', '128k'],
85
87
  ]
86
88
 
87
89
  // πŸ“– All sources combined - used by the main script