free-coding-models 0.1.38 β†’ 0.1.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -421,6 +421,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
421
421
  // πŸ“– Column widths (generous spacing with margins)
422
422
  const W_RANK = 6
423
423
  const W_TIER = 6
424
+ const W_CTX = 6
424
425
  const W_SOURCE = 14
425
426
  const W_MODEL = 26
426
427
  const W_SWE = 9
@@ -453,6 +454,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
453
454
  const originH = 'Origin'
454
455
  const modelH = 'Model'
455
456
  const sweH = sortColumn === 'swe' ? dir + ' SWE%' : 'SWE%'
457
+ const ctxH = sortColumn === 'ctx' ? dir + ' CTX' : 'CTX'
456
458
  const pingH = sortColumn === 'ping' ? dir + ' Latest Ping' : 'Latest Ping'
457
459
  const avgH = sortColumn === 'avg' ? dir + ' Avg Ping' : 'Avg Ping'
458
460
  const healthH = sortColumn === 'condition' ? dir + ' Health' : 'Health'
@@ -475,14 +477,15 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
475
477
  const originH_c = sortColumn === 'origin' ? chalk.bold.cyan(originH.padEnd(W_SOURCE)) : colorFirst(originH, W_SOURCE)
476
478
  const modelH_c = colorFirst(modelH, W_MODEL)
477
479
  const sweH_c = sortColumn === 'swe' ? chalk.bold.cyan(sweH.padEnd(W_SWE)) : colorFirst(sweH, W_SWE)
480
+ const ctxH_c = sortColumn === 'ctx' ? chalk.bold.cyan(ctxH.padEnd(W_CTX)) : colorFirst(ctxH, W_CTX)
478
481
  const pingH_c = sortColumn === 'ping' ? chalk.bold.cyan(pingH.padEnd(W_PING)) : colorFirst('Latest Ping', W_PING)
479
482
  const avgH_c = sortColumn === 'avg' ? chalk.bold.cyan(avgH.padEnd(W_AVG)) : colorFirst('Avg Ping', W_AVG)
480
483
  const healthH_c = sortColumn === 'condition' ? chalk.bold.cyan(healthH.padEnd(W_STATUS)) : colorFirst('Health', W_STATUS)
481
484
  const verdictH_c = sortColumn === 'verdict' ? chalk.bold.cyan(verdictH.padEnd(W_VERDICT)) : colorFirst(verdictH, W_VERDICT)
482
485
  const uptimeH_c = sortColumn === 'uptime' ? chalk.bold.cyan(uptimeH.padStart(W_UPTIME)) : colorFirst(uptimeH, W_UPTIME, chalk.green)
483
486
 
484
- // πŸ“– Header with proper spacing
485
- lines.push(' ' + rankH_c + ' ' + tierH_c + ' ' + sweH_c + ' ' + modelH_c + ' ' + originH_c + ' ' + pingH_c + ' ' + avgH_c + ' ' + healthH_c + ' ' + verdictH_c + ' ' + uptimeH_c)
487
+ // πŸ“– Header with proper spacing (column order: Rank, Tier, SWE%, CTX, Model, Origin, Latest Ping, Avg Ping, Health, Verdict, Up%)
488
+ lines.push(' ' + rankH_c + ' ' + tierH_c + ' ' + sweH_c + ' ' + ctxH_c + ' ' + modelH_c + ' ' + originH_c + ' ' + pingH_c + ' ' + avgH_c + ' ' + healthH_c + ' ' + verdictH_c + ' ' + uptimeH_c)
486
489
 
487
490
  // πŸ“– Separator line
488
491
  lines.push(
@@ -490,6 +493,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
490
493
  chalk.dim('─'.repeat(W_RANK)) + ' ' +
491
494
  chalk.dim('─'.repeat(W_TIER)) + ' ' +
492
495
  chalk.dim('─'.repeat(W_SWE)) + ' ' +
496
+ chalk.dim('─'.repeat(W_CTX)) + ' ' +
493
497
  '─'.repeat(W_MODEL) + ' ' +
494
498
  '─'.repeat(W_SOURCE) + ' ' +
495
499
  chalk.dim('─'.repeat(W_PING)) + ' ' +
@@ -523,6 +527,14 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
523
527
  : sweScore !== 'β€”' && parseFloat(sweScore) >= 30
524
528
  ? chalk.yellow(sweScore.padEnd(W_SWE))
525
529
  : chalk.dim(sweScore.padEnd(W_SWE))
530
+
531
+ // πŸ“– Context window column - colorized by size (larger = better)
532
+ const ctxRaw = r.ctx ?? 'β€”'
533
+ const ctxCell = ctxRaw !== 'β€”' && (ctxRaw.includes('128k') || ctxRaw.includes('200k') || ctxRaw.includes('1m'))
534
+ ? chalk.greenBright(ctxRaw.padEnd(W_CTX))
535
+ : ctxRaw !== 'β€”' && (ctxRaw.includes('32k') || ctxRaw.includes('64k'))
536
+ ? chalk.cyan(ctxRaw.padEnd(W_CTX))
537
+ : chalk.dim(ctxRaw.padEnd(W_CTX))
526
538
 
527
539
  // πŸ“– Latest ping - pings are objects: { ms, code }
528
540
  // πŸ“– Only show response time for successful pings, "β€”" for errors (error code is in Status column)
@@ -628,8 +640,8 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
628
640
  uptimeCell = chalk.red(uptimeStr.padStart(W_UPTIME))
629
641
  }
630
642
 
631
- // πŸ“– Build row with double space between columns
632
- const row = ' ' + num + ' ' + tier + ' ' + sweCell + ' ' + name + ' ' + source + ' ' + pingCell + ' ' + avgCell + ' ' + status + ' ' + speedCell + ' ' + uptimeCell
643
+ // πŸ“– Build row with double space between columns (order: Rank, Tier, SWE%, CTX, Model, Origin, Latest Ping, Avg Ping, Health, Verdict, Up%)
644
+ const row = ' ' + num + ' ' + tier + ' ' + sweCell + ' ' + ctxCell + ' ' + name + ' ' + source + ' ' + pingCell + ' ' + avgCell + ' ' + status + ' ' + speedCell + ' ' + uptimeCell
633
645
 
634
646
  if (isCursor) {
635
647
  lines.push(chalk.bgRgb(139, 0, 139)(row))
@@ -651,7 +663,7 @@ function renderTable(results, pendingPings, frame, cursor = null, sortColumn = '
651
663
  : mode === 'opencode-desktop'
652
664
  ? chalk.rgb(0, 200, 255)('Enterβ†’OpenDesktop')
653
665
  : chalk.rgb(0, 200, 255)('Enterβ†’OpenCode')
654
- lines.push(chalk.dim(` ↑↓ Navigate β€’ `) + actionHint + chalk.dim(` β€’ R/T/O/M/L/A/S/H/V/U Sort β€’ W↓/X↑ Interval (${intervalSec}s) β€’ T Tier β€’ Z Mode β€’ Ctrl+C Exit`))
666
+ lines.push(chalk.dim(` ↑↓ Navigate β€’ `) + actionHint + chalk.dim(` β€’ R/T/O/M/L/A/S/C/H/V/U Sort β€’ W↓/X↑ Interval (${intervalSec}s) β€’ T Tier β€’ Z Mode β€’ Ctrl+C Exit`))
655
667
  lines.push('')
656
668
  lines.push(chalk.dim(' Made with ') + 'πŸ’– & β˜•' + chalk.dim(' by ') + '\x1b]8;;https://github.com/vava-nessa\x1b\\vava-nessa\x1b]8;;\x1b\\' + chalk.dim(' β€’ ') + 'πŸ’¬ ' + chalk.cyanBright('\x1b]8;;https://discord.gg/WKA3TwYVuZ\x1b\\Join Free-Coding-Models Discord!\x1b]8;;\x1b\\') + chalk.dim(' β€’ ') + '⭐ ' + '\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\Read the docs on GitHub\x1b]8;;\x1b\\')
657
669
  lines.push('')
@@ -783,14 +795,25 @@ async function startOpenCode(model) {
783
795
 
784
796
  saveOpenCodeConfig(config)
785
797
 
786
- console.log(chalk.green(` βœ“ Default model set to: nvidia/${model.modelId}`))
798
+ // πŸ“– Verify config was saved correctly
799
+ const savedConfig = loadOpenCodeConfig()
800
+ console.log(chalk.dim(` πŸ“ Config saved to: ${getOpenCodeConfigPath()}`))
801
+ console.log(chalk.dim(` πŸ“ Default model in config: ${savedConfig.model || 'NOT SET'}`))
802
+ console.log()
803
+
804
+ if (savedConfig.model === config.model) {
805
+ console.log(chalk.green(` βœ“ Default model set to: nvidia/${model.modelId}`))
806
+ } else {
807
+ console.log(chalk.yellow(` ⚠ Config might not have been saved correctly`))
808
+ }
787
809
  console.log()
788
810
  console.log(chalk.dim(' Starting OpenCode…'))
789
811
  console.log()
790
812
 
791
813
  // πŸ“– Launch OpenCode and wait for it
814
+ // πŸ“– Use --model flag to ensure the model is selected
792
815
  const { spawn } = await import('child_process')
793
- const child = spawn('opencode', [], {
816
+ const child = spawn('opencode', ['--model', `nvidia/${model.modelId}`], {
794
817
  stdio: 'inherit',
795
818
  shell: true,
796
819
  detached: false
@@ -895,7 +918,17 @@ async function startOpenCodeDesktop(model) {
895
918
 
896
919
  saveOpenCodeConfig(config)
897
920
 
898
- console.log(chalk.green(` βœ“ Default model set to: nvidia/${model.modelId}`))
921
+ // πŸ“– Verify config was saved correctly
922
+ const savedConfig = loadOpenCodeConfig()
923
+ console.log(chalk.dim(` πŸ“ Config saved to: ${getOpenCodeConfigPath()}`))
924
+ console.log(chalk.dim(` πŸ“ Default model in config: ${savedConfig.model || 'NOT SET'}`))
925
+ console.log()
926
+
927
+ if (savedConfig.model === config.model) {
928
+ console.log(chalk.green(` βœ“ Default model set to: nvidia/${model.modelId}`))
929
+ } else {
930
+ console.log(chalk.yellow(` ⚠ Config might not have been saved correctly`))
931
+ }
899
932
  console.log()
900
933
  console.log(chalk.dim(' Opening OpenCode Desktop…'))
901
934
  console.log()
@@ -912,9 +945,9 @@ async function startOpenCodeDesktop(model) {
912
945
  // πŸ“– System installation: C:\Program Files\OpenCode\OpenCode.exe
913
946
  command = 'start "" "%LOCALAPPDATA%\\Programs\\OpenCode\\OpenCode.exe" 2>nul || start "" "%PROGRAMFILES%\\OpenCode\\OpenCode.exe" 2>nul || start OpenCode'
914
947
  } else if (isLinux) {
915
- // πŸ“– On Linux, try different methods
948
+ // πŸ“– On Linux, try different methods with model flag
916
949
  // πŸ“– Check if opencode-desktop exists, otherwise try xdg-open
917
- command = 'opencode-desktop 2>/dev/null || xdg-open /usr/share/applications/opencode.desktop 2>/dev/null || flatpak run ai.opencode.OpenCode 2>/dev/null || snap run opencode 2>/dev/null || echo "OpenCode not found"'
950
+ command = `opencode-desktop --model nvidia/${model.modelId} 2>/dev/null || flatpak run ai.opencode.OpenCode --model nvidia/${model.modelId} 2>/dev/null || snap run opencode --model nvidia/${model.modelId} 2>/dev/null || xdg-open /usr/share/applications/opencode.desktop 2>/dev/null || echo "OpenCode not found"`
918
951
  }
919
952
 
920
953
  exec(command, (err, stdout, stderr) => {
@@ -1072,8 +1105,8 @@ async function runFiableMode(apiKey) {
1072
1105
  console.log(chalk.cyan(' ⚑ Analyzing models for reliability (10 seconds)...'))
1073
1106
  console.log()
1074
1107
 
1075
- let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
1076
- idx: i + 1, modelId, label, tier, sweScore,
1108
+ let results = MODELS.map(([modelId, label, tier, sweScore, ctx], i) => ({
1109
+ idx: i + 1, modelId, label, tier, sweScore, ctx,
1077
1110
  status: 'pending',
1078
1111
  pings: [],
1079
1112
  httpCode: null,
@@ -1150,21 +1183,47 @@ async function main() {
1150
1183
  }
1151
1184
  }
1152
1185
 
1153
- // πŸ“– Skip update check during development to avoid blocking menus
1154
- // πŸ“– In production, this will work correctly when versions are published
1155
- const latestVersion = null // Skip update check for now
1186
+ // πŸ“– Check for updates in the background
1187
+ let latestVersion = null
1188
+ try {
1189
+ latestVersion = await checkForUpdate()
1190
+ } catch {
1191
+ // Silently fail - don't block the app if npm registry is unreachable
1192
+ }
1156
1193
 
1157
1194
  // πŸ“– Default mode: OpenCode CLI
1158
1195
  let mode = 'opencode'
1159
1196
 
1160
- // πŸ“– AUTO-UPDATE: Disabled during development
1161
- // πŸ“– Will be re-enabled when versions are properly published
1162
-
1163
- // πŸ“– This section is now handled by the update notification menu above
1197
+ // πŸ“– Show update notification menu if a new version is available
1198
+ if (latestVersion) {
1199
+ const action = await promptUpdateNotification(latestVersion)
1200
+ if (action === 'update') {
1201
+ runUpdate(latestVersion)
1202
+ return // runUpdate will restart the process
1203
+ } else if (action === 'changelogs') {
1204
+ console.log()
1205
+ console.log(chalk.cyan(' Opening changelog in browser...'))
1206
+ console.log()
1207
+ const { execSync } = require('child_process')
1208
+ const changelogUrl = 'https://github.com/vava-nessa/free-coding-models/releases'
1209
+ try {
1210
+ if (isMac) {
1211
+ execSync(`open "${changelogUrl}"`, { stdio: 'ignore' })
1212
+ } else if (isWindows) {
1213
+ execSync(`start "" "${changelogUrl}"`, { stdio: 'ignore' })
1214
+ } else {
1215
+ execSync(`xdg-open "${changelogUrl}"`, { stdio: 'ignore' })
1216
+ }
1217
+ } catch {
1218
+ console.log(chalk.dim(` Could not open browser. Visit: ${changelogUrl}`))
1219
+ }
1220
+ }
1221
+ // If action is null (Continue without update) or changelogs, proceed to main app
1222
+ }
1164
1223
 
1165
1224
  // πŸ“– Create results array with all models initially visible
1166
- let results = MODELS.map(([modelId, label, tier, sweScore], i) => ({
1167
- idx: i + 1, modelId, label, tier, sweScore,
1225
+ let results = MODELS.map(([modelId, label, tier, sweScore, ctx], i) => ({
1226
+ idx: i + 1, modelId, label, tier, sweScore, ctx,
1168
1227
  status: 'pending',
1169
1228
  pings: [], // πŸ“– All ping results (ms or 'TIMEOUT')
1170
1229
  httpCode: null,
@@ -1273,10 +1332,10 @@ async function main() {
1273
1332
  const onKeyPress = async (str, key) => {
1274
1333
  if (!key) return
1275
1334
 
1276
- // πŸ“– Sorting keys: R=rank, T=tier, O=origin, M=model, L=latest ping, A=avg ping, S=SWE-bench, H=health, V=verdict, U=uptime
1335
+ // πŸ“– Sorting keys: R=rank, T=tier, O=origin, M=model, L=latest ping, A=avg ping, S=SWE-bench, N=context, H=health, V=verdict, U=uptime
1277
1336
  const sortKeys = {
1278
1337
  'r': 'rank', 't': 'tier', 'o': 'origin', 'm': 'model',
1279
- 'l': 'ping', 'a': 'avg', 's': 'swe', 'h': 'condition', 'v': 'verdict', 'u': 'uptime'
1338
+ 'l': 'ping', 'a': 'avg', 's': 'swe', 'n': 'ctx', 'h': 'condition', 'v': 'verdict', 'u': 'uptime'
1280
1339
  }
1281
1340
 
1282
1341
  if (sortKeys[key.name]) {
package/lib/utils.js CHANGED
@@ -136,6 +136,7 @@ export const getUptime = (r) => {
136
136
  // - 'ping' (L key) β€” last ping latency (only successful ones count)
137
137
  // - 'avg' (A key) β€” average latency across all successful pings
138
138
  // - 'swe' (S key) β€” SWE-bench score (higher is better)
139
+ // - 'ctx' (N key) β€” context window size (larger is better)
139
140
  // - 'condition' (H key) β€” health status (alphabetical)
140
141
  // - 'verdict' (V key) β€” verdict order (Perfect β†’ Pending)
141
142
  // - 'uptime' (U key) β€” uptime percentage
@@ -184,6 +185,27 @@ export const sortResults = (results, sortColumn, sortDirection) => {
184
185
  cmp = parseSwe(a.sweScore) - parseSwe(b.sweScore)
185
186
  break
186
187
  }
188
+ case 'ctx': {
189
+ // πŸ“– Sort by context window size β€” larger is better
190
+ // πŸ“– Parse strings like "128k", "32k", "1m" into numeric tokens
191
+ const parseCtx = (ctx) => {
192
+ if (!ctx || ctx === 'β€”') return 0
193
+ const str = ctx.toLowerCase()
194
+ // πŸ“– Handle millions (1m = 1000k)
195
+ if (str.includes('m')) {
196
+ const num = parseFloat(str.replace('m', ''))
197
+ return num * 1000
198
+ }
199
+ // πŸ“– Handle thousands (128k)
200
+ if (str.includes('k')) {
201
+ const num = parseFloat(str.replace('k', ''))
202
+ return num
203
+ }
204
+ return 0
205
+ }
206
+ cmp = parseCtx(a.ctx) - parseCtx(b.ctx)
207
+ break
208
+ }
187
209
  case 'condition':
188
210
  cmp = a.status.localeCompare(b.status)
189
211
  break
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.1.38",
3
+ "version": "0.1.40",
4
4
  "description": "Find the fastest coding LLM models in seconds β€” ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",
package/sources.js CHANGED
@@ -4,20 +4,26 @@
4
4
  *
5
5
  * @details
6
6
  * This file contains all model definitions organized by provider/source.
7
- * Each source has its own models array with [model_id, display_label, tier].
8
- * Add new sources here to support additional providers beyond NVIDIA NIM.
7
+ * Each source has its own models array with [model_id, display_label, tier, swe_score, ctx].
8
+ * - model_id: The model identifier for API calls
9
+ * - display_label: Human-friendly name for display
10
+ * - tier: Performance tier (S+, S, A+, A, A-, B+, B, C)
11
+ * - swe_score: SWE-bench Verified score percentage
12
+ * - ctx: Context window size in tokens (e.g., "128k", "32k")
13
+ *
14
+ * Add new sources here to support additional providers beyond NIM.
9
15
  *
10
- * 🎯 Tier scale (based on Aider Polyglot benchmark):
11
- * - S+: 75%+ (elite frontier coders)
12
- * - S: 62-74% (excellent)
13
- * - A+: 54-62% (great)
14
- * - A: 44-54% (good)
15
- * - A-: 36-44% (decent)
16
- * - B+: 25-36% (average)
17
- * - B: 14-25% (below average)
18
- * - C: <14% (lightweight/edge)
16
+ * 🎯 Tier scale (based on SWE-bench Verified):
17
+ * - S+: 70%+ (elite frontier coders)
18
+ * - S: 60-70% (excellent)
19
+ * - A+: 50-60% (great)
20
+ * - A: 40-50% (good)
21
+ * - A-: 35-45% (decent)
22
+ * - B+: 30-40% (average)
23
+ * - B: 20-30% (below average)
24
+ * - C: <20% (lightweight/edge)
19
25
  *
20
- * πŸ“– Source: https://aider.chat/docs/leaderboards (Polyglot = 225 exercises, 6 languages)
26
+ * πŸ“– Source: https://www.swebench.com
21
27
  *
22
28
  * @exports Object containing all sources and their models
23
29
  */
@@ -25,57 +31,57 @@
25
31
  // πŸ“– NIM source - https://build.nvidia.com
26
32
  export const nvidiaNim = [
27
33
  // ── S+ tier β€” SWE-bench Verified β‰₯70% ──
28
- ['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S+', '49.2%'], // 49.2% SWE-bench Verified
29
- ['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S+', '49.2%'], // same base V3.1
30
- ['deepseek-ai/deepseek-v3.2', 'DeepSeek V3.2', 'S+', '73.1%'], // 73.1% SWE-bench Verified
31
- ['moonshotai/kimi-k2.5', 'Kimi K2.5', 'S+', '76.8%'], // 76.8% SWE-bench Verified
32
- ['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+', '62.0%'], // 62.0% SWE-bench (est.)
33
- ['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'S+', '56.0%'], // 56.0% SWE-bench (est.)
34
- ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'S+', '58.0%'], // 58.0% SWE-bench (est.)
34
+ ['deepseek-ai/deepseek-v3.1', 'DeepSeek V3.1', 'S+', '49.2%', '128k'],
35
+ ['deepseek-ai/deepseek-v3.1-terminus', 'DeepSeek V3.1 Term', 'S+', '49.2%', '128k'],
36
+ ['deepseek-ai/deepseek-v3.2', 'DeepSeek V3.2', 'S+', '73.1%', '128k'],
37
+ ['moonshotai/kimi-k2.5', 'Kimi K2.5', 'S+', '76.8%', '128k'],
38
+ ['mistralai/devstral-2-123b-instruct-2512', 'Devstral 2 123B', 'S+', '62.0%', '128k'],
39
+ ['nvidia/llama-3.1-nemotron-ultra-253b-v1', 'Nemotron Ultra 253B', 'S+', '56.0%', '128k'],
40
+ ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B', 'S+', '58.0%', '128k'],
35
41
  // ── S tier β€” SWE-bench Verified 50–70% ──
36
- ['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'S', '46.0%'], // 46.0% SWE-bench Verified
37
- ['z-ai/glm5', 'GLM 5', 'S', '77.8%'], // 77.8% SWE-bench Verified
38
- ['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S', '68.0%'], // 68.0% SWE-bench (est.)
39
- ['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S', '72.0%'], // 72.0% SWE-bench (est.)
40
- ['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S', '68.0%'], // 68.0% SWE-bench (est.)
41
- ['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'S', '44.0%'], // 44.0% SWE-bench (est.)
42
- ['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S', '70.0%'], // 70.0% SWE-bench (est.)
42
+ ['qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'S', '46.0%', '32k'],
43
+ ['z-ai/glm5', 'GLM 5', 'S', '77.8%', '128k'],
44
+ ['qwen/qwen3.5-397b-a17b', 'Qwen3.5 400B VLM', 'S', '68.0%', '128k'],
45
+ ['qwen/qwen3-coder-480b-a35b-instruct', 'Qwen3 Coder 480B', 'S', '72.0%', '128k'],
46
+ ['qwen/qwen3-next-80b-a3b-thinking', 'Qwen3 80B Thinking', 'S', '68.0%', '128k'],
47
+ ['meta/llama-3.1-405b-instruct', 'Llama 3.1 405B', 'S', '44.0%', '128k'],
48
+ ['minimaxai/minimax-m2.1', 'MiniMax M2.1', 'S', '70.0%', '128k'],
43
49
  // ── A+ tier β€” SWE-bench Verified 60–70% ──
44
- ['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'A+', '67.0%'], // 67.0% SWE-bench (est.)
45
- ['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'A+', '65.8%'], // 65.8% SWE-bench Verified
46
- ['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'A+', '70.0%'], // 70.0% SWE-bench (est.)
47
- ['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A+', '39.5%'], // 39.5% SWE-bench (est.)
48
- ['z-ai/glm4.7', 'GLM 4.7', 'A+', '73.8%'], // 73.8% SWE-bench Verified
49
- ['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'A+', '65.0%'], // 65.0% SWE-bench (est.)
50
+ ['moonshotai/kimi-k2-thinking', 'Kimi K2 Thinking', 'A+', '67.0%', '128k'],
51
+ ['moonshotai/kimi-k2-instruct', 'Kimi K2 Instruct', 'A+', '65.8%', '128k'],
52
+ ['qwen/qwen3-235b-a22b', 'Qwen3 235B', 'A+', '70.0%', '128k'],
53
+ ['meta/llama-3.3-70b-instruct', 'Llama 3.3 70B', 'A+', '39.5%', '128k'],
54
+ ['z-ai/glm4.7', 'GLM 4.7', 'A+', '73.8%', '128k'],
55
+ ['qwen/qwen3-next-80b-a3b-instruct', 'Qwen3 80B Instruct', 'A+', '65.0%', '128k'],
50
56
  // ── A tier β€” SWE-bench Verified 45–60% ──
51
- ['minimaxai/minimax-m2', 'MiniMax M2', 'A', '56.5%'], // 56.5% SWE-bench (est.)
52
- ['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A', '48.0%'], // 48.0% SWE-bench (est.)
53
- ['mistralai/magistral-small-2506', 'Magistral Small', 'A', '45.0%'], // 45.0% SWE-bench (est.)
54
- ['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A', '43.0%'], // 43.0% SWE-bench (est.)
55
- ['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%'], // 43.9% SWE-bench Verified
57
+ ['minimaxai/minimax-m2', 'MiniMax M2', 'A', '56.5%', '128k'],
58
+ ['mistralai/mistral-medium-3-instruct', 'Mistral Medium 3', 'A', '48.0%', '128k'],
59
+ ['mistralai/magistral-small-2506', 'Magistral Small', 'A', '45.0%', '32k'],
60
+ ['nvidia/nemotron-3-nano-30b-a3b', 'Nemotron Nano 30B', 'A', '43.0%', '128k'],
61
+ ['deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%', '128k'],
56
62
  // ── A- tier β€” SWE-bench Verified 35–45% ──
57
- ['openai/gpt-oss-120b', 'GPT OSS 120B', 'A-', '60.0%'], // 60.0% SWE-bench (est.)
58
- ['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A-', '49.0%'], // 49.0% SWE-bench (est.)
59
- ['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A-', '44.0%'], // 44.0% SWE-bench (est.)
60
- ['deepseek-ai/deepseek-r1-distill-qwen-14b', 'R1 Distill 14B', 'A-', '37.7%'], // 37.7% SWE-bench (est.)
61
- ['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A-', '52.0%'], // 52.0% SWE-bench (est.)
63
+ ['openai/gpt-oss-120b', 'GPT OSS 120B', 'A-', '60.0%', '128k'],
64
+ ['nvidia/llama-3.3-nemotron-super-49b-v1.5', 'Nemotron Super 49B', 'A-', '49.0%', '128k'],
65
+ ['meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 'A-', '44.0%', '128k'],
66
+ ['deepseek-ai/deepseek-r1-distill-qwen-14b', 'R1 Distill 14B', 'A-', '37.7%', '64k'],
67
+ ['igenius/colosseum_355b_instruct_16k', 'Colosseum 355B', 'A-', '52.0%', '16k'],
62
68
  // ── B+ tier β€” SWE-bench Verified 30–40% ──
63
- ['qwen/qwq-32b', 'QwQ 32B', 'B+', '50.0%'], // 50.0% SWE-bench (est.)
64
- ['openai/gpt-oss-20b', 'GPT OSS 20B', 'B+', '42.0%'], // 42.0% SWE-bench (est.)
65
- ['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'B+', '36.0%'], // 36.0% SWE-bench (est.)
66
- ['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'B+', '38.0%'], // 38.0% SWE-bench (est.)
67
- ['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'B+', '74.4%'], // 74.4% SWE-bench Verified
69
+ ['qwen/qwq-32b', 'QwQ 32B', 'B+', '50.0%', '32k'],
70
+ ['openai/gpt-oss-20b', 'GPT OSS 20B', 'B+', '42.0%', '32k'],
71
+ ['stockmark/stockmark-2-100b-instruct', 'Stockmark 100B', 'B+', '36.0%', '32k'],
72
+ ['bytedance/seed-oss-36b-instruct', 'Seed OSS 36B', 'B+', '38.0%', '32k'],
73
+ ['stepfun-ai/step-3.5-flash', 'Step 3.5 Flash', 'B+', '74.4%', '32k'],
68
74
  // ── B tier β€” SWE-bench Verified 20–35% ──
69
- ['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'B', '62.0%'], // 62.0% SWE-bench (est.)
70
- ['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B', '32.0%'], // 32.0% SWE-bench (est.)
71
- ['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B', '34.0%'], // 34.0% SWE-bench (est.)
72
- ['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B', '30.0%'], // 30.0% SWE-bench (est.)
73
- ['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B', '28.2%'], // 28.2% SWE-bench (est.)
75
+ ['meta/llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 'B', '62.0%', '128k'],
76
+ ['mistralai/mixtral-8x22b-instruct-v0.1', 'Mixtral 8x22B', 'B', '32.0%', '64k'],
77
+ ['mistralai/ministral-14b-instruct-2512', 'Ministral 14B', 'B', '34.0%', '32k'],
78
+ ['ibm/granite-34b-code-instruct', 'Granite 34B Code', 'B', '30.0%', '32k'],
79
+ ['deepseek-ai/deepseek-r1-distill-llama-8b', 'R1 Distill 8B', 'B', '28.2%', '32k'],
74
80
  // ── C tier β€” SWE-bench Verified <25% or lightweight edge models ──
75
- ['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'C', '22.6%'], // 22.6% SWE-bench (est.)
76
- ['google/gemma-2-9b-it', 'Gemma 2 9B', 'C', '18.0%'], // 18.0% SWE-bench (est.)
77
- ['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C', '12.0%'], // 12.0% SWE-bench (est.)
78
- ['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C', '14.0%'], // 14.0% SWE-bench (est.)
81
+ ['deepseek-ai/deepseek-r1-distill-qwen-7b', 'R1 Distill 7B', 'C', '22.6%', '32k'],
82
+ ['google/gemma-2-9b-it', 'Gemma 2 9B', 'C', '18.0%', '8k'],
83
+ ['microsoft/phi-3.5-mini-instruct', 'Phi 3.5 Mini', 'C', '12.0%', '128k'],
84
+ ['microsoft/phi-4-mini-instruct', 'Phi 4 Mini', 'C', '14.0%', '128k'],
79
85
  ]
80
86
 
81
87
  // πŸ“– All sources combined - used by the main script
@@ -84,21 +90,12 @@ export const sources = {
84
90
  name: 'NIM',
85
91
  models: nvidiaNim,
86
92
  },
87
- // πŸ“– Add more sources here in the future, for example:
88
- // openai: {
89
- // name: 'OpenAI',
90
- // models: [...],
91
- // },
92
- // anthropic: {
93
- // name: 'Anthropic',
94
- // models: [...],
95
- // },
96
93
  }
97
94
 
98
95
  // πŸ“– Flatten all models from all sources for backward compatibility
99
96
  export const MODELS = []
100
97
  for (const [sourceKey, sourceData] of Object.entries(sources)) {
101
- for (const [modelId, label, tier, sweScore] of sourceData.models) {
102
- MODELS.push([modelId, label, tier, sweScore])
98
+ for (const [modelId, label, tier, sweScore, ctx] of sourceData.models) {
99
+ MODELS.push([modelId, label, tier, sweScore, ctx])
103
100
  }
104
101
  }