npm - free-coding-models - Versions diffs - 0.1.40 → 0.1.41 - Mend

free-coding-models 0.1.40 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -199,23 +199,23 @@ free-coding-models
 ## 🤖 Coding Models
-**44 coding models** across 8 tiers, ranked by [Aider Polyglot benchmark](https://aider.chat/docs/leaderboards) (225 coding exercises across C++/Go/Java/JS/Python/Rust). Models without a confirmed Aider score are estimated from model family, size, and published release benchmarks.
-| Tier | Score | Count | Models |
-|------|-------|-------|--------|
-| **S+** | 75%+ | 7 | DeepSeek V3.1/Terminus, DeepSeek V3.2, Kimi K2.5, Devstral 2, Nemotron Ultra 253B, Mistral Large 675B |
-| **S**  | 62–74% | 7 | Qwen2.5 Coder 32B, GLM 5, Qwen3.5 400B VLM, Qwen3 Coder 480B, Qwen3 80B Thinking, Llama 3.1 405B, MiniMax M2.1 |
-| **A+** | 54–62% | 6 | Kimi K2 Thinking/Instruct, Qwen3 235B, Llama 3.3 70B, GLM 4.7, Qwen3 80B Instruct |
-| **A**  | 44–54% | 5 | MiniMax M2, Mistral Medium 3, Magistral Small, Nemotron Nano 30B, R1 Distill 32B |
-| **A-** | 36–44% | 5 | GPT OSS 120B, Nemotron Super 49B, Llama 4 Scout, R1 Distill 14B, Colosseum 355B |
-| **B+** | 25–36% | 5 | QwQ 32B, GPT OSS 20B, Stockmark 100B, Seed OSS 36B, Step 3.5 Flash |
-| **B**  | 14–25% | 5 | Llama 4 Maverick, Mixtral 8x22B, Ministral 14B, Granite 34B Code, R1 Distill 8B |
-| **C**  | <14%  | 4 | R1 Distill 7B, Gemma 2 9B, Phi 3.5 Mini, Phi 4 Mini |
+**44 coding models** across 8 tiers, ranked by [SWE-bench Verified](https://www.swebench.com) — the industry-standard benchmark measuring real GitHub issue resolution. Scores are self-reported by providers unless noted.
+| Tier | SWE-bench | Models |
+|------|-----------|--------|
+| **S+** ≥70% | GLM 5 (77.8%), Kimi K2.5 (76.8%), Step 3.5 Flash (74.4%), MiniMax M2.1 (74.0%), GLM 4.7 (73.8%), DeepSeek V3.2 (73.1%), Devstral 2 (72.2%), Kimi K2 Thinking (71.3%), Qwen3 Coder 480B (70.6%), Qwen3 235B (70.0%) |
+| **S** 60–70% | MiniMax M2 (69.4%), DeepSeek V3.1 Terminus (68.4%), Qwen3 80B Thinking (68.0%), Qwen3.5 400B (68.0%), Kimi K2 Instruct (65.8%), Qwen3 80B Instruct (65.0%), DeepSeek V3.1 (62.0%), Llama 4 Maverick (62.0%), GPT OSS 120B (60.0%) |
+| **A+** 50–60% | Mistral Large 675B (58.0%), Nemotron Ultra 253B (56.0%), Colosseum 355B (52.0%), QwQ 32B (50.0%) |
+| **A** 40–50% | Nemotron Super 49B (49.0%), Mistral Medium 3 (48.0%), Qwen2.5 Coder 32B (46.0%), Magistral Small (45.0%), Llama 4 Scout (44.0%), Llama 3.1 405B (44.0%), Nemotron Nano 30B (43.0%), R1 Distill 32B (43.9%), GPT OSS 20B (42.0%) |
+| **A-** 35–40% | Llama 3.3 70B (39.5%), Seed OSS 36B (38.0%), R1 Distill 14B (37.7%), Stockmark 100B (36.0%) |
+| **B+** 30–35% | Ministral 14B (34.0%), Mixtral 8x22B (32.0%), Granite 34B Code (30.0%) |
+| **B** 20–30% | R1 Distill 8B (28.2%), R1 Distill 7B (22.6%) |
+| **C** <20% | Gemma 2 9B (18.0%), Phi 4 Mini (14.0%), Phi 3.5 Mini (12.0%) |
 ### Tier scale
-- **S+/S** — Frontier coders, top Aider polyglot scores, best for complex refactors
-- **A+/A** — Excellent alternatives, strong at most coding tasks
+- **S+/S** — Elite frontier coders (≥60% SWE-bench), best for complex real-world tasks and refactors
+- **A+/A** — Great alternatives, strong at most coding tasks
 - **A-/B+** — Solid performers, good for targeted programming tasks
 - **B/C** — Lightweight or older models, good for code completion on constrained infra

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "free-coding-models",
-  "version": "0.1.40",
+  "version": "0.1.41",
   "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
   "keywords": [
     "nvidia",

package/sources.js CHANGED Viewed

@@ -8,9 +8,9 @@
  *   - model_id: The model identifier for API calls
  *   - display_label: Human-friendly name for display
  *   - tier: Performance tier (S+, S, A+, A, A-, B+, B, C)
- *   - swe_score: SWE-bench Verified score percentage
+ *   - swe_score: SWE-bench Verified score percentage (self-reported by model provider)
  *   - ctx: Context window size in tokens (e.g., "128k", "32k")
- *
+ *
  *   Add new sources here to support additional providers beyond NIM.
  *
  *   🎯 Tier scale (based on SWE-bench Verified):
@@ -18,12 +18,14 @@
  *   - S:  60-70% (excellent)
  *   - A+: 50-60% (great)
  *   - A:  40-50% (good)
- *   - A-: 35-45% (decent)
- *   - B+: 30-40% (average)
+ *   - A-: 35-40% (decent)
+ *   - B+: 30-35% (average)
  *   - B:  20-30% (below average)
  *   - C:  <20% (lightweight/edge)
  *
- *   📖 Source: https://www.swebench.com
+ *   📖 Source: https://www.swebench.com — scores are self-reported unless noted
+ *   📖 Secondary: https://swe-rebench.com (independent evals, scores are lower)
+ *   📖 Leaderboard tracker: https://www.marc0.dev/en/leaderboard
  *
  *   @exports Object containing all sources and their models
  */
@@ -31,57 +33,57 @@
 // 📖 NIM source - https://build.nvidia.com
 export const nvidiaNim = [
   // ── S+ tier — SWE-bench Verified ≥70% ──
-  ['deepseek-ai/deepseek-v3.1',                    'DeepSeek V3.1',       'S+', '49.2%', '128k'],
-  ['deepseek-ai/deepseek-v3.1-terminus',           'DeepSeek V3.1 Term',  'S+', '49.2%', '128k'],
   ['deepseek-ai/deepseek-v3.2',                    'DeepSeek V3.2',       'S+', '73.1%', '128k'],
   ['moonshotai/kimi-k2.5',                         'Kimi K2.5',           'S+', '76.8%', '128k'],
-  ['mistralai/devstral-2-123b-instruct-2512',      'Devstral 2 123B',     'S+', '62.0%', '128k'],
-  ['nvidia/llama-3.1-nemotron-ultra-253b-v1',      'Nemotron Ultra 253B', 'S+', '56.0%', '128k'],
-  ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B',  'S+', '58.0%', '128k'],
-  // ── S tier — SWE-bench Verified 50–70% ──
-  ['qwen/qwen2.5-coder-32b-instruct',              'Qwen2.5 Coder 32B',   'S', '46.0%', '32k'],
-  ['z-ai/glm5',                                    'GLM 5',               'S', '77.8%', '128k'],
-  ['qwen/qwen3.5-397b-a17b',                       'Qwen3.5 400B VLM',    'S', '68.0%', '128k'],
-  ['qwen/qwen3-coder-480b-a35b-instruct',          'Qwen3 Coder 480B',    'S', '72.0%', '128k'],
-  ['qwen/qwen3-next-80b-a3b-thinking',             'Qwen3 80B Thinking',  'S', '68.0%', '128k'],
-  ['meta/llama-3.1-405b-instruct',                 'Llama 3.1 405B',      'S', '44.0%', '128k'],
-  ['minimaxai/minimax-m2.1',                       'MiniMax M2.1',        'S', '70.0%', '128k'],
-  // ── A+ tier — SWE-bench Verified 60–70% ──
-  ['moonshotai/kimi-k2-thinking',                  'Kimi K2 Thinking',    'A+', '67.0%', '128k'],
-  ['moonshotai/kimi-k2-instruct',                  'Kimi K2 Instruct',    'A+', '65.8%', '128k'],
-  ['qwen/qwen3-235b-a22b',                         'Qwen3 235B',          'A+', '70.0%', '128k'],
-  ['meta/llama-3.3-70b-instruct',                  'Llama 3.3 70B',       'A+', '39.5%', '128k'],
-  ['z-ai/glm4.7',                                  'GLM 4.7',             'A+', '73.8%', '128k'],
-  ['qwen/qwen3-next-80b-a3b-instruct',             'Qwen3 80B Instruct',  'A+', '65.0%', '128k'],
-  // ── A tier — SWE-bench Verified 45–60% ──
-  ['minimaxai/minimax-m2',                         'MiniMax M2',          'A', '56.5%', '128k'],
-  ['mistralai/mistral-medium-3-instruct',          'Mistral Medium 3',    'A', '48.0%', '128k'],
-  ['mistralai/magistral-small-2506',               'Magistral Small',     'A', '45.0%', '32k'],
-  ['nvidia/nemotron-3-nano-30b-a3b',               'Nemotron Nano 30B',   'A', '43.0%', '128k'],
-  ['deepseek-ai/deepseek-r1-distill-qwen-32b',     'R1 Distill 32B',      'A', '43.9%', '128k'],
-  // ── A- tier — SWE-bench Verified 35–45% ──
-  ['openai/gpt-oss-120b',                          'GPT OSS 120B',        'A-', '60.0%', '128k'],
-  ['nvidia/llama-3.3-nemotron-super-49b-v1.5',     'Nemotron Super 49B',  'A-', '49.0%', '128k'],
-  ['meta/llama-4-scout-17b-16e-instruct',          'Llama 4 Scout',       'A-', '44.0%', '128k'],
+  ['z-ai/glm5',                                    'GLM 5',               'S+', '77.8%', '128k'],
+  ['z-ai/glm4.7',                                  'GLM 4.7',             'S+', '73.8%', '200k'],
+  ['moonshotai/kimi-k2-thinking',                  'Kimi K2 Thinking',    'S+', '71.3%', '256k'],
+  ['minimaxai/minimax-m2.1',                       'MiniMax M2.1',        'S+', '74.0%', '200k'],
+  ['stepfun-ai/step-3.5-flash',                    'Step 3.5 Flash',      'S+', '74.4%', '256k'],
+  ['qwen/qwen3-coder-480b-a35b-instruct',          'Qwen3 Coder 480B',    'S+', '70.6%', '256k'],
+  ['qwen/qwen3-235b-a22b',                         'Qwen3 235B',          'S+', '70.0%', '128k'],
+  ['mistralai/devstral-2-123b-instruct-2512',      'Devstral 2 123B',     'S+', '72.2%', '256k'],
+  // ── S tier — SWE-bench Verified 60–70% ──
+  ['deepseek-ai/deepseek-v3.1-terminus',           'DeepSeek V3.1 Term',  'S',  '68.4%', '128k'],
+  ['moonshotai/kimi-k2-instruct',                  'Kimi K2 Instruct',    'S',  '65.8%', '128k'],
+  ['minimaxai/minimax-m2',                         'MiniMax M2',          'S',  '69.4%', '128k'],
+  ['qwen/qwen3-next-80b-a3b-thinking',             'Qwen3 80B Thinking',  'S',  '68.0%', '128k'],
+  ['qwen/qwen3-next-80b-a3b-instruct',             'Qwen3 80B Instruct',  'S',  '65.0%', '128k'],
+  ['qwen/qwen3.5-397b-a17b',                       'Qwen3.5 400B VLM',    'S',  '68.0%', '128k'],
+  ['openai/gpt-oss-120b',                          'GPT OSS 120B',        'S',  '60.0%', '128k'],
+  ['meta/llama-4-maverick-17b-128e-instruct',      'Llama 4 Maverick',    'S',  '62.0%', '1M'],
+  ['deepseek-ai/deepseek-v3.1',                    'DeepSeek V3.1',       'S',  '62.0%', '128k'],
+  // ── A+ tier — SWE-bench Verified 50–60% ──
+  ['nvidia/llama-3.1-nemotron-ultra-253b-v1',      'Nemotron Ultra 253B', 'A+', '56.0%', '128k'],
+  ['mistralai/mistral-large-3-675b-instruct-2512', 'Mistral Large 675B',  'A+', '58.0%', '256k'],
+  ['qwen/qwq-32b',                                 'QwQ 32B',             'A+', '50.0%', '131k'],
+  ['igenius/colosseum_355b_instruct_16k',          'Colosseum 355B',      'A+', '52.0%', '16k'],
+  // ── A tier — SWE-bench Verified 40–50% ──
+  ['mistralai/mistral-medium-3-instruct',          'Mistral Medium 3',    'A',  '48.0%', '128k'],
+  ['mistralai/magistral-small-2506',               'Magistral Small',     'A',  '45.0%', '32k'],
+  ['nvidia/llama-3.3-nemotron-super-49b-v1.5',     'Nemotron Super 49B',  'A',  '49.0%', '128k'],
+  ['meta/llama-4-scout-17b-16e-instruct',          'Llama 4 Scout',       'A',  '44.0%', '10M'],
+  ['nvidia/nemotron-3-nano-30b-a3b',               'Nemotron Nano 30B',   'A',  '43.0%', '128k'],
+  ['deepseek-ai/deepseek-r1-distill-qwen-32b',     'R1 Distill 32B',      'A',  '43.9%', '128k'],
+  ['openai/gpt-oss-20b',                           'GPT OSS 20B',         'A',  '42.0%', '128k'],
+  ['qwen/qwen2.5-coder-32b-instruct',              'Qwen2.5 Coder 32B',   'A',  '46.0%', '32k'],
+  ['meta/llama-3.1-405b-instruct',                 'Llama 3.1 405B',      'A',  '44.0%', '128k'],
+  // ── A- tier — SWE-bench Verified 35–40% ──
+  ['meta/llama-3.3-70b-instruct',                  'Llama 3.3 70B',       'A-', '39.5%', '128k'],
   ['deepseek-ai/deepseek-r1-distill-qwen-14b',     'R1 Distill 14B',      'A-', '37.7%', '64k'],
-  ['igenius/colosseum_355b_instruct_16k',          'Colosseum 355B',      'A-', '52.0%', '16k'],
-  // ── B+ tier — SWE-bench Verified 30–40% ──
-  ['qwen/qwq-32b',                                 'QwQ 32B',            'B+', '50.0%', '32k'],
-  ['openai/gpt-oss-20b',                           'GPT OSS 20B',        'B+', '42.0%', '32k'],
-  ['stockmark/stockmark-2-100b-instruct',          'Stockmark 100B',     'B+', '36.0%', '32k'],
-  ['bytedance/seed-oss-36b-instruct',              'Seed OSS 36B',       'B+', '38.0%', '32k'],
-  ['stepfun-ai/step-3.5-flash',                    'Step 3.5 Flash',      'B+', '74.4%', '32k'],
-  // ── B tier — SWE-bench Verified 20–35% ──
-  ['meta/llama-4-maverick-17b-128e-instruct',      'Llama 4 Maverick',    'B', '62.0%', '128k'],
-  ['mistralai/mixtral-8x22b-instruct-v0.1',        'Mixtral 8x22B',       'B', '32.0%', '64k'],
-  ['mistralai/ministral-14b-instruct-2512',        'Ministral 14B',       'B', '34.0%', '32k'],
-  ['ibm/granite-34b-code-instruct',                'Granite 34B Code',    'B', '30.0%', '32k'],
-  ['deepseek-ai/deepseek-r1-distill-llama-8b',     'R1 Distill 8B',       'B', '28.2%', '32k'],
-  // ── C tier — SWE-bench Verified <25% or lightweight edge models ──
-  ['deepseek-ai/deepseek-r1-distill-qwen-7b',      'R1 Distill 7B',       'C', '22.6%', '32k'],
-  ['google/gemma-2-9b-it',                         'Gemma 2 9B',          'C', '18.0%', '8k'],
-  ['microsoft/phi-3.5-mini-instruct',              'Phi 3.5 Mini',        'C', '12.0%', '128k'],
-  ['microsoft/phi-4-mini-instruct',                'Phi 4 Mini',          'C', '14.0%', '128k'],
+  ['bytedance/seed-oss-36b-instruct',              'Seed OSS 36B',        'A-', '38.0%', '32k'],
+  ['stockmark/stockmark-2-100b-instruct',          'Stockmark 100B',      'A-', '36.0%', '32k'],
+  // ── B+ tier — SWE-bench Verified 30–35% ──
+  ['mistralai/mixtral-8x22b-instruct-v0.1',        'Mixtral 8x22B',       'B+', '32.0%', '64k'],
+  ['mistralai/ministral-14b-instruct-2512',        'Ministral 14B',       'B+', '34.0%', '32k'],
+  ['ibm/granite-34b-code-instruct',                'Granite 34B Code',    'B+', '30.0%', '32k'],
+  // ── B tier — SWE-bench Verified 20–30% ──
+  ['deepseek-ai/deepseek-r1-distill-llama-8b',     'R1 Distill 8B',       'B',  '28.2%', '32k'],
+  ['deepseek-ai/deepseek-r1-distill-qwen-7b',      'R1 Distill 7B',       'B',  '22.6%', '32k'],
+  // ── C tier — SWE-bench Verified <20% or lightweight edge models ──
+  ['google/gemma-2-9b-it',                         'Gemma 2 9B',          'C',  '18.0%', '8k'],
+  ['microsoft/phi-3.5-mini-instruct',              'Phi 3.5 Mini',        'C',  '12.0%', '128k'],
+  ['microsoft/phi-4-mini-instruct',                'Phi 4 Mini',          'C',  '14.0%', '128k'],
 ]
 // 📖 All sources combined - used by the main script