npm - free-coding-models - Versions diffs - 0.1.65 → 0.1.67 - Mend

free-coding-models 0.1.65 → 0.1.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +172 -31
package/bin/free-coding-models.js +457 -94
package/lib/config.js +14 -2
package/lib/utils.js +121 -25
package/package.json +1 -1
package/sources.js +70 -2

package/lib/config.js CHANGED Viewed

@@ -24,7 +24,11 @@
  *       "codestral":  "csk-xxx",
  *       "hyperbolic": "eyJ...",
  *       "scaleway":   "scw-xxx",
- *       "googleai":   "AIza..."
+ *       "googleai":   "AIza...",
+ *       "siliconflow":"sk-xxx",
+ *       "together":   "together-xxx",
+ *       "cloudflare": "cf-xxx",
+ *       "perplexity": "pplx-xxx"
  *     },
  *     "providers": {
  *       "nvidia":     { "enabled": true },
@@ -39,7 +43,11 @@
  *       "codestral":  { "enabled": true },
  *       "hyperbolic": { "enabled": true },
  *       "scaleway":   { "enabled": true },
- *       "googleai":   { "enabled": true }
+ *       "googleai":   { "enabled": true },
+ *       "siliconflow":{ "enabled": true },
+ *       "together":   { "enabled": true },
+ *       "cloudflare": { "enabled": true },
+ *       "perplexity": { "enabled": true }
  *     },
  *     "favorites": [
  *       "nvidia/deepseek-ai/deepseek-v3.2"
@@ -94,6 +102,10 @@ const ENV_VARS = {
   hyperbolic: 'HYPERBOLIC_API_KEY',
   scaleway:   'SCALEWAY_API_KEY',
   googleai:   'GOOGLE_API_KEY',
+  siliconflow:'SILICONFLOW_API_KEY',
+  together:   'TOGETHER_API_KEY',
+  cloudflare: ['CLOUDFLARE_API_TOKEN', 'CLOUDFLARE_API_KEY'],
+  perplexity: ['PERPLEXITY_API_KEY', 'PPLX_API_KEY'],
 }
 /**

package/lib/utils.js CHANGED Viewed

@@ -27,14 +27,18 @@
  *
  * @functions
  *   → getAvg(result) — Calculate average latency from successful pings only
- *   → getVerdict(result) — Determine model health verdict based on avg latency and status
+ *   → getVerdict(result) — Determine model health verdict based on avg latency and stability
  *   → getUptime(result) — Calculate uptime percentage (successful / total pings)
+ *   → getP95(result) — Calculate 95th percentile latency from successful pings
+ *   → getJitter(result) — Calculate latency standard deviation (jitter)
+ *   → getStabilityScore(result) — Composite 0–100 stability score (p95 + jitter + spikes + uptime)
  *   → sortResults(results, sortColumn, sortDirection) — Sort model results by any column
  *   → filterByTier(results, tierLetter) — Filter results by tier letter (S/A/B/C)
- *   → findBestModel(results) — Pick the best model by status → avg → uptime priority
+ *   → findBestModel(results) — Pick the best model by status → avg → stability → uptime priority
  *   → parseArgs(argv) — Parse CLI arguments into structured flags and values
  *
- * @exports getAvg, getVerdict, getUptime, sortResults, filterByTier, findBestModel, parseArgs
+ * @exports getAvg, getVerdict, getUptime, getP95, getJitter, getStabilityScore
+ * @exports sortResults, filterByTier, findBestModel, parseArgs
  * @exports TIER_ORDER, VERDICT_ORDER, TIER_LETTER_MAP
  *
  * @see bin/free-coding-models.js — main CLI that imports these utils
@@ -54,7 +58,7 @@ export const TIER_ORDER = ['S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
 // 📖 Used by sortResults when sorting by the "verdict" column.
 // 📖 "Perfect" means < 400ms avg, "Pending" means no data yet.
 // 📖 The order matters — it determines sort rank in the TUI table.
-export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
+export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Spiky', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
 // 📖 Maps a CLI tier letter (--tier S/A/B/C) to the full tier strings it includes.
 // 📖 Example: --tier A matches A+, A, and A- models (all "A-family" tiers).
@@ -91,11 +95,17 @@ export const getAvg = (r) => {
 //   2. Timeout/down BUT was previously up → "Unstable" (it worked before, now it doesn't)
 //   3. Timeout/down and never worked → "Not Active" (model might be offline)
 //   4. No successful pings yet → "Pending" (still waiting for first response)
-//   5. Avg < 400ms → "Perfect"
-//   6. Avg < 1000ms → "Normal"
-//   7. Avg < 3000ms → "Slow"
-//   8. Avg < 5000ms → "Very Slow"
-//   9. Avg >= 5000ms → "Unstable"
+//   5. Stability-aware speed tiers (avg + p95/jitter penalty):
+//      - Avg < 400ms + stable → "Perfect"
+//      - Avg < 400ms but spiky p95 → "Spiky" (fast on average, but tail latency hurts)
+//      - Avg < 1000ms → "Normal"
+//      - Avg < 3000ms → "Slow"
+//      - Avg < 5000ms → "Very Slow"
+//      - Avg >= 5000ms → "Unstable"
+//
+// 📖 The "Spiky" verdict catches models that look fast on paper (low avg) but randomly
+//    stall your IDE/agent with tail-latency spikes. A model with avg 250ms but p95 6000ms
+//    gets downgraded from "Perfect" to "Spiky" — because consistency matters more than speed.
 //
 // 📖 The "wasUpBefore" check is key — it distinguishes between a model that's
 //    temporarily flaky vs one that was never reachable in the first place.
@@ -107,8 +117,20 @@ export const getVerdict = (r) => {
   if ((r.status === 'timeout' || r.status === 'down') && wasUpBefore) return 'Unstable'
   if (r.status === 'timeout' || r.status === 'down') return 'Not Active'
   if (avg === Infinity) return 'Pending'
-  if (avg < 400) return 'Perfect'
-  if (avg < 1000) return 'Normal'
+  // 📖 Stability-aware verdict: penalize models with good avg but terrible tail latency
+  const successfulPings = (r.pings || []).filter(p => p.code === '200')
+  const p95 = getP95(r)
+  if (avg < 400) {
+    // 📖 Only flag as "Spiky" when we have enough data (≥3 pings) to judge stability
+    if (successfulPings.length >= 3 && p95 > 3000) return 'Spiky'
+    return 'Perfect'
+  }
+  if (avg < 1000) {
+    if (successfulPings.length >= 3 && p95 > 5000) return 'Spiky'
+    return 'Normal'
+  }
   if (avg < 3000) return 'Slow'
   if (avg < 5000) return 'Very Slow'
   if (avg < 10000) return 'Unstable'
@@ -125,21 +147,84 @@ export const getUptime = (r) => {
   return Math.round((successful / r.pings.length) * 100)
 }
+// 📖 getP95: Calculate the 95th percentile latency from successful pings (HTTP 200).
+// 📖 The p95 answers: "95% of requests are faster than this value."
+// 📖 A low p95 means consistently fast responses — a high p95 signals tail-latency spikes.
+// 📖 Returns Infinity when no successful pings exist.
+//
+// 📖 Algorithm: sort latencies ascending, pick the value at ceil(N * 0.95) - 1.
+// 📖 Example: [100, 200, 300, 400, 5000] → p95 index = ceil(5 * 0.95) - 1 = 4 → 5000ms
+export const getP95 = (r) => {
+  const successfulPings = (r.pings || []).filter(p => p.code === '200')
+  if (successfulPings.length === 0) return Infinity
+  const sorted = successfulPings.map(p => p.ms).sort((a, b) => a - b)
+  const idx = Math.ceil(sorted.length * 0.95) - 1
+  return sorted[Math.max(0, idx)]
+}
+// 📖 getJitter: Calculate latency standard deviation (σ) from successful pings.
+// 📖 Low jitter = predictable response times. High jitter = erratic, spiky latency.
+// 📖 Returns 0 when fewer than 2 successful pings (can't compute variance from 1 point).
+// 📖 Uses population σ (divides by N, not N-1) since we have ALL the data, not a sample.
+export const getJitter = (r) => {
+  const successfulPings = (r.pings || []).filter(p => p.code === '200')
+  if (successfulPings.length < 2) return 0
+  const mean = successfulPings.reduce((a, b) => a + b.ms, 0) / successfulPings.length
+  const variance = successfulPings.reduce((sum, p) => sum + (p.ms - mean) ** 2, 0) / successfulPings.length
+  return Math.round(Math.sqrt(variance))
+}
+// 📖 getStabilityScore: Composite 0–100 score that rewards consistency and reliability.
+// 📖 Combines four signals into a single number:
+//   - p95 latency (30%) — penalizes tail-latency spikes
+//   - Jitter / σ (30%) — penalizes erratic response times
+//   - Spike rate (20%) — fraction of pings above 3000ms threshold
+//   - Uptime / reliability (20%) — fraction of successful pings
+//
+// 📖 Each component is normalized to 0–100, then weighted and combined.
+// 📖 Returns -1 when no successful pings exist (not enough data yet).
+//
+// 📖 Example:
+//   Model A: avg 250ms, p95 6000ms (tons of spikes) → score ~30
+//   Model B: avg 400ms, p95 650ms (boringly consistent) → score ~85
+//   In real usage, Model B FEELS faster because it doesn't randomly stall.
+export const getStabilityScore = (r) => {
+  const successfulPings = (r.pings || []).filter(p => p.code === '200')
+  if (successfulPings.length === 0) return -1
+  const p95 = getP95(r)
+  const jitter = getJitter(r)
+  const uptime = getUptime(r)
+  const spikeCount = successfulPings.filter(p => p.ms > 3000).length
+  const spikeRate = spikeCount / successfulPings.length
+  // 📖 Normalize each component to 0–100 (higher = better)
+  const p95Score = Math.max(0, Math.min(100, 100 * (1 - p95 / 5000)))
+  const jitterScore = Math.max(0, Math.min(100, 100 * (1 - jitter / 2000)))
+  const spikeScore = Math.max(0, 100 * (1 - spikeRate))
+  const reliabilityScore = uptime
+  // 📖 Weighted composite: 30% p95, 30% jitter, 20% spikes, 20% reliability
+  const score = 0.3 * p95Score + 0.3 * jitterScore + 0.2 * spikeScore + 0.2 * reliabilityScore
+  return Math.round(score)
+}
 // 📖 sortResults: Sort the results array by any column the user can click/press in the TUI.
 // 📖 Returns a NEW array — never mutates the original (important for React-style re-renders).
 //
 // 📖 Supported columns (matching the keyboard shortcuts in the TUI):
-//   - 'rank'    (R key) — original index from sources.js
-//   - 'tier'    (T key) — tier hierarchy (S+ first, C last)
-//   - 'origin'  (O key) — provider name (all NIM for now, future-proofed)
-//   - 'model'   (M key) — alphabetical by display label
-//   - 'ping'    (L key) — last ping latency (only successful ones count)
-//   - 'avg'     (A key) — average latency across all successful pings
-//   - 'swe'     (S key) — SWE-bench score (higher is better)
-//   - 'ctx'     (N key) — context window size (larger is better)
-//   - 'condition' (H key) — health status (alphabetical)
-//   - 'verdict' (V key) — verdict order (Perfect → Pending)
-//   - 'uptime'  (U key) — uptime percentage
+//   - 'rank'      (R key) — original index from sources.js
+//   - 'tier'      (T key) — tier hierarchy (S+ first, C last)
+//   - 'origin'    (O key) — provider name (all NIM for now, future-proofed)
+//   - 'model'     (M key) — alphabetical by display label
+//   - 'ping'      (L key) — last ping latency (only successful ones count)
+//   - 'avg'       (A key) — average latency across all successful pings
+//   - 'swe'       (S key) — SWE-bench score (higher is better)
+//   - 'ctx'       (N key) — context window size (larger is better)
+//   - 'condition'  (H key) — health status (alphabetical)
+//   - 'verdict'   (V key) — verdict order (Perfect → Pending)
+//   - 'uptime'    (U key) — uptime percentage
+//   - 'stability' (B key) — stability score (0–100, higher = more stable)
 //
 // 📖 sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
 export const sortResults = (results, sortColumn, sortDirection) => {
@@ -219,6 +304,11 @@ export const sortResults = (results, sortColumn, sortDirection) => {
       case 'uptime':
         cmp = getUptime(a) - getUptime(b)
         break
+      case 'stability':
+        // 📖 Sort by stability score — higher = more stable = better
+        // 📖 Models with no data (-1) sort to the bottom
+        cmp = getStabilityScore(a) - getStabilityScore(b)
+        break
     }
     // 📖 Flip comparison for descending order
@@ -242,16 +332,19 @@ export function filterByTier(results, tierLetter) {
 // 📖 findBestModel: Pick the single best model from a results array.
 // 📖 Used by --fiable mode to output the most reliable model after 10s of analysis.
 //
-// 📖 Selection priority (tri-key sort):
+// 📖 Selection priority (quad-key sort):
 //   1. Status: "up" models always beat non-up models
 //   2. Average latency: faster average wins (lower is better)
-//   3. Uptime %: higher uptime wins as tiebreaker
+//   3. Stability score: higher stability wins (more consistent = better)
+//   4. Uptime %: higher uptime wins as final tiebreaker
 //
 // 📖 Returns null if the array is empty.
 export function findBestModel(results) {
   const sorted = [...results].sort((a, b) => {
     const avgA = getAvg(a)
     const avgB = getAvg(b)
+    const stabilityA = getStabilityScore(a)
+    const stabilityB = getStabilityScore(b)
     const uptimeA = getUptime(a)
     const uptimeB = getUptime(b)
@@ -262,7 +355,10 @@ export function findBestModel(results) {
     // 📖 Priority 2: Lower average latency = faster = better
     if (avgA !== avgB) return avgA - avgB
-    // 📖 Priority 3: Higher uptime = more reliable = better (tiebreaker)
+    // 📖 Priority 3: Higher stability = more consistent = better
+    if (stabilityA !== stabilityB) return stabilityB - stabilityA
+    // 📖 Priority 4: Higher uptime = more reliable = better (final tiebreaker)
     return uptimeB - uptimeA
   })

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "free-coding-models",
-  "version": "0.1.65",
+  "version": "0.1.67",
   "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
   "keywords": [
     "nvidia",

package/sources.js CHANGED Viewed

@@ -27,8 +27,8 @@
  *   📖 Secondary: https://swe-rebench.com (independent evals, scores are lower)
  *   📖 Leaderboard tracker: https://www.marc0.dev/en/leaderboard
  *
- *   @exports nvidiaNim, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai — model arrays per provider
- *   @exports sources — map of { nvidia, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai } each with { name, url, models }
+ *   @exports nvidiaNim, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai, siliconflow, together, cloudflare, perplexity — model arrays per provider
+ *   @exports sources — map of { nvidia, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai, siliconflow, together, cloudflare, perplexity } each with { name, url, models }
  *   @exports MODELS — flat array of [modelId, label, tier, sweScore, ctx, providerKey]
  *
  *   📖 MODELS now includes providerKey as 6th element so ping() knows which
@@ -230,6 +230,54 @@ export const googleai = [
   ['gemma-3-4b-it',                            'Gemma 3 4B',         'C',  '10.0%', '128k'],
 ]
+// 📖 SiliconFlow source - https://cloud.siliconflow.cn
+// 📖 OpenAI-compatible endpoint: https://api.siliconflow.com/v1/chat/completions
+// 📖 Free model quotas vary by model and can change over time.
+export const siliconflow = [
+  ['Qwen/Qwen3-Coder-480B-A35B-Instruct',      'Qwen3 Coder 480B',   'S+', '70.6%', '256k'],
+  ['deepseek-ai/DeepSeek-V3.2',                'DeepSeek V3.2',      'S+', '73.1%', '128k'],
+  ['Qwen/Qwen3-235B-A22B',                     'Qwen3 235B',         'S+', '70.0%', '128k'],
+  ['deepseek-ai/DeepSeek-R1',                  'DeepSeek R1',        'S',  '61.0%', '128k'],
+  ['Qwen/Qwen3-Coder-30B-A3B-Instruct',        'Qwen3 Coder 30B',    'A+', '55.0%', '32k'],
+  ['Qwen/Qwen2.5-Coder-32B-Instruct',          'Qwen2.5 Coder 32B',  'A',  '46.0%', '32k'],
+]
+// 📖 Together AI source - https://api.together.ai
+// 📖 OpenAI-compatible endpoint: https://api.together.xyz/v1/chat/completions
+// 📖 Credits/promotions vary by account and region; verify current quota in console.
+export const together = [
+  ['moonshotai/Kimi-K2.5',                     'Kimi K2.5',          'S+', '76.8%', '128k'],
+  ['Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8',  'Qwen3 Coder 480B',   'S+', '70.6%', '256k'],
+  ['deepseek-ai/DeepSeek-V3.1',                'DeepSeek V3.1',      'S',  '62.0%', '128k'],
+  ['deepseek-ai/DeepSeek-R1',                  'DeepSeek R1',        'S',  '61.0%', '128k'],
+  ['openai/gpt-oss-120b',                      'GPT OSS 120B',       'S',  '60.0%', '128k'],
+  ['openai/gpt-oss-20b',                       'GPT OSS 20B',        'A',  '42.0%', '128k'],
+  ['meta-llama/Llama-3.3-70B-Instruct-Turbo',  'Llama 3.3 70B',      'A-', '39.5%', '128k'],
+]
+// 📖 Cloudflare Workers AI source - https://developers.cloudflare.com/workers-ai
+// 📖 OpenAI-compatible endpoint requires account id:
+// 📖 https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/v1/chat/completions
+// 📖 Free plan includes daily neuron quota and provider-level request limits.
+export const cloudflare = [
+  ['@cf/openai/gpt-oss-120b',                  'GPT OSS 120B',       'S',  '60.0%', '128k'],
+  ['@cf/qwen/qwen2.5-coder-32b-instruct',      'Qwen2.5 Coder 32B',  'A',  '46.0%', '32k'],
+  ['@cf/deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A',  '43.9%', '128k'],
+  ['@cf/openai/gpt-oss-20b',                   'GPT OSS 20B',        'A',  '42.0%', '128k'],
+  ['@cf/meta/llama-3.3-70b-instruct-fp8-fast', 'Llama 3.3 70B',      'A-', '39.5%', '128k'],
+  ['@cf/meta/llama-3.1-8b-instruct',           'Llama 3.1 8B',       'B',  '28.8%', '128k'],
+]
+// 📖 Perplexity source - https://docs.perplexity.ai
+// 📖 Chat Completions endpoint: https://api.perplexity.ai/chat/completions
+// 📖 Sonar models focus on search/reasoning and have tiered API rate limits.
+export const perplexity = [
+  ['sonar-reasoning-pro',                      'Sonar Reasoning Pro', 'A+', '50.0%', '128k'],
+  ['sonar-reasoning',                          'Sonar Reasoning',     'A',  '45.0%', '128k'],
+  ['sonar-pro',                                'Sonar Pro',           'B+', '32.0%', '128k'],
+  ['sonar',                                    'Sonar',               'B',  '25.0%', '128k'],
+]
 // 📖 All sources combined - used by the main script
 // 📖 Each source has: name (display), url (API endpoint), models (array of model tuples)
 export const sources = {
@@ -298,6 +346,26 @@ export const sources = {
     url: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',
     models: googleai,
   },
+  siliconflow: {
+    name: 'SiliconFlow',
+    url: 'https://api.siliconflow.com/v1/chat/completions',
+    models: siliconflow,
+  },
+  together: {
+    name: 'Together AI',
+    url: 'https://api.together.xyz/v1/chat/completions',
+    models: together,
+  },
+  cloudflare: {
+    name: 'Cloudflare AI',
+    url: 'https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/v1/chat/completions',
+    models: cloudflare,
+  },
+  perplexity: {
+    name: 'Perplexity',
+    url: 'https://api.perplexity.ai/chat/completions',
+    models: perplexity,
+  },
 }
 // 📖 Flatten all models from all sources — each entry includes providerKey as 6th element