free-coding-models 0.1.65 → 0.1.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/config.js CHANGED
@@ -24,7 +24,11 @@
24
24
  * "codestral": "csk-xxx",
25
25
  * "hyperbolic": "eyJ...",
26
26
  * "scaleway": "scw-xxx",
27
- * "googleai": "AIza..."
27
+ * "googleai": "AIza...",
28
+ * "siliconflow":"sk-xxx",
29
+ * "together": "together-xxx",
30
+ * "cloudflare": "cf-xxx",
31
+ * "perplexity": "pplx-xxx"
28
32
  * },
29
33
  * "providers": {
30
34
  * "nvidia": { "enabled": true },
@@ -39,7 +43,11 @@
39
43
  * "codestral": { "enabled": true },
40
44
  * "hyperbolic": { "enabled": true },
41
45
  * "scaleway": { "enabled": true },
42
- * "googleai": { "enabled": true }
46
+ * "googleai": { "enabled": true },
47
+ * "siliconflow":{ "enabled": true },
48
+ * "together": { "enabled": true },
49
+ * "cloudflare": { "enabled": true },
50
+ * "perplexity": { "enabled": true }
43
51
  * },
44
52
  * "favorites": [
45
53
  * "nvidia/deepseek-ai/deepseek-v3.2"
@@ -94,6 +102,10 @@ const ENV_VARS = {
94
102
  hyperbolic: 'HYPERBOLIC_API_KEY',
95
103
  scaleway: 'SCALEWAY_API_KEY',
96
104
  googleai: 'GOOGLE_API_KEY',
105
+ siliconflow:'SILICONFLOW_API_KEY',
106
+ together: 'TOGETHER_API_KEY',
107
+ cloudflare: ['CLOUDFLARE_API_TOKEN', 'CLOUDFLARE_API_KEY'],
108
+ perplexity: ['PERPLEXITY_API_KEY', 'PPLX_API_KEY'],
97
109
  }
98
110
 
99
111
  /**
package/lib/utils.js CHANGED
@@ -27,14 +27,18 @@
27
27
  *
28
28
  * @functions
29
29
  * → getAvg(result) — Calculate average latency from successful pings only
30
- * → getVerdict(result) — Determine model health verdict based on avg latency and status
30
+ * → getVerdict(result) — Determine model health verdict based on avg latency and stability
31
31
  * → getUptime(result) — Calculate uptime percentage (successful / total pings)
32
+ * → getP95(result) — Calculate 95th percentile latency from successful pings
33
+ * → getJitter(result) — Calculate latency standard deviation (jitter)
34
+ * → getStabilityScore(result) — Composite 0–100 stability score (p95 + jitter + spikes + uptime)
32
35
  * → sortResults(results, sortColumn, sortDirection) — Sort model results by any column
33
36
  * → filterByTier(results, tierLetter) — Filter results by tier letter (S/A/B/C)
34
- * → findBestModel(results) — Pick the best model by status → avg → uptime priority
37
+ * → findBestModel(results) — Pick the best model by status → avg → stability → uptime priority
35
38
  * → parseArgs(argv) — Parse CLI arguments into structured flags and values
36
39
  *
37
- * @exports getAvg, getVerdict, getUptime, sortResults, filterByTier, findBestModel, parseArgs
40
+ * @exports getAvg, getVerdict, getUptime, getP95, getJitter, getStabilityScore
41
+ * @exports sortResults, filterByTier, findBestModel, parseArgs
38
42
  * @exports TIER_ORDER, VERDICT_ORDER, TIER_LETTER_MAP
39
43
  *
40
44
  * @see bin/free-coding-models.js — main CLI that imports these utils
@@ -54,7 +58,7 @@ export const TIER_ORDER = ['S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
54
58
  // 📖 Used by sortResults when sorting by the "verdict" column.
55
59
  // 📖 "Perfect" means < 400ms avg, "Pending" means no data yet.
56
60
  // 📖 The order matters — it determines sort rank in the TUI table.
57
- export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
61
+ export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Spiky', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
58
62
 
59
63
  // 📖 Maps a CLI tier letter (--tier S/A/B/C) to the full tier strings it includes.
60
64
  // 📖 Example: --tier A matches A+, A, and A- models (all "A-family" tiers).
@@ -91,11 +95,17 @@ export const getAvg = (r) => {
91
95
  // 2. Timeout/down BUT was previously up → "Unstable" (it worked before, now it doesn't)
92
96
  // 3. Timeout/down and never worked → "Not Active" (model might be offline)
93
97
  // 4. No successful pings yet → "Pending" (still waiting for first response)
94
- // 5. Avg < 400ms "Perfect"
95
- // 6. Avg < 1000ms → "Normal"
96
- // 7. Avg < 3000ms → "Slow"
97
- // 8. Avg < 5000ms → "Very Slow"
98
- // 9. Avg >= 5000ms → "Unstable"
98
+ // 5. Stability-aware speed tiers (avg + p95/jitter penalty):
99
+ // - Avg < 400ms + stable → "Perfect"
100
+ // - Avg < 400ms but spiky p95 → "Spiky" (fast on average, but tail latency hurts)
101
+ // - Avg < 1000ms → "Normal"
102
+ // - Avg < 3000ms → "Slow"
103
+ // - Avg < 5000ms → "Very Slow"
104
+ // - Avg >= 5000ms → "Unstable"
105
+ //
106
+ // 📖 The "Spiky" verdict catches models that look fast on paper (low avg) but randomly
107
+ // stall your IDE/agent with tail-latency spikes. A model with avg 250ms but p95 6000ms
108
+ // gets downgraded from "Perfect" to "Spiky" — because consistency matters more than speed.
99
109
  //
100
110
  // 📖 The "wasUpBefore" check is key — it distinguishes between a model that's
101
111
  // temporarily flaky vs one that was never reachable in the first place.
@@ -107,8 +117,20 @@ export const getVerdict = (r) => {
107
117
  if ((r.status === 'timeout' || r.status === 'down') && wasUpBefore) return 'Unstable'
108
118
  if (r.status === 'timeout' || r.status === 'down') return 'Not Active'
109
119
  if (avg === Infinity) return 'Pending'
110
- if (avg < 400) return 'Perfect'
111
- if (avg < 1000) return 'Normal'
120
+
121
+ // 📖 Stability-aware verdict: penalize models with good avg but terrible tail latency
122
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
123
+ const p95 = getP95(r)
124
+
125
+ if (avg < 400) {
126
+ // 📖 Only flag as "Spiky" when we have enough data (≥3 pings) to judge stability
127
+ if (successfulPings.length >= 3 && p95 > 3000) return 'Spiky'
128
+ return 'Perfect'
129
+ }
130
+ if (avg < 1000) {
131
+ if (successfulPings.length >= 3 && p95 > 5000) return 'Spiky'
132
+ return 'Normal'
133
+ }
112
134
  if (avg < 3000) return 'Slow'
113
135
  if (avg < 5000) return 'Very Slow'
114
136
  if (avg < 10000) return 'Unstable'
@@ -125,21 +147,84 @@ export const getUptime = (r) => {
125
147
  return Math.round((successful / r.pings.length) * 100)
126
148
  }
127
149
 
150
+ // 📖 getP95: Calculate the 95th percentile latency from successful pings (HTTP 200).
151
+ // 📖 The p95 answers: "95% of requests are faster than this value."
152
+ // 📖 A low p95 means consistently fast responses — a high p95 signals tail-latency spikes.
153
+ // 📖 Returns Infinity when no successful pings exist.
154
+ //
155
+ // 📖 Algorithm: sort latencies ascending, pick the value at ceil(N * 0.95) - 1.
156
+ // 📖 Example: [100, 200, 300, 400, 5000] → p95 index = ceil(5 * 0.95) - 1 = 4 → 5000ms
157
+ export const getP95 = (r) => {
158
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
159
+ if (successfulPings.length === 0) return Infinity
160
+ const sorted = successfulPings.map(p => p.ms).sort((a, b) => a - b)
161
+ const idx = Math.ceil(sorted.length * 0.95) - 1
162
+ return sorted[Math.max(0, idx)]
163
+ }
164
+
165
+ // 📖 getJitter: Calculate latency standard deviation (σ) from successful pings.
166
+ // 📖 Low jitter = predictable response times. High jitter = erratic, spiky latency.
167
+ // 📖 Returns 0 when fewer than 2 successful pings (can't compute variance from 1 point).
168
+ // 📖 Uses population σ (divides by N, not N-1) since we have ALL the data, not a sample.
169
+ export const getJitter = (r) => {
170
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
171
+ if (successfulPings.length < 2) return 0
172
+ const mean = successfulPings.reduce((a, b) => a + b.ms, 0) / successfulPings.length
173
+ const variance = successfulPings.reduce((sum, p) => sum + (p.ms - mean) ** 2, 0) / successfulPings.length
174
+ return Math.round(Math.sqrt(variance))
175
+ }
176
+
177
+ // 📖 getStabilityScore: Composite 0–100 score that rewards consistency and reliability.
178
+ // 📖 Combines four signals into a single number:
179
+ // - p95 latency (30%) — penalizes tail-latency spikes
180
+ // - Jitter / σ (30%) — penalizes erratic response times
181
+ // - Spike rate (20%) — fraction of pings above 3000ms threshold
182
+ // - Uptime / reliability (20%) — fraction of successful pings
183
+ //
184
+ // 📖 Each component is normalized to 0–100, then weighted and combined.
185
+ // 📖 Returns -1 when no successful pings exist (not enough data yet).
186
+ //
187
+ // 📖 Example:
188
+ // Model A: avg 250ms, p95 6000ms (tons of spikes) → score ~30
189
+ // Model B: avg 400ms, p95 650ms (boringly consistent) → score ~85
190
+ // In real usage, Model B FEELS faster because it doesn't randomly stall.
191
+ export const getStabilityScore = (r) => {
192
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
193
+ if (successfulPings.length === 0) return -1
194
+
195
+ const p95 = getP95(r)
196
+ const jitter = getJitter(r)
197
+ const uptime = getUptime(r)
198
+ const spikeCount = successfulPings.filter(p => p.ms > 3000).length
199
+ const spikeRate = spikeCount / successfulPings.length
200
+
201
+ // 📖 Normalize each component to 0–100 (higher = better)
202
+ const p95Score = Math.max(0, Math.min(100, 100 * (1 - p95 / 5000)))
203
+ const jitterScore = Math.max(0, Math.min(100, 100 * (1 - jitter / 2000)))
204
+ const spikeScore = Math.max(0, 100 * (1 - spikeRate))
205
+ const reliabilityScore = uptime
206
+
207
+ // 📖 Weighted composite: 30% p95, 30% jitter, 20% spikes, 20% reliability
208
+ const score = 0.3 * p95Score + 0.3 * jitterScore + 0.2 * spikeScore + 0.2 * reliabilityScore
209
+ return Math.round(score)
210
+ }
211
+
128
212
  // 📖 sortResults: Sort the results array by any column the user can click/press in the TUI.
129
213
  // 📖 Returns a NEW array — never mutates the original (important for React-style re-renders).
130
214
  //
131
215
  // 📖 Supported columns (matching the keyboard shortcuts in the TUI):
132
- // - 'rank' (R key) — original index from sources.js
133
- // - 'tier' (T key) — tier hierarchy (S+ first, C last)
134
- // - 'origin' (O key) — provider name (all NIM for now, future-proofed)
135
- // - 'model' (M key) — alphabetical by display label
136
- // - 'ping' (L key) — last ping latency (only successful ones count)
137
- // - 'avg' (A key) — average latency across all successful pings
138
- // - 'swe' (S key) — SWE-bench score (higher is better)
139
- // - 'ctx' (N key) — context window size (larger is better)
140
- // - 'condition' (H key) — health status (alphabetical)
141
- // - 'verdict' (V key) — verdict order (Perfect → Pending)
142
- // - 'uptime' (U key) — uptime percentage
216
+ // - 'rank' (R key) — original index from sources.js
217
+ // - 'tier' (T key) — tier hierarchy (S+ first, C last)
218
+ // - 'origin' (O key) — provider name (all NIM for now, future-proofed)
219
+ // - 'model' (M key) — alphabetical by display label
220
+ // - 'ping' (L key) — last ping latency (only successful ones count)
221
+ // - 'avg' (A key) — average latency across all successful pings
222
+ // - 'swe' (S key) — SWE-bench score (higher is better)
223
+ // - 'ctx' (N key) — context window size (larger is better)
224
+ // - 'condition' (H key) — health status (alphabetical)
225
+ // - 'verdict' (V key) — verdict order (Perfect → Pending)
226
+ // - 'uptime' (U key) — uptime percentage
227
+ // - 'stability' (B key) — stability score (0–100, higher = more stable)
143
228
  //
144
229
  // 📖 sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
145
230
  export const sortResults = (results, sortColumn, sortDirection) => {
@@ -219,6 +304,11 @@ export const sortResults = (results, sortColumn, sortDirection) => {
219
304
  case 'uptime':
220
305
  cmp = getUptime(a) - getUptime(b)
221
306
  break
307
+ case 'stability':
308
+ // 📖 Sort by stability score — higher = more stable = better
309
+ // 📖 Models with no data (-1) sort to the bottom
310
+ cmp = getStabilityScore(a) - getStabilityScore(b)
311
+ break
222
312
  }
223
313
 
224
314
  // 📖 Flip comparison for descending order
@@ -242,16 +332,19 @@ export function filterByTier(results, tierLetter) {
242
332
  // 📖 findBestModel: Pick the single best model from a results array.
243
333
  // 📖 Used by --fiable mode to output the most reliable model after 10s of analysis.
244
334
  //
245
- // 📖 Selection priority (tri-key sort):
335
+ // 📖 Selection priority (quad-key sort):
246
336
  // 1. Status: "up" models always beat non-up models
247
337
  // 2. Average latency: faster average wins (lower is better)
248
- // 3. Uptime %: higher uptime wins as tiebreaker
338
+ // 3. Stability score: higher stability wins (more consistent = better)
339
+ // 4. Uptime %: higher uptime wins as final tiebreaker
249
340
  //
250
341
  // 📖 Returns null if the array is empty.
251
342
  export function findBestModel(results) {
252
343
  const sorted = [...results].sort((a, b) => {
253
344
  const avgA = getAvg(a)
254
345
  const avgB = getAvg(b)
346
+ const stabilityA = getStabilityScore(a)
347
+ const stabilityB = getStabilityScore(b)
255
348
  const uptimeA = getUptime(a)
256
349
  const uptimeB = getUptime(b)
257
350
 
@@ -262,7 +355,10 @@ export function findBestModel(results) {
262
355
  // 📖 Priority 2: Lower average latency = faster = better
263
356
  if (avgA !== avgB) return avgA - avgB
264
357
 
265
- // 📖 Priority 3: Higher uptime = more reliable = better (tiebreaker)
358
+ // 📖 Priority 3: Higher stability = more consistent = better
359
+ if (stabilityA !== stabilityB) return stabilityB - stabilityA
360
+
361
+ // 📖 Priority 4: Higher uptime = more reliable = better (final tiebreaker)
266
362
  return uptimeB - uptimeA
267
363
  })
268
364
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.1.65",
3
+ "version": "0.1.67",
4
4
  "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",
package/sources.js CHANGED
@@ -27,8 +27,8 @@
27
27
  * 📖 Secondary: https://swe-rebench.com (independent evals, scores are lower)
28
28
  * 📖 Leaderboard tracker: https://www.marc0.dev/en/leaderboard
29
29
  *
30
- * @exports nvidiaNim, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai — model arrays per provider
31
- * @exports sources — map of { nvidia, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai } each with { name, url, models }
30
+ * @exports nvidiaNim, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai, siliconflow, together, cloudflare, perplexity — model arrays per provider
31
+ * @exports sources — map of { nvidia, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai, siliconflow, together, cloudflare, perplexity } each with { name, url, models }
32
32
  * @exports MODELS — flat array of [modelId, label, tier, sweScore, ctx, providerKey]
33
33
  *
34
34
  * 📖 MODELS now includes providerKey as 6th element so ping() knows which
@@ -230,6 +230,54 @@ export const googleai = [
230
230
  ['gemma-3-4b-it', 'Gemma 3 4B', 'C', '10.0%', '128k'],
231
231
  ]
232
232
 
233
+ // 📖 SiliconFlow source - https://cloud.siliconflow.cn
234
+ // 📖 OpenAI-compatible endpoint: https://api.siliconflow.com/v1/chat/completions
235
+ // 📖 Free model quotas vary by model and can change over time.
236
+ export const siliconflow = [
237
+ ['Qwen/Qwen3-Coder-480B-A35B-Instruct', 'Qwen3 Coder 480B', 'S+', '70.6%', '256k'],
238
+ ['deepseek-ai/DeepSeek-V3.2', 'DeepSeek V3.2', 'S+', '73.1%', '128k'],
239
+ ['Qwen/Qwen3-235B-A22B', 'Qwen3 235B', 'S+', '70.0%', '128k'],
240
+ ['deepseek-ai/DeepSeek-R1', 'DeepSeek R1', 'S', '61.0%', '128k'],
241
+ ['Qwen/Qwen3-Coder-30B-A3B-Instruct', 'Qwen3 Coder 30B', 'A+', '55.0%', '32k'],
242
+ ['Qwen/Qwen2.5-Coder-32B-Instruct', 'Qwen2.5 Coder 32B', 'A', '46.0%', '32k'],
243
+ ]
244
+
245
+ // 📖 Together AI source - https://api.together.ai
246
+ // 📖 OpenAI-compatible endpoint: https://api.together.xyz/v1/chat/completions
247
+ // 📖 Credits/promotions vary by account and region; verify current quota in console.
248
+ export const together = [
249
+ ['moonshotai/Kimi-K2.5', 'Kimi K2.5', 'S+', '76.8%', '128k'],
250
+ ['Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8', 'Qwen3 Coder 480B', 'S+', '70.6%', '256k'],
251
+ ['deepseek-ai/DeepSeek-V3.1', 'DeepSeek V3.1', 'S', '62.0%', '128k'],
252
+ ['deepseek-ai/DeepSeek-R1', 'DeepSeek R1', 'S', '61.0%', '128k'],
253
+ ['openai/gpt-oss-120b', 'GPT OSS 120B', 'S', '60.0%', '128k'],
254
+ ['openai/gpt-oss-20b', 'GPT OSS 20B', 'A', '42.0%', '128k'],
255
+ ['meta-llama/Llama-3.3-70B-Instruct-Turbo', 'Llama 3.3 70B', 'A-', '39.5%', '128k'],
256
+ ]
257
+
258
+ // 📖 Cloudflare Workers AI source - https://developers.cloudflare.com/workers-ai
259
+ // 📖 OpenAI-compatible endpoint requires account id:
260
+ // 📖 https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/v1/chat/completions
261
+ // 📖 Free plan includes daily neuron quota and provider-level request limits.
262
+ export const cloudflare = [
263
+ ['@cf/openai/gpt-oss-120b', 'GPT OSS 120B', 'S', '60.0%', '128k'],
264
+ ['@cf/qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'A', '46.0%', '32k'],
265
+ ['@cf/deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%', '128k'],
266
+ ['@cf/openai/gpt-oss-20b', 'GPT OSS 20B', 'A', '42.0%', '128k'],
267
+ ['@cf/meta/llama-3.3-70b-instruct-fp8-fast', 'Llama 3.3 70B', 'A-', '39.5%', '128k'],
268
+ ['@cf/meta/llama-3.1-8b-instruct', 'Llama 3.1 8B', 'B', '28.8%', '128k'],
269
+ ]
270
+
271
+ // 📖 Perplexity source - https://docs.perplexity.ai
272
+ // 📖 Chat Completions endpoint: https://api.perplexity.ai/chat/completions
273
+ // 📖 Sonar models focus on search/reasoning and have tiered API rate limits.
274
+ export const perplexity = [
275
+ ['sonar-reasoning-pro', 'Sonar Reasoning Pro', 'A+', '50.0%', '128k'],
276
+ ['sonar-reasoning', 'Sonar Reasoning', 'A', '45.0%', '128k'],
277
+ ['sonar-pro', 'Sonar Pro', 'B+', '32.0%', '128k'],
278
+ ['sonar', 'Sonar', 'B', '25.0%', '128k'],
279
+ ]
280
+
233
281
  // 📖 All sources combined - used by the main script
234
282
  // 📖 Each source has: name (display), url (API endpoint), models (array of model tuples)
235
283
  export const sources = {
@@ -298,6 +346,26 @@ export const sources = {
298
346
  url: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',
299
347
  models: googleai,
300
348
  },
349
+ siliconflow: {
350
+ name: 'SiliconFlow',
351
+ url: 'https://api.siliconflow.com/v1/chat/completions',
352
+ models: siliconflow,
353
+ },
354
+ together: {
355
+ name: 'Together AI',
356
+ url: 'https://api.together.xyz/v1/chat/completions',
357
+ models: together,
358
+ },
359
+ cloudflare: {
360
+ name: 'Cloudflare AI',
361
+ url: 'https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/v1/chat/completions',
362
+ models: cloudflare,
363
+ },
364
+ perplexity: {
365
+ name: 'Perplexity',
366
+ url: 'https://api.perplexity.ai/chat/completions',
367
+ models: perplexity,
368
+ },
301
369
  }
302
370
 
303
371
  // 📖 Flatten all models from all sources — each entry includes providerKey as 6th element