free-coding-models 0.1.65 → 0.1.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -31
- package/bin/free-coding-models.js +457 -94
- package/lib/config.js +14 -2
- package/lib/utils.js +121 -25
- package/package.json +1 -1
- package/sources.js +70 -2
package/lib/config.js
CHANGED
|
@@ -24,7 +24,11 @@
|
|
|
24
24
|
* "codestral": "csk-xxx",
|
|
25
25
|
* "hyperbolic": "eyJ...",
|
|
26
26
|
* "scaleway": "scw-xxx",
|
|
27
|
-
* "googleai": "AIza..."
|
|
27
|
+
* "googleai": "AIza...",
|
|
28
|
+
* "siliconflow":"sk-xxx",
|
|
29
|
+
* "together": "together-xxx",
|
|
30
|
+
* "cloudflare": "cf-xxx",
|
|
31
|
+
* "perplexity": "pplx-xxx"
|
|
28
32
|
* },
|
|
29
33
|
* "providers": {
|
|
30
34
|
* "nvidia": { "enabled": true },
|
|
@@ -39,7 +43,11 @@
|
|
|
39
43
|
* "codestral": { "enabled": true },
|
|
40
44
|
* "hyperbolic": { "enabled": true },
|
|
41
45
|
* "scaleway": { "enabled": true },
|
|
42
|
-
* "googleai": { "enabled": true }
|
|
46
|
+
* "googleai": { "enabled": true },
|
|
47
|
+
* "siliconflow":{ "enabled": true },
|
|
48
|
+
* "together": { "enabled": true },
|
|
49
|
+
* "cloudflare": { "enabled": true },
|
|
50
|
+
* "perplexity": { "enabled": true }
|
|
43
51
|
* },
|
|
44
52
|
* "favorites": [
|
|
45
53
|
* "nvidia/deepseek-ai/deepseek-v3.2"
|
|
@@ -94,6 +102,10 @@ const ENV_VARS = {
|
|
|
94
102
|
hyperbolic: 'HYPERBOLIC_API_KEY',
|
|
95
103
|
scaleway: 'SCALEWAY_API_KEY',
|
|
96
104
|
googleai: 'GOOGLE_API_KEY',
|
|
105
|
+
siliconflow:'SILICONFLOW_API_KEY',
|
|
106
|
+
together: 'TOGETHER_API_KEY',
|
|
107
|
+
cloudflare: ['CLOUDFLARE_API_TOKEN', 'CLOUDFLARE_API_KEY'],
|
|
108
|
+
perplexity: ['PERPLEXITY_API_KEY', 'PPLX_API_KEY'],
|
|
97
109
|
}
|
|
98
110
|
|
|
99
111
|
/**
|
package/lib/utils.js
CHANGED
|
@@ -27,14 +27,18 @@
|
|
|
27
27
|
*
|
|
28
28
|
* @functions
|
|
29
29
|
* → getAvg(result) — Calculate average latency from successful pings only
|
|
30
|
-
* → getVerdict(result) — Determine model health verdict based on avg latency and
|
|
30
|
+
* → getVerdict(result) — Determine model health verdict based on avg latency and stability
|
|
31
31
|
* → getUptime(result) — Calculate uptime percentage (successful / total pings)
|
|
32
|
+
* → getP95(result) — Calculate 95th percentile latency from successful pings
|
|
33
|
+
* → getJitter(result) — Calculate latency standard deviation (jitter)
|
|
34
|
+
* → getStabilityScore(result) — Composite 0–100 stability score (p95 + jitter + spikes + uptime)
|
|
32
35
|
* → sortResults(results, sortColumn, sortDirection) — Sort model results by any column
|
|
33
36
|
* → filterByTier(results, tierLetter) — Filter results by tier letter (S/A/B/C)
|
|
34
|
-
* → findBestModel(results) — Pick the best model by status → avg → uptime priority
|
|
37
|
+
* → findBestModel(results) — Pick the best model by status → avg → stability → uptime priority
|
|
35
38
|
* → parseArgs(argv) — Parse CLI arguments into structured flags and values
|
|
36
39
|
*
|
|
37
|
-
* @exports getAvg, getVerdict, getUptime,
|
|
40
|
+
* @exports getAvg, getVerdict, getUptime, getP95, getJitter, getStabilityScore
|
|
41
|
+
* @exports sortResults, filterByTier, findBestModel, parseArgs
|
|
38
42
|
* @exports TIER_ORDER, VERDICT_ORDER, TIER_LETTER_MAP
|
|
39
43
|
*
|
|
40
44
|
* @see bin/free-coding-models.js — main CLI that imports these utils
|
|
@@ -54,7 +58,7 @@ export const TIER_ORDER = ['S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
|
|
|
54
58
|
// 📖 Used by sortResults when sorting by the "verdict" column.
|
|
55
59
|
// 📖 "Perfect" means < 400ms avg, "Pending" means no data yet.
|
|
56
60
|
// 📖 The order matters — it determines sort rank in the TUI table.
|
|
57
|
-
export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
|
|
61
|
+
export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Spiky', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
|
|
58
62
|
|
|
59
63
|
// 📖 Maps a CLI tier letter (--tier S/A/B/C) to the full tier strings it includes.
|
|
60
64
|
// 📖 Example: --tier A matches A+, A, and A- models (all "A-family" tiers).
|
|
@@ -91,11 +95,17 @@ export const getAvg = (r) => {
|
|
|
91
95
|
// 2. Timeout/down BUT was previously up → "Unstable" (it worked before, now it doesn't)
|
|
92
96
|
// 3. Timeout/down and never worked → "Not Active" (model might be offline)
|
|
93
97
|
// 4. No successful pings yet → "Pending" (still waiting for first response)
|
|
94
|
-
// 5.
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
//
|
|
98
|
+
// 5. Stability-aware speed tiers (avg + p95/jitter penalty):
|
|
99
|
+
// - Avg < 400ms + stable → "Perfect"
|
|
100
|
+
// - Avg < 400ms but spiky p95 → "Spiky" (fast on average, but tail latency hurts)
|
|
101
|
+
// - Avg < 1000ms → "Normal"
|
|
102
|
+
// - Avg < 3000ms → "Slow"
|
|
103
|
+
// - Avg < 5000ms → "Very Slow"
|
|
104
|
+
// - Avg >= 5000ms → "Unstable"
|
|
105
|
+
//
|
|
106
|
+
// 📖 The "Spiky" verdict catches models that look fast on paper (low avg) but randomly
|
|
107
|
+
// stall your IDE/agent with tail-latency spikes. A model with avg 250ms but p95 6000ms
|
|
108
|
+
// gets downgraded from "Perfect" to "Spiky" — because consistency matters more than speed.
|
|
99
109
|
//
|
|
100
110
|
// 📖 The "wasUpBefore" check is key — it distinguishes between a model that's
|
|
101
111
|
// temporarily flaky vs one that was never reachable in the first place.
|
|
@@ -107,8 +117,20 @@ export const getVerdict = (r) => {
|
|
|
107
117
|
if ((r.status === 'timeout' || r.status === 'down') && wasUpBefore) return 'Unstable'
|
|
108
118
|
if (r.status === 'timeout' || r.status === 'down') return 'Not Active'
|
|
109
119
|
if (avg === Infinity) return 'Pending'
|
|
110
|
-
|
|
111
|
-
|
|
120
|
+
|
|
121
|
+
// 📖 Stability-aware verdict: penalize models with good avg but terrible tail latency
|
|
122
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
123
|
+
const p95 = getP95(r)
|
|
124
|
+
|
|
125
|
+
if (avg < 400) {
|
|
126
|
+
// 📖 Only flag as "Spiky" when we have enough data (≥3 pings) to judge stability
|
|
127
|
+
if (successfulPings.length >= 3 && p95 > 3000) return 'Spiky'
|
|
128
|
+
return 'Perfect'
|
|
129
|
+
}
|
|
130
|
+
if (avg < 1000) {
|
|
131
|
+
if (successfulPings.length >= 3 && p95 > 5000) return 'Spiky'
|
|
132
|
+
return 'Normal'
|
|
133
|
+
}
|
|
112
134
|
if (avg < 3000) return 'Slow'
|
|
113
135
|
if (avg < 5000) return 'Very Slow'
|
|
114
136
|
if (avg < 10000) return 'Unstable'
|
|
@@ -125,21 +147,84 @@ export const getUptime = (r) => {
|
|
|
125
147
|
return Math.round((successful / r.pings.length) * 100)
|
|
126
148
|
}
|
|
127
149
|
|
|
150
|
+
// 📖 getP95: Calculate the 95th percentile latency from successful pings (HTTP 200).
|
|
151
|
+
// 📖 The p95 answers: "95% of requests are faster than this value."
|
|
152
|
+
// 📖 A low p95 means consistently fast responses — a high p95 signals tail-latency spikes.
|
|
153
|
+
// 📖 Returns Infinity when no successful pings exist.
|
|
154
|
+
//
|
|
155
|
+
// 📖 Algorithm: sort latencies ascending, pick the value at ceil(N * 0.95) - 1.
|
|
156
|
+
// 📖 Example: [100, 200, 300, 400, 5000] → p95 index = ceil(5 * 0.95) - 1 = 4 → 5000ms
|
|
157
|
+
export const getP95 = (r) => {
|
|
158
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
159
|
+
if (successfulPings.length === 0) return Infinity
|
|
160
|
+
const sorted = successfulPings.map(p => p.ms).sort((a, b) => a - b)
|
|
161
|
+
const idx = Math.ceil(sorted.length * 0.95) - 1
|
|
162
|
+
return sorted[Math.max(0, idx)]
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// 📖 getJitter: Calculate latency standard deviation (σ) from successful pings.
|
|
166
|
+
// 📖 Low jitter = predictable response times. High jitter = erratic, spiky latency.
|
|
167
|
+
// 📖 Returns 0 when fewer than 2 successful pings (can't compute variance from 1 point).
|
|
168
|
+
// 📖 Uses population σ (divides by N, not N-1) since we have ALL the data, not a sample.
|
|
169
|
+
export const getJitter = (r) => {
|
|
170
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
171
|
+
if (successfulPings.length < 2) return 0
|
|
172
|
+
const mean = successfulPings.reduce((a, b) => a + b.ms, 0) / successfulPings.length
|
|
173
|
+
const variance = successfulPings.reduce((sum, p) => sum + (p.ms - mean) ** 2, 0) / successfulPings.length
|
|
174
|
+
return Math.round(Math.sqrt(variance))
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// 📖 getStabilityScore: Composite 0–100 score that rewards consistency and reliability.
|
|
178
|
+
// 📖 Combines four signals into a single number:
|
|
179
|
+
// - p95 latency (30%) — penalizes tail-latency spikes
|
|
180
|
+
// - Jitter / σ (30%) — penalizes erratic response times
|
|
181
|
+
// - Spike rate (20%) — fraction of pings above 3000ms threshold
|
|
182
|
+
// - Uptime / reliability (20%) — fraction of successful pings
|
|
183
|
+
//
|
|
184
|
+
// 📖 Each component is normalized to 0–100, then weighted and combined.
|
|
185
|
+
// 📖 Returns -1 when no successful pings exist (not enough data yet).
|
|
186
|
+
//
|
|
187
|
+
// 📖 Example:
|
|
188
|
+
// Model A: avg 250ms, p95 6000ms (tons of spikes) → score ~30
|
|
189
|
+
// Model B: avg 400ms, p95 650ms (boringly consistent) → score ~85
|
|
190
|
+
// In real usage, Model B FEELS faster because it doesn't randomly stall.
|
|
191
|
+
export const getStabilityScore = (r) => {
|
|
192
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
193
|
+
if (successfulPings.length === 0) return -1
|
|
194
|
+
|
|
195
|
+
const p95 = getP95(r)
|
|
196
|
+
const jitter = getJitter(r)
|
|
197
|
+
const uptime = getUptime(r)
|
|
198
|
+
const spikeCount = successfulPings.filter(p => p.ms > 3000).length
|
|
199
|
+
const spikeRate = spikeCount / successfulPings.length
|
|
200
|
+
|
|
201
|
+
// 📖 Normalize each component to 0–100 (higher = better)
|
|
202
|
+
const p95Score = Math.max(0, Math.min(100, 100 * (1 - p95 / 5000)))
|
|
203
|
+
const jitterScore = Math.max(0, Math.min(100, 100 * (1 - jitter / 2000)))
|
|
204
|
+
const spikeScore = Math.max(0, 100 * (1 - spikeRate))
|
|
205
|
+
const reliabilityScore = uptime
|
|
206
|
+
|
|
207
|
+
// 📖 Weighted composite: 30% p95, 30% jitter, 20% spikes, 20% reliability
|
|
208
|
+
const score = 0.3 * p95Score + 0.3 * jitterScore + 0.2 * spikeScore + 0.2 * reliabilityScore
|
|
209
|
+
return Math.round(score)
|
|
210
|
+
}
|
|
211
|
+
|
|
128
212
|
// 📖 sortResults: Sort the results array by any column the user can click/press in the TUI.
|
|
129
213
|
// 📖 Returns a NEW array — never mutates the original (important for React-style re-renders).
|
|
130
214
|
//
|
|
131
215
|
// 📖 Supported columns (matching the keyboard shortcuts in the TUI):
|
|
132
|
-
// - 'rank'
|
|
133
|
-
// - 'tier'
|
|
134
|
-
// - 'origin'
|
|
135
|
-
// - 'model'
|
|
136
|
-
// - 'ping'
|
|
137
|
-
// - 'avg'
|
|
138
|
-
// - 'swe'
|
|
139
|
-
// - 'ctx'
|
|
140
|
-
// - 'condition'
|
|
141
|
-
// - 'verdict'
|
|
142
|
-
// - 'uptime'
|
|
216
|
+
// - 'rank' (R key) — original index from sources.js
|
|
217
|
+
// - 'tier' (T key) — tier hierarchy (S+ first, C last)
|
|
218
|
+
// - 'origin' (O key) — provider name (all NIM for now, future-proofed)
|
|
219
|
+
// - 'model' (M key) — alphabetical by display label
|
|
220
|
+
// - 'ping' (L key) — last ping latency (only successful ones count)
|
|
221
|
+
// - 'avg' (A key) — average latency across all successful pings
|
|
222
|
+
// - 'swe' (S key) — SWE-bench score (higher is better)
|
|
223
|
+
// - 'ctx' (N key) — context window size (larger is better)
|
|
224
|
+
// - 'condition' (H key) — health status (alphabetical)
|
|
225
|
+
// - 'verdict' (V key) — verdict order (Perfect → Pending)
|
|
226
|
+
// - 'uptime' (U key) — uptime percentage
|
|
227
|
+
// - 'stability' (B key) — stability score (0–100, higher = more stable)
|
|
143
228
|
//
|
|
144
229
|
// 📖 sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
|
|
145
230
|
export const sortResults = (results, sortColumn, sortDirection) => {
|
|
@@ -219,6 +304,11 @@ export const sortResults = (results, sortColumn, sortDirection) => {
|
|
|
219
304
|
case 'uptime':
|
|
220
305
|
cmp = getUptime(a) - getUptime(b)
|
|
221
306
|
break
|
|
307
|
+
case 'stability':
|
|
308
|
+
// 📖 Sort by stability score — higher = more stable = better
|
|
309
|
+
// 📖 Models with no data (-1) sort to the bottom
|
|
310
|
+
cmp = getStabilityScore(a) - getStabilityScore(b)
|
|
311
|
+
break
|
|
222
312
|
}
|
|
223
313
|
|
|
224
314
|
// 📖 Flip comparison for descending order
|
|
@@ -242,16 +332,19 @@ export function filterByTier(results, tierLetter) {
|
|
|
242
332
|
// 📖 findBestModel: Pick the single best model from a results array.
|
|
243
333
|
// 📖 Used by --fiable mode to output the most reliable model after 10s of analysis.
|
|
244
334
|
//
|
|
245
|
-
// 📖 Selection priority (
|
|
335
|
+
// 📖 Selection priority (quad-key sort):
|
|
246
336
|
// 1. Status: "up" models always beat non-up models
|
|
247
337
|
// 2. Average latency: faster average wins (lower is better)
|
|
248
|
-
// 3.
|
|
338
|
+
// 3. Stability score: higher stability wins (more consistent = better)
|
|
339
|
+
// 4. Uptime %: higher uptime wins as final tiebreaker
|
|
249
340
|
//
|
|
250
341
|
// 📖 Returns null if the array is empty.
|
|
251
342
|
export function findBestModel(results) {
|
|
252
343
|
const sorted = [...results].sort((a, b) => {
|
|
253
344
|
const avgA = getAvg(a)
|
|
254
345
|
const avgB = getAvg(b)
|
|
346
|
+
const stabilityA = getStabilityScore(a)
|
|
347
|
+
const stabilityB = getStabilityScore(b)
|
|
255
348
|
const uptimeA = getUptime(a)
|
|
256
349
|
const uptimeB = getUptime(b)
|
|
257
350
|
|
|
@@ -262,7 +355,10 @@ export function findBestModel(results) {
|
|
|
262
355
|
// 📖 Priority 2: Lower average latency = faster = better
|
|
263
356
|
if (avgA !== avgB) return avgA - avgB
|
|
264
357
|
|
|
265
|
-
// 📖 Priority 3: Higher
|
|
358
|
+
// 📖 Priority 3: Higher stability = more consistent = better
|
|
359
|
+
if (stabilityA !== stabilityB) return stabilityB - stabilityA
|
|
360
|
+
|
|
361
|
+
// 📖 Priority 4: Higher uptime = more reliable = better (final tiebreaker)
|
|
266
362
|
return uptimeB - uptimeA
|
|
267
363
|
})
|
|
268
364
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "free-coding-models",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.67",
|
|
4
4
|
"description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"nvidia",
|
package/sources.js
CHANGED
|
@@ -27,8 +27,8 @@
|
|
|
27
27
|
* 📖 Secondary: https://swe-rebench.com (independent evals, scores are lower)
|
|
28
28
|
* 📖 Leaderboard tracker: https://www.marc0.dev/en/leaderboard
|
|
29
29
|
*
|
|
30
|
-
* @exports nvidiaNim, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai — model arrays per provider
|
|
31
|
-
* @exports sources — map of { nvidia, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai } each with { name, url, models }
|
|
30
|
+
* @exports nvidiaNim, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai, siliconflow, together, cloudflare, perplexity — model arrays per provider
|
|
31
|
+
* @exports sources — map of { nvidia, groq, cerebras, sambanova, openrouter, huggingface, replicate, deepinfra, fireworks, codestral, hyperbolic, scaleway, googleai, siliconflow, together, cloudflare, perplexity } each with { name, url, models }
|
|
32
32
|
* @exports MODELS — flat array of [modelId, label, tier, sweScore, ctx, providerKey]
|
|
33
33
|
*
|
|
34
34
|
* 📖 MODELS now includes providerKey as 6th element so ping() knows which
|
|
@@ -230,6 +230,54 @@ export const googleai = [
|
|
|
230
230
|
['gemma-3-4b-it', 'Gemma 3 4B', 'C', '10.0%', '128k'],
|
|
231
231
|
]
|
|
232
232
|
|
|
233
|
+
// 📖 SiliconFlow source - https://cloud.siliconflow.cn
|
|
234
|
+
// 📖 OpenAI-compatible endpoint: https://api.siliconflow.com/v1/chat/completions
|
|
235
|
+
// 📖 Free model quotas vary by model and can change over time.
|
|
236
|
+
export const siliconflow = [
|
|
237
|
+
['Qwen/Qwen3-Coder-480B-A35B-Instruct', 'Qwen3 Coder 480B', 'S+', '70.6%', '256k'],
|
|
238
|
+
['deepseek-ai/DeepSeek-V3.2', 'DeepSeek V3.2', 'S+', '73.1%', '128k'],
|
|
239
|
+
['Qwen/Qwen3-235B-A22B', 'Qwen3 235B', 'S+', '70.0%', '128k'],
|
|
240
|
+
['deepseek-ai/DeepSeek-R1', 'DeepSeek R1', 'S', '61.0%', '128k'],
|
|
241
|
+
['Qwen/Qwen3-Coder-30B-A3B-Instruct', 'Qwen3 Coder 30B', 'A+', '55.0%', '32k'],
|
|
242
|
+
['Qwen/Qwen2.5-Coder-32B-Instruct', 'Qwen2.5 Coder 32B', 'A', '46.0%', '32k'],
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
// 📖 Together AI source - https://api.together.ai
|
|
246
|
+
// 📖 OpenAI-compatible endpoint: https://api.together.xyz/v1/chat/completions
|
|
247
|
+
// 📖 Credits/promotions vary by account and region; verify current quota in console.
|
|
248
|
+
export const together = [
|
|
249
|
+
['moonshotai/Kimi-K2.5', 'Kimi K2.5', 'S+', '76.8%', '128k'],
|
|
250
|
+
['Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8', 'Qwen3 Coder 480B', 'S+', '70.6%', '256k'],
|
|
251
|
+
['deepseek-ai/DeepSeek-V3.1', 'DeepSeek V3.1', 'S', '62.0%', '128k'],
|
|
252
|
+
['deepseek-ai/DeepSeek-R1', 'DeepSeek R1', 'S', '61.0%', '128k'],
|
|
253
|
+
['openai/gpt-oss-120b', 'GPT OSS 120B', 'S', '60.0%', '128k'],
|
|
254
|
+
['openai/gpt-oss-20b', 'GPT OSS 20B', 'A', '42.0%', '128k'],
|
|
255
|
+
['meta-llama/Llama-3.3-70B-Instruct-Turbo', 'Llama 3.3 70B', 'A-', '39.5%', '128k'],
|
|
256
|
+
]
|
|
257
|
+
|
|
258
|
+
// 📖 Cloudflare Workers AI source - https://developers.cloudflare.com/workers-ai
|
|
259
|
+
// 📖 OpenAI-compatible endpoint requires account id:
|
|
260
|
+
// 📖 https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/v1/chat/completions
|
|
261
|
+
// 📖 Free plan includes daily neuron quota and provider-level request limits.
|
|
262
|
+
export const cloudflare = [
|
|
263
|
+
['@cf/openai/gpt-oss-120b', 'GPT OSS 120B', 'S', '60.0%', '128k'],
|
|
264
|
+
['@cf/qwen/qwen2.5-coder-32b-instruct', 'Qwen2.5 Coder 32B', 'A', '46.0%', '32k'],
|
|
265
|
+
['@cf/deepseek-ai/deepseek-r1-distill-qwen-32b', 'R1 Distill 32B', 'A', '43.9%', '128k'],
|
|
266
|
+
['@cf/openai/gpt-oss-20b', 'GPT OSS 20B', 'A', '42.0%', '128k'],
|
|
267
|
+
['@cf/meta/llama-3.3-70b-instruct-fp8-fast', 'Llama 3.3 70B', 'A-', '39.5%', '128k'],
|
|
268
|
+
['@cf/meta/llama-3.1-8b-instruct', 'Llama 3.1 8B', 'B', '28.8%', '128k'],
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
// 📖 Perplexity source - https://docs.perplexity.ai
|
|
272
|
+
// 📖 Chat Completions endpoint: https://api.perplexity.ai/chat/completions
|
|
273
|
+
// 📖 Sonar models focus on search/reasoning and have tiered API rate limits.
|
|
274
|
+
export const perplexity = [
|
|
275
|
+
['sonar-reasoning-pro', 'Sonar Reasoning Pro', 'A+', '50.0%', '128k'],
|
|
276
|
+
['sonar-reasoning', 'Sonar Reasoning', 'A', '45.0%', '128k'],
|
|
277
|
+
['sonar-pro', 'Sonar Pro', 'B+', '32.0%', '128k'],
|
|
278
|
+
['sonar', 'Sonar', 'B', '25.0%', '128k'],
|
|
279
|
+
]
|
|
280
|
+
|
|
233
281
|
// 📖 All sources combined - used by the main script
|
|
234
282
|
// 📖 Each source has: name (display), url (API endpoint), models (array of model tuples)
|
|
235
283
|
export const sources = {
|
|
@@ -298,6 +346,26 @@ export const sources = {
|
|
|
298
346
|
url: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',
|
|
299
347
|
models: googleai,
|
|
300
348
|
},
|
|
349
|
+
siliconflow: {
|
|
350
|
+
name: 'SiliconFlow',
|
|
351
|
+
url: 'https://api.siliconflow.com/v1/chat/completions',
|
|
352
|
+
models: siliconflow,
|
|
353
|
+
},
|
|
354
|
+
together: {
|
|
355
|
+
name: 'Together AI',
|
|
356
|
+
url: 'https://api.together.xyz/v1/chat/completions',
|
|
357
|
+
models: together,
|
|
358
|
+
},
|
|
359
|
+
cloudflare: {
|
|
360
|
+
name: 'Cloudflare AI',
|
|
361
|
+
url: 'https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/v1/chat/completions',
|
|
362
|
+
models: cloudflare,
|
|
363
|
+
},
|
|
364
|
+
perplexity: {
|
|
365
|
+
name: 'Perplexity',
|
|
366
|
+
url: 'https://api.perplexity.ai/chat/completions',
|
|
367
|
+
models: perplexity,
|
|
368
|
+
},
|
|
301
369
|
}
|
|
302
370
|
|
|
303
371
|
// 📖 Flatten all models from all sources — each entry includes providerKey as 6th element
|