free-coding-models 0.1.66 → 0.1.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +183 -30
- package/bin/free-coding-models.js +990 -119
- package/lib/config.js +164 -3
- package/lib/utils.js +293 -30
- package/package.json +1 -1
- package/sources.js +17 -0
package/lib/config.js
CHANGED
|
@@ -28,7 +28,8 @@
|
|
|
28
28
|
* "siliconflow":"sk-xxx",
|
|
29
29
|
* "together": "together-xxx",
|
|
30
30
|
* "cloudflare": "cf-xxx",
|
|
31
|
-
* "perplexity": "pplx-xxx"
|
|
31
|
+
* "perplexity": "pplx-xxx",
|
|
32
|
+
* "zai": "zai-xxx"
|
|
32
33
|
* },
|
|
33
34
|
* "providers": {
|
|
34
35
|
* "nvidia": { "enabled": true },
|
|
@@ -47,7 +48,8 @@
|
|
|
47
48
|
* "siliconflow":{ "enabled": true },
|
|
48
49
|
* "together": { "enabled": true },
|
|
49
50
|
* "cloudflare": { "enabled": true },
|
|
50
|
-
* "perplexity": { "enabled": true }
|
|
51
|
+
* "perplexity": { "enabled": true },
|
|
52
|
+
* "zai": { "enabled": true }
|
|
51
53
|
* },
|
|
52
54
|
* "favorites": [
|
|
53
55
|
* "nvidia/deepseek-ai/deepseek-v3.2"
|
|
@@ -56,9 +58,28 @@
|
|
|
56
58
|
* "enabled": true,
|
|
57
59
|
* "consentVersion": 1,
|
|
58
60
|
* "anonymousId": "anon_550e8400-e29b-41d4-a716-446655440000"
|
|
61
|
+
* "apiKeys": { ... },
|
|
62
|
+
* "providers": { ... },
|
|
63
|
+
* "favorites": [ "nvidia/deepseek-ai/deepseek-v3.2" ],
|
|
64
|
+
* "telemetry": { "enabled": true, "consentVersion": 1, "anonymousId": "anon_..." },
|
|
65
|
+
* "activeProfile": "work",
|
|
66
|
+
* "profiles": {
|
|
67
|
+
* "work": { "apiKeys": {...}, "providers": {...}, "favorites": [...], "settings": {...} },
|
|
68
|
+
* "personal": { "apiKeys": {...}, "providers": {...}, "favorites": [...], "settings": {...} },
|
|
69
|
+
* "fast": { "apiKeys": {...}, "providers": {...}, "favorites": [...], "settings": {...} }
|
|
59
70
|
* }
|
|
60
71
|
* }
|
|
61
72
|
*
|
|
73
|
+
* 📖 Profiles store a snapshot of the user's configuration. Each profile contains:
|
|
74
|
+
* - apiKeys: API keys per provider (can differ between work/personal setups)
|
|
75
|
+
* - providers: enabled/disabled state per provider
|
|
76
|
+
* - favorites: list of pinned favorite models
|
|
77
|
+
* - settings: extra TUI preferences (tierFilter, sortColumn, sortAsc, pingInterval)
|
|
78
|
+
*
|
|
79
|
+
* 📖 When a profile is loaded via --profile <name> or Shift+P, the main config's
|
|
80
|
+
* apiKeys/providers/favorites are replaced with the profile's values. The profile
|
|
81
|
+
* data itself stays in the profiles section — it's a named snapshot, not a fork.
|
|
82
|
+
*
|
|
62
83
|
* 📖 Migration: On first run, if the old plain-text ~/.free-coding-models exists
|
|
63
84
|
* and the new JSON file does not, the old key is auto-migrated as the nvidia key.
|
|
64
85
|
* The old file is left in place (not deleted) for safety.
|
|
@@ -68,8 +89,17 @@
|
|
|
68
89
|
* → saveConfig(config) — Write config to ~/.free-coding-models.json with 0o600 permissions
|
|
69
90
|
* → getApiKey(config, providerKey) — Get effective API key (env var override > config > null)
|
|
70
91
|
* → isProviderEnabled(config, providerKey) — Check if provider is enabled (defaults true)
|
|
92
|
+
* → saveAsProfile(config, name) — Snapshot current apiKeys/providers/favorites/settings into a named profile
|
|
93
|
+
* → loadProfile(config, name) — Apply a named profile's values onto the live config
|
|
94
|
+
* → listProfiles(config) — Return array of profile names
|
|
95
|
+
* → deleteProfile(config, name) — Remove a named profile
|
|
96
|
+
* → getActiveProfileName(config) — Get the currently active profile name (or null)
|
|
97
|
+
* → setActiveProfile(config, name) — Set which profile is active (null to clear)
|
|
98
|
+
* → _emptyProfileSettings() — Default TUI settings for a profile
|
|
71
99
|
*
|
|
72
|
-
* @exports loadConfig, saveConfig, getApiKey
|
|
100
|
+
* @exports loadConfig, saveConfig, getApiKey, isProviderEnabled
|
|
101
|
+
* @exports saveAsProfile, loadProfile, listProfiles, deleteProfile
|
|
102
|
+
* @exports getActiveProfileName, setActiveProfile
|
|
73
103
|
* @exports CONFIG_PATH — path to the JSON config file
|
|
74
104
|
*
|
|
75
105
|
* @see bin/free-coding-models.js — main CLI that uses these functions
|
|
@@ -106,6 +136,7 @@ const ENV_VARS = {
|
|
|
106
136
|
together: 'TOGETHER_API_KEY',
|
|
107
137
|
cloudflare: ['CLOUDFLARE_API_TOKEN', 'CLOUDFLARE_API_KEY'],
|
|
108
138
|
perplexity: ['PERPLEXITY_API_KEY', 'PPLX_API_KEY'],
|
|
139
|
+
zai: 'ZAI_API_KEY',
|
|
109
140
|
}
|
|
110
141
|
|
|
111
142
|
/**
|
|
@@ -137,6 +168,9 @@ export function loadConfig() {
|
|
|
137
168
|
if (typeof parsed.telemetry.enabled !== 'boolean') parsed.telemetry.enabled = null
|
|
138
169
|
if (typeof parsed.telemetry.consentVersion !== 'number') parsed.telemetry.consentVersion = 0
|
|
139
170
|
if (typeof parsed.telemetry.anonymousId !== 'string' || !parsed.telemetry.anonymousId.trim()) parsed.telemetry.anonymousId = null
|
|
171
|
+
// 📖 Ensure profiles section exists (added in profile system)
|
|
172
|
+
if (!parsed.profiles || typeof parsed.profiles !== 'object') parsed.profiles = {}
|
|
173
|
+
if (parsed.activeProfile && typeof parsed.activeProfile !== 'string') parsed.activeProfile = null
|
|
140
174
|
return parsed
|
|
141
175
|
} catch {
|
|
142
176
|
// 📖 Corrupted JSON — return empty config (user will re-enter keys)
|
|
@@ -222,6 +256,129 @@ export function isProviderEnabled(config, providerKey) {
|
|
|
222
256
|
return providerConfig.enabled !== false
|
|
223
257
|
}
|
|
224
258
|
|
|
259
|
+
// ─── Config Profiles ──────────────────────────────────────────────────────────
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* 📖 _emptyProfileSettings: Default TUI settings stored in a profile.
|
|
263
|
+
*
|
|
264
|
+
* 📖 These settings are saved/restored when switching profiles so each profile
|
|
265
|
+
* can have different sort, filter, and ping preferences.
|
|
266
|
+
*
|
|
267
|
+
* @returns {{ tierFilter: string|null, sortColumn: string, sortAsc: boolean, pingInterval: number }}
|
|
268
|
+
*/
|
|
269
|
+
export function _emptyProfileSettings() {
|
|
270
|
+
return {
|
|
271
|
+
tierFilter: null, // 📖 null = show all tiers, or 'S'|'A'|'B'|'C'|'D'
|
|
272
|
+
sortColumn: 'avg', // 📖 default sort column
|
|
273
|
+
sortAsc: true, // 📖 true = ascending (fastest first for latency)
|
|
274
|
+
pingInterval: 8000, // 📖 default ms between pings
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* 📖 saveAsProfile: Snapshot the current config state into a named profile.
|
|
280
|
+
*
|
|
281
|
+
* 📖 Takes the current apiKeys, providers, favorites, plus explicit TUI settings
|
|
282
|
+
* and stores them under config.profiles[name]. Does NOT change activeProfile —
|
|
283
|
+
* call setActiveProfile() separately if you want to switch to this profile.
|
|
284
|
+
*
|
|
285
|
+
* 📖 If a profile with the same name exists, it's overwritten.
|
|
286
|
+
*
|
|
287
|
+
* @param {object} config — Live config object (will be mutated)
|
|
288
|
+
* @param {string} name — Profile name (e.g. 'work', 'personal', 'fast')
|
|
289
|
+
* @param {object} [settings] — TUI settings to save (tierFilter, sortColumn, etc.)
|
|
290
|
+
* @returns {object} The config object (for chaining)
|
|
291
|
+
*/
|
|
292
|
+
export function saveAsProfile(config, name, settings = null) {
|
|
293
|
+
if (!config.profiles || typeof config.profiles !== 'object') config.profiles = {}
|
|
294
|
+
config.profiles[name] = {
|
|
295
|
+
apiKeys: JSON.parse(JSON.stringify(config.apiKeys || {})),
|
|
296
|
+
providers: JSON.parse(JSON.stringify(config.providers || {})),
|
|
297
|
+
favorites: [...(config.favorites || [])],
|
|
298
|
+
settings: settings ? { ..._emptyProfileSettings(), ...settings } : _emptyProfileSettings(),
|
|
299
|
+
}
|
|
300
|
+
return config
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* 📖 loadProfile: Apply a named profile's values onto the live config.
|
|
305
|
+
*
|
|
306
|
+
* 📖 Replaces config.apiKeys, config.providers, config.favorites with the
|
|
307
|
+
* profile's stored values. Also sets config.activeProfile to the loaded name.
|
|
308
|
+
*
|
|
309
|
+
* 📖 Returns the profile's TUI settings so the caller (main CLI) can apply them
|
|
310
|
+
* to the live state object (sortColumn, tierFilter, etc.).
|
|
311
|
+
*
|
|
312
|
+
* 📖 If the profile doesn't exist, returns null (caller should show an error).
|
|
313
|
+
*
|
|
314
|
+
* @param {object} config — Live config object (will be mutated)
|
|
315
|
+
* @param {string} name — Profile name to load
|
|
316
|
+
* @returns {{ tierFilter: string|null, sortColumn: string, sortAsc: boolean, pingInterval: number }|null}
|
|
317
|
+
* The profile's TUI settings, or null if profile not found
|
|
318
|
+
*/
|
|
319
|
+
export function loadProfile(config, name) {
|
|
320
|
+
const profile = config?.profiles?.[name]
|
|
321
|
+
if (!profile) return null
|
|
322
|
+
|
|
323
|
+
// 📖 Deep-copy the profile data into the live config (don't share references)
|
|
324
|
+
config.apiKeys = JSON.parse(JSON.stringify(profile.apiKeys || {}))
|
|
325
|
+
config.providers = JSON.parse(JSON.stringify(profile.providers || {}))
|
|
326
|
+
config.favorites = [...(profile.favorites || [])]
|
|
327
|
+
config.activeProfile = name
|
|
328
|
+
|
|
329
|
+
return profile.settings ? { ..._emptyProfileSettings(), ...profile.settings } : _emptyProfileSettings()
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* 📖 listProfiles: Get all saved profile names.
|
|
334
|
+
*
|
|
335
|
+
* @param {object} config
|
|
336
|
+
* @returns {string[]} Array of profile names, sorted alphabetically
|
|
337
|
+
*/
|
|
338
|
+
export function listProfiles(config) {
|
|
339
|
+
if (!config?.profiles || typeof config.profiles !== 'object') return []
|
|
340
|
+
return Object.keys(config.profiles).sort()
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/**
|
|
344
|
+
* 📖 deleteProfile: Remove a named profile from the config.
|
|
345
|
+
*
|
|
346
|
+
* 📖 If the deleted profile is the active one, clears activeProfile.
|
|
347
|
+
*
|
|
348
|
+
* @param {object} config — Live config object (will be mutated)
|
|
349
|
+
* @param {string} name — Profile name to delete
|
|
350
|
+
* @returns {boolean} True if the profile existed and was deleted
|
|
351
|
+
*/
|
|
352
|
+
export function deleteProfile(config, name) {
|
|
353
|
+
if (!config?.profiles?.[name]) return false
|
|
354
|
+
delete config.profiles[name]
|
|
355
|
+
if (config.activeProfile === name) config.activeProfile = null
|
|
356
|
+
return true
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* 📖 getActiveProfileName: Get the currently active profile name.
|
|
361
|
+
*
|
|
362
|
+
* @param {object} config
|
|
363
|
+
* @returns {string|null} Profile name, or null if no profile is active
|
|
364
|
+
*/
|
|
365
|
+
export function getActiveProfileName(config) {
|
|
366
|
+
return config?.activeProfile || null
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* 📖 setActiveProfile: Set which profile is active (or null to clear).
|
|
371
|
+
*
|
|
372
|
+
* 📖 This just stores the name — it does NOT load the profile's data.
|
|
373
|
+
* Call loadProfile() first to actually apply the profile's values.
|
|
374
|
+
*
|
|
375
|
+
* @param {object} config — Live config object (will be mutated)
|
|
376
|
+
* @param {string|null} name — Profile name, or null to clear
|
|
377
|
+
*/
|
|
378
|
+
export function setActiveProfile(config, name) {
|
|
379
|
+
config.activeProfile = name || null
|
|
380
|
+
}
|
|
381
|
+
|
|
225
382
|
// 📖 Internal helper: create a blank config with the right shape
|
|
226
383
|
function _emptyConfig() {
|
|
227
384
|
return {
|
|
@@ -235,5 +392,9 @@ function _emptyConfig() {
|
|
|
235
392
|
consentVersion: 0,
|
|
236
393
|
anonymousId: null,
|
|
237
394
|
},
|
|
395
|
+
// 📖 Active profile name — null means no profile is loaded (using raw config).
|
|
396
|
+
activeProfile: null,
|
|
397
|
+
// 📖 Named profiles: each is a snapshot of apiKeys + providers + favorites + settings.
|
|
398
|
+
profiles: {},
|
|
238
399
|
}
|
|
239
400
|
}
|
package/lib/utils.js
CHANGED
|
@@ -27,15 +27,20 @@
|
|
|
27
27
|
*
|
|
28
28
|
* @functions
|
|
29
29
|
* → getAvg(result) — Calculate average latency from successful pings only
|
|
30
|
-
* → getVerdict(result) — Determine model health verdict based on avg latency and
|
|
30
|
+
* → getVerdict(result) — Determine model health verdict based on avg latency and stability
|
|
31
31
|
* → getUptime(result) — Calculate uptime percentage (successful / total pings)
|
|
32
|
+
* → getP95(result) — Calculate 95th percentile latency from successful pings
|
|
33
|
+
* → getJitter(result) — Calculate latency standard deviation (jitter)
|
|
34
|
+
* → getStabilityScore(result) — Composite 0–100 stability score (p95 + jitter + spikes + uptime)
|
|
32
35
|
* → sortResults(results, sortColumn, sortDirection) — Sort model results by any column
|
|
33
36
|
* → filterByTier(results, tierLetter) — Filter results by tier letter (S/A/B/C)
|
|
34
|
-
* → findBestModel(results) — Pick the best model by status → avg → uptime priority
|
|
37
|
+
* → findBestModel(results) — Pick the best model by status → avg → stability → uptime priority
|
|
35
38
|
* → parseArgs(argv) — Parse CLI arguments into structured flags and values
|
|
36
39
|
*
|
|
37
|
-
* @exports getAvg, getVerdict, getUptime,
|
|
38
|
-
* @exports
|
|
40
|
+
* @exports getAvg, getVerdict, getUptime, getP95, getJitter, getStabilityScore
|
|
41
|
+
* @exports sortResults, filterByTier, findBestModel, parseArgs
|
|
42
|
+
* @exports scoreModelForTask, getTopRecommendations
|
|
43
|
+
* @exports TIER_ORDER, VERDICT_ORDER, TIER_LETTER_MAP, TASK_TYPES, PRIORITY_TYPES, CONTEXT_BUDGETS
|
|
39
44
|
*
|
|
40
45
|
* @see bin/free-coding-models.js — main CLI that imports these utils
|
|
41
46
|
* @see sources.js — model definitions consumed by these functions
|
|
@@ -54,7 +59,7 @@ export const TIER_ORDER = ['S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
|
|
|
54
59
|
// 📖 Used by sortResults when sorting by the "verdict" column.
|
|
55
60
|
// 📖 "Perfect" means < 400ms avg, "Pending" means no data yet.
|
|
56
61
|
// 📖 The order matters — it determines sort rank in the TUI table.
|
|
57
|
-
export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
|
|
62
|
+
export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Spiky', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
|
|
58
63
|
|
|
59
64
|
// 📖 Maps a CLI tier letter (--tier S/A/B/C) to the full tier strings it includes.
|
|
60
65
|
// 📖 Example: --tier A matches A+, A, and A- models (all "A-family" tiers).
|
|
@@ -91,11 +96,17 @@ export const getAvg = (r) => {
|
|
|
91
96
|
// 2. Timeout/down BUT was previously up → "Unstable" (it worked before, now it doesn't)
|
|
92
97
|
// 3. Timeout/down and never worked → "Not Active" (model might be offline)
|
|
93
98
|
// 4. No successful pings yet → "Pending" (still waiting for first response)
|
|
94
|
-
// 5.
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
//
|
|
99
|
+
// 5. Stability-aware speed tiers (avg + p95/jitter penalty):
|
|
100
|
+
// - Avg < 400ms + stable → "Perfect"
|
|
101
|
+
// - Avg < 400ms but spiky p95 → "Spiky" (fast on average, but tail latency hurts)
|
|
102
|
+
// - Avg < 1000ms → "Normal"
|
|
103
|
+
// - Avg < 3000ms → "Slow"
|
|
104
|
+
// - Avg < 5000ms → "Very Slow"
|
|
105
|
+
// - Avg >= 5000ms → "Unstable"
|
|
106
|
+
//
|
|
107
|
+
// 📖 The "Spiky" verdict catches models that look fast on paper (low avg) but randomly
|
|
108
|
+
// stall your IDE/agent with tail-latency spikes. A model with avg 250ms but p95 6000ms
|
|
109
|
+
// gets downgraded from "Perfect" to "Spiky" — because consistency matters more than speed.
|
|
99
110
|
//
|
|
100
111
|
// 📖 The "wasUpBefore" check is key — it distinguishes between a model that's
|
|
101
112
|
// temporarily flaky vs one that was never reachable in the first place.
|
|
@@ -107,8 +118,20 @@ export const getVerdict = (r) => {
|
|
|
107
118
|
if ((r.status === 'timeout' || r.status === 'down') && wasUpBefore) return 'Unstable'
|
|
108
119
|
if (r.status === 'timeout' || r.status === 'down') return 'Not Active'
|
|
109
120
|
if (avg === Infinity) return 'Pending'
|
|
110
|
-
|
|
111
|
-
|
|
121
|
+
|
|
122
|
+
// 📖 Stability-aware verdict: penalize models with good avg but terrible tail latency
|
|
123
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
124
|
+
const p95 = getP95(r)
|
|
125
|
+
|
|
126
|
+
if (avg < 400) {
|
|
127
|
+
// 📖 Only flag as "Spiky" when we have enough data (≥3 pings) to judge stability
|
|
128
|
+
if (successfulPings.length >= 3 && p95 > 3000) return 'Spiky'
|
|
129
|
+
return 'Perfect'
|
|
130
|
+
}
|
|
131
|
+
if (avg < 1000) {
|
|
132
|
+
if (successfulPings.length >= 3 && p95 > 5000) return 'Spiky'
|
|
133
|
+
return 'Normal'
|
|
134
|
+
}
|
|
112
135
|
if (avg < 3000) return 'Slow'
|
|
113
136
|
if (avg < 5000) return 'Very Slow'
|
|
114
137
|
if (avg < 10000) return 'Unstable'
|
|
@@ -125,21 +148,84 @@ export const getUptime = (r) => {
|
|
|
125
148
|
return Math.round((successful / r.pings.length) * 100)
|
|
126
149
|
}
|
|
127
150
|
|
|
151
|
+
// 📖 getP95: Calculate the 95th percentile latency from successful pings (HTTP 200).
|
|
152
|
+
// 📖 The p95 answers: "95% of requests are faster than this value."
|
|
153
|
+
// 📖 A low p95 means consistently fast responses — a high p95 signals tail-latency spikes.
|
|
154
|
+
// 📖 Returns Infinity when no successful pings exist.
|
|
155
|
+
//
|
|
156
|
+
// 📖 Algorithm: sort latencies ascending, pick the value at ceil(N * 0.95) - 1.
|
|
157
|
+
// 📖 Example: [100, 200, 300, 400, 5000] → p95 index = ceil(5 * 0.95) - 1 = 4 → 5000ms
|
|
158
|
+
export const getP95 = (r) => {
|
|
159
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
160
|
+
if (successfulPings.length === 0) return Infinity
|
|
161
|
+
const sorted = successfulPings.map(p => p.ms).sort((a, b) => a - b)
|
|
162
|
+
const idx = Math.ceil(sorted.length * 0.95) - 1
|
|
163
|
+
return sorted[Math.max(0, idx)]
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// 📖 getJitter: Calculate latency standard deviation (σ) from successful pings.
|
|
167
|
+
// 📖 Low jitter = predictable response times. High jitter = erratic, spiky latency.
|
|
168
|
+
// 📖 Returns 0 when fewer than 2 successful pings (can't compute variance from 1 point).
|
|
169
|
+
// 📖 Uses population σ (divides by N, not N-1) since we have ALL the data, not a sample.
|
|
170
|
+
export const getJitter = (r) => {
|
|
171
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
172
|
+
if (successfulPings.length < 2) return 0
|
|
173
|
+
const mean = successfulPings.reduce((a, b) => a + b.ms, 0) / successfulPings.length
|
|
174
|
+
const variance = successfulPings.reduce((sum, p) => sum + (p.ms - mean) ** 2, 0) / successfulPings.length
|
|
175
|
+
return Math.round(Math.sqrt(variance))
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// 📖 getStabilityScore: Composite 0–100 score that rewards consistency and reliability.
|
|
179
|
+
// 📖 Combines four signals into a single number:
|
|
180
|
+
// - p95 latency (30%) — penalizes tail-latency spikes
|
|
181
|
+
// - Jitter / σ (30%) — penalizes erratic response times
|
|
182
|
+
// - Spike rate (20%) — fraction of pings above 3000ms threshold
|
|
183
|
+
// - Uptime / reliability (20%) — fraction of successful pings
|
|
184
|
+
//
|
|
185
|
+
// 📖 Each component is normalized to 0–100, then weighted and combined.
|
|
186
|
+
// 📖 Returns -1 when no successful pings exist (not enough data yet).
|
|
187
|
+
//
|
|
188
|
+
// 📖 Example:
|
|
189
|
+
// Model A: avg 250ms, p95 6000ms (tons of spikes) → score ~30
|
|
190
|
+
// Model B: avg 400ms, p95 650ms (boringly consistent) → score ~85
|
|
191
|
+
// In real usage, Model B FEELS faster because it doesn't randomly stall.
|
|
192
|
+
export const getStabilityScore = (r) => {
|
|
193
|
+
const successfulPings = (r.pings || []).filter(p => p.code === '200')
|
|
194
|
+
if (successfulPings.length === 0) return -1
|
|
195
|
+
|
|
196
|
+
const p95 = getP95(r)
|
|
197
|
+
const jitter = getJitter(r)
|
|
198
|
+
const uptime = getUptime(r)
|
|
199
|
+
const spikeCount = successfulPings.filter(p => p.ms > 3000).length
|
|
200
|
+
const spikeRate = spikeCount / successfulPings.length
|
|
201
|
+
|
|
202
|
+
// 📖 Normalize each component to 0–100 (higher = better)
|
|
203
|
+
const p95Score = Math.max(0, Math.min(100, 100 * (1 - p95 / 5000)))
|
|
204
|
+
const jitterScore = Math.max(0, Math.min(100, 100 * (1 - jitter / 2000)))
|
|
205
|
+
const spikeScore = Math.max(0, 100 * (1 - spikeRate))
|
|
206
|
+
const reliabilityScore = uptime
|
|
207
|
+
|
|
208
|
+
// 📖 Weighted composite: 30% p95, 30% jitter, 20% spikes, 20% reliability
|
|
209
|
+
const score = 0.3 * p95Score + 0.3 * jitterScore + 0.2 * spikeScore + 0.2 * reliabilityScore
|
|
210
|
+
return Math.round(score)
|
|
211
|
+
}
|
|
212
|
+
|
|
128
213
|
// 📖 sortResults: Sort the results array by any column the user can click/press in the TUI.
|
|
129
214
|
// 📖 Returns a NEW array — never mutates the original (important for React-style re-renders).
|
|
130
215
|
//
|
|
131
216
|
// 📖 Supported columns (matching the keyboard shortcuts in the TUI):
|
|
132
|
-
// - 'rank'
|
|
133
|
-
// - 'tier'
|
|
134
|
-
// - 'origin'
|
|
135
|
-
// - 'model'
|
|
136
|
-
// - 'ping'
|
|
137
|
-
// - 'avg'
|
|
138
|
-
// - 'swe'
|
|
139
|
-
// - 'ctx'
|
|
140
|
-
// - 'condition'
|
|
141
|
-
// - 'verdict'
|
|
142
|
-
// - 'uptime'
|
|
217
|
+
// - 'rank' (R key) — original index from sources.js
|
|
218
|
+
// - 'tier' (T key) — tier hierarchy (S+ first, C last)
|
|
219
|
+
// - 'origin' (O key) — provider name (all NIM for now, future-proofed)
|
|
220
|
+
// - 'model' (M key) — alphabetical by display label
|
|
221
|
+
// - 'ping' (L key) — last ping latency (only successful ones count)
|
|
222
|
+
// - 'avg' (A key) — average latency across all successful pings
|
|
223
|
+
// - 'swe' (S key) — SWE-bench score (higher is better)
|
|
224
|
+
// - 'ctx' (N key) — context window size (larger is better)
|
|
225
|
+
// - 'condition' (H key) — health status (alphabetical)
|
|
226
|
+
// - 'verdict' (V key) — verdict order (Perfect → Pending)
|
|
227
|
+
// - 'uptime' (U key) — uptime percentage
|
|
228
|
+
// - 'stability' (B key) — stability score (0–100, higher = more stable)
|
|
143
229
|
//
|
|
144
230
|
// 📖 sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
|
|
145
231
|
export const sortResults = (results, sortColumn, sortDirection) => {
|
|
@@ -219,6 +305,11 @@ export const sortResults = (results, sortColumn, sortDirection) => {
|
|
|
219
305
|
case 'uptime':
|
|
220
306
|
cmp = getUptime(a) - getUptime(b)
|
|
221
307
|
break
|
|
308
|
+
case 'stability':
|
|
309
|
+
// 📖 Sort by stability score — higher = more stable = better
|
|
310
|
+
// 📖 Models with no data (-1) sort to the bottom
|
|
311
|
+
cmp = getStabilityScore(a) - getStabilityScore(b)
|
|
312
|
+
break
|
|
222
313
|
}
|
|
223
314
|
|
|
224
315
|
// 📖 Flip comparison for descending order
|
|
@@ -242,16 +333,19 @@ export function filterByTier(results, tierLetter) {
|
|
|
242
333
|
// 📖 findBestModel: Pick the single best model from a results array.
|
|
243
334
|
// 📖 Used by --fiable mode to output the most reliable model after 10s of analysis.
|
|
244
335
|
//
|
|
245
|
-
// 📖 Selection priority (
|
|
336
|
+
// 📖 Selection priority (quad-key sort):
|
|
246
337
|
// 1. Status: "up" models always beat non-up models
|
|
247
338
|
// 2. Average latency: faster average wins (lower is better)
|
|
248
|
-
// 3.
|
|
339
|
+
// 3. Stability score: higher stability wins (more consistent = better)
|
|
340
|
+
// 4. Uptime %: higher uptime wins as final tiebreaker
|
|
249
341
|
//
|
|
250
342
|
// 📖 Returns null if the array is empty.
|
|
251
343
|
export function findBestModel(results) {
|
|
252
344
|
const sorted = [...results].sort((a, b) => {
|
|
253
345
|
const avgA = getAvg(a)
|
|
254
346
|
const avgB = getAvg(b)
|
|
347
|
+
const stabilityA = getStabilityScore(a)
|
|
348
|
+
const stabilityB = getStabilityScore(b)
|
|
255
349
|
const uptimeA = getUptime(a)
|
|
256
350
|
const uptimeB = getUptime(b)
|
|
257
351
|
|
|
@@ -262,7 +356,10 @@ export function findBestModel(results) {
|
|
|
262
356
|
// 📖 Priority 2: Lower average latency = faster = better
|
|
263
357
|
if (avgA !== avgB) return avgA - avgB
|
|
264
358
|
|
|
265
|
-
// 📖 Priority 3: Higher
|
|
359
|
+
// 📖 Priority 3: Higher stability = more consistent = better
|
|
360
|
+
if (stabilityA !== stabilityB) return stabilityB - stabilityA
|
|
361
|
+
|
|
362
|
+
// 📖 Priority 4: Higher uptime = more reliable = better (final tiebreaker)
|
|
266
363
|
return uptimeB - uptimeA
|
|
267
364
|
})
|
|
268
365
|
|
|
@@ -289,17 +386,27 @@ export function parseArgs(argv) {
|
|
|
289
386
|
let apiKey = null
|
|
290
387
|
const flags = []
|
|
291
388
|
|
|
292
|
-
// Determine which arg
|
|
389
|
+
// 📖 Determine which arg indices are consumed by --tier and --profile so we skip them
|
|
293
390
|
const tierIdx = args.findIndex(a => a.toLowerCase() === '--tier')
|
|
294
391
|
const tierValueIdx = (tierIdx !== -1 && args[tierIdx + 1] && !args[tierIdx + 1].startsWith('--'))
|
|
295
392
|
? tierIdx + 1
|
|
296
393
|
: -1
|
|
297
394
|
|
|
395
|
+
const profileIdx = args.findIndex(a => a.toLowerCase() === '--profile')
|
|
396
|
+
const profileValueIdx = (profileIdx !== -1 && args[profileIdx + 1] && !args[profileIdx + 1].startsWith('--'))
|
|
397
|
+
? profileIdx + 1
|
|
398
|
+
: -1
|
|
399
|
+
|
|
400
|
+
// 📖 Set of arg indices that are values for flags (not API keys)
|
|
401
|
+
const skipIndices = new Set()
|
|
402
|
+
if (tierValueIdx !== -1) skipIndices.add(tierValueIdx)
|
|
403
|
+
if (profileValueIdx !== -1) skipIndices.add(profileValueIdx)
|
|
404
|
+
|
|
298
405
|
for (const [i, arg] of args.entries()) {
|
|
299
406
|
if (arg.startsWith('--')) {
|
|
300
407
|
flags.push(arg.toLowerCase())
|
|
301
|
-
} else if (i
|
|
302
|
-
// Skip
|
|
408
|
+
} else if (skipIndices.has(i)) {
|
|
409
|
+
// 📖 Skip — this is a value for --tier or --profile, not an API key
|
|
303
410
|
} else if (!apiKey) {
|
|
304
411
|
apiKey = arg
|
|
305
412
|
}
|
|
@@ -314,5 +421,161 @@ export function parseArgs(argv) {
|
|
|
314
421
|
|
|
315
422
|
let tierFilter = tierValueIdx !== -1 ? args[tierValueIdx].toUpperCase() : null
|
|
316
423
|
|
|
317
|
-
|
|
424
|
+
const profileName = profileValueIdx !== -1 ? args[profileValueIdx] : null
|
|
425
|
+
|
|
426
|
+
// 📖 --recommend — launch directly into Smart Recommend mode (Q key equivalent)
|
|
427
|
+
const recommendMode = flags.includes('--recommend')
|
|
428
|
+
|
|
429
|
+
return { apiKey, bestMode, fiableMode, openCodeMode, openCodeDesktopMode, openClawMode, noTelemetry, tierFilter, profileName, recommendMode }
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// ─── Smart Recommend — Scoring Engine ─────────────────────────────────────────
|
|
433
|
+
|
|
434
|
+
// 📖 Task types for the Smart Recommend questionnaire.
|
|
435
|
+
// 📖 Each task type has different weight priorities — quick fixes favor speed,
|
|
436
|
+
// deep refactors favor SWE score and context, code review needs balanced quality,
|
|
437
|
+
// test generation needs high SWE score + medium context.
|
|
438
|
+
export const TASK_TYPES = {
|
|
439
|
+
quickfix: { label: 'Quick Fix', sweWeight: 0.2, speedWeight: 0.5, ctxWeight: 0.1, stabilityWeight: 0.2 },
|
|
440
|
+
refactor: { label: 'Deep Refactor', sweWeight: 0.4, speedWeight: 0.1, ctxWeight: 0.3, stabilityWeight: 0.2 },
|
|
441
|
+
review: { label: 'Code Review', sweWeight: 0.35, speedWeight: 0.2, ctxWeight: 0.25, stabilityWeight: 0.2 },
|
|
442
|
+
testgen: { label: 'Test Generation', sweWeight: 0.35, speedWeight: 0.15, ctxWeight: 0.2, stabilityWeight: 0.3 },
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// 📖 Priority presets — bias the scoring toward speed or quality.
|
|
446
|
+
// 📖 'speed' amplifies latency weighting, 'quality' amplifies SWE score weighting.
|
|
447
|
+
export const PRIORITY_TYPES = {
|
|
448
|
+
speed: { label: 'Speed', speedMultiplier: 1.5, sweMultiplier: 0.7 },
|
|
449
|
+
quality: { label: 'Quality', speedMultiplier: 0.7, sweMultiplier: 1.5 },
|
|
450
|
+
balanced:{ label: 'Balanced', speedMultiplier: 1.0, sweMultiplier: 1.0 },
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// 📖 Context budget categories — match against model's context window size.
|
|
454
|
+
// 📖 'small' (<4K tokens) can use any model. 'large' (>32K) strongly penalizes small-ctx models.
|
|
455
|
+
export const CONTEXT_BUDGETS = {
|
|
456
|
+
small: { label: 'Small file (<4K)', minCtx: 0, idealCtx: 32 },
|
|
457
|
+
medium: { label: 'Medium project (<32K)', minCtx: 32, idealCtx: 128 },
|
|
458
|
+
large: { label: 'Large codebase (>32K)', minCtx: 128, idealCtx: 256 },
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// 📖 parseCtxToK: Convert context window string ("128k", "1m", "200k") into numeric K tokens.
|
|
462
|
+
// 📖 Used by the scoring engine to compare against CONTEXT_BUDGETS thresholds.
|
|
463
|
+
function parseCtxToK(ctx) {
|
|
464
|
+
if (!ctx || ctx === '—') return 0
|
|
465
|
+
const str = ctx.toLowerCase()
|
|
466
|
+
if (str.includes('m')) return parseFloat(str.replace('m', '')) * 1000
|
|
467
|
+
if (str.includes('k')) return parseFloat(str.replace('k', ''))
|
|
468
|
+
return 0
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// 📖 parseSweToNum: Convert SWE-bench score string ("49.2%", "73.1%") into a 0–100 number.
|
|
472
|
+
// 📖 Returns 0 for missing or invalid scores.
|
|
473
|
+
function parseSweToNum(sweScore) {
|
|
474
|
+
if (!sweScore || sweScore === '—') return 0
|
|
475
|
+
const num = parseFloat(sweScore.replace('%', ''))
|
|
476
|
+
return isNaN(num) ? 0 : num
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* 📖 scoreModelForTask: Score a single model result for a specific task/priority/context combination.
|
|
481
|
+
*
|
|
482
|
+
* 📖 The score is a weighted composite of 4 signals:
|
|
483
|
+
* - SWE quality score (0–100): how good the model is at coding (from sources.js benchmarks)
|
|
484
|
+
* - Speed score (0–100): inverse of average latency (faster = higher score)
|
|
485
|
+
* - Context fit score (0–100): how well the model's context window matches the user's budget
|
|
486
|
+
* - Stability score (0–100): composite p95/jitter/uptime from getStabilityScore()
|
|
487
|
+
*
|
|
488
|
+
* 📖 Each signal is weighted by the task type, then further adjusted by the priority multiplier.
|
|
489
|
+
* 📖 Models that are down/timeout get a harsh penalty but aren't completely excluded
|
|
490
|
+
* (they might come back up during the analysis phase).
|
|
491
|
+
*
|
|
492
|
+
* @param {object} result — A model result object (from state.results)
|
|
493
|
+
* @param {string} taskType — Key from TASK_TYPES ('quickfix'|'refactor'|'review'|'testgen')
|
|
494
|
+
* @param {string} priority — Key from PRIORITY_TYPES ('speed'|'quality'|'balanced')
|
|
495
|
+
* @param {string} contextBudget — Key from CONTEXT_BUDGETS ('small'|'medium'|'large')
|
|
496
|
+
* @returns {number} Score between 0 and 100 (higher = better recommendation)
|
|
497
|
+
*/
|
|
498
|
+
export function scoreModelForTask(result, taskType, priority, contextBudget) {
|
|
499
|
+
const task = TASK_TYPES[taskType]
|
|
500
|
+
const prio = PRIORITY_TYPES[priority]
|
|
501
|
+
const budget = CONTEXT_BUDGETS[contextBudget]
|
|
502
|
+
if (!task || !prio || !budget) return 0
|
|
503
|
+
|
|
504
|
+
// 📖 SWE quality signal (0–100) — raw SWE-bench score
|
|
505
|
+
const sweNum = parseSweToNum(result.sweScore)
|
|
506
|
+
const sweScore = Math.min(100, sweNum * (100 / 80)) // 📖 Normalize: 80% SWE → 100 score
|
|
507
|
+
|
|
508
|
+
// 📖 Speed signal (0–100) — inverse latency, capped at 5000ms
|
|
509
|
+
const avg = getAvg(result)
|
|
510
|
+
let speedScore
|
|
511
|
+
if (avg === Infinity) {
|
|
512
|
+
speedScore = 0 // 📖 No data yet — can't judge speed
|
|
513
|
+
} else {
|
|
514
|
+
speedScore = Math.max(0, Math.min(100, 100 * (1 - avg / 5000)))
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// 📖 Context fit signal (0–100):
|
|
518
|
+
// - Full score if model ctx >= idealCtx
|
|
519
|
+
// - Partial score if model ctx >= minCtx but < idealCtx (linear interpolation)
|
|
520
|
+
// - Zero if model ctx < minCtx (too small for the job)
|
|
521
|
+
const modelCtx = parseCtxToK(result.ctx)
|
|
522
|
+
let ctxScore
|
|
523
|
+
if (modelCtx >= budget.idealCtx) {
|
|
524
|
+
ctxScore = 100
|
|
525
|
+
} else if (modelCtx >= budget.minCtx) {
|
|
526
|
+
ctxScore = budget.idealCtx === budget.minCtx
|
|
527
|
+
? 100
|
|
528
|
+
: Math.round(100 * (modelCtx - budget.minCtx) / (budget.idealCtx - budget.minCtx))
|
|
529
|
+
} else {
|
|
530
|
+
ctxScore = 0
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// 📖 Stability signal (0–100) — from getStabilityScore(), or 0 if no data
|
|
534
|
+
const stability = getStabilityScore(result)
|
|
535
|
+
const stabScore = stability === -1 ? 0 : stability
|
|
536
|
+
|
|
537
|
+
// 📖 Weighted combination: task weights × priority multipliers
|
|
538
|
+
const rawScore =
|
|
539
|
+
(sweScore * task.sweWeight * prio.sweMultiplier) +
|
|
540
|
+
(speedScore * task.speedWeight * prio.speedMultiplier) +
|
|
541
|
+
(ctxScore * task.ctxWeight) +
|
|
542
|
+
(stabScore * task.stabilityWeight)
|
|
543
|
+
|
|
544
|
+
// 📖 Normalize by total effective weight to keep result in 0–100 range
|
|
545
|
+
const totalWeight =
|
|
546
|
+
(task.sweWeight * prio.sweMultiplier) +
|
|
547
|
+
(task.speedWeight * prio.speedMultiplier) +
|
|
548
|
+
task.ctxWeight +
|
|
549
|
+
task.stabilityWeight
|
|
550
|
+
|
|
551
|
+
let score = totalWeight > 0 ? rawScore / totalWeight : 0
|
|
552
|
+
|
|
553
|
+
// 📖 Penalty for models that are currently down/timeout — still scoreable but penalized
|
|
554
|
+
if (result.status === 'down' || result.status === 'timeout') {
|
|
555
|
+
score *= 0.2
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return Math.round(Math.min(100, Math.max(0, score)))
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/**
|
|
562
|
+
* 📖 getTopRecommendations: Score all models and return the top N recommendations.
|
|
563
|
+
*
|
|
564
|
+
* 📖 Filters out hidden models, scores each one, sorts descending, returns topN.
|
|
565
|
+
* 📖 Each returned item includes the original result + computed score for display.
|
|
566
|
+
*
|
|
567
|
+
* @param {Array} results — Full state.results array
|
|
568
|
+
* @param {string} taskType — Key from TASK_TYPES
|
|
569
|
+
* @param {string} priority — Key from PRIORITY_TYPES
|
|
570
|
+
* @param {string} contextBudget — Key from CONTEXT_BUDGETS
|
|
571
|
+
* @param {number} [topN=3] — How many recommendations to return
|
|
572
|
+
* @returns {Array<{result: object, score: number}>} Top N scored models, descending by score
|
|
573
|
+
*/
|
|
574
|
+
export function getTopRecommendations(results, taskType, priority, contextBudget, topN = 3) {
|
|
575
|
+
const scored = results
|
|
576
|
+
.filter(r => !r.hidden)
|
|
577
|
+
.map(r => ({ result: r, score: scoreModelForTask(r, taskType, priority, contextBudget) }))
|
|
578
|
+
.sort((a, b) => b.score - a.score)
|
|
579
|
+
|
|
580
|
+
return scored.slice(0, topN)
|
|
318
581
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "free-coding-models",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.68",
|
|
4
4
|
"description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"nvidia",
|