free-coding-models 0.1.66 → 0.1.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/config.js CHANGED
@@ -28,7 +28,8 @@
28
28
  * "siliconflow":"sk-xxx",
29
29
  * "together": "together-xxx",
30
30
  * "cloudflare": "cf-xxx",
31
- * "perplexity": "pplx-xxx"
31
+ * "perplexity": "pplx-xxx",
32
+ * "zai": "zai-xxx"
32
33
  * },
33
34
  * "providers": {
34
35
  * "nvidia": { "enabled": true },
@@ -47,7 +48,8 @@
47
48
  * "siliconflow":{ "enabled": true },
48
49
  * "together": { "enabled": true },
49
50
  * "cloudflare": { "enabled": true },
50
- * "perplexity": { "enabled": true }
51
+ * "perplexity": { "enabled": true },
52
+ * "zai": { "enabled": true }
51
53
  * },
52
54
  * "favorites": [
53
55
  * "nvidia/deepseek-ai/deepseek-v3.2"
@@ -56,9 +58,28 @@
56
58
  * "enabled": true,
57
59
  * "consentVersion": 1,
58
60
  * "anonymousId": "anon_550e8400-e29b-41d4-a716-446655440000"
61
+ * "apiKeys": { ... },
62
+ * "providers": { ... },
63
+ * "favorites": [ "nvidia/deepseek-ai/deepseek-v3.2" ],
64
+ * "telemetry": { "enabled": true, "consentVersion": 1, "anonymousId": "anon_..." },
65
+ * "activeProfile": "work",
66
+ * "profiles": {
67
+ * "work": { "apiKeys": {...}, "providers": {...}, "favorites": [...], "settings": {...} },
68
+ * "personal": { "apiKeys": {...}, "providers": {...}, "favorites": [...], "settings": {...} },
69
+ * "fast": { "apiKeys": {...}, "providers": {...}, "favorites": [...], "settings": {...} }
59
70
  * }
60
71
  * }
61
72
  *
73
+ * 📖 Profiles store a snapshot of the user's configuration. Each profile contains:
74
+ * - apiKeys: API keys per provider (can differ between work/personal setups)
75
+ * - providers: enabled/disabled state per provider
76
+ * - favorites: list of pinned favorite models
77
+ * - settings: extra TUI preferences (tierFilter, sortColumn, sortAsc, pingInterval)
78
+ *
79
+ * 📖 When a profile is loaded via --profile <name> or Shift+P, the main config's
80
+ * apiKeys/providers/favorites are replaced with the profile's values. The profile
81
+ * data itself stays in the profiles section — it's a named snapshot, not a fork.
82
+ *
62
83
  * 📖 Migration: On first run, if the old plain-text ~/.free-coding-models exists
63
84
  * and the new JSON file does not, the old key is auto-migrated as the nvidia key.
64
85
  * The old file is left in place (not deleted) for safety.
@@ -68,8 +89,17 @@
68
89
  * → saveConfig(config) — Write config to ~/.free-coding-models.json with 0o600 permissions
69
90
  * → getApiKey(config, providerKey) — Get effective API key (env var override > config > null)
70
91
  * → isProviderEnabled(config, providerKey) — Check if provider is enabled (defaults true)
92
+ * → saveAsProfile(config, name) — Snapshot current apiKeys/providers/favorites/settings into a named profile
93
+ * → loadProfile(config, name) — Apply a named profile's values onto the live config
94
+ * → listProfiles(config) — Return array of profile names
95
+ * → deleteProfile(config, name) — Remove a named profile
96
+ * → getActiveProfileName(config) — Get the currently active profile name (or null)
97
+ * → setActiveProfile(config, name) — Set which profile is active (null to clear)
98
+ * → _emptyProfileSettings() — Default TUI settings for a profile
71
99
  *
72
- * @exports loadConfig, saveConfig, getApiKey
100
+ * @exports loadConfig, saveConfig, getApiKey, isProviderEnabled
101
+ * @exports saveAsProfile, loadProfile, listProfiles, deleteProfile
102
+ * @exports getActiveProfileName, setActiveProfile
73
103
  * @exports CONFIG_PATH — path to the JSON config file
74
104
  *
75
105
  * @see bin/free-coding-models.js — main CLI that uses these functions
@@ -106,6 +136,7 @@ const ENV_VARS = {
106
136
  together: 'TOGETHER_API_KEY',
107
137
  cloudflare: ['CLOUDFLARE_API_TOKEN', 'CLOUDFLARE_API_KEY'],
108
138
  perplexity: ['PERPLEXITY_API_KEY', 'PPLX_API_KEY'],
139
+ zai: 'ZAI_API_KEY',
109
140
  }
110
141
 
111
142
  /**
@@ -137,6 +168,9 @@ export function loadConfig() {
137
168
  if (typeof parsed.telemetry.enabled !== 'boolean') parsed.telemetry.enabled = null
138
169
  if (typeof parsed.telemetry.consentVersion !== 'number') parsed.telemetry.consentVersion = 0
139
170
  if (typeof parsed.telemetry.anonymousId !== 'string' || !parsed.telemetry.anonymousId.trim()) parsed.telemetry.anonymousId = null
171
+ // 📖 Ensure profiles section exists (added in profile system)
172
+ if (!parsed.profiles || typeof parsed.profiles !== 'object') parsed.profiles = {}
173
+ if (parsed.activeProfile && typeof parsed.activeProfile !== 'string') parsed.activeProfile = null
140
174
  return parsed
141
175
  } catch {
142
176
  // 📖 Corrupted JSON — return empty config (user will re-enter keys)
@@ -222,6 +256,129 @@ export function isProviderEnabled(config, providerKey) {
222
256
  return providerConfig.enabled !== false
223
257
  }
224
258
 
259
+ // ─── Config Profiles ──────────────────────────────────────────────────────────
260
+
261
+ /**
262
+ * 📖 _emptyProfileSettings: Default TUI settings stored in a profile.
263
+ *
264
+ * 📖 These settings are saved/restored when switching profiles so each profile
265
+ * can have different sort, filter, and ping preferences.
266
+ *
267
+ * @returns {{ tierFilter: string|null, sortColumn: string, sortAsc: boolean, pingInterval: number }}
268
+ */
269
+ export function _emptyProfileSettings() {
270
+ return {
271
+ tierFilter: null, // 📖 null = show all tiers, or 'S'|'A'|'B'|'C'|'D'
272
+ sortColumn: 'avg', // 📖 default sort column
273
+ sortAsc: true, // 📖 true = ascending (fastest first for latency)
274
+ pingInterval: 8000, // 📖 default ms between pings
275
+ }
276
+ }
277
+
278
+ /**
279
+ * 📖 saveAsProfile: Snapshot the current config state into a named profile.
280
+ *
281
+ * 📖 Takes the current apiKeys, providers, favorites, plus explicit TUI settings
282
+ * and stores them under config.profiles[name]. Does NOT change activeProfile —
283
+ * call setActiveProfile() separately if you want to switch to this profile.
284
+ *
285
+ * 📖 If a profile with the same name exists, it's overwritten.
286
+ *
287
+ * @param {object} config — Live config object (will be mutated)
288
+ * @param {string} name — Profile name (e.g. 'work', 'personal', 'fast')
289
+ * @param {object} [settings] — TUI settings to save (tierFilter, sortColumn, etc.)
290
+ * @returns {object} The config object (for chaining)
291
+ */
292
+ export function saveAsProfile(config, name, settings = null) {
293
+ if (!config.profiles || typeof config.profiles !== 'object') config.profiles = {}
294
+ config.profiles[name] = {
295
+ apiKeys: JSON.parse(JSON.stringify(config.apiKeys || {})),
296
+ providers: JSON.parse(JSON.stringify(config.providers || {})),
297
+ favorites: [...(config.favorites || [])],
298
+ settings: settings ? { ..._emptyProfileSettings(), ...settings } : _emptyProfileSettings(),
299
+ }
300
+ return config
301
+ }
302
+
303
+ /**
304
+ * 📖 loadProfile: Apply a named profile's values onto the live config.
305
+ *
306
+ * 📖 Replaces config.apiKeys, config.providers, config.favorites with the
307
+ * profile's stored values. Also sets config.activeProfile to the loaded name.
308
+ *
309
+ * 📖 Returns the profile's TUI settings so the caller (main CLI) can apply them
310
+ * to the live state object (sortColumn, tierFilter, etc.).
311
+ *
312
+ * 📖 If the profile doesn't exist, returns null (caller should show an error).
313
+ *
314
+ * @param {object} config — Live config object (will be mutated)
315
+ * @param {string} name — Profile name to load
316
+ * @returns {{ tierFilter: string|null, sortColumn: string, sortAsc: boolean, pingInterval: number }|null}
317
+ * The profile's TUI settings, or null if profile not found
318
+ */
319
+ export function loadProfile(config, name) {
320
+ const profile = config?.profiles?.[name]
321
+ if (!profile) return null
322
+
323
+ // 📖 Deep-copy the profile data into the live config (don't share references)
324
+ config.apiKeys = JSON.parse(JSON.stringify(profile.apiKeys || {}))
325
+ config.providers = JSON.parse(JSON.stringify(profile.providers || {}))
326
+ config.favorites = [...(profile.favorites || [])]
327
+ config.activeProfile = name
328
+
329
+ return profile.settings ? { ..._emptyProfileSettings(), ...profile.settings } : _emptyProfileSettings()
330
+ }
331
+
332
+ /**
333
+ * 📖 listProfiles: Get all saved profile names.
334
+ *
335
+ * @param {object} config
336
+ * @returns {string[]} Array of profile names, sorted alphabetically
337
+ */
338
+ export function listProfiles(config) {
339
+ if (!config?.profiles || typeof config.profiles !== 'object') return []
340
+ return Object.keys(config.profiles).sort()
341
+ }
342
+
343
+ /**
344
+ * 📖 deleteProfile: Remove a named profile from the config.
345
+ *
346
+ * 📖 If the deleted profile is the active one, clears activeProfile.
347
+ *
348
+ * @param {object} config — Live config object (will be mutated)
349
+ * @param {string} name — Profile name to delete
350
+ * @returns {boolean} True if the profile existed and was deleted
351
+ */
352
+ export function deleteProfile(config, name) {
353
+ if (!config?.profiles?.[name]) return false
354
+ delete config.profiles[name]
355
+ if (config.activeProfile === name) config.activeProfile = null
356
+ return true
357
+ }
358
+
359
+ /**
360
+ * 📖 getActiveProfileName: Get the currently active profile name.
361
+ *
362
+ * @param {object} config
363
+ * @returns {string|null} Profile name, or null if no profile is active
364
+ */
365
+ export function getActiveProfileName(config) {
366
+ return config?.activeProfile || null
367
+ }
368
+
369
+ /**
370
+ * 📖 setActiveProfile: Set which profile is active (or null to clear).
371
+ *
372
+ * 📖 This just stores the name — it does NOT load the profile's data.
373
+ * Call loadProfile() first to actually apply the profile's values.
374
+ *
375
+ * @param {object} config — Live config object (will be mutated)
376
+ * @param {string|null} name — Profile name, or null to clear
377
+ */
378
+ export function setActiveProfile(config, name) {
379
+ config.activeProfile = name || null
380
+ }
381
+
225
382
  // 📖 Internal helper: create a blank config with the right shape
226
383
  function _emptyConfig() {
227
384
  return {
@@ -235,5 +392,9 @@ function _emptyConfig() {
235
392
  consentVersion: 0,
236
393
  anonymousId: null,
237
394
  },
395
+ // 📖 Active profile name — null means no profile is loaded (using raw config).
396
+ activeProfile: null,
397
+ // 📖 Named profiles: each is a snapshot of apiKeys + providers + favorites + settings.
398
+ profiles: {},
238
399
  }
239
400
  }
package/lib/utils.js CHANGED
@@ -27,15 +27,20 @@
27
27
  *
28
28
  * @functions
29
29
  * → getAvg(result) — Calculate average latency from successful pings only
30
- * → getVerdict(result) — Determine model health verdict based on avg latency and status
30
+ * → getVerdict(result) — Determine model health verdict based on avg latency and stability
31
31
  * → getUptime(result) — Calculate uptime percentage (successful / total pings)
32
+ * → getP95(result) — Calculate 95th percentile latency from successful pings
33
+ * → getJitter(result) — Calculate latency standard deviation (jitter)
34
+ * → getStabilityScore(result) — Composite 0–100 stability score (p95 + jitter + spikes + uptime)
32
35
  * → sortResults(results, sortColumn, sortDirection) — Sort model results by any column
33
36
  * → filterByTier(results, tierLetter) — Filter results by tier letter (S/A/B/C)
34
- * → findBestModel(results) — Pick the best model by status → avg → uptime priority
37
+ * → findBestModel(results) — Pick the best model by status → avg → stability → uptime priority
35
38
  * → parseArgs(argv) — Parse CLI arguments into structured flags and values
36
39
  *
37
- * @exports getAvg, getVerdict, getUptime, sortResults, filterByTier, findBestModel, parseArgs
38
- * @exports TIER_ORDER, VERDICT_ORDER, TIER_LETTER_MAP
40
+ * @exports getAvg, getVerdict, getUptime, getP95, getJitter, getStabilityScore
41
+ * @exports sortResults, filterByTier, findBestModel, parseArgs
42
+ * @exports scoreModelForTask, getTopRecommendations
43
+ * @exports TIER_ORDER, VERDICT_ORDER, TIER_LETTER_MAP, TASK_TYPES, PRIORITY_TYPES, CONTEXT_BUDGETS
39
44
  *
40
45
  * @see bin/free-coding-models.js — main CLI that imports these utils
41
46
  * @see sources.js — model definitions consumed by these functions
@@ -54,7 +59,7 @@ export const TIER_ORDER = ['S+', 'S', 'A+', 'A', 'A-', 'B+', 'B', 'C']
54
59
  // 📖 Used by sortResults when sorting by the "verdict" column.
55
60
  // 📖 "Perfect" means < 400ms avg, "Pending" means no data yet.
56
61
  // 📖 The order matters — it determines sort rank in the TUI table.
57
- export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
62
+ export const VERDICT_ORDER = ['Perfect', 'Normal', 'Slow', 'Spiky', 'Very Slow', 'Overloaded', 'Unstable', 'Not Active', 'Pending']
58
63
 
59
64
  // 📖 Maps a CLI tier letter (--tier S/A/B/C) to the full tier strings it includes.
60
65
  // 📖 Example: --tier A matches A+, A, and A- models (all "A-family" tiers).
@@ -91,11 +96,17 @@ export const getAvg = (r) => {
91
96
  // 2. Timeout/down BUT was previously up → "Unstable" (it worked before, now it doesn't)
92
97
  // 3. Timeout/down and never worked → "Not Active" (model might be offline)
93
98
  // 4. No successful pings yet → "Pending" (still waiting for first response)
94
- // 5. Avg < 400ms "Perfect"
95
- // 6. Avg < 1000ms → "Normal"
96
- // 7. Avg < 3000ms → "Slow"
97
- // 8. Avg < 5000ms → "Very Slow"
98
- // 9. Avg >= 5000ms → "Unstable"
99
+ // 5. Stability-aware speed tiers (avg + p95/jitter penalty):
100
+ // - Avg < 400ms + stable → "Perfect"
101
+ // - Avg < 400ms but spiky p95 → "Spiky" (fast on average, but tail latency hurts)
102
+ // - Avg < 1000ms → "Normal"
103
+ // - Avg < 3000ms → "Slow"
104
+ // - Avg < 5000ms → "Very Slow"
105
+ // - Avg >= 5000ms → "Unstable"
106
+ //
107
+ // 📖 The "Spiky" verdict catches models that look fast on paper (low avg) but randomly
108
+ // stall your IDE/agent with tail-latency spikes. A model with avg 250ms but p95 6000ms
109
+ // gets downgraded from "Perfect" to "Spiky" — because consistency matters more than speed.
99
110
  //
100
111
  // 📖 The "wasUpBefore" check is key — it distinguishes between a model that's
101
112
  // temporarily flaky vs one that was never reachable in the first place.
@@ -107,8 +118,20 @@ export const getVerdict = (r) => {
107
118
  if ((r.status === 'timeout' || r.status === 'down') && wasUpBefore) return 'Unstable'
108
119
  if (r.status === 'timeout' || r.status === 'down') return 'Not Active'
109
120
  if (avg === Infinity) return 'Pending'
110
- if (avg < 400) return 'Perfect'
111
- if (avg < 1000) return 'Normal'
121
+
122
+ // 📖 Stability-aware verdict: penalize models with good avg but terrible tail latency
123
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
124
+ const p95 = getP95(r)
125
+
126
+ if (avg < 400) {
127
+ // 📖 Only flag as "Spiky" when we have enough data (≥3 pings) to judge stability
128
+ if (successfulPings.length >= 3 && p95 > 3000) return 'Spiky'
129
+ return 'Perfect'
130
+ }
131
+ if (avg < 1000) {
132
+ if (successfulPings.length >= 3 && p95 > 5000) return 'Spiky'
133
+ return 'Normal'
134
+ }
112
135
  if (avg < 3000) return 'Slow'
113
136
  if (avg < 5000) return 'Very Slow'
114
137
  if (avg < 10000) return 'Unstable'
@@ -125,21 +148,84 @@ export const getUptime = (r) => {
125
148
  return Math.round((successful / r.pings.length) * 100)
126
149
  }
127
150
 
151
+ // 📖 getP95: Calculate the 95th percentile latency from successful pings (HTTP 200).
152
+ // 📖 The p95 answers: "95% of requests are faster than this value."
153
+ // 📖 A low p95 means consistently fast responses — a high p95 signals tail-latency spikes.
154
+ // 📖 Returns Infinity when no successful pings exist.
155
+ //
156
+ // 📖 Algorithm: sort latencies ascending, pick the value at ceil(N * 0.95) - 1.
157
+ // 📖 Example: [100, 200, 300, 400, 5000] → p95 index = ceil(5 * 0.95) - 1 = 4 → 5000ms
158
+ export const getP95 = (r) => {
159
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
160
+ if (successfulPings.length === 0) return Infinity
161
+ const sorted = successfulPings.map(p => p.ms).sort((a, b) => a - b)
162
+ const idx = Math.ceil(sorted.length * 0.95) - 1
163
+ return sorted[Math.max(0, idx)]
164
+ }
165
+
166
+ // 📖 getJitter: Calculate latency standard deviation (σ) from successful pings.
167
+ // 📖 Low jitter = predictable response times. High jitter = erratic, spiky latency.
168
+ // 📖 Returns 0 when fewer than 2 successful pings (can't compute variance from 1 point).
169
+ // 📖 Uses population σ (divides by N, not N-1) since we have ALL the data, not a sample.
170
+ export const getJitter = (r) => {
171
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
172
+ if (successfulPings.length < 2) return 0
173
+ const mean = successfulPings.reduce((a, b) => a + b.ms, 0) / successfulPings.length
174
+ const variance = successfulPings.reduce((sum, p) => sum + (p.ms - mean) ** 2, 0) / successfulPings.length
175
+ return Math.round(Math.sqrt(variance))
176
+ }
177
+
178
+ // 📖 getStabilityScore: Composite 0–100 score that rewards consistency and reliability.
179
+ // 📖 Combines four signals into a single number:
180
+ // - p95 latency (30%) — penalizes tail-latency spikes
181
+ // - Jitter / σ (30%) — penalizes erratic response times
182
+ // - Spike rate (20%) — fraction of pings above 3000ms threshold
183
+ // - Uptime / reliability (20%) — fraction of successful pings
184
+ //
185
+ // 📖 Each component is normalized to 0–100, then weighted and combined.
186
+ // 📖 Returns -1 when no successful pings exist (not enough data yet).
187
+ //
188
+ // 📖 Example:
189
+ // Model A: avg 250ms, p95 6000ms (tons of spikes) → score ~30
190
+ // Model B: avg 400ms, p95 650ms (boringly consistent) → score ~85
191
+ // In real usage, Model B FEELS faster because it doesn't randomly stall.
192
+ export const getStabilityScore = (r) => {
193
+ const successfulPings = (r.pings || []).filter(p => p.code === '200')
194
+ if (successfulPings.length === 0) return -1
195
+
196
+ const p95 = getP95(r)
197
+ const jitter = getJitter(r)
198
+ const uptime = getUptime(r)
199
+ const spikeCount = successfulPings.filter(p => p.ms > 3000).length
200
+ const spikeRate = spikeCount / successfulPings.length
201
+
202
+ // 📖 Normalize each component to 0–100 (higher = better)
203
+ const p95Score = Math.max(0, Math.min(100, 100 * (1 - p95 / 5000)))
204
+ const jitterScore = Math.max(0, Math.min(100, 100 * (1 - jitter / 2000)))
205
+ const spikeScore = Math.max(0, 100 * (1 - spikeRate))
206
+ const reliabilityScore = uptime
207
+
208
+ // 📖 Weighted composite: 30% p95, 30% jitter, 20% spikes, 20% reliability
209
+ const score = 0.3 * p95Score + 0.3 * jitterScore + 0.2 * spikeScore + 0.2 * reliabilityScore
210
+ return Math.round(score)
211
+ }
212
+
128
213
  // 📖 sortResults: Sort the results array by any column the user can click/press in the TUI.
129
214
  // 📖 Returns a NEW array — never mutates the original (important for React-style re-renders).
130
215
  //
131
216
  // 📖 Supported columns (matching the keyboard shortcuts in the TUI):
132
- // - 'rank' (R key) — original index from sources.js
133
- // - 'tier' (T key) — tier hierarchy (S+ first, C last)
134
- // - 'origin' (O key) — provider name (all NIM for now, future-proofed)
135
- // - 'model' (M key) — alphabetical by display label
136
- // - 'ping' (L key) — last ping latency (only successful ones count)
137
- // - 'avg' (A key) — average latency across all successful pings
138
- // - 'swe' (S key) — SWE-bench score (higher is better)
139
- // - 'ctx' (N key) — context window size (larger is better)
140
- // - 'condition' (H key) — health status (alphabetical)
141
- // - 'verdict' (V key) — verdict order (Perfect → Pending)
142
- // - 'uptime' (U key) — uptime percentage
217
+ // - 'rank' (R key) — original index from sources.js
218
+ // - 'tier' (T key) — tier hierarchy (S+ first, C last)
219
+ // - 'origin' (O key) — provider name (all NIM for now, future-proofed)
220
+ // - 'model' (M key) — alphabetical by display label
221
+ // - 'ping' (L key) — last ping latency (only successful ones count)
222
+ // - 'avg' (A key) — average latency across all successful pings
223
+ // - 'swe' (S key) — SWE-bench score (higher is better)
224
+ // - 'ctx' (N key) — context window size (larger is better)
225
+ // - 'condition' (H key) — health status (alphabetical)
226
+ // - 'verdict' (V key) — verdict order (Perfect → Pending)
227
+ // - 'uptime' (U key) — uptime percentage
228
+ // - 'stability' (B key) — stability score (0–100, higher = more stable)
143
229
  //
144
230
  // 📖 sortDirection 'asc' = ascending (smallest first), 'desc' = descending (largest first)
145
231
  export const sortResults = (results, sortColumn, sortDirection) => {
@@ -219,6 +305,11 @@ export const sortResults = (results, sortColumn, sortDirection) => {
219
305
  case 'uptime':
220
306
  cmp = getUptime(a) - getUptime(b)
221
307
  break
308
+ case 'stability':
309
+ // 📖 Sort by stability score — higher = more stable = better
310
+ // 📖 Models with no data (-1) sort to the bottom
311
+ cmp = getStabilityScore(a) - getStabilityScore(b)
312
+ break
222
313
  }
223
314
 
224
315
  // 📖 Flip comparison for descending order
@@ -242,16 +333,19 @@ export function filterByTier(results, tierLetter) {
242
333
  // 📖 findBestModel: Pick the single best model from a results array.
243
334
  // 📖 Used by --fiable mode to output the most reliable model after 10s of analysis.
244
335
  //
245
- // 📖 Selection priority (tri-key sort):
336
+ // 📖 Selection priority (quad-key sort):
246
337
  // 1. Status: "up" models always beat non-up models
247
338
  // 2. Average latency: faster average wins (lower is better)
248
- // 3. Uptime %: higher uptime wins as tiebreaker
339
+ // 3. Stability score: higher stability wins (more consistent = better)
340
+ // 4. Uptime %: higher uptime wins as final tiebreaker
249
341
  //
250
342
  // 📖 Returns null if the array is empty.
251
343
  export function findBestModel(results) {
252
344
  const sorted = [...results].sort((a, b) => {
253
345
  const avgA = getAvg(a)
254
346
  const avgB = getAvg(b)
347
+ const stabilityA = getStabilityScore(a)
348
+ const stabilityB = getStabilityScore(b)
255
349
  const uptimeA = getUptime(a)
256
350
  const uptimeB = getUptime(b)
257
351
 
@@ -262,7 +356,10 @@ export function findBestModel(results) {
262
356
  // 📖 Priority 2: Lower average latency = faster = better
263
357
  if (avgA !== avgB) return avgA - avgB
264
358
 
265
- // 📖 Priority 3: Higher uptime = more reliable = better (tiebreaker)
359
+ // 📖 Priority 3: Higher stability = more consistent = better
360
+ if (stabilityA !== stabilityB) return stabilityB - stabilityA
361
+
362
+ // 📖 Priority 4: Higher uptime = more reliable = better (final tiebreaker)
266
363
  return uptimeB - uptimeA
267
364
  })
268
365
 
@@ -289,17 +386,27 @@ export function parseArgs(argv) {
289
386
  let apiKey = null
290
387
  const flags = []
291
388
 
292
- // Determine which arg index is consumed by --tier so we skip it
389
+ // 📖 Determine which arg indices are consumed by --tier and --profile so we skip them
293
390
  const tierIdx = args.findIndex(a => a.toLowerCase() === '--tier')
294
391
  const tierValueIdx = (tierIdx !== -1 && args[tierIdx + 1] && !args[tierIdx + 1].startsWith('--'))
295
392
  ? tierIdx + 1
296
393
  : -1
297
394
 
395
+ const profileIdx = args.findIndex(a => a.toLowerCase() === '--profile')
396
+ const profileValueIdx = (profileIdx !== -1 && args[profileIdx + 1] && !args[profileIdx + 1].startsWith('--'))
397
+ ? profileIdx + 1
398
+ : -1
399
+
400
+ // 📖 Set of arg indices that are values for flags (not API keys)
401
+ const skipIndices = new Set()
402
+ if (tierValueIdx !== -1) skipIndices.add(tierValueIdx)
403
+ if (profileValueIdx !== -1) skipIndices.add(profileValueIdx)
404
+
298
405
  for (const [i, arg] of args.entries()) {
299
406
  if (arg.startsWith('--')) {
300
407
  flags.push(arg.toLowerCase())
301
- } else if (i === tierValueIdx) {
302
- // Skip -- this is the --tier value, not an API key
408
+ } else if (skipIndices.has(i)) {
409
+ // 📖 Skip this is a value for --tier or --profile, not an API key
303
410
  } else if (!apiKey) {
304
411
  apiKey = arg
305
412
  }
@@ -314,5 +421,161 @@ export function parseArgs(argv) {
314
421
 
315
422
  let tierFilter = tierValueIdx !== -1 ? args[tierValueIdx].toUpperCase() : null
316
423
 
317
- return { apiKey, bestMode, fiableMode, openCodeMode, openCodeDesktopMode, openClawMode, noTelemetry, tierFilter }
424
+ const profileName = profileValueIdx !== -1 ? args[profileValueIdx] : null
425
+
426
+ // 📖 --recommend — launch directly into Smart Recommend mode (Q key equivalent)
427
+ const recommendMode = flags.includes('--recommend')
428
+
429
+ return { apiKey, bestMode, fiableMode, openCodeMode, openCodeDesktopMode, openClawMode, noTelemetry, tierFilter, profileName, recommendMode }
430
+ }
431
+
432
+ // ─── Smart Recommend — Scoring Engine ─────────────────────────────────────────
433
+
434
+ // 📖 Task types for the Smart Recommend questionnaire.
435
+ // 📖 Each task type has different weight priorities — quick fixes favor speed,
436
+ // deep refactors favor SWE score and context, code review needs balanced quality,
437
+ // test generation needs high SWE score + medium context.
438
+ export const TASK_TYPES = {
439
+ quickfix: { label: 'Quick Fix', sweWeight: 0.2, speedWeight: 0.5, ctxWeight: 0.1, stabilityWeight: 0.2 },
440
+ refactor: { label: 'Deep Refactor', sweWeight: 0.4, speedWeight: 0.1, ctxWeight: 0.3, stabilityWeight: 0.2 },
441
+ review: { label: 'Code Review', sweWeight: 0.35, speedWeight: 0.2, ctxWeight: 0.25, stabilityWeight: 0.2 },
442
+ testgen: { label: 'Test Generation', sweWeight: 0.35, speedWeight: 0.15, ctxWeight: 0.2, stabilityWeight: 0.3 },
443
+ }
444
+
445
+ // 📖 Priority presets — bias the scoring toward speed or quality.
446
+ // 📖 'speed' amplifies latency weighting, 'quality' amplifies SWE score weighting.
447
+ export const PRIORITY_TYPES = {
448
+ speed: { label: 'Speed', speedMultiplier: 1.5, sweMultiplier: 0.7 },
449
+ quality: { label: 'Quality', speedMultiplier: 0.7, sweMultiplier: 1.5 },
450
+ balanced:{ label: 'Balanced', speedMultiplier: 1.0, sweMultiplier: 1.0 },
451
+ }
452
+
453
+ // 📖 Context budget categories — match against model's context window size.
454
+ // 📖 'small' (<4K tokens) can use any model. 'large' (>32K) strongly penalizes small-ctx models.
455
+ export const CONTEXT_BUDGETS = {
456
+ small: { label: 'Small file (<4K)', minCtx: 0, idealCtx: 32 },
457
+ medium: { label: 'Medium project (<32K)', minCtx: 32, idealCtx: 128 },
458
+ large: { label: 'Large codebase (>32K)', minCtx: 128, idealCtx: 256 },
459
+ }
460
+
461
+ // 📖 parseCtxToK: Convert context window string ("128k", "1m", "200k") into numeric K tokens.
462
+ // 📖 Used by the scoring engine to compare against CONTEXT_BUDGETS thresholds.
463
+ function parseCtxToK(ctx) {
464
+ if (!ctx || ctx === '—') return 0
465
+ const str = ctx.toLowerCase()
466
+ if (str.includes('m')) return parseFloat(str.replace('m', '')) * 1000
467
+ if (str.includes('k')) return parseFloat(str.replace('k', ''))
468
+ return 0
469
+ }
470
+
471
+ // 📖 parseSweToNum: Convert SWE-bench score string ("49.2%", "73.1%") into a 0–100 number.
472
+ // 📖 Returns 0 for missing or invalid scores.
473
+ function parseSweToNum(sweScore) {
474
+ if (!sweScore || sweScore === '—') return 0
475
+ const num = parseFloat(sweScore.replace('%', ''))
476
+ return isNaN(num) ? 0 : num
477
+ }
478
+
479
+ /**
480
+ * 📖 scoreModelForTask: Score a single model result for a specific task/priority/context combination.
481
+ *
482
+ * 📖 The score is a weighted composite of 4 signals:
483
+ * - SWE quality score (0–100): how good the model is at coding (from sources.js benchmarks)
484
+ * - Speed score (0–100): inverse of average latency (faster = higher score)
485
+ * - Context fit score (0–100): how well the model's context window matches the user's budget
486
+ * - Stability score (0–100): composite p95/jitter/uptime from getStabilityScore()
487
+ *
488
+ * 📖 Each signal is weighted by the task type, then further adjusted by the priority multiplier.
489
+ * 📖 Models that are down/timeout get a harsh penalty but aren't completely excluded
490
+ * (they might come back up during the analysis phase).
491
+ *
492
+ * @param {object} result — A model result object (from state.results)
493
+ * @param {string} taskType — Key from TASK_TYPES ('quickfix'|'refactor'|'review'|'testgen')
494
+ * @param {string} priority — Key from PRIORITY_TYPES ('speed'|'quality'|'balanced')
495
+ * @param {string} contextBudget — Key from CONTEXT_BUDGETS ('small'|'medium'|'large')
496
+ * @returns {number} Score between 0 and 100 (higher = better recommendation)
497
+ */
498
+ export function scoreModelForTask(result, taskType, priority, contextBudget) {
499
+ const task = TASK_TYPES[taskType]
500
+ const prio = PRIORITY_TYPES[priority]
501
+ const budget = CONTEXT_BUDGETS[contextBudget]
502
+ if (!task || !prio || !budget) return 0
503
+
504
+ // 📖 SWE quality signal (0–100) — raw SWE-bench score
505
+ const sweNum = parseSweToNum(result.sweScore)
506
+ const sweScore = Math.min(100, sweNum * (100 / 80)) // 📖 Normalize: 80% SWE → 100 score
507
+
508
+ // 📖 Speed signal (0–100) — inverse latency, capped at 5000ms
509
+ const avg = getAvg(result)
510
+ let speedScore
511
+ if (avg === Infinity) {
512
+ speedScore = 0 // 📖 No data yet — can't judge speed
513
+ } else {
514
+ speedScore = Math.max(0, Math.min(100, 100 * (1 - avg / 5000)))
515
+ }
516
+
517
+ // 📖 Context fit signal (0–100):
518
+ // - Full score if model ctx >= idealCtx
519
+ // - Partial score if model ctx >= minCtx but < idealCtx (linear interpolation)
520
+ // - Zero if model ctx < minCtx (too small for the job)
521
+ const modelCtx = parseCtxToK(result.ctx)
522
+ let ctxScore
523
+ if (modelCtx >= budget.idealCtx) {
524
+ ctxScore = 100
525
+ } else if (modelCtx >= budget.minCtx) {
526
+ ctxScore = budget.idealCtx === budget.minCtx
527
+ ? 100
528
+ : Math.round(100 * (modelCtx - budget.minCtx) / (budget.idealCtx - budget.minCtx))
529
+ } else {
530
+ ctxScore = 0
531
+ }
532
+
533
+ // 📖 Stability signal (0–100) — from getStabilityScore(), or 0 if no data
534
+ const stability = getStabilityScore(result)
535
+ const stabScore = stability === -1 ? 0 : stability
536
+
537
+ // 📖 Weighted combination: task weights × priority multipliers
538
+ const rawScore =
539
+ (sweScore * task.sweWeight * prio.sweMultiplier) +
540
+ (speedScore * task.speedWeight * prio.speedMultiplier) +
541
+ (ctxScore * task.ctxWeight) +
542
+ (stabScore * task.stabilityWeight)
543
+
544
+ // 📖 Normalize by total effective weight to keep result in 0–100 range
545
+ const totalWeight =
546
+ (task.sweWeight * prio.sweMultiplier) +
547
+ (task.speedWeight * prio.speedMultiplier) +
548
+ task.ctxWeight +
549
+ task.stabilityWeight
550
+
551
+ let score = totalWeight > 0 ? rawScore / totalWeight : 0
552
+
553
+ // 📖 Penalty for models that are currently down/timeout — still scoreable but penalized
554
+ if (result.status === 'down' || result.status === 'timeout') {
555
+ score *= 0.2
556
+ }
557
+
558
+ return Math.round(Math.min(100, Math.max(0, score)))
559
+ }
560
+
561
+ /**
562
+ * 📖 getTopRecommendations: Score all models and return the top N recommendations.
563
+ *
564
+ * 📖 Filters out hidden models, scores each one, sorts descending, returns topN.
565
+ * 📖 Each returned item includes the original result + computed score for display.
566
+ *
567
+ * @param {Array} results — Full state.results array
568
+ * @param {string} taskType — Key from TASK_TYPES
569
+ * @param {string} priority — Key from PRIORITY_TYPES
570
+ * @param {string} contextBudget — Key from CONTEXT_BUDGETS
571
+ * @param {number} [topN=3] — How many recommendations to return
572
+ * @returns {Array<{result: object, score: number}>} Top N scored models, descending by score
573
+ */
574
+ export function getTopRecommendations(results, taskType, priority, contextBudget, topN = 3) {
575
+ const scored = results
576
+ .filter(r => !r.hidden)
577
+ .map(r => ({ result: r, score: scoreModelForTask(r, taskType, priority, contextBudget) }))
578
+ .sort((a, b) => b.score - a.score)
579
+
580
+ return scored.slice(0, topN)
318
581
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.1.66",
3
+ "version": "0.1.68",
4
4
  "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",