free-coding-models 0.1.83 → 0.1.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ /**
2
+ * @file token-usage-reader.js
3
+ * @description Reads historical token usage from request-log.jsonl and aggregates it by exact provider + model pair.
4
+ *
5
+ * @details
6
+ * The TUI already shows live latency and quota state, but that does not tell
7
+ * you how much you've actually consumed on a given Origin. This module reads
8
+ * the persistent JSONL request log once at startup and builds a compact
9
+ * `provider::model -> totalTokens` map for table display.
10
+ *
11
+ * Why this exists:
12
+ * - `token-stats.json` keeps convenience aggregates, but not the exact
13
+ * provider+model sum needed for the new table column.
14
+ * - `request-log.jsonl` is the source of truth because every proxied request
15
+ * records prompt and completion token counts with provider context.
16
+ * - Startup-only parsing keeps runtime overhead negligible during TUI redraws.
17
+ *
18
+ * @functions
19
+ * → `buildProviderModelTokenKey` — creates a stable aggregation key
20
+ * → `loadTokenUsageByProviderModel` — reads request-log.jsonl and returns total tokens by provider+model
21
+ * → `formatTokenTotalCompact` — renders totals as integer K / M strings for narrow columns
22
+ *
23
+ * @exports buildProviderModelTokenKey, loadTokenUsageByProviderModel, formatTokenTotalCompact
24
+ *
25
+ * @see src/log-reader.js
26
+ * @see src/render-table.js
27
+ */
28
+
29
+ import { loadRecentLogs } from './log-reader.js'
30
+
31
+ // 📖 buildProviderModelTokenKey keeps provider-scoped totals isolated even when
32
+ // 📖 multiple Origins expose the same model ID.
33
+ export function buildProviderModelTokenKey(providerKey, modelId) {
34
+ return `${providerKey}::${modelId}`
35
+ }
36
+
37
+ // 📖 loadTokenUsageByProviderModel reads the full bounded log history available
38
+ // 📖 through log-reader and sums tokens per exact provider+model pair.
39
+ export function loadTokenUsageByProviderModel({ logFile, limit = 50_000 } = {}) {
40
+ const rows = loadRecentLogs({ logFile, limit })
41
+ const totals = {}
42
+
43
+ for (const row of rows) {
44
+ const providerKey = typeof row.provider === 'string' ? row.provider : 'unknown'
45
+ const modelId = typeof row.model === 'string' ? row.model : 'unknown'
46
+ const tokens = Number(row.tokens) || 0
47
+ if (tokens <= 0) continue
48
+
49
+ const key = buildProviderModelTokenKey(providerKey, modelId)
50
+ totals[key] = (totals[key] || 0) + tokens
51
+ }
52
+
53
+ return totals
54
+ }
55
+
56
+ // 📖 formatTokenTotalCompact keeps the new column narrow and scannable:
57
+ // 📖 0-999 => raw integer, 1k-999k => Nk, 1m+ => NM, no decimals.
58
+ export function formatTokenTotalCompact(totalTokens) {
59
+ const safeTotal = Number(totalTokens) || 0
60
+ if (safeTotal >= 1_000_000) return `${Math.floor(safeTotal / 1_000_000)}M`
61
+ if (safeTotal >= 1_000) return `${Math.floor(safeTotal / 1_000)}k`
62
+ return String(Math.floor(safeTotal))
63
+ }
package/src/updater.js ADDED
@@ -0,0 +1,237 @@
1
+ /**
2
+ * @file updater.js
3
+ * @description Update detection and installation helpers, extracted from bin/free-coding-models.js.
4
+ *
5
+ * @details
6
+ * This module handles all npm version-check and auto-update logic:
7
+ *
8
+ * - `checkForUpdateDetailed()` — hits the npm registry to compare the published version
9
+ * against the locally installed one. Returns `{ latestVersion, error }` so callers
10
+ * can surface meaningful status text in the Settings overlay.
11
+ *
12
+ * - `checkForUpdate()` — thin backward-compatible wrapper used at startup for the
13
+ * auto-update guard. Returns `latestVersion` (string) or `null`.
14
+ *
15
+ * - `runUpdate(latestVersion)` — runs `npm i -g free-coding-models@<version> --prefer-online`,
16
+ * retrying with `sudo` on EACCES/EPERM. On success, relaunches the process with the
17
+ * same argv. On failure, prints manual instructions and exits with code 1.
18
+ * Uses `require('child_process').execSync` inline because ESM dynamic import is async
19
+ * but `execSync` must block to give `stdio: 'inherit'` feedback in the terminal.
20
+ *
21
+ * - `promptUpdateNotification(latestVersion)` — renders a small centered interactive menu
22
+ * that lets the user choose: Update Now / Read Changelogs / Continue without update.
23
+ * Uses raw mode readline keypress events (same pattern as the main TUI).
24
+ * This function is called BEFORE the alt-screen is entered, so it writes to the
25
+ * normal terminal buffer.
26
+ *
27
+ * ⚙️ Notes:
28
+ * - `LOCAL_VERSION` is resolved from package.json via `createRequire` so this module
29
+ * can be imported independently from the bin entry point.
30
+ * - The auto-update flow in `main()` skips update if `isDevMode` is detected (presence of
31
+ * a `.git` directory next to the package root) to avoid an infinite update loop in dev.
32
+ *
33
+ * @functions
34
+ * → checkForUpdateDetailed() — Fetch npm latest with explicit error info
35
+ * → checkForUpdate() — Startup wrapper, returns version string or null
36
+ * → runUpdate(latestVersion) — Install new version via npm global + relaunch
37
+ * → promptUpdateNotification(version) — Interactive pre-TUI update menu
38
+ *
39
+ * @exports
40
+ * checkForUpdateDetailed, checkForUpdate, runUpdate, promptUpdateNotification
41
+ *
42
+ * @see bin/free-coding-models.js — calls checkForUpdate() at startup and runUpdate() on confirm
43
+ */
44
+
45
+ import chalk from 'chalk'
46
+ import { createRequire } from 'module'
47
+
48
+ const require = createRequire(import.meta.url)
49
+ const readline = require('readline')
50
+ const pkg = require('../package.json')
51
+ const LOCAL_VERSION = pkg.version
52
+
53
+ /**
54
+ * 📖 checkForUpdateDetailed: Fetch npm latest version with explicit error details.
55
+ * 📖 Used by settings manual-check flow to display meaningful status in the UI.
56
+ * @returns {Promise<{ latestVersion: string|null, error: string|null }>}
57
+ */
58
+ export async function checkForUpdateDetailed() {
59
+ try {
60
+ const res = await fetch('https://registry.npmjs.org/free-coding-models/latest', { signal: AbortSignal.timeout(5000) })
61
+ if (!res.ok) return { latestVersion: null, error: `HTTP ${res.status}` }
62
+ const data = await res.json()
63
+ if (data.version && data.version !== LOCAL_VERSION) return { latestVersion: data.version, error: null }
64
+ return { latestVersion: null, error: null }
65
+ } catch (error) {
66
+ const message = error instanceof Error ? error.message : 'Unknown error'
67
+ return { latestVersion: null, error: message }
68
+ }
69
+ }
70
+
71
+ /**
72
+ * 📖 checkForUpdate: Backward-compatible wrapper for startup update prompt.
73
+ * @returns {Promise<string|null>}
74
+ */
75
+ export async function checkForUpdate() {
76
+ const { latestVersion } = await checkForUpdateDetailed()
77
+ return latestVersion
78
+ }
79
+
80
+ /**
81
+ * 📖 runUpdate: Run npm global install to update to latestVersion.
82
+ * 📖 Retries with sudo on permission errors.
83
+ * 📖 Relaunches the process on success, exits with code 1 on failure.
84
+ * @param {string} latestVersion
85
+ */
86
+ export function runUpdate(latestVersion) {
87
+ const { execSync } = require('child_process')
88
+ console.log()
89
+ console.log(chalk.bold.cyan(' ⬆ Updating free-coding-models to v' + latestVersion + '...'))
90
+ console.log()
91
+
92
+ try {
93
+ // 📖 Force install from npm registry (ignore local cache)
94
+ // 📖 Use --prefer-online to ensure we get the latest published version
95
+ execSync(`npm i -g free-coding-models@${latestVersion} --prefer-online`, { stdio: 'inherit' })
96
+ console.log()
97
+ console.log(chalk.green(' ✅ Update complete! Version ' + latestVersion + ' installed.'))
98
+ console.log()
99
+ console.log(chalk.dim(' 🔄 Restarting with new version...'))
100
+ console.log()
101
+
102
+ // 📖 Relaunch automatically with the same arguments
103
+ const args = process.argv.slice(2)
104
+ execSync(`node ${process.argv[1]} ${args.join(' ')}`, { stdio: 'inherit' })
105
+ process.exit(0)
106
+ } catch (err) {
107
+ console.log()
108
+ // 📖 Check if error is permission-related (EACCES or EPERM)
109
+ const isPermissionError = err.code === 'EACCES' || err.code === 'EPERM' ||
110
+ (err.stderr && (err.stderr.includes('EACCES') || err.stderr.includes('permission') ||
111
+ err.stderr.includes('EACCES'))) ||
112
+ (err.message && (err.message.includes('EACCES') || err.message.includes('permission')))
113
+
114
+ if (isPermissionError) {
115
+ console.log(chalk.yellow(' ⚠️ Permission denied. Retrying with sudo...'))
116
+ console.log()
117
+ try {
118
+ execSync(`sudo npm i -g free-coding-models@${latestVersion} --prefer-online`, { stdio: 'inherit' })
119
+ console.log()
120
+ console.log(chalk.green(' ✅ Update complete with sudo! Version ' + latestVersion + ' installed.'))
121
+ console.log()
122
+ console.log(chalk.dim(' 🔄 Restarting with new version...'))
123
+ console.log()
124
+
125
+ // 📖 Relaunch automatically with the same arguments
126
+ const args = process.argv.slice(2)
127
+ execSync(`node ${process.argv[1]} ${args.join(' ')}`, { stdio: 'inherit' })
128
+ process.exit(0)
129
+ } catch (sudoErr) {
130
+ console.log()
131
+ console.log(chalk.red(' ✖ Update failed even with sudo. Try manually:'))
132
+ console.log(chalk.dim(' sudo npm i -g free-coding-models@' + latestVersion))
133
+ console.log()
134
+ }
135
+ } else {
136
+ console.log(chalk.red(' ✖ Update failed. Try manually: npm i -g free-coding-models@' + latestVersion))
137
+ console.log()
138
+ }
139
+ }
140
+ process.exit(1)
141
+ }
142
+
143
+ /**
144
+ * 📖 promptUpdateNotification: Show a centered interactive menu when a new version is available.
145
+ * 📖 Returns 'update', 'changelogs', or null (continue without update).
146
+ * 📖 Called BEFORE entering the alt-screen so it renders in the normal terminal buffer.
147
+ * @param {string|null} latestVersion
148
+ * @returns {Promise<'update'|'changelogs'|null>}
149
+ */
150
+ export async function promptUpdateNotification(latestVersion) {
151
+ if (!latestVersion) return null
152
+
153
+ return new Promise((resolve) => {
154
+ let selected = 0
155
+ const options = [
156
+ {
157
+ label: 'Update now',
158
+ icon: '⬆',
159
+ description: `Update free-coding-models to v${latestVersion}`,
160
+ },
161
+ {
162
+ label: 'Read Changelogs',
163
+ icon: '📋',
164
+ description: 'Open GitHub changelog',
165
+ },
166
+ {
167
+ label: 'Continue without update',
168
+ icon: '▶',
169
+ description: 'Use current version',
170
+ },
171
+ ]
172
+
173
+ // 📖 Centered render function
174
+ const render = () => {
175
+ process.stdout.write('\x1b[2J\x1b[H') // clear screen + cursor home
176
+
177
+ // 📖 Calculate centering
178
+ const terminalWidth = process.stdout.columns || 80
179
+ const maxWidth = Math.min(terminalWidth - 4, 70)
180
+ const centerPad = ' '.repeat(Math.max(0, Math.floor((terminalWidth - maxWidth) / 2)))
181
+
182
+ console.log()
183
+ console.log(centerPad + chalk.bold.red(' ⚠ UPDATE AVAILABLE'))
184
+ console.log(centerPad + chalk.red(` Version ${latestVersion} is ready to install`))
185
+ console.log()
186
+ console.log(centerPad + chalk.bold(' ⚡ Free Coding Models') + chalk.dim(` v${LOCAL_VERSION}`))
187
+ console.log()
188
+
189
+ for (let i = 0; i < options.length; i++) {
190
+ const isSelected = i === selected
191
+ const bullet = isSelected ? chalk.bold.cyan(' ❯ ') : chalk.dim(' ')
192
+ const label = isSelected
193
+ ? chalk.bold.white(options[i].icon + ' ' + options[i].label)
194
+ : chalk.dim(options[i].icon + ' ' + options[i].label)
195
+
196
+ console.log(centerPad + bullet + label)
197
+ console.log(centerPad + chalk.dim(' ' + options[i].description))
198
+ console.log()
199
+ }
200
+
201
+ console.log(centerPad + chalk.dim(' ↑↓ Navigate • Enter Select • Ctrl+C Continue'))
202
+ console.log()
203
+ }
204
+
205
+ render()
206
+
207
+ readline.emitKeypressEvents(process.stdin)
208
+ if (process.stdin.isTTY) process.stdin.setRawMode(true)
209
+
210
+ const onKey = (_str, key) => {
211
+ if (!key) return
212
+ if (key.ctrl && key.name === 'c') {
213
+ if (process.stdin.isTTY) process.stdin.setRawMode(false)
214
+ process.stdin.removeListener('keypress', onKey)
215
+ resolve(null) // Continue without update
216
+ return
217
+ }
218
+ if (key.name === 'up' && selected > 0) {
219
+ selected--
220
+ render()
221
+ } else if (key.name === 'down' && selected < options.length - 1) {
222
+ selected++
223
+ render()
224
+ } else if (key.name === 'return') {
225
+ if (process.stdin.isTTY) process.stdin.setRawMode(false)
226
+ process.stdin.removeListener('keypress', onKey)
227
+ process.stdin.pause()
228
+
229
+ if (selected === 0) resolve('update')
230
+ else if (selected === 1) resolve('changelogs')
231
+ else resolve(null) // Continue without update
232
+ }
233
+ }
234
+
235
+ process.stdin.on('keypress', onKey)
236
+ })
237
+ }
@@ -1,9 +1,13 @@
1
1
  /**
2
2
  * @file lib/usage-reader.js
3
- * @description Pure functions to read model quota usage from token-stats.json.
3
+ * @description Pure functions to read provider-scoped Usage snapshots from token-stats.json.
4
4
  *
5
- * Designed for TUI consumption: reads the pre-computed `quotaSnapshots.byModel`
6
- * section from the JSON file written by TokenStats. Never reads the JSONL log.
5
+ * Designed for TUI consumption: reads the pre-computed provider-scoped quota
6
+ * snapshots written by TokenStats. Never reads the JSONL log.
7
+ *
8
+ * The UI must distinguish the same model served by different Origins
9
+ * (for example NVIDIA vs Groq). Because of that, the canonical snapshot source
10
+ * is `quotaSnapshots.byProviderModel`, not the legacy `byModel` aggregate.
7
11
  *
8
12
  * All functions are pure (no shared mutable state) and handle missing/malformed
9
13
  * files gracefully by returning safe fallback values.
@@ -30,6 +34,7 @@
30
34
  * @exports CACHE_TTL_MS
31
35
  * @exports clearUsageCache
32
36
  * @exports loadUsageSnapshot
37
+ * @exports buildUsageSnapshotKey
33
38
  * @exports loadUsageMap
34
39
  * @exports usageForModelId
35
40
  * @exports usageForRow
@@ -38,6 +43,7 @@
38
43
  import { readFileSync, existsSync } from 'node:fs'
39
44
  import { join } from 'node:path'
40
45
  import { homedir } from 'node:os'
46
+ import { supportsUsagePercent, usageResetsDaily } from './quota-capabilities.js'
41
47
 
42
48
  const DEFAULT_STATS_FILE = join(homedir(), '.free-coding-models', 'token-stats.json')
43
49
 
@@ -57,7 +63,7 @@ export const CACHE_TTL_MS = 750
57
63
 
58
64
  /**
59
65
  * Module-level cache: path → { snapshot, expiresAt }
60
- * @type {Map<string, { snapshot: { byModel: Record<string, number>, byProvider: Record<string, number> }, expiresAt: number }>}
66
+ * @type {Map<string, { snapshot: { byProviderModel: Record<string, number>, byProvider: Record<string, number>, legacyByModel: Record<string, number> }, expiresAt: number }>}
61
67
  */
62
68
  const _cache = new Map()
63
69
 
@@ -81,13 +87,29 @@ export function clearUsageCache() {
81
87
  * @param {number} [nowMs] - optional current time (ms) for testability
82
88
  * @returns {boolean}
83
89
  */
84
- function isSnapshotFresh(entry, nowMs = Date.now()) {
90
+ function isSnapshotFresh(entry, nowMs = Date.now(), providerKey = null) {
85
91
  if (!entry || typeof entry.updatedAt !== 'string') return true // backward compat
86
92
  const updatedMs = Date.parse(entry.updatedAt)
87
93
  if (!Number.isFinite(updatedMs)) return true // unparseable: be generous
94
+ if (providerKey && usageResetsDaily(providerKey)) {
95
+ const nowDay = new Date(nowMs).toISOString().slice(0, 10)
96
+ const updatedDay = entry.updatedAt.slice(0, 10)
97
+ if (updatedDay !== nowDay) return false
98
+ }
88
99
  return nowMs - updatedMs < SNAPSHOT_TTL_MS
89
100
  }
90
101
 
102
+ /**
103
+ * Build the canonical map key for one Origin + model pair.
104
+ *
105
+ * @param {string} providerKey
106
+ * @param {string} modelId
107
+ * @returns {string}
108
+ */
109
+ export function buildUsageSnapshotKey(providerKey, modelId) {
110
+ return `${providerKey}::${modelId}`
111
+ }
112
+
91
113
  /**
92
114
  * Load token-stats.json and return model/provider usage maps.
93
115
  * Entries with stale `updatedAt` (older than SNAPSHOT_TTL_MS) are excluded.
@@ -96,7 +118,7 @@ function isSnapshotFresh(entry, nowMs = Date.now()) {
96
118
  * The 30-minute data freshness filter is re-applied on every cache miss (parse).
97
119
  *
98
120
  * @param {string} [statsFile]
99
- * @returns {{ byModel: Record<string, number>, byProvider: Record<string, number> }}
121
+ * @returns {{ byProviderModel: Record<string, number>, byProvider: Record<string, number>, legacyByModel: Record<string, number> }}
100
122
  */
101
123
  export function loadUsageSnapshot(statsFile = DEFAULT_STATS_FILE) {
102
124
  const now = Date.now()
@@ -118,23 +140,40 @@ export function loadUsageSnapshot(statsFile = DEFAULT_STATS_FILE) {
118
140
  *
119
141
  * @param {string} statsFile
120
142
  * @param {number} now - current time in ms (for freshness checks)
121
- * @returns {{ byModel: Record<string, number>, byProvider: Record<string, number> }}
143
+ * @returns {{ byProviderModel: Record<string, number>, byProvider: Record<string, number>, legacyByModel: Record<string, number> }}
122
144
  */
123
145
  function _parseSnapshot(statsFile, now) {
124
146
  try {
125
- if (!existsSync(statsFile)) return { byModel: {}, byProvider: {} }
147
+ if (!existsSync(statsFile)) return { byProviderModel: {}, byProvider: {}, legacyByModel: {} }
126
148
  const raw = readFileSync(statsFile, 'utf8')
127
149
  const data = JSON.parse(raw)
128
150
 
151
+ const byProviderModelSrc = data?.quotaSnapshots?.byProviderModel
129
152
  const byModelSrc = data?.quotaSnapshots?.byModel
130
153
  const byProviderSrc = data?.quotaSnapshots?.byProvider
131
154
 
132
- const byModel = {}
155
+ const byProviderModel = {}
156
+ if (byProviderModelSrc && typeof byProviderModelSrc === 'object') {
157
+ for (const [snapshotKey, entry] of Object.entries(byProviderModelSrc)) {
158
+ const providerKey = typeof entry?.providerKey === 'string'
159
+ ? entry.providerKey
160
+ : snapshotKey.split('::', 1)[0]
161
+ if (!supportsUsagePercent(providerKey)) continue
162
+ if (entry && typeof entry.quotaPercent === 'number' && Number.isFinite(entry.quotaPercent)) {
163
+ if (isSnapshotFresh(entry, now, providerKey)) {
164
+ byProviderModel[snapshotKey] = entry.quotaPercent
165
+ }
166
+ }
167
+ }
168
+ }
169
+
170
+ // 📖 Legacy map kept only for backward compatibility helpers/tests.
171
+ const legacyByModel = {}
133
172
  if (byModelSrc && typeof byModelSrc === 'object') {
134
173
  for (const [modelId, entry] of Object.entries(byModelSrc)) {
135
174
  if (entry && typeof entry.quotaPercent === 'number' && Number.isFinite(entry.quotaPercent)) {
136
175
  if (isSnapshotFresh(entry, now)) {
137
- byModel[modelId] = entry.quotaPercent
176
+ legacyByModel[modelId] = entry.quotaPercent
138
177
  }
139
178
  }
140
179
  }
@@ -143,44 +182,45 @@ function _parseSnapshot(statsFile, now) {
143
182
  const byProvider = {}
144
183
  if (byProviderSrc && typeof byProviderSrc === 'object') {
145
184
  for (const [providerKey, entry] of Object.entries(byProviderSrc)) {
185
+ if (!supportsUsagePercent(providerKey)) continue
146
186
  if (entry && typeof entry.quotaPercent === 'number' && Number.isFinite(entry.quotaPercent)) {
147
- if (isSnapshotFresh(entry, now)) {
187
+ if (isSnapshotFresh(entry, now, providerKey)) {
148
188
  byProvider[providerKey] = entry.quotaPercent
149
189
  }
150
190
  }
151
191
  }
152
192
  }
153
193
 
154
- return { byModel, byProvider }
194
+ return { byProviderModel, byProvider, legacyByModel }
155
195
  } catch {
156
- return { byModel: {}, byProvider: {} }
196
+ return { byProviderModel: {}, byProvider: {}, legacyByModel: {} }
157
197
  }
158
198
  }
159
199
 
160
200
  /**
161
- * Load token-stats.json and return a plain object mapping modelId → quotaPercent.
201
+ * Load token-stats.json and return a plain object mapping provider+model → quotaPercent.
162
202
  *
163
203
  * Only includes models whose `quotaPercent` is a finite number and whose
164
204
  * snapshot is fresh (within SNAPSHOT_TTL_MS).
165
205
  * Returns an empty object on any error (missing file, bad JSON, missing keys).
166
206
  *
167
207
  * @param {string} [statsFile] - Path to token-stats.json (defaults to ~/.free-coding-models/token-stats.json)
168
- * @returns {Record<string, number>} e.g. { 'claude-3-5': 80, 'gpt-4o': 45 }
208
+ * @returns {Record<string, number>} e.g. { 'groq::openai/gpt-oss-120b': 37 }
169
209
  */
170
210
  export function loadUsageMap(statsFile = DEFAULT_STATS_FILE) {
171
- return loadUsageSnapshot(statsFile).byModel
211
+ return loadUsageSnapshot(statsFile).byProviderModel
172
212
  }
173
213
 
174
214
  /**
175
- * Return the quota percent remaining for a specific model.
176
- * Returns null if the model has no snapshot or its snapshot is stale.
215
+ * Return the legacy quota percent remaining for a specific modelId.
216
+ * This helper is retained for backward compatibility tests only.
177
217
  *
178
218
  * @param {string} modelId
179
219
  * @param {string} [statsFile] - Path to token-stats.json (defaults to ~/.free-coding-models/token-stats.json)
180
220
  * @returns {number | null} quota percent (0–100), or null if unknown/stale
181
221
  */
182
222
  export function usageForModelId(modelId, statsFile = DEFAULT_STATS_FILE) {
183
- const map = loadUsageMap(statsFile)
223
+ const map = loadUsageSnapshot(statsFile).legacyByModel
184
224
  const value = map[modelId]
185
225
  return value !== undefined ? value : null
186
226
  }
@@ -196,8 +236,10 @@ export function usageForModelId(modelId, statsFile = DEFAULT_STATS_FILE) {
196
236
  * @returns {number | null}
197
237
  */
198
238
  export function usageForRow(providerKey, modelId, statsFile = DEFAULT_STATS_FILE) {
199
- const { byModel, byProvider } = loadUsageSnapshot(statsFile)
200
- if (byModel[modelId] !== undefined) return byModel[modelId]
239
+ if (!supportsUsagePercent(providerKey)) return null
240
+ const { byProviderModel, byProvider } = loadUsageSnapshot(statsFile)
241
+ const providerModelKey = buildUsageSnapshotKey(providerKey, modelId)
242
+ if (byProviderModel[providerModelKey] !== undefined) return byProviderModel[providerModelKey]
201
243
  if (byProvider[providerKey] !== undefined) return byProvider[providerKey]
202
244
  return null
203
245
  }
@@ -74,18 +74,23 @@ export const TIER_LETTER_MAP = {
74
74
 
75
75
  // ─── Core Logic Functions ────────────────────────────────────────────────────
76
76
 
77
- // 📖 getAvg: Calculate average latency from ONLY successful pings (HTTP 200).
78
- // 📖 Failed pings (timeouts, 429s, 500s) are excluded to avoid skewing the average.
79
- // 📖 Returns Infinity when no successful pings exist — this sorts "unknown" models to the bottom.
77
+ // 📖 measureablePingCodes: HTTP codes that still give us a real round-trip latency sample.
78
+ // 📖 200 = normal success, 401 = no key / bad key but the provider endpoint is reachable.
79
+ const measurablePingCodes = new Set(['200', '401'])
80
+
81
+ // 📖 getAvg: Calculate average latency from pings that produced a real latency sample.
82
+ // 📖 HTTP 200 and 401 both count because a 401 still proves the endpoint responded in X ms.
83
+ // 📖 Timeouts and server failures are excluded to avoid mixing availability with raw latency.
84
+ // 📖 Returns Infinity when no measurable pings exist — this sorts "unknown" models to the bottom.
80
85
  // 📖 The rounding to integer avoids displaying fractional milliseconds in the TUI.
81
86
  //
82
87
  // 📖 Example:
83
- // pings = [{ms: 200, code: '200'}, {ms: 0, code: '429'}, {ms: 400, code: '200'}]
84
- // → getAvg returns 300 (only the two 200s count: (200+400)/2)
88
+ // pings = [{ms: 200, code: '200'}, {ms: 320, code: '401'}, {ms: 999, code: '500'}]
89
+ // → getAvg returns 260 (only the measurable pings count: (200+320)/2)
85
90
  export const getAvg = (r) => {
86
- const successfulPings = (r.pings || []).filter(p => p.code === '200')
87
- if (successfulPings.length === 0) return Infinity
88
- return Math.round(successfulPings.reduce((a, b) => a + b.ms, 0) / successfulPings.length)
91
+ const measurablePings = (r.pings || []).filter(p => measurablePingCodes.has(p.code))
92
+ if (measurablePings.length === 0) return Infinity
93
+ return Math.round(measurablePings.reduce((a, b) => a + b.ms, 0) / measurablePings.length)
89
94
  }
90
95
 
91
96
  // 📖 getVerdict: Determine a human-readable health verdict for a model.
@@ -120,16 +125,16 @@ export const getVerdict = (r) => {
120
125
  if (avg === Infinity) return 'Pending'
121
126
 
122
127
  // 📖 Stability-aware verdict: penalize models with good avg but terrible tail latency
123
- const successfulPings = (r.pings || []).filter(p => p.code === '200')
128
+ const measurablePings = (r.pings || []).filter(p => measurablePingCodes.has(p.code))
124
129
  const p95 = getP95(r)
125
130
 
126
131
  if (avg < 400) {
127
132
  // 📖 Only flag as "Spiky" when we have enough data (≥3 pings) to judge stability
128
- if (successfulPings.length >= 3 && p95 > 3000) return 'Spiky'
133
+ if (measurablePings.length >= 3 && p95 > 3000) return 'Spiky'
129
134
  return 'Perfect'
130
135
  }
131
136
  if (avg < 1000) {
132
- if (successfulPings.length >= 3 && p95 > 5000) return 'Spiky'
137
+ if (measurablePings.length >= 3 && p95 > 5000) return 'Spiky'
133
138
  return 'Normal'
134
139
  }
135
140
  if (avg < 3000) return 'Slow'
@@ -148,30 +153,30 @@ export const getUptime = (r) => {
148
153
  return Math.round((successful / r.pings.length) * 100)
149
154
  }
150
155
 
151
- // 📖 getP95: Calculate the 95th percentile latency from successful pings (HTTP 200).
156
+ // 📖 getP95: Calculate the 95th percentile latency from measurable pings (HTTP 200/401).
152
157
  // 📖 The p95 answers: "95% of requests are faster than this value."
153
158
  // 📖 A low p95 means consistently fast responses — a high p95 signals tail-latency spikes.
154
- // 📖 Returns Infinity when no successful pings exist.
159
+ // 📖 Returns Infinity when no measurable pings exist.
155
160
  //
156
161
  // 📖 Algorithm: sort latencies ascending, pick the value at ceil(N * 0.95) - 1.
157
162
  // 📖 Example: [100, 200, 300, 400, 5000] → p95 index = ceil(5 * 0.95) - 1 = 4 → 5000ms
158
163
  export const getP95 = (r) => {
159
- const successfulPings = (r.pings || []).filter(p => p.code === '200')
160
- if (successfulPings.length === 0) return Infinity
161
- const sorted = successfulPings.map(p => p.ms).sort((a, b) => a - b)
164
+ const measurablePings = (r.pings || []).filter(p => measurablePingCodes.has(p.code))
165
+ if (measurablePings.length === 0) return Infinity
166
+ const sorted = measurablePings.map(p => p.ms).sort((a, b) => a - b)
162
167
  const idx = Math.ceil(sorted.length * 0.95) - 1
163
168
  return sorted[Math.max(0, idx)]
164
169
  }
165
170
 
166
- // 📖 getJitter: Calculate latency standard deviation (σ) from successful pings.
171
+ // 📖 getJitter: Calculate latency standard deviation (σ) from measurable pings.
167
172
  // 📖 Low jitter = predictable response times. High jitter = erratic, spiky latency.
168
- // 📖 Returns 0 when fewer than 2 successful pings (can't compute variance from 1 point).
173
+ // 📖 Returns 0 when fewer than 2 measurable pings (can't compute variance from 1 point).
169
174
  // 📖 Uses population σ (divides by N, not N-1) since we have ALL the data, not a sample.
170
175
  export const getJitter = (r) => {
171
- const successfulPings = (r.pings || []).filter(p => p.code === '200')
172
- if (successfulPings.length < 2) return 0
173
- const mean = successfulPings.reduce((a, b) => a + b.ms, 0) / successfulPings.length
174
- const variance = successfulPings.reduce((sum, p) => sum + (p.ms - mean) ** 2, 0) / successfulPings.length
176
+ const measurablePings = (r.pings || []).filter(p => measurablePingCodes.has(p.code))
177
+ if (measurablePings.length < 2) return 0
178
+ const mean = measurablePings.reduce((a, b) => a + b.ms, 0) / measurablePings.length
179
+ const variance = measurablePings.reduce((sum, p) => sum + (p.ms - mean) ** 2, 0) / measurablePings.length
175
180
  return Math.round(Math.sqrt(variance))
176
181
  }
177
182
 
@@ -190,14 +195,14 @@ export const getJitter = (r) => {
190
195
  // Model B: avg 400ms, p95 650ms (boringly consistent) → score ~85
191
196
  // In real usage, Model B FEELS faster because it doesn't randomly stall.
192
197
  export const getStabilityScore = (r) => {
193
- const successfulPings = (r.pings || []).filter(p => p.code === '200')
194
- if (successfulPings.length === 0) return -1
198
+ const measurablePings = (r.pings || []).filter(p => measurablePingCodes.has(p.code))
199
+ if (measurablePings.length === 0) return -1
195
200
 
196
201
  const p95 = getP95(r)
197
202
  const jitter = getJitter(r)
198
203
  const uptime = getUptime(r)
199
- const spikeCount = successfulPings.filter(p => p.ms > 3000).length
200
- const spikeRate = spikeCount / successfulPings.length
204
+ const spikeCount = measurablePings.filter(p => p.ms > 3000).length
205
+ const spikeRate = spikeCount / measurablePings.length
201
206
 
202
207
  // 📖 Normalize each component to 0–100 (higher = better)
203
208
  const p95Score = Math.max(0, Math.min(100, 100 * (1 - p95 / 5000)))