free-coding-models 0.1.82 → 0.1.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -40
- package/bin/free-coding-models.js +676 -66
- package/lib/account-manager.js +600 -0
- package/lib/config.js +122 -0
- package/lib/error-classifier.js +154 -0
- package/lib/log-reader.js +174 -0
- package/lib/model-merger.js +78 -0
- package/lib/opencode-sync.js +159 -0
- package/lib/provider-quota-fetchers.js +319 -0
- package/lib/proxy-server.js +543 -0
- package/lib/quota-capabilities.js +79 -0
- package/lib/request-transformer.js +180 -0
- package/lib/token-stats.js +242 -0
- package/lib/usage-reader.js +203 -0
- package/lib/utils.js +55 -0
- package/package.json +1 -1
- package/sources.js +3 -2
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* request-transformer.js
|
|
3
|
+
*
|
|
4
|
+
* Utilities for transforming outgoing API request bodies before they are
|
|
5
|
+
* forwarded to a model provider:
|
|
6
|
+
* - applyThinkingBudget — control Anthropic-style "thinking" budget
|
|
7
|
+
* - compressContext — reduce prompt size at increasing compression levels
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Internal helpers
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Count the total characters contributed by a single message.
|
|
16
|
+
* Handles both plain-string content and array-of-blocks content.
|
|
17
|
+
*
|
|
18
|
+
* @param {object} msg
|
|
19
|
+
* @returns {number}
|
|
20
|
+
*/
|
|
21
|
+
function messageCharCount(msg) {
|
|
22
|
+
if (typeof msg.content === 'string') return msg.content.length
|
|
23
|
+
if (Array.isArray(msg.content)) {
|
|
24
|
+
return msg.content.reduce((sum, block) => {
|
|
25
|
+
if (typeof block === 'string') return sum + block.length
|
|
26
|
+
if (block.type === 'text') return sum + (block.text?.length || 0)
|
|
27
|
+
if (block.type === 'thinking') return sum + (block.thinking?.length || 0)
|
|
28
|
+
return sum
|
|
29
|
+
}, 0)
|
|
30
|
+
}
|
|
31
|
+
return 0
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// applyThinkingBudget
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Attach (or omit) an Anthropic-style "thinking" budget to the request body.
|
|
40
|
+
*
|
|
41
|
+
* Modes:
|
|
42
|
+
* 'passthrough' — return a shallow copy of body with no changes
|
|
43
|
+
* 'custom' — add thinking: { budget_tokens: config.budget_tokens }
|
|
44
|
+
* 'auto' — add thinking only when the total prompt is > 2 000 chars;
|
|
45
|
+
* budget is proportional: min(totalChars * 2, 32 000)
|
|
46
|
+
*
|
|
47
|
+
* The original body is NEVER mutated.
|
|
48
|
+
*
|
|
49
|
+
* @param {object} body - The request body (OpenAI-compatible shape)
|
|
50
|
+
* @param {{ mode: string, budget_tokens?: number }} config
|
|
51
|
+
* @returns {object} - A new body object
|
|
52
|
+
*/
|
|
53
|
+
export function applyThinkingBudget(body, config) {
|
|
54
|
+
const { mode } = config
|
|
55
|
+
|
|
56
|
+
if (mode === 'passthrough') {
|
|
57
|
+
return { ...body }
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (mode === 'custom') {
|
|
61
|
+
return { ...body, thinking: { budget_tokens: config.budget_tokens } }
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (mode === 'auto') {
|
|
65
|
+
const messages = Array.isArray(body.messages) ? body.messages : []
|
|
66
|
+
const totalChars = messages.reduce((sum, msg) => {
|
|
67
|
+
if (typeof msg.content === 'string') return sum + msg.content.length
|
|
68
|
+
if (Array.isArray(msg.content)) {
|
|
69
|
+
return sum + msg.content.reduce((s, block) => {
|
|
70
|
+
if (typeof block === 'string') return s + block.length
|
|
71
|
+
return s + (block.text?.length || 0) + (block.thinking?.length || 0)
|
|
72
|
+
}, 0)
|
|
73
|
+
}
|
|
74
|
+
return sum
|
|
75
|
+
}, 0)
|
|
76
|
+
|
|
77
|
+
if (totalChars > 2000) {
|
|
78
|
+
const budget_tokens = Math.min(Math.floor(totalChars * 2), 32000)
|
|
79
|
+
return { ...body, thinking: { budget_tokens } }
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return { ...body }
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Unknown mode — return shallow copy unchanged
|
|
86
|
+
return { ...body }
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// compressContext
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Reduce the size of the messages array at increasing compression levels.
|
|
95
|
+
*
|
|
96
|
+
* Levels:
|
|
97
|
+
* 0 — no change (shallow copy of array)
|
|
98
|
+
* 1 — truncate tool-result messages whose content exceeds toolResultMaxChars
|
|
99
|
+
* 2 — L1 + truncate thinking blocks in assistant messages
|
|
100
|
+
* 3 — L2 + drop oldest non-system messages when total chars exceed maxTotalChars
|
|
101
|
+
*
|
|
102
|
+
* The original messages array and its objects are NEVER mutated.
|
|
103
|
+
*
|
|
104
|
+
* @param {object[]} messages
|
|
105
|
+
* @param {{
|
|
106
|
+
* level?: number,
|
|
107
|
+
* toolResultMaxChars?: number,
|
|
108
|
+
* thinkingMaxChars?: number,
|
|
109
|
+
* maxTotalChars?: number
|
|
110
|
+
* }} opts
|
|
111
|
+
* @returns {object[]}
|
|
112
|
+
*/
|
|
113
|
+
export function compressContext(messages, opts = {}) {
|
|
114
|
+
const {
|
|
115
|
+
level = 0,
|
|
116
|
+
toolResultMaxChars = 4000,
|
|
117
|
+
thinkingMaxChars = 1000,
|
|
118
|
+
maxTotalChars = 100000,
|
|
119
|
+
} = opts
|
|
120
|
+
|
|
121
|
+
if (level === 0) {
|
|
122
|
+
return [...messages]
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// L1: trim oversized tool results
|
|
126
|
+
let result = messages.map(msg => {
|
|
127
|
+
if (msg.role === 'tool' && typeof msg.content === 'string') {
|
|
128
|
+
if (msg.content.length > toolResultMaxChars) {
|
|
129
|
+
return {
|
|
130
|
+
...msg,
|
|
131
|
+
content: msg.content.slice(0, toolResultMaxChars) + '\n[truncated]',
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return msg
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
if (level === 1) {
|
|
139
|
+
return result
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// L2: trim thinking blocks in assistant messages
|
|
143
|
+
result = result.map(msg => {
|
|
144
|
+
if (msg.role === 'assistant' && Array.isArray(msg.content)) {
|
|
145
|
+
const newContent = msg.content.map(block => {
|
|
146
|
+
if (
|
|
147
|
+
block.type === 'thinking' &&
|
|
148
|
+
typeof block.thinking === 'string' &&
|
|
149
|
+
block.thinking.length > thinkingMaxChars
|
|
150
|
+
) {
|
|
151
|
+
return { ...block, thinking: block.thinking.slice(0, thinkingMaxChars) }
|
|
152
|
+
}
|
|
153
|
+
return block
|
|
154
|
+
})
|
|
155
|
+
// Only create a new message object when something actually changed
|
|
156
|
+
const changed = newContent.some((b, i) => b !== msg.content[i])
|
|
157
|
+
return changed ? { ...msg, content: newContent } : msg
|
|
158
|
+
}
|
|
159
|
+
return msg
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
if (level === 2) {
|
|
163
|
+
return result
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// L3: drop oldest non-system messages until total chars is within budget
|
|
167
|
+
// Always preserve: every 'system' message, and the last message in the array.
|
|
168
|
+
const totalChars = () => result.reduce((sum, msg) => sum + messageCharCount(msg), 0)
|
|
169
|
+
|
|
170
|
+
while (totalChars() > maxTotalChars && result.length > 1) {
|
|
171
|
+
// Find the first droppable message: not 'system', not the last one
|
|
172
|
+
const dropIdx = result.findIndex(
|
|
173
|
+
(msg, idx) => msg.role !== 'system' && idx !== result.length - 1
|
|
174
|
+
)
|
|
175
|
+
if (dropIdx === -1) break // nothing left to drop
|
|
176
|
+
result = [...result.slice(0, dropIdx), ...result.slice(dropIdx + 1)]
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return result
|
|
180
|
+
}
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/token-stats.js
|
|
3
|
+
* @description Persistent token usage tracking for the multi-account proxy.
|
|
4
|
+
*
|
|
5
|
+
* Records per-account and per-model token usage, hourly/daily aggregates,
|
|
6
|
+
* an in-memory ring buffer of the 100 most-recent requests, and an
|
|
7
|
+
* append-only JSONL log file for detailed history.
|
|
8
|
+
*
|
|
9
|
+
* Storage locations:
|
|
10
|
+
* ~/.free-coding-models/token-stats.json — aggregated stats (auto-saved every 10 records)
|
|
11
|
+
* ~/.free-coding-models/request-log.jsonl — timestamped per-request log (pruned after 30 days)
|
|
12
|
+
*
|
|
13
|
+
* @exports TokenStats
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { readFileSync, writeFileSync, appendFileSync, mkdirSync, existsSync } from 'node:fs'
|
|
17
|
+
import { join } from 'node:path'
|
|
18
|
+
import { homedir } from 'node:os'
|
|
19
|
+
|
|
20
|
+
const DEFAULT_DATA_DIR = join(homedir(), '.free-coding-models')
|
|
21
|
+
const MAX_RING_BUFFER = 100
|
|
22
|
+
const RETENTION_DAYS = 30
|
|
23
|
+
|
|
24
|
+
function timestampToMillis(value) {
|
|
25
|
+
if (typeof value === 'number' && Number.isFinite(value)) return value
|
|
26
|
+
if (typeof value === 'string') {
|
|
27
|
+
const numeric = Number(value)
|
|
28
|
+
if (Number.isFinite(numeric)) return numeric
|
|
29
|
+
const parsed = Date.parse(value)
|
|
30
|
+
if (!Number.isNaN(parsed)) return parsed
|
|
31
|
+
}
|
|
32
|
+
return null
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export class TokenStats {
|
|
36
|
+
/**
|
|
37
|
+
* @param {{ dataDir?: string }} [opts]
|
|
38
|
+
* dataDir — override the default ~/.free-coding-models directory (used in tests)
|
|
39
|
+
*/
|
|
40
|
+
constructor({ dataDir } = {}) {
|
|
41
|
+
this._dataDir = dataDir || DEFAULT_DATA_DIR
|
|
42
|
+
this._statsFile = join(this._dataDir, 'token-stats.json')
|
|
43
|
+
this._logFile = join(this._dataDir, 'request-log.jsonl')
|
|
44
|
+
this._stats = {
|
|
45
|
+
byAccount: {},
|
|
46
|
+
byModel: {},
|
|
47
|
+
hourly: {},
|
|
48
|
+
daily: {},
|
|
49
|
+
quotaSnapshots: { byAccount: {}, byModel: {}, byProvider: {} },
|
|
50
|
+
}
|
|
51
|
+
this._ringBuffer = []
|
|
52
|
+
this._recordsSinceLastSave = 0
|
|
53
|
+
this._load()
|
|
54
|
+
setImmediate(() => this._pruneOldLogs())
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
_load() {
|
|
58
|
+
try {
|
|
59
|
+
mkdirSync(this._dataDir, { recursive: true })
|
|
60
|
+
if (existsSync(this._statsFile)) {
|
|
61
|
+
const loaded = JSON.parse(readFileSync(this._statsFile, 'utf8'))
|
|
62
|
+
this._stats = loaded
|
|
63
|
+
}
|
|
64
|
+
} catch { /* start fresh */ }
|
|
65
|
+
// Ensure quotaSnapshots always exists (backward compat for old files)
|
|
66
|
+
if (!this._stats.quotaSnapshots || typeof this._stats.quotaSnapshots !== 'object') {
|
|
67
|
+
this._stats.quotaSnapshots = { byAccount: {}, byModel: {}, byProvider: {} }
|
|
68
|
+
}
|
|
69
|
+
if (!this._stats.quotaSnapshots.byAccount) this._stats.quotaSnapshots.byAccount = {}
|
|
70
|
+
if (!this._stats.quotaSnapshots.byModel) this._stats.quotaSnapshots.byModel = {}
|
|
71
|
+
if (!this._stats.quotaSnapshots.byProvider) this._stats.quotaSnapshots.byProvider = {}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
_pruneOldLogs() {
|
|
75
|
+
try {
|
|
76
|
+
if (!existsSync(this._logFile)) return
|
|
77
|
+
const cutoff = Date.now() - RETENTION_DAYS * 86400000
|
|
78
|
+
const lines = readFileSync(this._logFile, 'utf8').split('\n').filter(Boolean)
|
|
79
|
+
const kept = lines.filter(line => {
|
|
80
|
+
try {
|
|
81
|
+
const millis = timestampToMillis(JSON.parse(line).timestamp)
|
|
82
|
+
return millis !== null && millis >= cutoff
|
|
83
|
+
} catch {
|
|
84
|
+
return false
|
|
85
|
+
}
|
|
86
|
+
})
|
|
87
|
+
writeFileSync(this._logFile, kept.join('\n') + (kept.length ? '\n' : ''))
|
|
88
|
+
} catch { /* ignore */ }
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Record a single request's token usage.
|
|
93
|
+
*
|
|
94
|
+
* @param {{ accountId: string, modelId: string, providerKey?: string, statusCode?: number|string, requestType?: string, promptTokens?: number, completionTokens?: number, latencyMs?: number, success?: boolean }} entry
|
|
95
|
+
*/
|
|
96
|
+
record(entry) {
|
|
97
|
+
const {
|
|
98
|
+
accountId,
|
|
99
|
+
modelId,
|
|
100
|
+
providerKey = 'unknown',
|
|
101
|
+
statusCode = 200,
|
|
102
|
+
requestType = 'chat.completions',
|
|
103
|
+
promptTokens = 0,
|
|
104
|
+
completionTokens = 0,
|
|
105
|
+
latencyMs = 0,
|
|
106
|
+
success = true,
|
|
107
|
+
} = entry
|
|
108
|
+
const totalTokens = promptTokens + completionTokens
|
|
109
|
+
const now = new Date()
|
|
110
|
+
const hourKey = now.toISOString().slice(0, 13)
|
|
111
|
+
const dayKey = now.toISOString().slice(0, 10)
|
|
112
|
+
|
|
113
|
+
// By account
|
|
114
|
+
const acct = this._stats.byAccount[accountId] ||= { requests: 0, tokens: 0, errors: 0 }
|
|
115
|
+
acct.requests++
|
|
116
|
+
acct.tokens += totalTokens
|
|
117
|
+
if (!success) acct.errors++
|
|
118
|
+
|
|
119
|
+
// By model
|
|
120
|
+
const model = this._stats.byModel[modelId] ||= { requests: 0, tokens: 0 }
|
|
121
|
+
model.requests++
|
|
122
|
+
model.tokens += totalTokens
|
|
123
|
+
|
|
124
|
+
// Hourly
|
|
125
|
+
this._stats.hourly[hourKey] ||= { requests: 0, tokens: 0 }
|
|
126
|
+
this._stats.hourly[hourKey].requests++
|
|
127
|
+
this._stats.hourly[hourKey].tokens += totalTokens
|
|
128
|
+
|
|
129
|
+
// Daily
|
|
130
|
+
this._stats.daily[dayKey] ||= { requests: 0, tokens: 0 }
|
|
131
|
+
this._stats.daily[dayKey].requests++
|
|
132
|
+
this._stats.daily[dayKey].tokens += totalTokens
|
|
133
|
+
|
|
134
|
+
// Ring buffer (newest at end)
|
|
135
|
+
this._ringBuffer.push({ ...entry, timestamp: now.toISOString() })
|
|
136
|
+
if (this._ringBuffer.length > MAX_RING_BUFFER) this._ringBuffer.shift()
|
|
137
|
+
|
|
138
|
+
// JSONL log
|
|
139
|
+
try {
|
|
140
|
+
const logEntry = {
|
|
141
|
+
timestamp: now.toISOString(),
|
|
142
|
+
accountId,
|
|
143
|
+
modelId,
|
|
144
|
+
providerKey,
|
|
145
|
+
statusCode,
|
|
146
|
+
requestType,
|
|
147
|
+
promptTokens,
|
|
148
|
+
completionTokens,
|
|
149
|
+
latencyMs,
|
|
150
|
+
success,
|
|
151
|
+
}
|
|
152
|
+
appendFileSync(this._logFile, JSON.stringify(logEntry) + '\n')
|
|
153
|
+
} catch { /* ignore */ }
|
|
154
|
+
|
|
155
|
+
// Auto-save every 10 records
|
|
156
|
+
this._recordsSinceLastSave++
|
|
157
|
+
if (this._recordsSinceLastSave >= 10) this.save()
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
save() {
|
|
161
|
+
try {
|
|
162
|
+
mkdirSync(this._dataDir, { recursive: true })
|
|
163
|
+
writeFileSync(this._statsFile, JSON.stringify(this._stats, null, 2))
|
|
164
|
+
this._recordsSinceLastSave = 0
|
|
165
|
+
} catch { /* ignore */ }
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Persist a quota snapshot for a single account.
|
|
170
|
+
* Also recomputes the per-model aggregate quota if modelId is provided.
|
|
171
|
+
* Tracks latest provider-level quota snapshot when providerKey is provided.
|
|
172
|
+
*
|
|
173
|
+
* Quota snapshots are lightweight (not per-request) and are written to
|
|
174
|
+
* token-stats.json immediately so the TUI can read them without waiting
|
|
175
|
+
* for the next 10-record auto-save cycle.
|
|
176
|
+
*
|
|
177
|
+
* @param {string} accountId
|
|
178
|
+
* @param {{ quotaPercent: number, providerKey?: string, modelId?: string, updatedAt?: string }} opts
|
|
179
|
+
*/
|
|
180
|
+
updateQuotaSnapshot(accountId, { quotaPercent, providerKey, modelId, updatedAt } = {}) {
|
|
181
|
+
const snap = {
|
|
182
|
+
quotaPercent,
|
|
183
|
+
updatedAt: updatedAt || new Date().toISOString(),
|
|
184
|
+
}
|
|
185
|
+
if (providerKey !== undefined) snap.providerKey = providerKey
|
|
186
|
+
if (modelId !== undefined) snap.modelId = modelId
|
|
187
|
+
|
|
188
|
+
this._stats.quotaSnapshots.byAccount[accountId] = snap
|
|
189
|
+
|
|
190
|
+
if (modelId !== undefined) {
|
|
191
|
+
this._recomputeModelQuota(modelId)
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if (providerKey !== undefined) {
|
|
195
|
+
this._stats.quotaSnapshots.byProvider[providerKey] = {
|
|
196
|
+
quotaPercent,
|
|
197
|
+
updatedAt: snap.updatedAt,
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Persist immediately (quota data must be fresh for TUI reads)
|
|
202
|
+
this.save()
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Recompute the per-model quota snapshot by averaging all account snapshots
|
|
207
|
+
* that share the given modelId.
|
|
208
|
+
*
|
|
209
|
+
* @param {string} modelId
|
|
210
|
+
*/
|
|
211
|
+
_recomputeModelQuota(modelId) {
|
|
212
|
+
const accountSnaps = Object.values(this._stats.quotaSnapshots.byAccount)
|
|
213
|
+
.filter(s => s.modelId === modelId)
|
|
214
|
+
|
|
215
|
+
if (accountSnaps.length === 0) return
|
|
216
|
+
|
|
217
|
+
const avgPercent = Math.round(
|
|
218
|
+
accountSnaps.reduce((sum, s) => sum + s.quotaPercent, 0) / accountSnaps.length
|
|
219
|
+
)
|
|
220
|
+
const latestUpdatedAt = accountSnaps.reduce(
|
|
221
|
+
(latest, s) => (s.updatedAt > latest ? s.updatedAt : latest),
|
|
222
|
+
accountSnaps[0].updatedAt
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
this._stats.quotaSnapshots.byModel[modelId] = {
|
|
226
|
+
quotaPercent: avgPercent,
|
|
227
|
+
updatedAt: latestUpdatedAt,
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Return a summary snapshot including the 10 most-recent requests.
|
|
233
|
+
*
|
|
234
|
+
* @returns {{ byAccount: object, byModel: object, hourly: object, daily: object, recentRequests: object[] }}
|
|
235
|
+
*/
|
|
236
|
+
getSummary() {
|
|
237
|
+
return {
|
|
238
|
+
...this._stats,
|
|
239
|
+
recentRequests: this._ringBuffer.slice(-10),
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/usage-reader.js
|
|
3
|
+
* @description Pure functions to read model quota usage from token-stats.json.
|
|
4
|
+
*
|
|
5
|
+
* Designed for TUI consumption: reads the pre-computed `quotaSnapshots.byModel`
|
|
6
|
+
* section from the JSON file written by TokenStats. Never reads the JSONL log.
|
|
7
|
+
*
|
|
8
|
+
* All functions are pure (no shared mutable state) and handle missing/malformed
|
|
9
|
+
* files gracefully by returning safe fallback values.
|
|
10
|
+
*
|
|
11
|
+
* Default path: ~/.free-coding-models/token-stats.json
|
|
12
|
+
*
|
|
13
|
+
* ## Freshness contract
|
|
14
|
+
* Usage snapshots carry an `updatedAt` ISO timestamp. Any entry whose
|
|
15
|
+
* `updatedAt` is older than SNAPSHOT_TTL_MS (30 minutes) is excluded and
|
|
16
|
+
* treated as `N/A` by the UI. Entries that predate this feature (no
|
|
17
|
+
* `updatedAt` field) are included for backward compatibility.
|
|
18
|
+
*
|
|
19
|
+
* ## Parse cache
|
|
20
|
+
* `loadUsageSnapshot` maintains a module-level in-memory cache keyed by the
|
|
21
|
+
* resolved stats-file path. Each cache entry is valid for CACHE_TTL_MS
|
|
22
|
+
* (500 ms – 1 000 ms). This avoids redundant synchronous disk reads when the
|
|
23
|
+
* TUI rerenders multiple times within the same tick or across a few frames.
|
|
24
|
+
* The 30-minute data-freshness filter (SNAPSHOT_TTL_MS) is applied every time
|
|
25
|
+
* the snapshot is parsed — caching never bypasses it.
|
|
26
|
+
*
|
|
27
|
+
* Use `clearUsageCache()` to evict all entries (useful in tests).
|
|
28
|
+
*
|
|
29
|
+
* @exports SNAPSHOT_TTL_MS
|
|
30
|
+
* @exports CACHE_TTL_MS
|
|
31
|
+
* @exports clearUsageCache
|
|
32
|
+
* @exports loadUsageSnapshot
|
|
33
|
+
* @exports loadUsageMap
|
|
34
|
+
* @exports usageForModelId
|
|
35
|
+
* @exports usageForRow
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
import { readFileSync, existsSync } from 'node:fs'
|
|
39
|
+
import { join } from 'node:path'
|
|
40
|
+
import { homedir } from 'node:os'
|
|
41
|
+
|
|
42
|
+
const DEFAULT_STATS_FILE = join(homedir(), '.free-coding-models', 'token-stats.json')
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Freshness TTL for quota snapshots in milliseconds (30 minutes).
|
|
46
|
+
* Snapshots older than this are treated as stale and excluded from results.
|
|
47
|
+
* The UI renders stale/missing entries as `N/A`.
|
|
48
|
+
*/
|
|
49
|
+
export const SNAPSHOT_TTL_MS = 30 * 60 * 1000
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* TTL for the module-level parse cache in milliseconds (750 ms).
|
|
53
|
+
* Within this window repeated calls to loadUsageSnapshot with the same path
|
|
54
|
+
* return the already-parsed result without touching the filesystem.
|
|
55
|
+
*/
|
|
56
|
+
export const CACHE_TTL_MS = 750
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Module-level cache: path → { snapshot, expiresAt }
|
|
60
|
+
* @type {Map<string, { snapshot: { byModel: Record<string, number>, byProvider: Record<string, number> }, expiresAt: number }>}
|
|
61
|
+
*/
|
|
62
|
+
const _cache = new Map()
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Evict all cached parse results. Subsequent calls to loadUsageSnapshot will
|
|
66
|
+
* re-read from disk. Primarily intended for use in tests.
|
|
67
|
+
*/
|
|
68
|
+
export function clearUsageCache() {
|
|
69
|
+
_cache.clear()
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Returns true when the snapshot entry is considered fresh enough to display.
|
|
74
|
+
*
|
|
75
|
+
* Rules:
|
|
76
|
+
* - If `updatedAt` is absent (older format): include for backward compatibility.
|
|
77
|
+
* - If `updatedAt` parses to a time older than SNAPSHOT_TTL_MS ago: exclude (stale).
|
|
78
|
+
* - If `updatedAt` is within TTL (strictly less than TTL ms ago): include.
|
|
79
|
+
*
|
|
80
|
+
* @param {{ updatedAt?: string }} entry
|
|
81
|
+
* @param {number} [nowMs] - optional current time (ms) for testability
|
|
82
|
+
* @returns {boolean}
|
|
83
|
+
*/
|
|
84
|
+
function isSnapshotFresh(entry, nowMs = Date.now()) {
|
|
85
|
+
if (!entry || typeof entry.updatedAt !== 'string') return true // backward compat
|
|
86
|
+
const updatedMs = Date.parse(entry.updatedAt)
|
|
87
|
+
if (!Number.isFinite(updatedMs)) return true // unparseable: be generous
|
|
88
|
+
return nowMs - updatedMs < SNAPSHOT_TTL_MS
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Load token-stats.json and return model/provider usage maps.
|
|
93
|
+
* Entries with stale `updatedAt` (older than SNAPSHOT_TTL_MS) are excluded.
|
|
94
|
+
*
|
|
95
|
+
* Results are cached in memory for CACHE_TTL_MS to avoid repeated disk reads.
|
|
96
|
+
* The 30-minute data freshness filter is re-applied on every cache miss (parse).
|
|
97
|
+
*
|
|
98
|
+
* @param {string} [statsFile]
|
|
99
|
+
* @returns {{ byModel: Record<string, number>, byProvider: Record<string, number> }}
|
|
100
|
+
*/
|
|
101
|
+
export function loadUsageSnapshot(statsFile = DEFAULT_STATS_FILE) {
|
|
102
|
+
const now = Date.now()
|
|
103
|
+
|
|
104
|
+
// Return cached result if still valid
|
|
105
|
+
const cached = _cache.get(statsFile)
|
|
106
|
+
if (cached && now < cached.expiresAt) {
|
|
107
|
+
return cached.snapshot
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Cache miss — parse from disk
|
|
111
|
+
const snapshot = _parseSnapshot(statsFile, now)
|
|
112
|
+
_cache.set(statsFile, { snapshot, expiresAt: now + CACHE_TTL_MS })
|
|
113
|
+
return snapshot
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Internal: read and parse token-stats.json without caching.
|
|
118
|
+
*
|
|
119
|
+
* @param {string} statsFile
|
|
120
|
+
* @param {number} now - current time in ms (for freshness checks)
|
|
121
|
+
* @returns {{ byModel: Record<string, number>, byProvider: Record<string, number> }}
|
|
122
|
+
*/
|
|
123
|
+
function _parseSnapshot(statsFile, now) {
|
|
124
|
+
try {
|
|
125
|
+
if (!existsSync(statsFile)) return { byModel: {}, byProvider: {} }
|
|
126
|
+
const raw = readFileSync(statsFile, 'utf8')
|
|
127
|
+
const data = JSON.parse(raw)
|
|
128
|
+
|
|
129
|
+
const byModelSrc = data?.quotaSnapshots?.byModel
|
|
130
|
+
const byProviderSrc = data?.quotaSnapshots?.byProvider
|
|
131
|
+
|
|
132
|
+
const byModel = {}
|
|
133
|
+
if (byModelSrc && typeof byModelSrc === 'object') {
|
|
134
|
+
for (const [modelId, entry] of Object.entries(byModelSrc)) {
|
|
135
|
+
if (entry && typeof entry.quotaPercent === 'number' && Number.isFinite(entry.quotaPercent)) {
|
|
136
|
+
if (isSnapshotFresh(entry, now)) {
|
|
137
|
+
byModel[modelId] = entry.quotaPercent
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const byProvider = {}
|
|
144
|
+
if (byProviderSrc && typeof byProviderSrc === 'object') {
|
|
145
|
+
for (const [providerKey, entry] of Object.entries(byProviderSrc)) {
|
|
146
|
+
if (entry && typeof entry.quotaPercent === 'number' && Number.isFinite(entry.quotaPercent)) {
|
|
147
|
+
if (isSnapshotFresh(entry, now)) {
|
|
148
|
+
byProvider[providerKey] = entry.quotaPercent
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return { byModel, byProvider }
|
|
155
|
+
} catch {
|
|
156
|
+
return { byModel: {}, byProvider: {} }
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Load token-stats.json and return a plain object mapping modelId → quotaPercent.
|
|
162
|
+
*
|
|
163
|
+
* Only includes models whose `quotaPercent` is a finite number and whose
|
|
164
|
+
* snapshot is fresh (within SNAPSHOT_TTL_MS).
|
|
165
|
+
* Returns an empty object on any error (missing file, bad JSON, missing keys).
|
|
166
|
+
*
|
|
167
|
+
* @param {string} [statsFile] - Path to token-stats.json (defaults to ~/.free-coding-models/token-stats.json)
|
|
168
|
+
* @returns {Record<string, number>} e.g. { 'claude-3-5': 80, 'gpt-4o': 45 }
|
|
169
|
+
*/
|
|
170
|
+
export function loadUsageMap(statsFile = DEFAULT_STATS_FILE) {
|
|
171
|
+
return loadUsageSnapshot(statsFile).byModel
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Return the quota percent remaining for a specific model.
|
|
176
|
+
* Returns null if the model has no snapshot or its snapshot is stale.
|
|
177
|
+
*
|
|
178
|
+
* @param {string} modelId
|
|
179
|
+
* @param {string} [statsFile] - Path to token-stats.json (defaults to ~/.free-coding-models/token-stats.json)
|
|
180
|
+
* @returns {number | null} quota percent (0–100), or null if unknown/stale
|
|
181
|
+
*/
|
|
182
|
+
export function usageForModelId(modelId, statsFile = DEFAULT_STATS_FILE) {
|
|
183
|
+
const map = loadUsageMap(statsFile)
|
|
184
|
+
const value = map[modelId]
|
|
185
|
+
return value !== undefined ? value : null
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Return quota percent for a table row with model-first, provider fallback.
|
|
190
|
+
* Both model and provider snapshots are checked for freshness independently.
|
|
191
|
+
* Returns null when both are absent or stale.
|
|
192
|
+
*
|
|
193
|
+
* @param {string} providerKey
|
|
194
|
+
* @param {string} modelId
|
|
195
|
+
* @param {string} [statsFile]
|
|
196
|
+
* @returns {number | null}
|
|
197
|
+
*/
|
|
198
|
+
export function usageForRow(providerKey, modelId, statsFile = DEFAULT_STATS_FILE) {
|
|
199
|
+
const { byModel, byProvider } = loadUsageSnapshot(statsFile)
|
|
200
|
+
if (byModel[modelId] !== undefined) return byModel[modelId]
|
|
201
|
+
if (byProvider[providerKey] !== undefined) return byProvider[providerKey]
|
|
202
|
+
return null
|
|
203
|
+
}
|
package/lib/utils.js
CHANGED
|
@@ -310,6 +310,12 @@ export const sortResults = (results, sortColumn, sortDirection) => {
|
|
|
310
310
|
// 📖 Models with no data (-1) sort to the bottom
|
|
311
311
|
cmp = getStabilityScore(a) - getStabilityScore(b)
|
|
312
312
|
break
|
|
313
|
+
case 'usage':
|
|
314
|
+
// 📖 Sort by quota usage percent (usagePercent numeric field, 0–100)
|
|
315
|
+
// 📖 Models with no usage data (undefined/null) are treated as 0 — stable tie-break
|
|
316
|
+
// 📖 via JS stable sort preserving original order when values are equal
|
|
317
|
+
cmp = (a.usagePercent ?? 0) - (b.usagePercent ?? 0)
|
|
318
|
+
break
|
|
313
319
|
}
|
|
314
320
|
|
|
315
321
|
// 📖 Flip comparison for descending order
|
|
@@ -598,3 +604,52 @@ export function getTopRecommendations(results, taskType, priority, contextBudget
|
|
|
598
604
|
|
|
599
605
|
return scored.slice(0, topN)
|
|
600
606
|
}
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* 📖 getProxyStatusInfo: Pure function that maps startup proxy status + active proxy state
|
|
610
|
+
* 📖 to a normalised descriptor object consumed by the TUI footer indicator.
|
|
611
|
+
*
|
|
612
|
+
* 📖 Priority of evaluation:
|
|
613
|
+
* 1. proxyStartupStatus.phase === 'starting' → state:'starting'
|
|
614
|
+
* 2. proxyStartupStatus.phase === 'running' → state:'running' with port/accountCount
|
|
615
|
+
* 3. proxyStartupStatus.phase === 'failed' → state:'failed' with truncated reason
|
|
616
|
+
* 4. isProxyActive (legacy activeProxy flag) → state:'running' (no port detail)
|
|
617
|
+
* 5. otherwise → state:'stopped'
|
|
618
|
+
*
|
|
619
|
+
* 📖 Reason is clamped to 80 characters to keep footer readable (no stack traces).
|
|
620
|
+
*
|
|
621
|
+
* @param {object|null} proxyStartupStatus — state.proxyStartupStatus value
|
|
622
|
+
* @param {boolean} isProxyActive — truthy when the module-level activeProxy is non-null
|
|
623
|
+
* @returns {{ state: string, port?: number, accountCount?: number, reason?: string }}
|
|
624
|
+
*/
|
|
625
|
+
export function getProxyStatusInfo(proxyStartupStatus, isProxyActive) {
|
|
626
|
+
const MAX_REASON = 80
|
|
627
|
+
|
|
628
|
+
if (proxyStartupStatus) {
|
|
629
|
+
const { phase } = proxyStartupStatus
|
|
630
|
+
if (phase === 'starting') {
|
|
631
|
+
return { state: 'starting' }
|
|
632
|
+
}
|
|
633
|
+
if (phase === 'running') {
|
|
634
|
+
return {
|
|
635
|
+
state: 'running',
|
|
636
|
+
port: proxyStartupStatus.port,
|
|
637
|
+
accountCount: proxyStartupStatus.accountCount,
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
if (phase === 'failed') {
|
|
641
|
+
const raw = proxyStartupStatus.reason ?? 'unknown error'
|
|
642
|
+
return {
|
|
643
|
+
state: 'failed',
|
|
644
|
+
reason: raw.length > MAX_REASON ? raw.slice(0, MAX_REASON - 1) + '…' : raw,
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// 📖 Legacy fallback: activeProxy set directly (e.g. from manual proxy start without startup status)
|
|
650
|
+
if (isProxyActive) {
|
|
651
|
+
return { state: 'running' }
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
return { state: 'stopped' }
|
|
655
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "free-coding-models",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.83",
|
|
4
4
|
"description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"nvidia",
|