free-coding-models 0.3.75 → 0.3.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -379,18 +379,14 @@ Press **`Z`** in the TUI to cycle between tools without restarting.
379
379
 
380
380
  ### OpenCode Zen Free Models
381
381
 
382
- [OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering 8 free coding models exclusively through OpenCode CLI and OpenCode Desktop. These models are **not** available through other tools.
382
+ [OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering **4 free coding models** exclusively through OpenCode CLI and OpenCode Desktop. These models are **not** available through other tools.
383
383
 
384
384
  | Model | Tier | SWE-bench | Context |
385
385
  |-------|------|-----------|---------|
386
386
  | Big Pickle | S+ | 72.0% | 200k |
387
- | MiniMax M2.5 Free | S+ | 80.2% | 200k |
388
- | Nemotron 3 Super Free | A+ | 52.0% | 1M |
389
- | GPT 5 Nano | S | 65.0% | 400k |
390
- | HY3 Preview Free | A+ | - | 128k |
391
- | Ling 2.6 Flash Free | S | - | 128k |
392
- | Trinity Large Preview Free | S | - | 128k |
393
- | Trinity Mini Preview Free | A | - | 128k |
387
+ | DeepSeek V4 Flash Free | S+ | 79.0% | 200k |
388
+ | MiMo-V2.5 Free | S+ | - | 200k |
389
+ | Nemotron 3 Super Free | A+ | 52.0% | 200k |
394
390
 
395
391
  To use Zen models: sign up at [opencode.ai/auth](https://opencode.ai/auth) and enter your Zen API key via `P` (Settings). Zen models appear in the main table and auto-switch to OpenCode CLI on launch.
396
392
 
@@ -0,0 +1,4 @@
1
+ # Changelog v0.3.76 - 2026-05-28
2
+
3
+ ### Fixed
4
+ - **ZAI and Zen providers removed from install blocklist** — `opencode-zen` and `zai` were in `DIRECT_INSTALL_UNSUPPORTED_PROVIDERS` preventing auto-install into tools like Pi, Aider, Cline, etc. Both providers use OpenAI-compatible endpoints and should be installable everywhere. Only `replicate` (non-standard `/v1/predictions`), `rovo` and `gemini` (CLI-only, no API endpoint) remain blocked.
@@ -0,0 +1,12 @@
1
+ # Changelog v0.3.77 - 2026-05-28
2
+
3
+ ### Fixed
4
+ - **OpenCode Zen model catalog sync** — removed 3 models that are no longer free and added 1 new free model:
5
+ - ❌ Removed `qwen3.6-plus-free` — free promotion ended, now requires OpenCode Go subscription or PAYG
6
+ - ❌ Removed `minimax-m2.5-free` — 7-day launch promo expired, now paid
7
+ - ❌ Removed `gpt-5-nano` — was incorrectly cataloged as free, actually requires Zen PAYG
8
+ - ✅ Added `mimo-v2.5-free` — newly confirmed free on Zen
9
+ - Result: Zen free catalog updated from 6 → 4 confirmed free models
10
+
11
+ ### Changed
12
+ - Updated README Zen models table to reflect the current 4-model free tier
@@ -0,0 +1,14 @@
1
+ # Changelog v0.3.78 - 2026-05-28
2
+
3
+ ### Added
4
+ - **Real-Answer Benchmark feature** — Press `Ctrl+A` to run a live completion benchmark on the currently selected model. Measures actual wall-clock response time and tokens-per-second (TPS) with a real chat completion request (`"Why is the sky blue? Answer in exactly one short sentence."`). Results appear in the new **Answer Speed** column as `4.3s / 13 TPS`.
5
+ - New `Answer Speed` column in the TUI table, positioned after the Uptime column. Defaults to `—`, shows a green spinner while benchmarking, and displays compact error codes (`ERR`, `TIMEOUT`, `401`, `429`) on failure.
6
+ - New module `src/benchmark.js` with lightweight, native Node.js benchmark logic:
7
+ - `benchmarkModel({ apiKey, modelId, providerKey, url })` — sends one completion, measures time, parses `usage.completion_tokens` with a `Math.ceil(outputText.length / 4)` fallback clearly labeled as an estimate.
8
+ - Guards against division-by-zero and unsupported providers (rovo, gemini, opencode-zen return `UNSUPPORTED`).
9
+ - Respects existing API key handling and fails gracefully on missing credentials, rate limits, or timeouts.
10
+ - Benchmark state stored in TUI state keyed by `${providerKey}/${modelId}`, so results survive re-renders and table refreshes.
11
+ - `Ctrl+A` documented in the Help overlay (`I` key).
12
+
13
+ ### Changed
14
+ - Responsive column breakpoints adjusted to accommodate the new `Answer Speed` column (14 cols). Progressive hiding order: Rank → Answer Speed → Up% → Tier → Stability. Compact mode still active at ~163+ cols.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.3.75",
3
+ "version": "0.3.77",
4
4
  "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",
package/sources.js CHANGED
@@ -355,11 +355,12 @@ export const gemini = [
355
355
  // 📖 Config: set provider to opencode/<model-id> in OpenCode config
356
356
  export const opencodeZen = [
357
357
  ['big-pickle', 'Big Pickle', 'S+', '72.0%', '200k'],
358
- ['minimax-m2.5-free', 'MiniMax M2.5 Free', 'S+', '80.2%', '200k'],
359
358
  ['deepseek-v4-flash-free', 'DeepSeek V4 Flash Free', 'S+', '79.0%', '200k'],
360
- ['qwen3.6-plus-free', 'Qwen3.6 Plus Free', 'S+', '78.8%', '1M'],
359
+ ['mimo-v2.5-free', 'MiMo-V2.5 Free', 'S+', '-', '200k'],
361
360
  ['nemotron-3-super-free', 'Nemotron 3 Super Free', 'A+', '52.0%', '200k'],
362
- ['gpt-5-nano', 'GPT 5 Nano', 'S', '65.0%', '400k'],
361
+ // Removed (2026-05-28): qwen3.6-plus-free (free promotion ended — now requires OpenCode Go)
362
+ // Removed (2026-05-28): minimax-m2.5-free (7-day launch promo expired — now paid)
363
+ // Removed (2026-05-28): gpt-5-nano (was never free — incorrectly cataloged as free, requires Zen PAYG)
363
364
  // Removed (2026-05-26): hy3-preview-free (deleted from Zen)
364
365
  // Removed (2026-05-26): ling-2.6-flash-free (deleted from Zen)
365
366
  // Removed (2026-05-26): trinity-mini-free (deleted from Zen)
package/src/app.js CHANGED
@@ -845,6 +845,8 @@ export async function runApp(cliArgs, config) {
845
845
  verdictFilterMode: state.verdictFilterMode,
846
846
  healthFilterMode: state.healthFilterMode,
847
847
  bestModeOnly: state.bestModeOnly,
848
+ benchmarkResults: state.benchmarkResults,
849
+ benchmarkRunning: state.benchmarkRunning,
848
850
  }
849
851
  if (state.commandPaletteOpen) {
850
852
  if (!state.commandPaletteFrozenTable) {
@@ -932,6 +934,8 @@ export async function runApp(cliArgs, config) {
932
934
  verdictFilterMode: state.verdictFilterMode,
933
935
  healthFilterMode: state.healthFilterMode,
934
936
  bestModeOnly: state.bestModeOnly,
937
+ benchmarkResults: state.benchmarkResults,
938
+ benchmarkRunning: state.benchmarkRunning,
935
939
  }))
936
940
  if (process.stdout.isTTY) {
937
941
  process.stdout.flush && process.stdout.flush()
@@ -0,0 +1,241 @@
1
+ /**
2
+ * @file benchmark.js
3
+ * @description Real-answer benchmark for measuring model response speed and throughput.
4
+ *
5
+ * @details
6
+ * This module sends a single small chat completion to a model and measures:
7
+ * - Total wall-clock response time (ms)
8
+ * - Output tokens generated
9
+ * - Tokens per second (TPS)
10
+ *
11
+ * 🎯 Key features:
12
+ * - Provider-specific request building (reuses buildPingRequest from ping.js)
13
+ * - Async benchmark with timeout and abort controller
14
+ * - Prefers `usage.completion_tokens` from the API response
15
+ * - Falls back to character-length estimate when usage is missing
16
+ * - Returns structured success/failure objects for TUI consumption
17
+ *
18
+ * → Functions:
19
+ * - `buildBenchmarkRequest`: Build provider-specific benchmark request
20
+ * - `benchmarkModel`: Run a single benchmark and return timing + token metrics
21
+ * - `formatBenchmarkResult`: Format a benchmark result for the TUI column
22
+ * - `estimateTokensFromText`: Fallback token estimator (clearly labeled)
23
+ *
24
+ * 📦 Dependencies:
25
+ * - ./ping.js: buildPingRequest, resolveCloudflareUrl
26
+ *
27
+ * @see {@link ./ping.js} Provider-specific request building
28
+ * @see {@link ./render-table.js} Answer Speed column rendering
29
+ */
30
+
31
+ import { buildPingRequest, resolveCloudflareUrl } from './ping.js'
32
+
33
+ // 📖 BENCHMARK_PROMPT: A short, unambiguous question that any model can answer.
34
+ // 📖 Constrained to one sentence to keep benchmarks fast and consistent.
35
+ export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one short sentence.'
36
+
37
+ // 📖 BENCHMARK_MAX_TOKENS: Hard cap on generation length to prevent slow models
38
+ // 📖 from producing essays and skewing the TPS calculation.
39
+ export const BENCHMARK_MAX_TOKENS = 32
40
+
41
+ // 📖 BENCHMARK_TEMPERATURE: Zero temperature for deterministic, reproducible results.
42
+ export const BENCHMARK_TEMPERATURE = 0
43
+
44
+ // 📖 BENCHMARK_TIMEOUT_MS: How long to wait before treating a benchmark as failed.
45
+ export const BENCHMARK_TIMEOUT_MS = 20_000
46
+
47
+ // 📖 estimateTokensFromText: Fallback token counter when the API does not return usage.
48
+ // 📖 Uses a simple heuristic: avg English token ≈ 4 chars. This is explicitly an ESTIMATE
49
+ // 📖 and is labeled as such everywhere it surfaces. Do not use for billing.
50
+ export function estimateTokensFromText(text) {
51
+ if (!text || typeof text !== 'string') return 0
52
+ return Math.ceil(text.length / 4)
53
+ }
54
+
55
+ // 📖 formatBenchmarkResult: Turn a raw benchmark result into a compact display string.
56
+ // 📖 Handles all three states: empty, running, success, and error.
57
+ // 📖
58
+ // 📖 Success: "4.3s / 13 TPS"
59
+ // 📖 Running: spinner (caller passes spinner char)
60
+ // 📖 Error: compact error code like "ERR", "TIMEOUT", "401", "429"
61
+ // 📖 Empty: "—"
62
+ export function formatBenchmarkResult(result, { running = false, frame = 0 } = {}) {
63
+ if (running) {
64
+ const spinIdx = frame % 10
65
+ const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
66
+ return spinner
67
+ }
68
+
69
+ if (!result) {
70
+ return '—'
71
+ }
72
+
73
+ if (!result.ok) {
74
+ return result.code || 'ERR'
75
+ }
76
+
77
+ const totalSeconds = result.totalMs / 1000
78
+ const secondsLabel = totalSeconds >= 10
79
+ ? totalSeconds.toFixed(0) + 's'
80
+ : totalSeconds.toFixed(1) + 's'
81
+
82
+ const tps = result.tokensPerSecond ?? 0
83
+ const tpsLabel = Math.round(tps)
84
+
85
+ return `${secondsLabel} / ${tpsLabel} TPS`
86
+ }
87
+
88
+ // 📖 buildBenchmarkRequest: Build provider-specific benchmark request.
89
+ // 📖 Reuses the ping module's request builder but swaps the payload for a real
90
+ // 📖 completion with temperature=0 and max_tokens=32.
91
+ export function buildBenchmarkRequest(apiKey, modelId, providerKey, url) {
92
+ // 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
93
+ const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
94
+
95
+ if (providerKey === 'replicate') {
96
+ const replicateHeaders = { 'Content-Type': 'application/json', Prefer: 'wait=4' }
97
+ if (apiKey) replicateHeaders.Authorization = `Token ${apiKey}`
98
+ return {
99
+ url,
100
+ headers: replicateHeaders,
101
+ body: { version: modelId, input: { prompt: BENCHMARK_PROMPT, max_tokens: BENCHMARK_MAX_TOKENS } },
102
+ }
103
+ }
104
+
105
+ if (providerKey === 'cloudflare') {
106
+ const headers = { 'Content-Type': 'application/json' }
107
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`
108
+ return {
109
+ url: resolveCloudflareUrl(url),
110
+ headers,
111
+ body: {
112
+ model: apiModelId,
113
+ messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
114
+ max_tokens: BENCHMARK_MAX_TOKENS,
115
+ temperature: BENCHMARK_TEMPERATURE,
116
+ },
117
+ }
118
+ }
119
+
120
+ const headers = { 'Content-Type': 'application/json' }
121
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`
122
+ if (providerKey === 'openrouter') {
123
+ headers['HTTP-Referer'] = 'https://github.com/vava-nessa/free-coding-models'
124
+ headers['X-Title'] = 'free-coding-models'
125
+ }
126
+
127
+ return {
128
+ url,
129
+ headers,
130
+ body: {
131
+ model: apiModelId,
132
+ messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
133
+ max_tokens: BENCHMARK_MAX_TOKENS,
134
+ temperature: BENCHMARK_TEMPERATURE,
135
+ },
136
+ }
137
+ }
138
+
139
+ // 📖 benchmarkModel: Send one real completion request and measure response speed.
140
+ // 📖
141
+ // 📖 Returns on success:
142
+ // 📖 {
143
+ // 📖 ok: true,
144
+ // 📖 totalMs: 4300,
145
+ // 📖 outputTokens: 56,
146
+ // 📖 tokensPerSecond: 13,
147
+ // 📖 answerPreview: "The sky is blue because..."
148
+ // 📖 }
149
+ // 📖
150
+ // 📖 Returns on failure:
151
+ // 📖 {
152
+ // 📖 ok: false,
153
+ // 📖 code: "TIMEOUT" | "ERR" | "401" | "429" | "UNSUPPORTED",
154
+ // 📖 totalMs: 15000,
155
+ // 📖 error: "Request timed out"
156
+ // 📖 }
157
+ export async function benchmarkModel({ apiKey, modelId, providerKey, url, timeoutMs = BENCHMARK_TIMEOUT_MS }) {
158
+ // 📖 Guard: unsupported providers that don't do chat completions
159
+ if (providerKey === 'rovo' || providerKey === 'gemini' || providerKey === 'opencode-zen') {
160
+ return {
161
+ ok: false,
162
+ code: 'UNSUPPORTED',
163
+ totalMs: 0,
164
+ error: 'Provider does not support chat completions',
165
+ }
166
+ }
167
+
168
+ const ctrl = new AbortController()
169
+ const timer = setTimeout(() => ctrl.abort(), timeoutMs)
170
+ const t0 = performance.now()
171
+
172
+ try {
173
+ const req = buildBenchmarkRequest(apiKey, modelId, providerKey, url)
174
+ const resp = await fetch(req.url, {
175
+ method: 'POST',
176
+ signal: ctrl.signal,
177
+ headers: req.headers,
178
+ body: JSON.stringify(req.body),
179
+ })
180
+
181
+ const totalMs = Math.round(performance.now() - t0)
182
+
183
+ // 📖 Parse response body regardless of HTTP status so we can extract partial data
184
+ let bodyText = ''
185
+ try {
186
+ bodyText = await resp.text()
187
+ } catch {}
188
+
189
+ let data = null
190
+ try {
191
+ data = JSON.parse(bodyText)
192
+ } catch {}
193
+
194
+ // 📖 Non-2xx: return compact error code
195
+ if (!resp.ok) {
196
+ const code = String(resp.status)
197
+ return {
198
+ ok: false,
199
+ code,
200
+ totalMs,
201
+ error: data?.error?.message || `HTTP ${resp.status}`,
202
+ }
203
+ }
204
+
205
+ // 📖 Extract generated text from OpenAI-compatible response
206
+ const content = data?.choices?.[0]?.message?.content || data?.choices?.[0]?.text || ''
207
+ const answerPreview = typeof content === 'string' ? content.slice(0, 60) : ''
208
+
209
+ // 📖 Prefer usage.completion_tokens when available
210
+ let outputTokens = 0
211
+ if (data?.usage?.completion_tokens != null) {
212
+ outputTokens = Number(data.usage.completion_tokens) || 0
213
+ } else {
214
+ // 📖 FALLBACK: estimate from character count when API omits usage
215
+ outputTokens = estimateTokensFromText(content)
216
+ }
217
+
218
+ // 📖 Guard division by zero
219
+ const seconds = totalMs / 1000
220
+ const tokensPerSecond = seconds > 0 ? outputTokens / seconds : 0
221
+
222
+ return {
223
+ ok: true,
224
+ totalMs,
225
+ outputTokens,
226
+ tokensPerSecond,
227
+ answerPreview,
228
+ }
229
+ } catch (err) {
230
+ const totalMs = Math.round(performance.now() - t0)
231
+ const isTimeout = err.name === 'AbortError'
232
+ return {
233
+ ok: false,
234
+ code: isTimeout ? 'TIMEOUT' : 'ERR',
235
+ totalMs,
236
+ error: isTimeout ? 'Request timed out' : (err.message || 'Network error'),
237
+ }
238
+ } finally {
239
+ clearTimeout(timer)
240
+ }
241
+ }
@@ -48,8 +48,10 @@ import { getApiKey, saveConfig } from './config.js'
48
48
  import { ENV_VAR_NAMES, PROVIDER_METADATA } from './provider-metadata.js'
49
49
  import { getToolMeta } from './tool-metadata.js'
50
50
 
51
- // 📖 CLI-only providers (rovo, gemini) and Zen-only (opencode-zen) cannot be installed into other tools.
52
- const DIRECT_INSTALL_UNSUPPORTED_PROVIDERS = new Set(['replicate', 'zai', 'rovo', 'gemini', 'opencode-zen'])
51
+ // 📖 CLI-only providers (rovo, gemini) cannot be installed into other tools — they manage their own auth.
52
+ // 📖 replicate uses /v1/predictions (not /chat/completions), so it's not OpenAI-compatible.
53
+ // 📖 zai and opencode-zen ARE OpenAI-compatible and CAN be installed into any tool.
54
+ const DIRECT_INSTALL_UNSUPPORTED_PROVIDERS = new Set(['replicate', 'rovo', 'gemini'])
53
55
  // 📖 Install Endpoints only lists tools whose persisted config shape is actually supported here.
54
56
  // 📖 Claude Code, Codex, and Gemini stay out while their dedicated bridges are being rebuilt.
55
57
  const INSTALL_TARGET_MODES = ['opencode', 'opencode-desktop', 'opencode-web', 'openclaw', 'kilo', 'crush', 'goose', 'pi', 'aider', 'qwen', 'openhands', 'amp', 'hermes', 'continue', 'cline', 'forgecode', 'fcm_router']
@@ -54,6 +54,7 @@ import {
54
54
  restartRouterDashboardDaemon,
55
55
  toggleRouterDashboardProbePause,
56
56
  } from './router-dashboard.js'
57
+ import { benchmarkModel } from './benchmark.js'
57
58
 
58
59
  // 📖 Some providers need an explicit probe model because the first catalog entry
59
60
  // 📖 is not guaranteed to be accepted by their chat endpoint.
@@ -1026,6 +1027,42 @@ export function createKeyHandler(ctx) {
1026
1027
  saveConfig(state.config)
1027
1028
  }
1028
1029
 
1030
+ // 📖 runBenchmarkOnSelected: Fire a real-answer benchmark on the currently selected row.
1031
+ // 📖 Triggered by Ctrl+A. Async — does not block the UI. Results are stored in state
1032
+ // 📖 keyed by `${providerKey}/${modelId}` so they survive re-renders.
1033
+ async function runBenchmarkOnSelected() {
1034
+ const selected = state.visibleSorted[state.cursor]
1035
+ if (!selected) return
1036
+
1037
+ const benchmarkKey = `${selected.providerKey}/${selected.modelId}`
1038
+ if (state.benchmarkRunning.has(benchmarkKey)) return
1039
+
1040
+ const apiKey = getApiKey(state.config, selected.providerKey) ?? null
1041
+ const providerUrl = sources[selected.providerKey]?.url ?? null
1042
+ if (!providerUrl) return
1043
+
1044
+ state.benchmarkRunning.add(benchmarkKey)
1045
+
1046
+ try {
1047
+ const result = await benchmarkModel({
1048
+ apiKey,
1049
+ modelId: selected.modelId,
1050
+ providerKey: selected.providerKey,
1051
+ url: providerUrl,
1052
+ })
1053
+ state.benchmarkResults[benchmarkKey] = result
1054
+ } catch (err) {
1055
+ state.benchmarkResults[benchmarkKey] = {
1056
+ ok: false,
1057
+ code: 'ERR',
1058
+ totalMs: 0,
1059
+ error: err?.message || 'Benchmark failed',
1060
+ }
1061
+ } finally {
1062
+ state.benchmarkRunning.delete(benchmarkKey)
1063
+ }
1064
+ }
1065
+
1029
1066
  // 📖 Favorites display mode:
1030
1067
  // 📖 - true => favorites stay pinned + always visible (legacy behavior)
1031
1068
  // 📖 - false => favorites are just starred rows and obey normal sort/filter rules
@@ -2811,6 +2848,13 @@ export function createKeyHandler(ctx) {
2811
2848
  return
2812
2849
  }
2813
2850
 
2851
+ // 📖 Ctrl+A: benchmark the currently selected model with a real completion.
2852
+ // 📖 Measures wall-clock response time and tokens per second (TPS).
2853
+ if (key.ctrl && key.name === 'a') {
2854
+ void runBenchmarkOnSelected()
2855
+ return
2856
+ }
2857
+
2814
2858
  if (key.shift && key.name === 'up') {
2815
2859
  const selected = state.visibleSorted?.[state.cursor]
2816
2860
  if (selected?.isFavorite) {
package/src/overlays.js CHANGED
@@ -928,6 +928,7 @@ export function createOverlayRenderers(state, deps) {
928
928
  lines.push(` ${heading('Controls')}`)
929
929
  lines.push(` ${key('W')} Toggle ping mode ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
930
930
  lines.push(` ${key('Ctrl+P')} Open ⚡️ command palette ${hint('(search and run actions quickly)')}`)
931
+ lines.push(` ${key('Ctrl+A')} Benchmark answer speed ${hint('(real completion on selected model → time + TPS)')}`)
931
932
  lines.push(` ${key('E')} Cycle filter mode ${hint('(Normal → Configured only → Usable only)')}`)
932
933
  lines.push(` ${key('Z')} Cycle tool mode ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
933
934
  lines.push(` ${key('F')} Toggle favorite on selected row ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)
@@ -49,6 +49,7 @@ import { themeColors, getProviderRgb, getTierRgb, getReadableTextRgb, getTheme }
49
49
  import { TIER_COLOR } from './tier-colors.js'
50
50
  import { getAvg, getVerdict, getUptime, getStabilityScore, getVersionStatusInfo } from './utils.js'
51
51
  import { usagePlaceholderForProvider } from './ping.js'
52
+ import { formatBenchmarkResult } from './benchmark.js'
52
53
  import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth } from './render-helpers.js'
53
54
  import { getToolMeta, TOOL_METADATA, TOOL_MODE_ORDER, isModelCompatibleWithTool } from './tool-metadata.js'
54
55
  import { getColumnSpacing } from './ui-config.js'
@@ -181,6 +182,8 @@ export function renderTable({
181
182
  routerFooterTodayTokens = 0,
182
183
  routerFooterAllTimeTokens = 0,
183
184
  routerFooterRequests = 0,
185
+ benchmarkResults = {},
186
+ benchmarkRunning = new Set(),
184
187
  } = {}) {
185
188
  // 📖 Filter out hidden models for display
186
189
  const visibleResults = results.filter(r => !r.hidden)
@@ -274,6 +277,7 @@ export function renderTable({
274
277
  const W_STATUS = 18
275
278
  const W_VERDICT = 14
276
279
  const W_UPTIME = 6
280
+ const W_ANSWER = 14
277
281
 
278
282
  // const W_TOKENS = 7 // Used column removed
279
283
  // const W_USAGE = 7 // Usage column removed
@@ -281,16 +285,17 @@ export function renderTable({
281
285
 
282
286
  // 📖 Responsive column visibility: progressively hide least-useful columns
283
287
  // 📖 and shorten header labels when terminal width is insufficient.
284
- // 📖 Hiding order (least useful first): Rank → Up% → Tier → Stability
288
+ // 📖 Hiding order (least useful first): Rank → Answer Speed → Up% → Tier → Stability
285
289
  // 📖 Compact mode shrinks: Latest Ping→Lat. P (9), Avg Ping→Avg. P (8),
286
290
  // 📖 Stability→StaB. (8), Provider→4chars+… (7), Health→6chars+… (13)
287
- // 📖 Breakpoints: full=169 | compact=146 | -Rank=137 | -Up%=128 | -Tier=120 | -Stab=109
291
+ // 📖 Breakpoints: full=183 | compact=160 | -Rank=151 | -Answer=142 | -Up%=133 | -Tier=125 | -Stab=114
288
292
  let wPing = 14
289
293
  let wAvg = 11
290
294
  let wStab = 11
291
295
  let wSource = W_SOURCE
292
296
  let wStatus = W_STATUS
293
297
  let showRank = true
298
+ let showAnswerSpeed = true
294
299
  let showUptime = true
295
300
  let showTier = true
296
301
  let showStability = true
@@ -305,6 +310,7 @@ export function renderTable({
305
310
  cols.push(W_SWE, W_CTX, W_MODEL, wSource, wPing, wAvg, wStatus, W_VERDICT)
306
311
  if (showStability) cols.push(wStab)
307
312
  if (showUptime) cols.push(W_UPTIME)
313
+ if (showAnswerSpeed) cols.push(W_ANSWER)
308
314
  return ROW_MARGIN + cols.reduce((a, b) => a + b, 0) + (cols.length - 1) * SEP_W
309
315
  }
310
316
 
@@ -317,8 +323,9 @@ export function renderTable({
317
323
  wSource = 7 // Provider truncated to 4 chars + '…', 7 cols total
318
324
  wStatus = 13 // Health truncated after 6 chars + '…'
319
325
  }
320
- // 📖 Steps 2–5: Progressive column hiding (least useful first)
326
+ // 📖 Steps 2–6: Progressive column hiding (least useful first)
321
327
  if (calcWidth() > terminalCols) showRank = false
328
+ if (calcWidth() > terminalCols) showAnswerSpeed = false
322
329
  if (calcWidth() > terminalCols) showUptime = false
323
330
  if (calcWidth() > terminalCols) showTier = false
324
331
  if (calcWidth() > terminalCols) showStability = false
@@ -341,6 +348,7 @@ export function renderTable({
341
348
  colDefs.push({ name: 'verdict', width: W_VERDICT })
342
349
  if (showStability) colDefs.push({ name: 'stability', width: wStab })
343
350
  if (showUptime) colDefs.push({ name: 'uptime', width: W_UPTIME })
351
+ if (showAnswerSpeed) colDefs.push({ name: 'answerSpeed', width: W_ANSWER })
344
352
  let x = ROW_MARGIN + 1 // 📖 1-based: first column starts after the 2-char left margin
345
353
  const columns = []
346
354
  for (let i = 0; i < colDefs.length; i++) {
@@ -467,6 +475,14 @@ export function renderTable({
467
475
  return themeColors.hotkey('U') + themeColors.dim('p%' + padding)
468
476
  })()
469
477
 
478
+ // 📖 Answer Speed header — no sort hotkey, just the label
479
+ const answerLabel = isCompact ? 'Answ.' : 'Answer Speed'
480
+ const answerH_c = (() => {
481
+ const plain = answerLabel
482
+ const padding = ' '.repeat(Math.max(0, W_ANSWER - plain.length))
483
+ return themeColors.dim('Ans') + themeColors.hotkey('w') + themeColors.dim('er' + (isCompact ? '.' : ' Speed') + padding)
484
+ })()
485
+
470
486
  // 📖 Usage column removed from UI – no header or separator for it.
471
487
  // 📖 Header row: conditionally include columns based on responsive visibility
472
488
  const headerParts = []
@@ -475,6 +491,7 @@ export function renderTable({
475
491
  headerParts.push(sweH_c, ctxH_c, modelH_c, originH_c, pingH_c, avgH_c, healthH_c, verdictH_c)
476
492
  if (showStability) headerParts.push(stabH_c)
477
493
  if (showUptime) headerParts.push(uptimeH_c)
494
+ if (showAnswerSpeed) headerParts.push(answerH_c)
478
495
  lines.push(' ' + headerParts.join(COL_SEP))
479
496
 
480
497
  // 📖 Mouse support: the column header row is the last line we just pushed.
@@ -776,6 +793,25 @@ export function renderTable({
776
793
  // (We keep the logic but do not render it.)
777
794
  const usageCell = ''
778
795
 
796
+ // 📖 Answer Speed column — show benchmark result, running spinner, or dash
797
+ const benchmarkKey = `${r.providerKey}/${r.modelId}`
798
+ const benchmarkResult = benchmarkResults[benchmarkKey]
799
+ const isBenchmarkRunning = benchmarkRunning.has(benchmarkKey)
800
+ let answerSpeedCell
801
+ if (isBenchmarkRunning) {
802
+ const spinner = FRAMES[frame % FRAMES.length]
803
+ answerSpeedCell = themeColors.success(spinner.padEnd(W_ANSWER))
804
+ } else if (benchmarkResult) {
805
+ const text = formatBenchmarkResult(benchmarkResult)
806
+ // 📖 Colorize: success = green, error = red/dim
807
+ const isError = !benchmarkResult.ok
808
+ answerSpeedCell = isError
809
+ ? themeColors.metricBad(text.padEnd(W_ANSWER))
810
+ : themeColors.metricGood(text.padEnd(W_ANSWER))
811
+ } else {
812
+ answerSpeedCell = themeColors.dim('—'.padEnd(W_ANSWER))
813
+ }
814
+
779
815
  // 📖 Build row: conditionally include columns based on responsive visibility
780
816
  const rowParts = []
781
817
  if (showRank) rowParts.push(num)
@@ -783,6 +819,7 @@ export function renderTable({
783
819
  rowParts.push(sweCell, ctxCell, nameCell, sourceCell, pingCell, avgCell, status, speedCell)
784
820
  if (showStability) rowParts.push(stabCell)
785
821
  if (showUptime) rowParts.push(uptimeCell)
822
+ if (showAnswerSpeed) rowParts.push(answerSpeedCell)
786
823
  const row = ' ' + rowParts.join(COL_SEP)
787
824
 
788
825
  if (isCursor) {
package/src/tui-state.js CHANGED
@@ -261,5 +261,12 @@ export function createTuiState({
261
261
 
262
262
  // 📖 Token usage overlay scroll state (used when overlay opens from footer)
263
263
  tokenUsageOpen: false,
264
+
265
+ // 📖 Benchmark results: keyed by `${providerKey}/${modelId}`
266
+ // 📖 Each entry is the raw result object from benchmarkModel() or null.
267
+ benchmarkResults: {},
268
+
269
+ // 📖 Set of benchmark keys currently running (for spinner display)
270
+ benchmarkRunning: new Set(),
264
271
  }
265
272
  }