free-coding-models 0.3.76 → 0.3.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -379,18 +379,14 @@ Press **`Z`** in the TUI to cycle between tools without restarting.
379
379
 
380
380
  ### OpenCode Zen Free Models
381
381
 
382
- [OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering 8 free coding models exclusively through OpenCode CLI and OpenCode Desktop. These models are **not** available through other tools.
382
+ [OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering **4 free coding models** exclusively through OpenCode CLI and OpenCode Desktop. These models are **not** available through other tools.
383
383
 
384
384
  | Model | Tier | SWE-bench | Context |
385
385
  |-------|------|-----------|---------|
386
386
  | Big Pickle | S+ | 72.0% | 200k |
387
- | MiniMax M2.5 Free | S+ | 80.2% | 200k |
388
- | Nemotron 3 Super Free | A+ | 52.0% | 1M |
389
- | GPT 5 Nano | S | 65.0% | 400k |
390
- | HY3 Preview Free | A+ | - | 128k |
391
- | Ling 2.6 Flash Free | S | - | 128k |
392
- | Trinity Large Preview Free | S | - | 128k |
393
- | Trinity Mini Preview Free | A | - | 128k |
387
+ | DeepSeek V4 Flash Free | S+ | 79.0% | 200k |
388
+ | MiMo-V2.5 Free | S+ | - | 200k |
389
+ | Nemotron 3 Super Free | A+ | 52.0% | 200k |
394
390
 
395
391
  To use Zen models: sign up at [opencode.ai/auth](https://opencode.ai/auth) and enter your Zen API key via `P` (Settings). Zen models appear in the main table and auto-switch to OpenCode CLI on launch.
396
392
 
@@ -0,0 +1,12 @@
1
+ # Changelog v0.3.77 - 2026-05-28
2
+
3
+ ### Fixed
4
+ - **OpenCode Zen model catalog sync** — removed 3 models that are no longer free and added 1 new free model:
5
+ - ❌ Removed `qwen3.6-plus-free` — free promotion ended, now requires OpenCode Go subscription or PAYG
6
+ - ❌ Removed `minimax-m2.5-free` — 7-day launch promo expired, now paid
7
+ - ❌ Removed `gpt-5-nano` — was incorrectly cataloged as free, actually requires Zen PAYG
8
+ - ✅ Added `mimo-v2.5-free` — newly confirmed free on Zen
9
+ - Result: Zen free catalog updated from 6 → 4 confirmed free models
10
+
11
+ ### Changed
12
+ - Updated README Zen models table to reflect the current 4-model free tier
@@ -0,0 +1,20 @@
1
+ # Changelog v0.3.78 - 2026-05-28
2
+
3
+ ### Added
4
+ - **Real-Answer Benchmark feature** — Press `Ctrl+A` to run a live completion benchmark on the currently selected model. Measures actual wall-clock response time and tokens-per-second (TPS) with a real chat completion request (`"Why is the sky blue? Answer in exactly one short sentence."`). Results appear in the new **Answer Speed** column as `4.3s / 13 TPS`.
5
+ - New `Answer Speed` column in the TUI table, positioned after the Uptime column. Defaults to `—`, shows a green spinner while benchmarking, and displays compact error codes (`ERR`, `TIMEOUT`, `401`, `429`) on failure.
6
+ - New module `src/benchmark.js` with lightweight, native Node.js benchmark logic:
7
+ - `benchmarkModel({ apiKey, modelId, providerKey, url })` — sends one completion, measures time, parses `usage.completion_tokens` with a `Math.ceil(outputText.length / 4)` fallback clearly labeled as an estimate.
8
+ - Guards against division-by-zero and unsupported providers (rovo, gemini, opencode-zen return `UNSUPPORTED`).
9
+ - Respects existing API key handling and fails gracefully on missing credentials, rate limits, or timeouts.
10
+ - Benchmark state stored in TUI state keyed by `${providerKey}/${modelId}`, so results survive re-renders and table refreshes.
11
+ - `Ctrl+A` documented in the Help overlay (`I` key).
12
+
13
+ ### Changed
14
+ - Responsive column breakpoints adjusted to accommodate the new `Answer Speed` column (14 cols). Progressive hiding order: Rank → Answer Speed → Up% → Tier → Stability. Compact mode still active at ~163+ cols.
15
+ - **Removed the startup Shell Environment popup** — The modal that asked existing users whether to export API keys to shell on every launch has been eliminated. It was intrusive and re-appeared even after being skipped.
16
+ - Shell Environment setup is now **on-demand via Settings (P key)**. The "Shell Env Export" row shows:
17
+ - `🔘 Not configured — Enter to set up` for users who never configured it (previously shown the popup)
18
+ - `✅ Enabled` when active
19
+ - `❌ Disabled` when explicitly turned off
20
+ - New users who add their first API key still get shell env enabled automatically (zero-friction default). Existing users can press `Enter` on the Settings row to enable it at any time.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.3.76",
3
+ "version": "0.3.78",
4
4
  "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",
package/sources.js CHANGED
@@ -355,11 +355,12 @@ export const gemini = [
355
355
  // 📖 Config: set provider to opencode/<model-id> in OpenCode config
356
356
  export const opencodeZen = [
357
357
  ['big-pickle', 'Big Pickle', 'S+', '72.0%', '200k'],
358
- ['minimax-m2.5-free', 'MiniMax M2.5 Free', 'S+', '80.2%', '200k'],
359
358
  ['deepseek-v4-flash-free', 'DeepSeek V4 Flash Free', 'S+', '79.0%', '200k'],
360
- ['qwen3.6-plus-free', 'Qwen3.6 Plus Free', 'S+', '78.8%', '1M'],
359
+ ['mimo-v2.5-free', 'MiMo-V2.5 Free', 'S+', '-', '200k'],
361
360
  ['nemotron-3-super-free', 'Nemotron 3 Super Free', 'A+', '52.0%', '200k'],
362
- ['gpt-5-nano', 'GPT 5 Nano', 'S', '65.0%', '400k'],
361
+ // Removed (2026-05-28): qwen3.6-plus-free (free promotion ended — now requires OpenCode Go)
362
+ // Removed (2026-05-28): minimax-m2.5-free (7-day launch promo expired — now paid)
363
+ // Removed (2026-05-28): gpt-5-nano (was never free — incorrectly cataloged as free, requires Zen PAYG)
363
364
  // Removed (2026-05-26): hy3-preview-free (deleted from Zen)
364
365
  // Removed (2026-05-26): ling-2.6-flash-free (deleted from Zen)
365
366
  // Removed (2026-05-26): trinity-mini-free (deleted from Zen)
package/src/app.js CHANGED
@@ -117,7 +117,7 @@ import { createTuiState, PING_MODE_INTERVALS, PING_MODE_CYCLE, SPEED_MODE_DURATI
117
117
  import { createPingLoop } from './ping-loop.js'
118
118
  import { createTuiFilters } from './tui-filters.js'
119
119
  import { promptApiKey } from '../src/setup.js'
120
- import { syncShellEnv, ensureShellRcSource, promptShellEnvMigration, removeShellEnv } from '../src/shell-env.js'
120
+ import { syncShellEnv, ensureShellRcSource, removeShellEnv } from '../src/shell-env.js'
121
121
  import { stripAnsi, maskApiKey, displayWidth, padEndDisplay, tintOverlayLines, keepOverlayTargetVisible, sliceOverlayLines, calculateViewport, sortResultsWithPinnedFavorites, adjustScrollOffset } from '../src/render-helpers.js'
122
122
  import { renderTable, PROVIDER_COLOR } from '../src/render-table.js'
123
123
  import { setOpenCodeModelData, startOpenCode, startOpenCodeDesktop, startOpenCodeWeb } from '../src/opencode.js'
@@ -230,28 +230,6 @@ export async function runApp(cliArgs, config) {
230
230
  }
231
231
  }
232
232
 
233
- // 📖 Shell env migration popup for existing users who haven't been asked yet
234
- // 📖 Only show when user has keys but shellEnvEnabled is still undefined (never prompted)
235
- // 📖 shellEnvPromptSeen flag ensures it only shows ONCE even after adding new keys
236
- if (hasAnyKey && config.settings.shellEnvEnabled === undefined && config.settings.shellEnvPromptSeen !== true) {
237
- const choice = await promptShellEnvMigration(config)
238
- if (!config.settings) config.settings = {}
239
- config.settings.shellEnvPromptSeen = true
240
- if (choice === 'enable') {
241
- config.settings.shellEnvEnabled = true
242
- saveConfig(config)
243
- syncShellEnv(config)
244
- ensureShellRcSource()
245
- } else if (choice === 'never') {
246
- config.settings.shellEnvEnabled = false
247
- saveConfig(config)
248
- }
249
- if (choice === 'skip') {
250
- config.settings.shellEnvEnabled = false
251
- saveConfig(config)
252
- }
253
- }
254
-
255
233
  // 📖 Default mode: use the last persisted launcher choice when valid,
256
234
  // 📖 otherwise fall back to OpenCode CLI.
257
235
  let mode = getToolModeOrder().includes(config.settings?.preferredToolMode)
@@ -845,6 +823,8 @@ export async function runApp(cliArgs, config) {
845
823
  verdictFilterMode: state.verdictFilterMode,
846
824
  healthFilterMode: state.healthFilterMode,
847
825
  bestModeOnly: state.bestModeOnly,
826
+ benchmarkResults: state.benchmarkResults,
827
+ benchmarkRunning: state.benchmarkRunning,
848
828
  }
849
829
  if (state.commandPaletteOpen) {
850
830
  if (!state.commandPaletteFrozenTable) {
@@ -932,6 +912,8 @@ export async function runApp(cliArgs, config) {
932
912
  verdictFilterMode: state.verdictFilterMode,
933
913
  healthFilterMode: state.healthFilterMode,
934
914
  bestModeOnly: state.bestModeOnly,
915
+ benchmarkResults: state.benchmarkResults,
916
+ benchmarkRunning: state.benchmarkRunning,
935
917
  }))
936
918
  if (process.stdout.isTTY) {
937
919
  process.stdout.flush && process.stdout.flush()
@@ -0,0 +1,241 @@
1
+ /**
2
+ * @file benchmark.js
3
+ * @description Real-answer benchmark for measuring model response speed and throughput.
4
+ *
5
+ * @details
6
+ * This module sends a single small chat completion to a model and measures:
7
+ * - Total wall-clock response time (ms)
8
+ * - Output tokens generated
9
+ * - Tokens per second (TPS)
10
+ *
11
+ * 🎯 Key features:
12
+ * - Provider-specific request building (reuses buildPingRequest from ping.js)
13
+ * - Async benchmark with timeout and abort controller
14
+ * - Prefers `usage.completion_tokens` from the API response
15
+ * - Falls back to character-length estimate when usage is missing
16
+ * - Returns structured success/failure objects for TUI consumption
17
+ *
18
+ * → Functions:
19
+ * - `buildBenchmarkRequest`: Build provider-specific benchmark request
20
+ * - `benchmarkModel`: Run a single benchmark and return timing + token metrics
21
+ * - `formatBenchmarkResult`: Format a benchmark result for the TUI column
22
+ * - `estimateTokensFromText`: Fallback token estimator (clearly labeled)
23
+ *
24
+ * 📦 Dependencies:
25
+ * - ./ping.js: buildPingRequest, resolveCloudflareUrl
26
+ *
27
+ * @see {@link ./ping.js} Provider-specific request building
28
+ * @see {@link ./render-table.js} Answer Speed column rendering
29
+ */
30
+
31
+ import { buildPingRequest, resolveCloudflareUrl } from './ping.js'
32
+
33
+ // 📖 BENCHMARK_PROMPT: A short, unambiguous question that any model can answer.
34
+ // 📖 Constrained to one sentence to keep benchmarks fast and consistent.
35
+ export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one short sentence.'
36
+
37
+ // 📖 BENCHMARK_MAX_TOKENS: Hard cap on generation length to prevent slow models
38
+ // 📖 from producing essays and skewing the TPS calculation.
39
+ export const BENCHMARK_MAX_TOKENS = 32
40
+
41
+ // 📖 BENCHMARK_TEMPERATURE: Zero temperature for deterministic, reproducible results.
42
+ export const BENCHMARK_TEMPERATURE = 0
43
+
44
+ // 📖 BENCHMARK_TIMEOUT_MS: How long to wait before treating a benchmark as failed.
45
+ export const BENCHMARK_TIMEOUT_MS = 20_000
46
+
47
+ // 📖 estimateTokensFromText: Fallback token counter when the API does not return usage.
48
+ // 📖 Uses a simple heuristic: avg English token ≈ 4 chars. This is explicitly an ESTIMATE
49
+ // 📖 and is labeled as such everywhere it surfaces. Do not use for billing.
50
+ export function estimateTokensFromText(text) {
51
+ if (!text || typeof text !== 'string') return 0
52
+ return Math.ceil(text.length / 4)
53
+ }
54
+
55
+ // 📖 formatBenchmarkResult: Turn a raw benchmark result into a compact display string.
56
+ // 📖 Handles all three states: empty, running, success, and error.
57
+ // 📖
58
+ // 📖 Success: "4.3s / 13 TPS"
59
+ // 📖 Running: spinner (caller passes spinner char)
60
+ // 📖 Error: compact error code like "ERR", "TIMEOUT", "401", "429"
61
+ // 📖 Empty: "—"
62
+ export function formatBenchmarkResult(result, { running = false, frame = 0 } = {}) {
63
+ if (running) {
64
+ const spinIdx = frame % 10
65
+ const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
66
+ return spinner
67
+ }
68
+
69
+ if (!result) {
70
+ return '—'
71
+ }
72
+
73
+ if (!result.ok) {
74
+ return result.code || 'ERR'
75
+ }
76
+
77
+ const totalSeconds = result.totalMs / 1000
78
+ const secondsLabel = totalSeconds >= 10
79
+ ? totalSeconds.toFixed(0) + 's'
80
+ : totalSeconds.toFixed(1) + 's'
81
+
82
+ const tps = result.tokensPerSecond ?? 0
83
+ const tpsLabel = Math.round(tps)
84
+
85
+ return `${secondsLabel} / ${tpsLabel} TPS`
86
+ }
87
+
88
+ // 📖 buildBenchmarkRequest: Build provider-specific benchmark request.
89
+ // 📖 Reuses the ping module's request builder but swaps the payload for a real
90
+ // 📖 completion with temperature=0 and max_tokens=32.
91
+ export function buildBenchmarkRequest(apiKey, modelId, providerKey, url) {
92
+ // 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
93
+ const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
94
+
95
+ if (providerKey === 'replicate') {
96
+ const replicateHeaders = { 'Content-Type': 'application/json', Prefer: 'wait=4' }
97
+ if (apiKey) replicateHeaders.Authorization = `Token ${apiKey}`
98
+ return {
99
+ url,
100
+ headers: replicateHeaders,
101
+ body: { version: modelId, input: { prompt: BENCHMARK_PROMPT, max_tokens: BENCHMARK_MAX_TOKENS } },
102
+ }
103
+ }
104
+
105
+ if (providerKey === 'cloudflare') {
106
+ const headers = { 'Content-Type': 'application/json' }
107
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`
108
+ return {
109
+ url: resolveCloudflareUrl(url),
110
+ headers,
111
+ body: {
112
+ model: apiModelId,
113
+ messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
114
+ max_tokens: BENCHMARK_MAX_TOKENS,
115
+ temperature: BENCHMARK_TEMPERATURE,
116
+ },
117
+ }
118
+ }
119
+
120
+ const headers = { 'Content-Type': 'application/json' }
121
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`
122
+ if (providerKey === 'openrouter') {
123
+ headers['HTTP-Referer'] = 'https://github.com/vava-nessa/free-coding-models'
124
+ headers['X-Title'] = 'free-coding-models'
125
+ }
126
+
127
+ return {
128
+ url,
129
+ headers,
130
+ body: {
131
+ model: apiModelId,
132
+ messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
133
+ max_tokens: BENCHMARK_MAX_TOKENS,
134
+ temperature: BENCHMARK_TEMPERATURE,
135
+ },
136
+ }
137
+ }
138
+
139
+ // 📖 benchmarkModel: Send one real completion request and measure response speed.
140
+ // 📖
141
+ // 📖 Returns on success:
142
+ // 📖 {
143
+ // 📖 ok: true,
144
+ // 📖 totalMs: 4300,
145
+ // 📖 outputTokens: 56,
146
+ // 📖 tokensPerSecond: 13,
147
+ // 📖 answerPreview: "The sky is blue because..."
148
+ // 📖 }
149
+ // 📖
150
+ // 📖 Returns on failure:
151
+ // 📖 {
152
+ // 📖 ok: false,
153
+ // 📖 code: "TIMEOUT" | "ERR" | "401" | "429" | "UNSUPPORTED",
154
+ // 📖 totalMs: 15000,
155
+ // 📖 error: "Request timed out"
156
+ // 📖 }
157
+ export async function benchmarkModel({ apiKey, modelId, providerKey, url, timeoutMs = BENCHMARK_TIMEOUT_MS }) {
158
+ // 📖 Guard: unsupported providers that don't do chat completions
159
+ if (providerKey === 'rovo' || providerKey === 'gemini' || providerKey === 'opencode-zen') {
160
+ return {
161
+ ok: false,
162
+ code: 'UNSUPPORTED',
163
+ totalMs: 0,
164
+ error: 'Provider does not support chat completions',
165
+ }
166
+ }
167
+
168
+ const ctrl = new AbortController()
169
+ const timer = setTimeout(() => ctrl.abort(), timeoutMs)
170
+ const t0 = performance.now()
171
+
172
+ try {
173
+ const req = buildBenchmarkRequest(apiKey, modelId, providerKey, url)
174
+ const resp = await fetch(req.url, {
175
+ method: 'POST',
176
+ signal: ctrl.signal,
177
+ headers: req.headers,
178
+ body: JSON.stringify(req.body),
179
+ })
180
+
181
+ const totalMs = Math.round(performance.now() - t0)
182
+
183
+ // 📖 Parse response body regardless of HTTP status so we can extract partial data
184
+ let bodyText = ''
185
+ try {
186
+ bodyText = await resp.text()
187
+ } catch {}
188
+
189
+ let data = null
190
+ try {
191
+ data = JSON.parse(bodyText)
192
+ } catch {}
193
+
194
+ // 📖 Non-2xx: return compact error code
195
+ if (!resp.ok) {
196
+ const code = String(resp.status)
197
+ return {
198
+ ok: false,
199
+ code,
200
+ totalMs,
201
+ error: data?.error?.message || `HTTP ${resp.status}`,
202
+ }
203
+ }
204
+
205
+ // 📖 Extract generated text from OpenAI-compatible response
206
+ const content = data?.choices?.[0]?.message?.content || data?.choices?.[0]?.text || ''
207
+ const answerPreview = typeof content === 'string' ? content.slice(0, 60) : ''
208
+
209
+ // 📖 Prefer usage.completion_tokens when available
210
+ let outputTokens = 0
211
+ if (data?.usage?.completion_tokens != null) {
212
+ outputTokens = Number(data.usage.completion_tokens) || 0
213
+ } else {
214
+ // 📖 FALLBACK: estimate from character count when API omits usage
215
+ outputTokens = estimateTokensFromText(content)
216
+ }
217
+
218
+ // 📖 Guard division by zero
219
+ const seconds = totalMs / 1000
220
+ const tokensPerSecond = seconds > 0 ? outputTokens / seconds : 0
221
+
222
+ return {
223
+ ok: true,
224
+ totalMs,
225
+ outputTokens,
226
+ tokensPerSecond,
227
+ answerPreview,
228
+ }
229
+ } catch (err) {
230
+ const totalMs = Math.round(performance.now() - t0)
231
+ const isTimeout = err.name === 'AbortError'
232
+ return {
233
+ ok: false,
234
+ code: isTimeout ? 'TIMEOUT' : 'ERR',
235
+ totalMs,
236
+ error: isTimeout ? 'Request timed out' : (err.message || 'Network error'),
237
+ }
238
+ } finally {
239
+ clearTimeout(timer)
240
+ }
241
+ }
@@ -54,6 +54,7 @@ import {
54
54
  restartRouterDashboardDaemon,
55
55
  toggleRouterDashboardProbePause,
56
56
  } from './router-dashboard.js'
57
+ import { benchmarkModel } from './benchmark.js'
57
58
 
58
59
  // 📖 Some providers need an explicit probe model because the first catalog entry
59
60
  // 📖 is not guaranteed to be accepted by their chat endpoint.
@@ -783,6 +784,18 @@ export function createKeyHandler(ctx) {
783
784
  function toggleShellEnv() {
784
785
  if (!state.config.settings) state.config.settings = {}
785
786
  const currentlyEnabled = state.config.settings.shellEnvEnabled === true
787
+ const isUndefined = state.config.settings.shellEnvEnabled === undefined
788
+
789
+ if (isUndefined) {
790
+ // 📖 First-time setup: enable + sync immediately (previously done by startup popup)
791
+ state.config.settings.shellEnvEnabled = true
792
+ saveConfig(state.config)
793
+ syncShellEnv(state.config)
794
+ ensureShellRcSource()
795
+ trackAppAction('shell_env_export_toggled', { enabled: true })
796
+ return
797
+ }
798
+
786
799
  state.config.settings.shellEnvEnabled = !currentlyEnabled
787
800
  saveConfig(state.config)
788
801
  if (!currentlyEnabled) {
@@ -1026,6 +1039,42 @@ export function createKeyHandler(ctx) {
1026
1039
  saveConfig(state.config)
1027
1040
  }
1028
1041
 
1042
+ // 📖 runBenchmarkOnSelected: Fire a real-answer benchmark on the currently selected row.
1043
+ // 📖 Triggered by Ctrl+A. Async — does not block the UI. Results are stored in state
1044
+ // 📖 keyed by `${providerKey}/${modelId}` so they survive re-renders.
1045
+ async function runBenchmarkOnSelected() {
1046
+ const selected = state.visibleSorted[state.cursor]
1047
+ if (!selected) return
1048
+
1049
+ const benchmarkKey = `${selected.providerKey}/${selected.modelId}`
1050
+ if (state.benchmarkRunning.has(benchmarkKey)) return
1051
+
1052
+ const apiKey = getApiKey(state.config, selected.providerKey) ?? null
1053
+ const providerUrl = sources[selected.providerKey]?.url ?? null
1054
+ if (!providerUrl) return
1055
+
1056
+ state.benchmarkRunning.add(benchmarkKey)
1057
+
1058
+ try {
1059
+ const result = await benchmarkModel({
1060
+ apiKey,
1061
+ modelId: selected.modelId,
1062
+ providerKey: selected.providerKey,
1063
+ url: providerUrl,
1064
+ })
1065
+ state.benchmarkResults[benchmarkKey] = result
1066
+ } catch (err) {
1067
+ state.benchmarkResults[benchmarkKey] = {
1068
+ ok: false,
1069
+ code: 'ERR',
1070
+ totalMs: 0,
1071
+ error: err?.message || 'Benchmark failed',
1072
+ }
1073
+ } finally {
1074
+ state.benchmarkRunning.delete(benchmarkKey)
1075
+ }
1076
+ }
1077
+
1029
1078
  // 📖 Favorites display mode:
1030
1079
  // 📖 - true => favorites stay pinned + always visible (legacy behavior)
1031
1080
  // 📖 - false => favorites are just starred rows and obey normal sort/filter rules
@@ -2811,6 +2860,13 @@ export function createKeyHandler(ctx) {
2811
2860
  return
2812
2861
  }
2813
2862
 
2863
+ // 📖 Ctrl+A: benchmark the currently selected model with a real completion.
2864
+ // 📖 Measures wall-clock response time and tokens per second (TPS).
2865
+ if (key.ctrl && key.name === 'a') {
2866
+ void runBenchmarkOnSelected()
2867
+ return
2868
+ }
2869
+
2814
2870
  if (key.shift && key.name === 'up') {
2815
2871
  const selected = state.visibleSorted?.[state.cursor]
2816
2872
  if (selected?.isFavorite) {
package/src/overlays.js CHANGED
@@ -274,10 +274,12 @@ export function createOverlayRenderers(state, deps) {
274
274
  lines.push(state.settingsCursor === changelogViewRowIdx ? themeColors.bgCursorSettingsList(changelogViewRow) : changelogViewRow)
275
275
 
276
276
  // 📖 Shell env toggle — expose API keys as shell environment variables
277
- const shellEnvEnabled = state.config.settings?.shellEnvEnabled === true
278
- const shellEnvStatus = shellEnvEnabled
277
+ const shellEnvSetting = state.config.settings?.shellEnvEnabled
278
+ const shellEnvStatus = shellEnvSetting === true
279
279
  ? themeColors.successBold('✅ Enabled — keys available in shell')
280
- : themeColors.dim('❌ Disabled')
280
+ : shellEnvSetting === false
281
+ ? themeColors.dim('❌ Disabled')
282
+ : themeColors.warning('🔘 Not configured — Enter to set up')
281
283
  const shellEnvRow = `${bullet(state.settingsCursor === shellEnvRowIdx)}${themeColors.textBold('Shell Env Export').padEnd(44)} ${shellEnvStatus}`
282
284
  cursorLineByRow[shellEnvRowIdx] = lines.length
283
285
  lines.push(state.settingsCursor === shellEnvRowIdx ? themeColors.bgCursorSettingsList(shellEnvRow) : shellEnvRow)
@@ -928,6 +930,7 @@ export function createOverlayRenderers(state, deps) {
928
930
  lines.push(` ${heading('Controls')}`)
929
931
  lines.push(` ${key('W')} Toggle ping mode ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
930
932
  lines.push(` ${key('Ctrl+P')} Open ⚡️ command palette ${hint('(search and run actions quickly)')}`)
933
+ lines.push(` ${key('Ctrl+A')} Benchmark answer speed ${hint('(real completion on selected model → time + TPS)')}`)
931
934
  lines.push(` ${key('E')} Cycle filter mode ${hint('(Normal → Configured only → Usable only)')}`)
932
935
  lines.push(` ${key('Z')} Cycle tool mode ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
933
936
  lines.push(` ${key('F')} Toggle favorite on selected row ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)
@@ -49,6 +49,7 @@ import { themeColors, getProviderRgb, getTierRgb, getReadableTextRgb, getTheme }
49
49
  import { TIER_COLOR } from './tier-colors.js'
50
50
  import { getAvg, getVerdict, getUptime, getStabilityScore, getVersionStatusInfo } from './utils.js'
51
51
  import { usagePlaceholderForProvider } from './ping.js'
52
+ import { formatBenchmarkResult } from './benchmark.js'
52
53
  import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth } from './render-helpers.js'
53
54
  import { getToolMeta, TOOL_METADATA, TOOL_MODE_ORDER, isModelCompatibleWithTool } from './tool-metadata.js'
54
55
  import { getColumnSpacing } from './ui-config.js'
@@ -181,6 +182,8 @@ export function renderTable({
181
182
  routerFooterTodayTokens = 0,
182
183
  routerFooterAllTimeTokens = 0,
183
184
  routerFooterRequests = 0,
185
+ benchmarkResults = {},
186
+ benchmarkRunning = new Set(),
184
187
  } = {}) {
185
188
  // 📖 Filter out hidden models for display
186
189
  const visibleResults = results.filter(r => !r.hidden)
@@ -274,6 +277,7 @@ export function renderTable({
274
277
  const W_STATUS = 18
275
278
  const W_VERDICT = 14
276
279
  const W_UPTIME = 6
280
+ const W_ANSWER = 14
277
281
 
278
282
  // const W_TOKENS = 7 // Used column removed
279
283
  // const W_USAGE = 7 // Usage column removed
@@ -281,16 +285,17 @@ export function renderTable({
281
285
 
282
286
  // 📖 Responsive column visibility: progressively hide least-useful columns
283
287
  // 📖 and shorten header labels when terminal width is insufficient.
284
- // 📖 Hiding order (least useful first): Rank → Up% → Tier → Stability
288
+ // 📖 Hiding order (least useful first): Rank → Answer Speed → Up% → Tier → Stability
285
289
  // 📖 Compact mode shrinks: Latest Ping→Lat. P (9), Avg Ping→Avg. P (8),
286
290
  // 📖 Stability→StaB. (8), Provider→4chars+… (7), Health→6chars+… (13)
287
- // 📖 Breakpoints: full=169 | compact=146 | -Rank=137 | -Up%=128 | -Tier=120 | -Stab=109
291
+ // 📖 Breakpoints: full=183 | compact=160 | -Rank=151 | -Answer=142 | -Up%=133 | -Tier=125 | -Stab=114
288
292
  let wPing = 14
289
293
  let wAvg = 11
290
294
  let wStab = 11
291
295
  let wSource = W_SOURCE
292
296
  let wStatus = W_STATUS
293
297
  let showRank = true
298
+ let showAnswerSpeed = true
294
299
  let showUptime = true
295
300
  let showTier = true
296
301
  let showStability = true
@@ -305,6 +310,7 @@ export function renderTable({
305
310
  cols.push(W_SWE, W_CTX, W_MODEL, wSource, wPing, wAvg, wStatus, W_VERDICT)
306
311
  if (showStability) cols.push(wStab)
307
312
  if (showUptime) cols.push(W_UPTIME)
313
+ if (showAnswerSpeed) cols.push(W_ANSWER)
308
314
  return ROW_MARGIN + cols.reduce((a, b) => a + b, 0) + (cols.length - 1) * SEP_W
309
315
  }
310
316
 
@@ -317,8 +323,9 @@ export function renderTable({
317
323
  wSource = 7 // Provider truncated to 4 chars + '…', 7 cols total
318
324
  wStatus = 13 // Health truncated after 6 chars + '…'
319
325
  }
320
- // 📖 Steps 2–5: Progressive column hiding (least useful first)
326
+ // 📖 Steps 2–6: Progressive column hiding (least useful first)
321
327
  if (calcWidth() > terminalCols) showRank = false
328
+ if (calcWidth() > terminalCols) showAnswerSpeed = false
322
329
  if (calcWidth() > terminalCols) showUptime = false
323
330
  if (calcWidth() > terminalCols) showTier = false
324
331
  if (calcWidth() > terminalCols) showStability = false
@@ -341,6 +348,7 @@ export function renderTable({
341
348
  colDefs.push({ name: 'verdict', width: W_VERDICT })
342
349
  if (showStability) colDefs.push({ name: 'stability', width: wStab })
343
350
  if (showUptime) colDefs.push({ name: 'uptime', width: W_UPTIME })
351
+ if (showAnswerSpeed) colDefs.push({ name: 'answerSpeed', width: W_ANSWER })
344
352
  let x = ROW_MARGIN + 1 // 📖 1-based: first column starts after the 2-char left margin
345
353
  const columns = []
346
354
  for (let i = 0; i < colDefs.length; i++) {
@@ -467,6 +475,14 @@ export function renderTable({
467
475
  return themeColors.hotkey('U') + themeColors.dim('p%' + padding)
468
476
  })()
469
477
 
478
+ // 📖 Answer Speed header — no sort hotkey, just the label
479
+ const answerLabel = isCompact ? 'Answ.' : 'Answer Speed'
480
+ const answerH_c = (() => {
481
+ const plain = answerLabel
482
+ const padding = ' '.repeat(Math.max(0, W_ANSWER - plain.length))
483
+ return themeColors.dim('Ans') + themeColors.hotkey('w') + themeColors.dim('er' + (isCompact ? '.' : ' Speed') + padding)
484
+ })()
485
+
470
486
  // 📖 Usage column removed from UI – no header or separator for it.
471
487
  // 📖 Header row: conditionally include columns based on responsive visibility
472
488
  const headerParts = []
@@ -475,6 +491,7 @@ export function renderTable({
475
491
  headerParts.push(sweH_c, ctxH_c, modelH_c, originH_c, pingH_c, avgH_c, healthH_c, verdictH_c)
476
492
  if (showStability) headerParts.push(stabH_c)
477
493
  if (showUptime) headerParts.push(uptimeH_c)
494
+ if (showAnswerSpeed) headerParts.push(answerH_c)
478
495
  lines.push(' ' + headerParts.join(COL_SEP))
479
496
 
480
497
  // 📖 Mouse support: the column header row is the last line we just pushed.
@@ -776,6 +793,25 @@ export function renderTable({
776
793
  // (We keep the logic but do not render it.)
777
794
  const usageCell = ''
778
795
 
796
+ // 📖 Answer Speed column — show benchmark result, running spinner, or dash
797
+ const benchmarkKey = `${r.providerKey}/${r.modelId}`
798
+ const benchmarkResult = benchmarkResults[benchmarkKey]
799
+ const isBenchmarkRunning = benchmarkRunning.has(benchmarkKey)
800
+ let answerSpeedCell
801
+ if (isBenchmarkRunning) {
802
+ const spinner = FRAMES[frame % FRAMES.length]
803
+ answerSpeedCell = themeColors.success(spinner.padEnd(W_ANSWER))
804
+ } else if (benchmarkResult) {
805
+ const text = formatBenchmarkResult(benchmarkResult)
806
+ // 📖 Colorize: success = green, error = red/dim
807
+ const isError = !benchmarkResult.ok
808
+ answerSpeedCell = isError
809
+ ? themeColors.metricBad(text.padEnd(W_ANSWER))
810
+ : themeColors.metricGood(text.padEnd(W_ANSWER))
811
+ } else {
812
+ answerSpeedCell = themeColors.dim('—'.padEnd(W_ANSWER))
813
+ }
814
+
779
815
  // 📖 Build row: conditionally include columns based on responsive visibility
780
816
  const rowParts = []
781
817
  if (showRank) rowParts.push(num)
@@ -783,6 +819,7 @@ export function renderTable({
783
819
  rowParts.push(sweCell, ctxCell, nameCell, sourceCell, pingCell, avgCell, status, speedCell)
784
820
  if (showStability) rowParts.push(stabCell)
785
821
  if (showUptime) rowParts.push(uptimeCell)
822
+ if (showAnswerSpeed) rowParts.push(answerSpeedCell)
786
823
  const row = ' ' + rowParts.join(COL_SEP)
787
824
 
788
825
  if (isCursor) {
package/src/tui-state.js CHANGED
@@ -261,5 +261,12 @@ export function createTuiState({
261
261
 
262
262
  // 📖 Token usage overlay scroll state (used when overlay opens from footer)
263
263
  tokenUsageOpen: false,
264
+
265
+ // 📖 Benchmark results: keyed by `${providerKey}/${modelId}`
266
+ // 📖 Each entry is the raw result object from benchmarkModel() or null.
267
+ benchmarkResults: {},
268
+
269
+ // 📖 Set of benchmark keys currently running (for spinner display)
270
+ benchmarkRunning: new Set(),
264
271
  }
265
272
  }