free-coding-models 0.3.76 → 0.3.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -8
- package/changelog/v0.3.77.md +12 -0
- package/changelog/v0.3.78.md +20 -0
- package/package.json +1 -1
- package/sources.js +4 -3
- package/src/app.js +5 -23
- package/src/benchmark.js +241 -0
- package/src/key-handler.js +56 -0
- package/src/overlays.js +6 -3
- package/src/render-table.js +40 -3
- package/src/tui-state.js +7 -0
- package/web/dist/assets/{index-8pEx_hYi.js → index-BjX9NY0U.js} +1 -1
- package/web/dist/index.html +1 -1
package/README.md
CHANGED
|
@@ -379,18 +379,14 @@ Press **`Z`** in the TUI to cycle between tools without restarting.
|
|
|
379
379
|
|
|
380
380
|
### OpenCode Zen Free Models
|
|
381
381
|
|
|
382
|
-
[OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering
|
|
382
|
+
[OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering **4 free coding models** exclusively through OpenCode CLI and OpenCode Desktop. These models are **not** available through other tools.
|
|
383
383
|
|
|
384
384
|
| Model | Tier | SWE-bench | Context |
|
|
385
385
|
|-------|------|-----------|---------|
|
|
386
386
|
| Big Pickle | S+ | 72.0% | 200k |
|
|
387
|
-
|
|
|
388
|
-
|
|
|
389
|
-
|
|
|
390
|
-
| HY3 Preview Free | A+ | - | 128k |
|
|
391
|
-
| Ling 2.6 Flash Free | S | - | 128k |
|
|
392
|
-
| Trinity Large Preview Free | S | - | 128k |
|
|
393
|
-
| Trinity Mini Preview Free | A | - | 128k |
|
|
387
|
+
| DeepSeek V4 Flash Free | S+ | 79.0% | 200k |
|
|
388
|
+
| MiMo-V2.5 Free | S+ | - | 200k |
|
|
389
|
+
| Nemotron 3 Super Free | A+ | 52.0% | 200k |
|
|
394
390
|
|
|
395
391
|
To use Zen models: sign up at [opencode.ai/auth](https://opencode.ai/auth) and enter your Zen API key via `P` (Settings). Zen models appear in the main table and auto-switch to OpenCode CLI on launch.
|
|
396
392
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Changelog v0.3.77 - 2026-05-28
|
|
2
|
+
|
|
3
|
+
### Fixed
|
|
4
|
+
- **OpenCode Zen model catalog sync** — removed 3 models that are no longer free and added 1 new free model:
|
|
5
|
+
- ❌ Removed `qwen3.6-plus-free` — free promotion ended, now requires OpenCode Go subscription or PAYG
|
|
6
|
+
- ❌ Removed `minimax-m2.5-free` — 7-day launch promo expired, now paid
|
|
7
|
+
- ❌ Removed `gpt-5-nano` — was incorrectly cataloged as free, actually requires Zen PAYG
|
|
8
|
+
- ✅ Added `mimo-v2.5-free` — newly confirmed free on Zen
|
|
9
|
+
- Result: Zen free catalog updated from 6 → 4 confirmed free models
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
- Updated README Zen models table to reflect the current 4-model free tier
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Changelog v0.3.78 - 2026-05-28
|
|
2
|
+
|
|
3
|
+
### Added
|
|
4
|
+
- **Real-Answer Benchmark feature** — Press `Ctrl+A` to run a live completion benchmark on the currently selected model. Measures actual wall-clock response time and tokens-per-second (TPS) with a real chat completion request (`"Why is the sky blue? Answer in exactly one short sentence."`). Results appear in the new **Answer Speed** column as `4.3s / 13 TPS`.
|
|
5
|
+
- New `Answer Speed` column in the TUI table, positioned after the Uptime column. Defaults to `—`, shows a green spinner while benchmarking, and displays compact error codes (`ERR`, `TIMEOUT`, `401`, `429`) on failure.
|
|
6
|
+
- New module `src/benchmark.js` with lightweight, native Node.js benchmark logic:
|
|
7
|
+
- `benchmarkModel({ apiKey, modelId, providerKey, url })` — sends one completion, measures time, parses `usage.completion_tokens` with a `Math.ceil(outputText.length / 4)` fallback clearly labeled as an estimate.
|
|
8
|
+
- Guards against division-by-zero and unsupported providers (rovo, gemini, opencode-zen return `UNSUPPORTED`).
|
|
9
|
+
- Respects existing API key handling and fails gracefully on missing credentials, rate limits, or timeouts.
|
|
10
|
+
- Benchmark state stored in TUI state keyed by `${providerKey}/${modelId}`, so results survive re-renders and table refreshes.
|
|
11
|
+
- `Ctrl+A` documented in the Help overlay (`I` key).
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- Responsive column breakpoints adjusted to accommodate the new `Answer Speed` column (14 cols). Progressive hiding order: Rank → Answer Speed → Up% → Tier → Stability. Compact mode still active at ~163+ cols.
|
|
15
|
+
- **Removed the startup Shell Environment popup** — The modal that asked existing users whether to export API keys to shell on every launch has been eliminated. It was intrusive and re-appeared even after being skipped.
|
|
16
|
+
- Shell Environment setup is now **on-demand via Settings (P key)**. The "Shell Env Export" row shows:
|
|
17
|
+
- `🔘 Not configured — Enter to set up` for users who never configured it (previously shown the popup)
|
|
18
|
+
- `✅ Enabled` when active
|
|
19
|
+
- `❌ Disabled` when explicitly turned off
|
|
20
|
+
- New users who add their first API key still get shell env enabled automatically (zero-friction default). Existing users can press `Enter` on the Settings row to enable it at any time.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "free-coding-models",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.78",
|
|
4
4
|
"description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"nvidia",
|
package/sources.js
CHANGED
|
@@ -355,11 +355,12 @@ export const gemini = [
|
|
|
355
355
|
// 📖 Config: set provider to opencode/<model-id> in OpenCode config
|
|
356
356
|
export const opencodeZen = [
|
|
357
357
|
['big-pickle', 'Big Pickle', 'S+', '72.0%', '200k'],
|
|
358
|
-
['minimax-m2.5-free', 'MiniMax M2.5 Free', 'S+', '80.2%', '200k'],
|
|
359
358
|
['deepseek-v4-flash-free', 'DeepSeek V4 Flash Free', 'S+', '79.0%', '200k'],
|
|
360
|
-
['
|
|
359
|
+
['mimo-v2.5-free', 'MiMo-V2.5 Free', 'S+', '-', '200k'],
|
|
361
360
|
['nemotron-3-super-free', 'Nemotron 3 Super Free', 'A+', '52.0%', '200k'],
|
|
362
|
-
|
|
361
|
+
// Removed (2026-05-28): qwen3.6-plus-free (free promotion ended — now requires OpenCode Go)
|
|
362
|
+
// Removed (2026-05-28): minimax-m2.5-free (7-day launch promo expired — now paid)
|
|
363
|
+
// Removed (2026-05-28): gpt-5-nano (was never free — incorrectly cataloged as free, requires Zen PAYG)
|
|
363
364
|
// Removed (2026-05-26): hy3-preview-free (deleted from Zen)
|
|
364
365
|
// Removed (2026-05-26): ling-2.6-flash-free (deleted from Zen)
|
|
365
366
|
// Removed (2026-05-26): trinity-mini-free (deleted from Zen)
|
package/src/app.js
CHANGED
|
@@ -117,7 +117,7 @@ import { createTuiState, PING_MODE_INTERVALS, PING_MODE_CYCLE, SPEED_MODE_DURATI
|
|
|
117
117
|
import { createPingLoop } from './ping-loop.js'
|
|
118
118
|
import { createTuiFilters } from './tui-filters.js'
|
|
119
119
|
import { promptApiKey } from '../src/setup.js'
|
|
120
|
-
import { syncShellEnv, ensureShellRcSource,
|
|
120
|
+
import { syncShellEnv, ensureShellRcSource, removeShellEnv } from '../src/shell-env.js'
|
|
121
121
|
import { stripAnsi, maskApiKey, displayWidth, padEndDisplay, tintOverlayLines, keepOverlayTargetVisible, sliceOverlayLines, calculateViewport, sortResultsWithPinnedFavorites, adjustScrollOffset } from '../src/render-helpers.js'
|
|
122
122
|
import { renderTable, PROVIDER_COLOR } from '../src/render-table.js'
|
|
123
123
|
import { setOpenCodeModelData, startOpenCode, startOpenCodeDesktop, startOpenCodeWeb } from '../src/opencode.js'
|
|
@@ -230,28 +230,6 @@ export async function runApp(cliArgs, config) {
|
|
|
230
230
|
}
|
|
231
231
|
}
|
|
232
232
|
|
|
233
|
-
// 📖 Shell env migration popup for existing users who haven't been asked yet
|
|
234
|
-
// 📖 Only show when user has keys but shellEnvEnabled is still undefined (never prompted)
|
|
235
|
-
// 📖 shellEnvPromptSeen flag ensures it only shows ONCE even after adding new keys
|
|
236
|
-
if (hasAnyKey && config.settings.shellEnvEnabled === undefined && config.settings.shellEnvPromptSeen !== true) {
|
|
237
|
-
const choice = await promptShellEnvMigration(config)
|
|
238
|
-
if (!config.settings) config.settings = {}
|
|
239
|
-
config.settings.shellEnvPromptSeen = true
|
|
240
|
-
if (choice === 'enable') {
|
|
241
|
-
config.settings.shellEnvEnabled = true
|
|
242
|
-
saveConfig(config)
|
|
243
|
-
syncShellEnv(config)
|
|
244
|
-
ensureShellRcSource()
|
|
245
|
-
} else if (choice === 'never') {
|
|
246
|
-
config.settings.shellEnvEnabled = false
|
|
247
|
-
saveConfig(config)
|
|
248
|
-
}
|
|
249
|
-
if (choice === 'skip') {
|
|
250
|
-
config.settings.shellEnvEnabled = false
|
|
251
|
-
saveConfig(config)
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
233
|
// 📖 Default mode: use the last persisted launcher choice when valid,
|
|
256
234
|
// 📖 otherwise fall back to OpenCode CLI.
|
|
257
235
|
let mode = getToolModeOrder().includes(config.settings?.preferredToolMode)
|
|
@@ -845,6 +823,8 @@ export async function runApp(cliArgs, config) {
|
|
|
845
823
|
verdictFilterMode: state.verdictFilterMode,
|
|
846
824
|
healthFilterMode: state.healthFilterMode,
|
|
847
825
|
bestModeOnly: state.bestModeOnly,
|
|
826
|
+
benchmarkResults: state.benchmarkResults,
|
|
827
|
+
benchmarkRunning: state.benchmarkRunning,
|
|
848
828
|
}
|
|
849
829
|
if (state.commandPaletteOpen) {
|
|
850
830
|
if (!state.commandPaletteFrozenTable) {
|
|
@@ -932,6 +912,8 @@ export async function runApp(cliArgs, config) {
|
|
|
932
912
|
verdictFilterMode: state.verdictFilterMode,
|
|
933
913
|
healthFilterMode: state.healthFilterMode,
|
|
934
914
|
bestModeOnly: state.bestModeOnly,
|
|
915
|
+
benchmarkResults: state.benchmarkResults,
|
|
916
|
+
benchmarkRunning: state.benchmarkRunning,
|
|
935
917
|
}))
|
|
936
918
|
if (process.stdout.isTTY) {
|
|
937
919
|
process.stdout.flush && process.stdout.flush()
|
package/src/benchmark.js
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file benchmark.js
|
|
3
|
+
* @description Real-answer benchmark for measuring model response speed and throughput.
|
|
4
|
+
*
|
|
5
|
+
* @details
|
|
6
|
+
* This module sends a single small chat completion to a model and measures:
|
|
7
|
+
* - Total wall-clock response time (ms)
|
|
8
|
+
* - Output tokens generated
|
|
9
|
+
* - Tokens per second (TPS)
|
|
10
|
+
*
|
|
11
|
+
* 🎯 Key features:
|
|
12
|
+
* - Provider-specific request building (reuses buildPingRequest from ping.js)
|
|
13
|
+
* - Async benchmark with timeout and abort controller
|
|
14
|
+
* - Prefers `usage.completion_tokens` from the API response
|
|
15
|
+
* - Falls back to character-length estimate when usage is missing
|
|
16
|
+
* - Returns structured success/failure objects for TUI consumption
|
|
17
|
+
*
|
|
18
|
+
* → Functions:
|
|
19
|
+
* - `buildBenchmarkRequest`: Build provider-specific benchmark request
|
|
20
|
+
* - `benchmarkModel`: Run a single benchmark and return timing + token metrics
|
|
21
|
+
* - `formatBenchmarkResult`: Format a benchmark result for the TUI column
|
|
22
|
+
* - `estimateTokensFromText`: Fallback token estimator (clearly labeled)
|
|
23
|
+
*
|
|
24
|
+
* 📦 Dependencies:
|
|
25
|
+
* - ./ping.js: buildPingRequest, resolveCloudflareUrl
|
|
26
|
+
*
|
|
27
|
+
* @see {@link ./ping.js} Provider-specific request building
|
|
28
|
+
* @see {@link ./render-table.js} Answer Speed column rendering
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { buildPingRequest, resolveCloudflareUrl } from './ping.js'
|
|
32
|
+
|
|
33
|
+
// 📖 BENCHMARK_PROMPT: A short, unambiguous question that any model can answer.
|
|
34
|
+
// 📖 Constrained to one sentence to keep benchmarks fast and consistent.
|
|
35
|
+
export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one short sentence.'
|
|
36
|
+
|
|
37
|
+
// 📖 BENCHMARK_MAX_TOKENS: Hard cap on generation length to prevent slow models
|
|
38
|
+
// 📖 from producing essays and skewing the TPS calculation.
|
|
39
|
+
export const BENCHMARK_MAX_TOKENS = 32
|
|
40
|
+
|
|
41
|
+
// 📖 BENCHMARK_TEMPERATURE: Zero temperature for deterministic, reproducible results.
|
|
42
|
+
export const BENCHMARK_TEMPERATURE = 0
|
|
43
|
+
|
|
44
|
+
// 📖 BENCHMARK_TIMEOUT_MS: How long to wait before treating a benchmark as failed.
|
|
45
|
+
export const BENCHMARK_TIMEOUT_MS = 20_000
|
|
46
|
+
|
|
47
|
+
// 📖 estimateTokensFromText: Fallback token counter when the API does not return usage.
|
|
48
|
+
// 📖 Uses a simple heuristic: avg English token ≈ 4 chars. This is explicitly an ESTIMATE
|
|
49
|
+
// 📖 and is labeled as such everywhere it surfaces. Do not use for billing.
|
|
50
|
+
export function estimateTokensFromText(text) {
|
|
51
|
+
if (!text || typeof text !== 'string') return 0
|
|
52
|
+
return Math.ceil(text.length / 4)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// 📖 formatBenchmarkResult: Turn a raw benchmark result into a compact display string.
|
|
56
|
+
// 📖 Handles all three states: empty, running, success, and error.
|
|
57
|
+
// 📖
|
|
58
|
+
// 📖 Success: "4.3s / 13 TPS"
|
|
59
|
+
// 📖 Running: spinner (caller passes spinner char)
|
|
60
|
+
// 📖 Error: compact error code like "ERR", "TIMEOUT", "401", "429"
|
|
61
|
+
// 📖 Empty: "—"
|
|
62
|
+
export function formatBenchmarkResult(result, { running = false, frame = 0 } = {}) {
|
|
63
|
+
if (running) {
|
|
64
|
+
const spinIdx = frame % 10
|
|
65
|
+
const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
|
|
66
|
+
return spinner
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (!result) {
|
|
70
|
+
return '—'
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (!result.ok) {
|
|
74
|
+
return result.code || 'ERR'
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const totalSeconds = result.totalMs / 1000
|
|
78
|
+
const secondsLabel = totalSeconds >= 10
|
|
79
|
+
? totalSeconds.toFixed(0) + 's'
|
|
80
|
+
: totalSeconds.toFixed(1) + 's'
|
|
81
|
+
|
|
82
|
+
const tps = result.tokensPerSecond ?? 0
|
|
83
|
+
const tpsLabel = Math.round(tps)
|
|
84
|
+
|
|
85
|
+
return `${secondsLabel} / ${tpsLabel} TPS`
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// 📖 buildBenchmarkRequest: Build provider-specific benchmark request.
|
|
89
|
+
// 📖 Reuses the ping module's request builder but swaps the payload for a real
|
|
90
|
+
// 📖 completion with temperature=0 and max_tokens=32.
|
|
91
|
+
export function buildBenchmarkRequest(apiKey, modelId, providerKey, url) {
|
|
92
|
+
// 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
|
|
93
|
+
const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
|
|
94
|
+
|
|
95
|
+
if (providerKey === 'replicate') {
|
|
96
|
+
const replicateHeaders = { 'Content-Type': 'application/json', Prefer: 'wait=4' }
|
|
97
|
+
if (apiKey) replicateHeaders.Authorization = `Token ${apiKey}`
|
|
98
|
+
return {
|
|
99
|
+
url,
|
|
100
|
+
headers: replicateHeaders,
|
|
101
|
+
body: { version: modelId, input: { prompt: BENCHMARK_PROMPT, max_tokens: BENCHMARK_MAX_TOKENS } },
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (providerKey === 'cloudflare') {
|
|
106
|
+
const headers = { 'Content-Type': 'application/json' }
|
|
107
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`
|
|
108
|
+
return {
|
|
109
|
+
url: resolveCloudflareUrl(url),
|
|
110
|
+
headers,
|
|
111
|
+
body: {
|
|
112
|
+
model: apiModelId,
|
|
113
|
+
messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
|
|
114
|
+
max_tokens: BENCHMARK_MAX_TOKENS,
|
|
115
|
+
temperature: BENCHMARK_TEMPERATURE,
|
|
116
|
+
},
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const headers = { 'Content-Type': 'application/json' }
|
|
121
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`
|
|
122
|
+
if (providerKey === 'openrouter') {
|
|
123
|
+
headers['HTTP-Referer'] = 'https://github.com/vava-nessa/free-coding-models'
|
|
124
|
+
headers['X-Title'] = 'free-coding-models'
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
url,
|
|
129
|
+
headers,
|
|
130
|
+
body: {
|
|
131
|
+
model: apiModelId,
|
|
132
|
+
messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
|
|
133
|
+
max_tokens: BENCHMARK_MAX_TOKENS,
|
|
134
|
+
temperature: BENCHMARK_TEMPERATURE,
|
|
135
|
+
},
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// 📖 benchmarkModel: Send one real completion request and measure response speed.
|
|
140
|
+
// 📖
|
|
141
|
+
// 📖 Returns on success:
|
|
142
|
+
// 📖 {
|
|
143
|
+
// 📖 ok: true,
|
|
144
|
+
// 📖 totalMs: 4300,
|
|
145
|
+
// 📖 outputTokens: 56,
|
|
146
|
+
// 📖 tokensPerSecond: 13,
|
|
147
|
+
// 📖 answerPreview: "The sky is blue because..."
|
|
148
|
+
// 📖 }
|
|
149
|
+
// 📖
|
|
150
|
+
// 📖 Returns on failure:
|
|
151
|
+
// 📖 {
|
|
152
|
+
// 📖 ok: false,
|
|
153
|
+
// 📖 code: "TIMEOUT" | "ERR" | "401" | "429" | "UNSUPPORTED",
|
|
154
|
+
// 📖 totalMs: 15000,
|
|
155
|
+
// 📖 error: "Request timed out"
|
|
156
|
+
// 📖 }
|
|
157
|
+
export async function benchmarkModel({ apiKey, modelId, providerKey, url, timeoutMs = BENCHMARK_TIMEOUT_MS }) {
|
|
158
|
+
// 📖 Guard: unsupported providers that don't do chat completions
|
|
159
|
+
if (providerKey === 'rovo' || providerKey === 'gemini' || providerKey === 'opencode-zen') {
|
|
160
|
+
return {
|
|
161
|
+
ok: false,
|
|
162
|
+
code: 'UNSUPPORTED',
|
|
163
|
+
totalMs: 0,
|
|
164
|
+
error: 'Provider does not support chat completions',
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const ctrl = new AbortController()
|
|
169
|
+
const timer = setTimeout(() => ctrl.abort(), timeoutMs)
|
|
170
|
+
const t0 = performance.now()
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
const req = buildBenchmarkRequest(apiKey, modelId, providerKey, url)
|
|
174
|
+
const resp = await fetch(req.url, {
|
|
175
|
+
method: 'POST',
|
|
176
|
+
signal: ctrl.signal,
|
|
177
|
+
headers: req.headers,
|
|
178
|
+
body: JSON.stringify(req.body),
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
const totalMs = Math.round(performance.now() - t0)
|
|
182
|
+
|
|
183
|
+
// 📖 Parse response body regardless of HTTP status so we can extract partial data
|
|
184
|
+
let bodyText = ''
|
|
185
|
+
try {
|
|
186
|
+
bodyText = await resp.text()
|
|
187
|
+
} catch {}
|
|
188
|
+
|
|
189
|
+
let data = null
|
|
190
|
+
try {
|
|
191
|
+
data = JSON.parse(bodyText)
|
|
192
|
+
} catch {}
|
|
193
|
+
|
|
194
|
+
// 📖 Non-2xx: return compact error code
|
|
195
|
+
if (!resp.ok) {
|
|
196
|
+
const code = String(resp.status)
|
|
197
|
+
return {
|
|
198
|
+
ok: false,
|
|
199
|
+
code,
|
|
200
|
+
totalMs,
|
|
201
|
+
error: data?.error?.message || `HTTP ${resp.status}`,
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// 📖 Extract generated text from OpenAI-compatible response
|
|
206
|
+
const content = data?.choices?.[0]?.message?.content || data?.choices?.[0]?.text || ''
|
|
207
|
+
const answerPreview = typeof content === 'string' ? content.slice(0, 60) : ''
|
|
208
|
+
|
|
209
|
+
// 📖 Prefer usage.completion_tokens when available
|
|
210
|
+
let outputTokens = 0
|
|
211
|
+
if (data?.usage?.completion_tokens != null) {
|
|
212
|
+
outputTokens = Number(data.usage.completion_tokens) || 0
|
|
213
|
+
} else {
|
|
214
|
+
// 📖 FALLBACK: estimate from character count when API omits usage
|
|
215
|
+
outputTokens = estimateTokensFromText(content)
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// 📖 Guard division by zero
|
|
219
|
+
const seconds = totalMs / 1000
|
|
220
|
+
const tokensPerSecond = seconds > 0 ? outputTokens / seconds : 0
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
ok: true,
|
|
224
|
+
totalMs,
|
|
225
|
+
outputTokens,
|
|
226
|
+
tokensPerSecond,
|
|
227
|
+
answerPreview,
|
|
228
|
+
}
|
|
229
|
+
} catch (err) {
|
|
230
|
+
const totalMs = Math.round(performance.now() - t0)
|
|
231
|
+
const isTimeout = err.name === 'AbortError'
|
|
232
|
+
return {
|
|
233
|
+
ok: false,
|
|
234
|
+
code: isTimeout ? 'TIMEOUT' : 'ERR',
|
|
235
|
+
totalMs,
|
|
236
|
+
error: isTimeout ? 'Request timed out' : (err.message || 'Network error'),
|
|
237
|
+
}
|
|
238
|
+
} finally {
|
|
239
|
+
clearTimeout(timer)
|
|
240
|
+
}
|
|
241
|
+
}
|
package/src/key-handler.js
CHANGED
|
@@ -54,6 +54,7 @@ import {
|
|
|
54
54
|
restartRouterDashboardDaemon,
|
|
55
55
|
toggleRouterDashboardProbePause,
|
|
56
56
|
} from './router-dashboard.js'
|
|
57
|
+
import { benchmarkModel } from './benchmark.js'
|
|
57
58
|
|
|
58
59
|
// 📖 Some providers need an explicit probe model because the first catalog entry
|
|
59
60
|
// 📖 is not guaranteed to be accepted by their chat endpoint.
|
|
@@ -783,6 +784,18 @@ export function createKeyHandler(ctx) {
|
|
|
783
784
|
function toggleShellEnv() {
|
|
784
785
|
if (!state.config.settings) state.config.settings = {}
|
|
785
786
|
const currentlyEnabled = state.config.settings.shellEnvEnabled === true
|
|
787
|
+
const isUndefined = state.config.settings.shellEnvEnabled === undefined
|
|
788
|
+
|
|
789
|
+
if (isUndefined) {
|
|
790
|
+
// 📖 First-time setup: enable + sync immediately (previously done by startup popup)
|
|
791
|
+
state.config.settings.shellEnvEnabled = true
|
|
792
|
+
saveConfig(state.config)
|
|
793
|
+
syncShellEnv(state.config)
|
|
794
|
+
ensureShellRcSource()
|
|
795
|
+
trackAppAction('shell_env_export_toggled', { enabled: true })
|
|
796
|
+
return
|
|
797
|
+
}
|
|
798
|
+
|
|
786
799
|
state.config.settings.shellEnvEnabled = !currentlyEnabled
|
|
787
800
|
saveConfig(state.config)
|
|
788
801
|
if (!currentlyEnabled) {
|
|
@@ -1026,6 +1039,42 @@ export function createKeyHandler(ctx) {
|
|
|
1026
1039
|
saveConfig(state.config)
|
|
1027
1040
|
}
|
|
1028
1041
|
|
|
1042
|
+
// 📖 runBenchmarkOnSelected: Fire a real-answer benchmark on the currently selected row.
|
|
1043
|
+
// 📖 Triggered by Ctrl+A. Async — does not block the UI. Results are stored in state
|
|
1044
|
+
// 📖 keyed by `${providerKey}/${modelId}` so they survive re-renders.
|
|
1045
|
+
async function runBenchmarkOnSelected() {
|
|
1046
|
+
const selected = state.visibleSorted[state.cursor]
|
|
1047
|
+
if (!selected) return
|
|
1048
|
+
|
|
1049
|
+
const benchmarkKey = `${selected.providerKey}/${selected.modelId}`
|
|
1050
|
+
if (state.benchmarkRunning.has(benchmarkKey)) return
|
|
1051
|
+
|
|
1052
|
+
const apiKey = getApiKey(state.config, selected.providerKey) ?? null
|
|
1053
|
+
const providerUrl = sources[selected.providerKey]?.url ?? null
|
|
1054
|
+
if (!providerUrl) return
|
|
1055
|
+
|
|
1056
|
+
state.benchmarkRunning.add(benchmarkKey)
|
|
1057
|
+
|
|
1058
|
+
try {
|
|
1059
|
+
const result = await benchmarkModel({
|
|
1060
|
+
apiKey,
|
|
1061
|
+
modelId: selected.modelId,
|
|
1062
|
+
providerKey: selected.providerKey,
|
|
1063
|
+
url: providerUrl,
|
|
1064
|
+
})
|
|
1065
|
+
state.benchmarkResults[benchmarkKey] = result
|
|
1066
|
+
} catch (err) {
|
|
1067
|
+
state.benchmarkResults[benchmarkKey] = {
|
|
1068
|
+
ok: false,
|
|
1069
|
+
code: 'ERR',
|
|
1070
|
+
totalMs: 0,
|
|
1071
|
+
error: err?.message || 'Benchmark failed',
|
|
1072
|
+
}
|
|
1073
|
+
} finally {
|
|
1074
|
+
state.benchmarkRunning.delete(benchmarkKey)
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1029
1078
|
// 📖 Favorites display mode:
|
|
1030
1079
|
// 📖 - true => favorites stay pinned + always visible (legacy behavior)
|
|
1031
1080
|
// 📖 - false => favorites are just starred rows and obey normal sort/filter rules
|
|
@@ -2811,6 +2860,13 @@ export function createKeyHandler(ctx) {
|
|
|
2811
2860
|
return
|
|
2812
2861
|
}
|
|
2813
2862
|
|
|
2863
|
+
// 📖 Ctrl+A: benchmark the currently selected model with a real completion.
|
|
2864
|
+
// 📖 Measures wall-clock response time and tokens per second (TPS).
|
|
2865
|
+
if (key.ctrl && key.name === 'a') {
|
|
2866
|
+
void runBenchmarkOnSelected()
|
|
2867
|
+
return
|
|
2868
|
+
}
|
|
2869
|
+
|
|
2814
2870
|
if (key.shift && key.name === 'up') {
|
|
2815
2871
|
const selected = state.visibleSorted?.[state.cursor]
|
|
2816
2872
|
if (selected?.isFavorite) {
|
package/src/overlays.js
CHANGED
|
@@ -274,10 +274,12 @@ export function createOverlayRenderers(state, deps) {
|
|
|
274
274
|
lines.push(state.settingsCursor === changelogViewRowIdx ? themeColors.bgCursorSettingsList(changelogViewRow) : changelogViewRow)
|
|
275
275
|
|
|
276
276
|
// 📖 Shell env toggle — expose API keys as shell environment variables
|
|
277
|
-
const
|
|
278
|
-
const shellEnvStatus =
|
|
277
|
+
const shellEnvSetting = state.config.settings?.shellEnvEnabled
|
|
278
|
+
const shellEnvStatus = shellEnvSetting === true
|
|
279
279
|
? themeColors.successBold('✅ Enabled — keys available in shell')
|
|
280
|
-
:
|
|
280
|
+
: shellEnvSetting === false
|
|
281
|
+
? themeColors.dim('❌ Disabled')
|
|
282
|
+
: themeColors.warning('🔘 Not configured — Enter to set up')
|
|
281
283
|
const shellEnvRow = `${bullet(state.settingsCursor === shellEnvRowIdx)}${themeColors.textBold('Shell Env Export').padEnd(44)} ${shellEnvStatus}`
|
|
282
284
|
cursorLineByRow[shellEnvRowIdx] = lines.length
|
|
283
285
|
lines.push(state.settingsCursor === shellEnvRowIdx ? themeColors.bgCursorSettingsList(shellEnvRow) : shellEnvRow)
|
|
@@ -928,6 +930,7 @@ export function createOverlayRenderers(state, deps) {
|
|
|
928
930
|
lines.push(` ${heading('Controls')}`)
|
|
929
931
|
lines.push(` ${key('W')} Toggle ping mode ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
|
|
930
932
|
lines.push(` ${key('Ctrl+P')} Open ⚡️ command palette ${hint('(search and run actions quickly)')}`)
|
|
933
|
+
lines.push(` ${key('Ctrl+A')} Benchmark answer speed ${hint('(real completion on selected model → time + TPS)')}`)
|
|
931
934
|
lines.push(` ${key('E')} Cycle filter mode ${hint('(Normal → Configured only → Usable only)')}`)
|
|
932
935
|
lines.push(` ${key('Z')} Cycle tool mode ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
|
|
933
936
|
lines.push(` ${key('F')} Toggle favorite on selected row ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)
|
package/src/render-table.js
CHANGED
|
@@ -49,6 +49,7 @@ import { themeColors, getProviderRgb, getTierRgb, getReadableTextRgb, getTheme }
|
|
|
49
49
|
import { TIER_COLOR } from './tier-colors.js'
|
|
50
50
|
import { getAvg, getVerdict, getUptime, getStabilityScore, getVersionStatusInfo } from './utils.js'
|
|
51
51
|
import { usagePlaceholderForProvider } from './ping.js'
|
|
52
|
+
import { formatBenchmarkResult } from './benchmark.js'
|
|
52
53
|
import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth } from './render-helpers.js'
|
|
53
54
|
import { getToolMeta, TOOL_METADATA, TOOL_MODE_ORDER, isModelCompatibleWithTool } from './tool-metadata.js'
|
|
54
55
|
import { getColumnSpacing } from './ui-config.js'
|
|
@@ -181,6 +182,8 @@ export function renderTable({
|
|
|
181
182
|
routerFooterTodayTokens = 0,
|
|
182
183
|
routerFooterAllTimeTokens = 0,
|
|
183
184
|
routerFooterRequests = 0,
|
|
185
|
+
benchmarkResults = {},
|
|
186
|
+
benchmarkRunning = new Set(),
|
|
184
187
|
} = {}) {
|
|
185
188
|
// 📖 Filter out hidden models for display
|
|
186
189
|
const visibleResults = results.filter(r => !r.hidden)
|
|
@@ -274,6 +277,7 @@ export function renderTable({
|
|
|
274
277
|
const W_STATUS = 18
|
|
275
278
|
const W_VERDICT = 14
|
|
276
279
|
const W_UPTIME = 6
|
|
280
|
+
const W_ANSWER = 14
|
|
277
281
|
|
|
278
282
|
// const W_TOKENS = 7 // Used column removed
|
|
279
283
|
// const W_USAGE = 7 // Usage column removed
|
|
@@ -281,16 +285,17 @@ export function renderTable({
|
|
|
281
285
|
|
|
282
286
|
// 📖 Responsive column visibility: progressively hide least-useful columns
|
|
283
287
|
// 📖 and shorten header labels when terminal width is insufficient.
|
|
284
|
-
// 📖 Hiding order (least useful first): Rank → Up% → Tier → Stability
|
|
288
|
+
// 📖 Hiding order (least useful first): Rank → Answer Speed → Up% → Tier → Stability
|
|
285
289
|
// 📖 Compact mode shrinks: Latest Ping→Lat. P (9), Avg Ping→Avg. P (8),
|
|
286
290
|
// 📖 Stability→StaB. (8), Provider→4chars+… (7), Health→6chars+… (13)
|
|
287
|
-
// 📖 Breakpoints: full=
|
|
291
|
+
// 📖 Breakpoints: full=183 | compact=160 | -Rank=151 | -Answer=142 | -Up%=133 | -Tier=125 | -Stab=114
|
|
288
292
|
let wPing = 14
|
|
289
293
|
let wAvg = 11
|
|
290
294
|
let wStab = 11
|
|
291
295
|
let wSource = W_SOURCE
|
|
292
296
|
let wStatus = W_STATUS
|
|
293
297
|
let showRank = true
|
|
298
|
+
let showAnswerSpeed = true
|
|
294
299
|
let showUptime = true
|
|
295
300
|
let showTier = true
|
|
296
301
|
let showStability = true
|
|
@@ -305,6 +310,7 @@ export function renderTable({
|
|
|
305
310
|
cols.push(W_SWE, W_CTX, W_MODEL, wSource, wPing, wAvg, wStatus, W_VERDICT)
|
|
306
311
|
if (showStability) cols.push(wStab)
|
|
307
312
|
if (showUptime) cols.push(W_UPTIME)
|
|
313
|
+
if (showAnswerSpeed) cols.push(W_ANSWER)
|
|
308
314
|
return ROW_MARGIN + cols.reduce((a, b) => a + b, 0) + (cols.length - 1) * SEP_W
|
|
309
315
|
}
|
|
310
316
|
|
|
@@ -317,8 +323,9 @@ export function renderTable({
|
|
|
317
323
|
wSource = 7 // Provider truncated to 4 chars + '…', 7 cols total
|
|
318
324
|
wStatus = 13 // Health truncated after 6 chars + '…'
|
|
319
325
|
}
|
|
320
|
-
// 📖 Steps 2–
|
|
326
|
+
// 📖 Steps 2–6: Progressive column hiding (least useful first)
|
|
321
327
|
if (calcWidth() > terminalCols) showRank = false
|
|
328
|
+
if (calcWidth() > terminalCols) showAnswerSpeed = false
|
|
322
329
|
if (calcWidth() > terminalCols) showUptime = false
|
|
323
330
|
if (calcWidth() > terminalCols) showTier = false
|
|
324
331
|
if (calcWidth() > terminalCols) showStability = false
|
|
@@ -341,6 +348,7 @@ export function renderTable({
|
|
|
341
348
|
colDefs.push({ name: 'verdict', width: W_VERDICT })
|
|
342
349
|
if (showStability) colDefs.push({ name: 'stability', width: wStab })
|
|
343
350
|
if (showUptime) colDefs.push({ name: 'uptime', width: W_UPTIME })
|
|
351
|
+
if (showAnswerSpeed) colDefs.push({ name: 'answerSpeed', width: W_ANSWER })
|
|
344
352
|
let x = ROW_MARGIN + 1 // 📖 1-based: first column starts after the 2-char left margin
|
|
345
353
|
const columns = []
|
|
346
354
|
for (let i = 0; i < colDefs.length; i++) {
|
|
@@ -467,6 +475,14 @@ export function renderTable({
|
|
|
467
475
|
return themeColors.hotkey('U') + themeColors.dim('p%' + padding)
|
|
468
476
|
})()
|
|
469
477
|
|
|
478
|
+
// 📖 Answer Speed header — no sort hotkey, just the label
|
|
479
|
+
const answerLabel = isCompact ? 'Answ.' : 'Answer Speed'
|
|
480
|
+
const answerH_c = (() => {
|
|
481
|
+
const plain = answerLabel
|
|
482
|
+
const padding = ' '.repeat(Math.max(0, W_ANSWER - plain.length))
|
|
483
|
+
return themeColors.dim('Ans') + themeColors.hotkey('w') + themeColors.dim('er' + (isCompact ? '.' : ' Speed') + padding)
|
|
484
|
+
})()
|
|
485
|
+
|
|
470
486
|
// 📖 Usage column removed from UI – no header or separator for it.
|
|
471
487
|
// 📖 Header row: conditionally include columns based on responsive visibility
|
|
472
488
|
const headerParts = []
|
|
@@ -475,6 +491,7 @@ export function renderTable({
|
|
|
475
491
|
headerParts.push(sweH_c, ctxH_c, modelH_c, originH_c, pingH_c, avgH_c, healthH_c, verdictH_c)
|
|
476
492
|
if (showStability) headerParts.push(stabH_c)
|
|
477
493
|
if (showUptime) headerParts.push(uptimeH_c)
|
|
494
|
+
if (showAnswerSpeed) headerParts.push(answerH_c)
|
|
478
495
|
lines.push(' ' + headerParts.join(COL_SEP))
|
|
479
496
|
|
|
480
497
|
// 📖 Mouse support: the column header row is the last line we just pushed.
|
|
@@ -776,6 +793,25 @@ export function renderTable({
|
|
|
776
793
|
// (We keep the logic but do not render it.)
|
|
777
794
|
const usageCell = ''
|
|
778
795
|
|
|
796
|
+
// 📖 Answer Speed column — show benchmark result, running spinner, or dash
|
|
797
|
+
const benchmarkKey = `${r.providerKey}/${r.modelId}`
|
|
798
|
+
const benchmarkResult = benchmarkResults[benchmarkKey]
|
|
799
|
+
const isBenchmarkRunning = benchmarkRunning.has(benchmarkKey)
|
|
800
|
+
let answerSpeedCell
|
|
801
|
+
if (isBenchmarkRunning) {
|
|
802
|
+
const spinner = FRAMES[frame % FRAMES.length]
|
|
803
|
+
answerSpeedCell = themeColors.success(spinner.padEnd(W_ANSWER))
|
|
804
|
+
} else if (benchmarkResult) {
|
|
805
|
+
const text = formatBenchmarkResult(benchmarkResult)
|
|
806
|
+
// 📖 Colorize: success = green, error = red/dim
|
|
807
|
+
const isError = !benchmarkResult.ok
|
|
808
|
+
answerSpeedCell = isError
|
|
809
|
+
? themeColors.metricBad(text.padEnd(W_ANSWER))
|
|
810
|
+
: themeColors.metricGood(text.padEnd(W_ANSWER))
|
|
811
|
+
} else {
|
|
812
|
+
answerSpeedCell = themeColors.dim('—'.padEnd(W_ANSWER))
|
|
813
|
+
}
|
|
814
|
+
|
|
779
815
|
// 📖 Build row: conditionally include columns based on responsive visibility
|
|
780
816
|
const rowParts = []
|
|
781
817
|
if (showRank) rowParts.push(num)
|
|
@@ -783,6 +819,7 @@ export function renderTable({
|
|
|
783
819
|
rowParts.push(sweCell, ctxCell, nameCell, sourceCell, pingCell, avgCell, status, speedCell)
|
|
784
820
|
if (showStability) rowParts.push(stabCell)
|
|
785
821
|
if (showUptime) rowParts.push(uptimeCell)
|
|
822
|
+
if (showAnswerSpeed) rowParts.push(answerSpeedCell)
|
|
786
823
|
const row = ' ' + rowParts.join(COL_SEP)
|
|
787
824
|
|
|
788
825
|
if (isCursor) {
|
package/src/tui-state.js
CHANGED
|
@@ -261,5 +261,12 @@ export function createTuiState({
|
|
|
261
261
|
|
|
262
262
|
// 📖 Token usage overlay scroll state (used when overlay opens from footer)
|
|
263
263
|
tokenUsageOpen: false,
|
|
264
|
+
|
|
265
|
+
// 📖 Benchmark results: keyed by `${providerKey}/${modelId}`
|
|
266
|
+
// 📖 Each entry is the raw result object from benchmarkModel() or null.
|
|
267
|
+
benchmarkResults: {},
|
|
268
|
+
|
|
269
|
+
// 📖 Set of benchmark keys currently running (for spinner display)
|
|
270
|
+
benchmarkRunning: new Set(),
|
|
264
271
|
}
|
|
265
272
|
}
|