npm - free-coding-models - Versions diffs - 0.3.75 → 0.3.77 - Mend

free-coding-models 0.3.75 → 0.3.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +4 -8
package/changelog/v0.3.76.md +4 -0
package/changelog/v0.3.77.md +12 -0
package/changelog/v0.3.78.md +14 -0
package/package.json +1 -1
package/sources.js +4 -3
package/src/app.js +4 -0
package/src/benchmark.js +241 -0
package/src/endpoint-installer.js +4 -2
package/src/key-handler.js +44 -0
package/src/overlays.js +1 -0
package/src/render-table.js +40 -3
package/src/tui-state.js +7 -0
package/web/dist/assets/{index-BTxvE1Bz.js → index-Ch6dT36p.js} +1 -1
package/web/dist/index.html +1 -1

package/README.md CHANGED Viewed

@@ -379,18 +379,14 @@ Press **`Z`** in the TUI to cycle between tools without restarting.
 ### OpenCode Zen Free Models
-[OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering 8 free coding models exclusively through OpenCode CLI and OpenCode Desktop. These models are **not** available through other tools.
+[OpenCode Zen](https://opencode.ai/zen) is a hosted AI gateway offering **4 free coding models** exclusively through OpenCode CLI and OpenCode Desktop. These models are **not** available through other tools.
 | Model | Tier | SWE-bench | Context |
 |-------|------|-----------|---------|
 | Big Pickle | S+ | 72.0% | 200k |
-| MiniMax M2.5 Free | S+ | 80.2% | 200k |
-| Nemotron 3 Super Free | A+ | 52.0% | 1M |
-| GPT 5 Nano | S | 65.0% | 400k |
-| HY3 Preview Free | A+ | - | 128k |
-| Ling 2.6 Flash Free | S | - | 128k |
-| Trinity Large Preview Free | S | - | 128k |
-| Trinity Mini Preview Free | A | - | 128k |
+| DeepSeek V4 Flash Free | S+ | 79.0% | 200k |
+| MiMo-V2.5 Free | S+ | - | 200k |
+| Nemotron 3 Super Free | A+ | 52.0% | 200k |
 To use Zen models: sign up at [opencode.ai/auth](https://opencode.ai/auth) and enter your Zen API key via `P` (Settings). Zen models appear in the main table and auto-switch to OpenCode CLI on launch.

package/changelog/v0.3.76.md ADDED Viewed

@@ -0,0 +1,4 @@
+# Changelog v0.3.76 - 2026-05-28
+### Fixed
+- **ZAI and Zen providers removed from install blocklist** — `opencode-zen` and `zai` were in `DIRECT_INSTALL_UNSUPPORTED_PROVIDERS` preventing auto-install into tools like Pi, Aider, Cline, etc. Both providers use OpenAI-compatible endpoints and should be installable everywhere. Only `replicate` (non-standard `/v1/predictions`), `rovo` and `gemini` (CLI-only, no API endpoint) remain blocked.

package/changelog/v0.3.77.md ADDED Viewed

@@ -0,0 +1,12 @@
+# Changelog v0.3.77 - 2026-05-28
+### Fixed
+- **OpenCode Zen model catalog sync** — removed 3 models that are no longer free and added 1 new free model:
+  - ❌ Removed `qwen3.6-plus-free` — free promotion ended, now requires OpenCode Go subscription or PAYG
+  - ❌ Removed `minimax-m2.5-free` — 7-day launch promo expired, now paid
+  - ❌ Removed `gpt-5-nano` — was incorrectly cataloged as free, actually requires Zen PAYG
+  - ✅ Added `mimo-v2.5-free` — newly confirmed free on Zen
+  - Result: Zen free catalog updated from 6 → 4 confirmed free models
+### Changed
+- Updated README Zen models table to reflect the current 4-model free tier

package/changelog/v0.3.78.md ADDED Viewed

@@ -0,0 +1,14 @@
+# Changelog v0.3.78 - 2026-05-28
+### Added
+- **Real-Answer Benchmark feature** — Press `Ctrl+A` to run a live completion benchmark on the currently selected model. Measures actual wall-clock response time and tokens-per-second (TPS) with a real chat completion request (`"Why is the sky blue? Answer in exactly one short sentence."`). Results appear in the new **Answer Speed** column as `4.3s / 13 TPS`.
+- New `Answer Speed` column in the TUI table, positioned after the Uptime column. Defaults to `—`, shows a green spinner while benchmarking, and displays compact error codes (`ERR`, `TIMEOUT`, `401`, `429`) on failure.
+- New module `src/benchmark.js` with lightweight, native Node.js benchmark logic:
+  - `benchmarkModel({ apiKey, modelId, providerKey, url })` — sends one completion, measures time, parses `usage.completion_tokens` with a `Math.ceil(outputText.length / 4)` fallback clearly labeled as an estimate.
+  - Guards against division-by-zero and unsupported providers (rovo, gemini, opencode-zen return `UNSUPPORTED`).
+  - Respects existing API key handling and fails gracefully on missing credentials, rate limits, or timeouts.
+- Benchmark state stored in TUI state keyed by `${providerKey}/${modelId}`, so results survive re-renders and table refreshes.
+- `Ctrl+A` documented in the Help overlay (`I` key).
+### Changed
+- Responsive column breakpoints adjusted to accommodate the new `Answer Speed` column (14 cols). Progressive hiding order: Rank → Answer Speed → Up% → Tier → Stability. Compact mode still active at ~163+ cols.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "free-coding-models",
-  "version": "0.3.75",
+  "version": "0.3.77",
   "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
   "keywords": [
     "nvidia",

package/sources.js CHANGED Viewed

@@ -355,11 +355,12 @@ export const gemini = [
 // 📖 Config: set provider to opencode/<model-id> in OpenCode config
 export const opencodeZen = [
   ['big-pickle',                       'Big Pickle',              'S+', '72.0%', '200k'],
-  ['minimax-m2.5-free',                'MiniMax M2.5 Free',      'S+', '80.2%', '200k'],
   ['deepseek-v4-flash-free',           'DeepSeek V4 Flash Free',  'S+', '79.0%', '200k'],
-  ['qwen3.6-plus-free',                'Qwen3.6 Plus Free',       'S+', '78.8%', '1M'],
+  ['mimo-v2.5-free',                   'MiMo-V2.5 Free',          'S+', '-',     '200k'],
   ['nemotron-3-super-free',            'Nemotron 3 Super Free',   'A+', '52.0%', '200k'],
-  ['gpt-5-nano',                       'GPT 5 Nano',              'S',  '65.0%', '400k'],
+  // Removed (2026-05-28): qwen3.6-plus-free (free promotion ended — now requires OpenCode Go)
+  // Removed (2026-05-28): minimax-m2.5-free (7-day launch promo expired — now paid)
+  // Removed (2026-05-28): gpt-5-nano (was never free — incorrectly cataloged as free, requires Zen PAYG)
   // Removed (2026-05-26): hy3-preview-free (deleted from Zen)
   // Removed (2026-05-26): ling-2.6-flash-free (deleted from Zen)
   // Removed (2026-05-26): trinity-mini-free (deleted from Zen)

package/src/app.js CHANGED Viewed

@@ -845,6 +845,8 @@ export async function runApp(cliArgs, config) {
       verdictFilterMode: state.verdictFilterMode,
       healthFilterMode: state.healthFilterMode,
       bestModeOnly: state.bestModeOnly,
+      benchmarkResults: state.benchmarkResults,
+      benchmarkRunning: state.benchmarkRunning,
     }
     if (state.commandPaletteOpen) {
       if (!state.commandPaletteFrozenTable) {
@@ -932,6 +934,8 @@ export async function runApp(cliArgs, config) {
     verdictFilterMode: state.verdictFilterMode,
     healthFilterMode: state.healthFilterMode,
     bestModeOnly: state.bestModeOnly,
+    benchmarkResults: state.benchmarkResults,
+    benchmarkRunning: state.benchmarkRunning,
   }))
   if (process.stdout.isTTY) {
     process.stdout.flush && process.stdout.flush()

package/src/benchmark.js ADDED Viewed

@@ -0,0 +1,241 @@
+/**
+ * @file benchmark.js
+ * @description Real-answer benchmark for measuring model response speed and throughput.
+ *
+ * @details
+ *   This module sends a single small chat completion to a model and measures:
+ *   - Total wall-clock response time (ms)
+ *   - Output tokens generated
+ *   - Tokens per second (TPS)
+ *
+ *   🎯 Key features:
+ *   - Provider-specific request building (reuses buildPingRequest from ping.js)
+ *   - Async benchmark with timeout and abort controller
+ *   - Prefers `usage.completion_tokens` from the API response
+ *   - Falls back to character-length estimate when usage is missing
+ *   - Returns structured success/failure objects for TUI consumption
+ *
+ *   → Functions:
+ *   - `buildBenchmarkRequest`: Build provider-specific benchmark request
+ *   - `benchmarkModel`: Run a single benchmark and return timing + token metrics
+ *   - `formatBenchmarkResult`: Format a benchmark result for the TUI column
+ *   - `estimateTokensFromText`: Fallback token estimator (clearly labeled)
+ *
+ *   📦 Dependencies:
+ *   - ./ping.js: buildPingRequest, resolveCloudflareUrl
+ *
+ *   @see {@link ./ping.js} Provider-specific request building
+ *   @see {@link ./render-table.js} Answer Speed column rendering
+ */
+import { buildPingRequest, resolveCloudflareUrl } from './ping.js'
+// 📖 BENCHMARK_PROMPT: A short, unambiguous question that any model can answer.
+// 📖 Constrained to one sentence to keep benchmarks fast and consistent.
+export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one short sentence.'
+// 📖 BENCHMARK_MAX_TOKENS: Hard cap on generation length to prevent slow models
+// 📖 from producing essays and skewing the TPS calculation.
+export const BENCHMARK_MAX_TOKENS = 32
+// 📖 BENCHMARK_TEMPERATURE: Zero temperature for deterministic, reproducible results.
+export const BENCHMARK_TEMPERATURE = 0
+// 📖 BENCHMARK_TIMEOUT_MS: How long to wait before treating a benchmark as failed.
+export const BENCHMARK_TIMEOUT_MS = 20_000
+// 📖 estimateTokensFromText: Fallback token counter when the API does not return usage.
+// 📖 Uses a simple heuristic: avg English token ≈ 4 chars. This is explicitly an ESTIMATE
+// 📖 and is labeled as such everywhere it surfaces. Do not use for billing.
+export function estimateTokensFromText(text) {
+  if (!text || typeof text !== 'string') return 0
+  return Math.ceil(text.length / 4)
+}
+// 📖 formatBenchmarkResult: Turn a raw benchmark result into a compact display string.
+// 📖 Handles all three states: empty, running, success, and error.
+// 📖
+// 📖 Success: "4.3s / 13 TPS"
+// 📖 Running: spinner (caller passes spinner char)
+// 📖 Error: compact error code like "ERR", "TIMEOUT", "401", "429"
+// 📖 Empty: "—"
+export function formatBenchmarkResult(result, { running = false, frame = 0 } = {}) {
+  if (running) {
+    const spinIdx = frame % 10
+    const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
+    return spinner
+  }
+  if (!result) {
+    return '—'
+  }
+  if (!result.ok) {
+    return result.code || 'ERR'
+  }
+  const totalSeconds = result.totalMs / 1000
+  const secondsLabel = totalSeconds >= 10
+    ? totalSeconds.toFixed(0) + 's'
+    : totalSeconds.toFixed(1) + 's'
+  const tps = result.tokensPerSecond ?? 0
+  const tpsLabel = Math.round(tps)
+  return `${secondsLabel} / ${tpsLabel} TPS`
+}
+// 📖 buildBenchmarkRequest: Build provider-specific benchmark request.
+// 📖 Reuses the ping module's request builder but swaps the payload for a real
+// 📖 completion with temperature=0 and max_tokens=32.
+export function buildBenchmarkRequest(apiKey, modelId, providerKey, url) {
+  // 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
+  const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
+  if (providerKey === 'replicate') {
+    const replicateHeaders = { 'Content-Type': 'application/json', Prefer: 'wait=4' }
+    if (apiKey) replicateHeaders.Authorization = `Token ${apiKey}`
+    return {
+      url,
+      headers: replicateHeaders,
+      body: { version: modelId, input: { prompt: BENCHMARK_PROMPT, max_tokens: BENCHMARK_MAX_TOKENS } },
+    }
+  }
+  if (providerKey === 'cloudflare') {
+    const headers = { 'Content-Type': 'application/json' }
+    if (apiKey) headers.Authorization = `Bearer ${apiKey}`
+    return {
+      url: resolveCloudflareUrl(url),
+      headers,
+      body: {
+        model: apiModelId,
+        messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
+        max_tokens: BENCHMARK_MAX_TOKENS,
+        temperature: BENCHMARK_TEMPERATURE,
+      },
+    }
+  }
+  const headers = { 'Content-Type': 'application/json' }
+  if (apiKey) headers.Authorization = `Bearer ${apiKey}`
+  if (providerKey === 'openrouter') {
+    headers['HTTP-Referer'] = 'https://github.com/vava-nessa/free-coding-models'
+    headers['X-Title'] = 'free-coding-models'
+  }
+  return {
+    url,
+    headers,
+    body: {
+      model: apiModelId,
+      messages: [{ role: 'user', content: BENCHMARK_PROMPT }],
+      max_tokens: BENCHMARK_MAX_TOKENS,
+      temperature: BENCHMARK_TEMPERATURE,
+    },
+  }
+}
+// 📖 benchmarkModel: Send one real completion request and measure response speed.
+// 📖
+// 📖 Returns on success:
+// 📖   {
+// 📖     ok: true,
+// 📖     totalMs: 4300,
+// 📖     outputTokens: 56,
+// 📖     tokensPerSecond: 13,
+// 📖     answerPreview: "The sky is blue because..."
+// 📖   }
+// 📖
+// 📖 Returns on failure:
+// 📖   {
+// 📖     ok: false,
+// 📖     code: "TIMEOUT" | "ERR" | "401" | "429" | "UNSUPPORTED",
+// 📖     totalMs: 15000,
+// 📖     error: "Request timed out"
+// 📖   }
+export async function benchmarkModel({ apiKey, modelId, providerKey, url, timeoutMs = BENCHMARK_TIMEOUT_MS }) {
+  // 📖 Guard: unsupported providers that don't do chat completions
+  if (providerKey === 'rovo' || providerKey === 'gemini' || providerKey === 'opencode-zen') {
+    return {
+      ok: false,
+      code: 'UNSUPPORTED',
+      totalMs: 0,
+      error: 'Provider does not support chat completions',
+    }
+  }
+  const ctrl = new AbortController()
+  const timer = setTimeout(() => ctrl.abort(), timeoutMs)
+  const t0 = performance.now()
+  try {
+    const req = buildBenchmarkRequest(apiKey, modelId, providerKey, url)
+    const resp = await fetch(req.url, {
+      method: 'POST',
+      signal: ctrl.signal,
+      headers: req.headers,
+      body: JSON.stringify(req.body),
+    })
+    const totalMs = Math.round(performance.now() - t0)
+    // 📖 Parse response body regardless of HTTP status so we can extract partial data
+    let bodyText = ''
+    try {
+      bodyText = await resp.text()
+    } catch {}
+    let data = null
+    try {
+      data = JSON.parse(bodyText)
+    } catch {}
+    // 📖 Non-2xx: return compact error code
+    if (!resp.ok) {
+      const code = String(resp.status)
+      return {
+        ok: false,
+        code,
+        totalMs,
+        error: data?.error?.message || `HTTP ${resp.status}`,
+      }
+    }
+    // 📖 Extract generated text from OpenAI-compatible response
+    const content = data?.choices?.[0]?.message?.content || data?.choices?.[0]?.text || ''
+    const answerPreview = typeof content === 'string' ? content.slice(0, 60) : ''
+    // 📖 Prefer usage.completion_tokens when available
+    let outputTokens = 0
+    if (data?.usage?.completion_tokens != null) {
+      outputTokens = Number(data.usage.completion_tokens) || 0
+    } else {
+      // 📖 FALLBACK: estimate from character count when API omits usage
+      outputTokens = estimateTokensFromText(content)
+    }
+    // 📖 Guard division by zero
+    const seconds = totalMs / 1000
+    const tokensPerSecond = seconds > 0 ? outputTokens / seconds : 0
+    return {
+      ok: true,
+      totalMs,
+      outputTokens,
+      tokensPerSecond,
+      answerPreview,
+    }
+  } catch (err) {
+    const totalMs = Math.round(performance.now() - t0)
+    const isTimeout = err.name === 'AbortError'
+    return {
+      ok: false,
+      code: isTimeout ? 'TIMEOUT' : 'ERR',
+      totalMs,
+      error: isTimeout ? 'Request timed out' : (err.message || 'Network error'),
+    }
+  } finally {
+    clearTimeout(timer)
+  }
+}

package/src/endpoint-installer.js CHANGED Viewed

@@ -48,8 +48,10 @@ import { getApiKey, saveConfig } from './config.js'
 import { ENV_VAR_NAMES, PROVIDER_METADATA } from './provider-metadata.js'
 import { getToolMeta } from './tool-metadata.js'
-// 📖 CLI-only providers (rovo, gemini) and Zen-only (opencode-zen) cannot be installed into other tools.
-const DIRECT_INSTALL_UNSUPPORTED_PROVIDERS = new Set(['replicate', 'zai', 'rovo', 'gemini', 'opencode-zen'])
+// 📖 CLI-only providers (rovo, gemini) cannot be installed into other tools — they manage their own auth.
+// 📖 replicate uses /v1/predictions (not /chat/completions), so it's not OpenAI-compatible.
+// 📖 zai and opencode-zen ARE OpenAI-compatible and CAN be installed into any tool.
+const DIRECT_INSTALL_UNSUPPORTED_PROVIDERS = new Set(['replicate', 'rovo', 'gemini'])
 // 📖 Install Endpoints only lists tools whose persisted config shape is actually supported here.
 // 📖 Claude Code, Codex, and Gemini stay out while their dedicated bridges are being rebuilt.
 const INSTALL_TARGET_MODES = ['opencode', 'opencode-desktop', 'opencode-web', 'openclaw', 'kilo', 'crush', 'goose', 'pi', 'aider', 'qwen', 'openhands', 'amp', 'hermes', 'continue', 'cline', 'forgecode', 'fcm_router']

package/src/key-handler.js CHANGED Viewed

@@ -54,6 +54,7 @@ import {
   restartRouterDashboardDaemon,
   toggleRouterDashboardProbePause,
 } from './router-dashboard.js'
+import { benchmarkModel } from './benchmark.js'
 // 📖 Some providers need an explicit probe model because the first catalog entry
 // 📖 is not guaranteed to be accepted by their chat endpoint.
@@ -1026,6 +1027,42 @@ export function createKeyHandler(ctx) {
     saveConfig(state.config)
   }
+  // 📖 runBenchmarkOnSelected: Fire a real-answer benchmark on the currently selected row.
+  // 📖 Triggered by Ctrl+A. Async — does not block the UI. Results are stored in state
+  // 📖 keyed by `${providerKey}/${modelId}` so they survive re-renders.
+  async function runBenchmarkOnSelected() {
+    const selected = state.visibleSorted[state.cursor]
+    if (!selected) return
+    const benchmarkKey = `${selected.providerKey}/${selected.modelId}`
+    if (state.benchmarkRunning.has(benchmarkKey)) return
+    const apiKey = getApiKey(state.config, selected.providerKey) ?? null
+    const providerUrl = sources[selected.providerKey]?.url ?? null
+    if (!providerUrl) return
+    state.benchmarkRunning.add(benchmarkKey)
+    try {
+      const result = await benchmarkModel({
+        apiKey,
+        modelId: selected.modelId,
+        providerKey: selected.providerKey,
+        url: providerUrl,
+      })
+      state.benchmarkResults[benchmarkKey] = result
+    } catch (err) {
+      state.benchmarkResults[benchmarkKey] = {
+        ok: false,
+        code: 'ERR',
+        totalMs: 0,
+        error: err?.message || 'Benchmark failed',
+      }
+    } finally {
+      state.benchmarkRunning.delete(benchmarkKey)
+    }
+  }
   // 📖 Favorites display mode:
   // 📖 - true  => favorites stay pinned + always visible (legacy behavior)
   // 📖 - false => favorites are just starred rows and obey normal sort/filter rules
@@ -2811,6 +2848,13 @@ export function createKeyHandler(ctx) {
       return
     }
+    // 📖 Ctrl+A: benchmark the currently selected model with a real completion.
+    // 📖 Measures wall-clock response time and tokens per second (TPS).
+    if (key.ctrl && key.name === 'a') {
+      void runBenchmarkOnSelected()
+      return
+    }
     if (key.shift && key.name === 'up') {
       const selected = state.visibleSorted?.[state.cursor]
       if (selected?.isFavorite) {

package/src/overlays.js CHANGED Viewed

@@ -928,6 +928,7 @@ export function createOverlayRenderers(state, deps) {
     lines.push(`  ${heading('Controls')}`)
     lines.push(`  ${key('W')}  Toggle ping mode  ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
     lines.push(`  ${key('Ctrl+P')}  Open ⚡️ command palette  ${hint('(search and run actions quickly)')}`)
+    lines.push(`  ${key('Ctrl+A')}  Benchmark answer speed  ${hint('(real completion on selected model → time + TPS)')}`)
     lines.push(`  ${key('E')}  Cycle filter mode  ${hint('(Normal → Configured only → Usable only)')}`)
     lines.push(`  ${key('Z')}  Cycle tool mode  ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
     lines.push(`  ${key('F')}  Toggle favorite on selected row  ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)

package/src/render-table.js CHANGED Viewed

@@ -49,6 +49,7 @@ import { themeColors, getProviderRgb, getTierRgb, getReadableTextRgb, getTheme }
 import { TIER_COLOR } from './tier-colors.js'
 import { getAvg, getVerdict, getUptime, getStabilityScore, getVersionStatusInfo } from './utils.js'
 import { usagePlaceholderForProvider } from './ping.js'
+import { formatBenchmarkResult } from './benchmark.js'
 import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth } from './render-helpers.js'
 import { getToolMeta, TOOL_METADATA, TOOL_MODE_ORDER, isModelCompatibleWithTool } from './tool-metadata.js'
 import { getColumnSpacing } from './ui-config.js'
@@ -181,6 +182,8 @@ export function renderTable({
   routerFooterTodayTokens = 0,
   routerFooterAllTimeTokens = 0,
   routerFooterRequests = 0,
+  benchmarkResults = {},
+  benchmarkRunning = new Set(),
 } = {}) {
   // 📖 Filter out hidden models for display
   const visibleResults = results.filter(r => !r.hidden)
@@ -274,6 +277,7 @@ export function renderTable({
   const W_STATUS = 18
   const W_VERDICT = 14
   const W_UPTIME = 6
+  const W_ANSWER = 14
   // const W_TOKENS = 7 // Used column removed
   // const W_USAGE = 7 // Usage column removed
@@ -281,16 +285,17 @@ export function renderTable({
   // 📖 Responsive column visibility: progressively hide least-useful columns
   // 📖 and shorten header labels when terminal width is insufficient.
-  // 📖 Hiding order (least useful first): Rank → Up% → Tier → Stability
+  // 📖 Hiding order (least useful first): Rank → Answer Speed → Up% → Tier → Stability
   // 📖 Compact mode shrinks: Latest Ping→Lat. P (9), Avg Ping→Avg. P (8),
   // 📖 Stability→StaB. (8), Provider→4chars+… (7), Health→6chars+… (13)
-  // 📖 Breakpoints: full=169 | compact=146 | -Rank=137 | -Up%=128 | -Tier=120 | -Stab=109
+  // 📖 Breakpoints: full=183 | compact=160 | -Rank=151 | -Answer=142 | -Up%=133 | -Tier=125 | -Stab=114
   let wPing = 14
   let wAvg = 11
   let wStab = 11
   let wSource = W_SOURCE
   let wStatus = W_STATUS
   let showRank = true
+  let showAnswerSpeed = true
   let showUptime = true
   let showTier = true
   let showStability = true
@@ -305,6 +310,7 @@ export function renderTable({
       cols.push(W_SWE, W_CTX, W_MODEL, wSource, wPing, wAvg, wStatus, W_VERDICT)
       if (showStability) cols.push(wStab)
       if (showUptime) cols.push(W_UPTIME)
+      if (showAnswerSpeed) cols.push(W_ANSWER)
       return ROW_MARGIN + cols.reduce((a, b) => a + b, 0) + (cols.length - 1) * SEP_W
     }
@@ -317,8 +323,9 @@ export function renderTable({
       wSource = 7    // Provider truncated to 4 chars + '…', 7 cols total
       wStatus = 13   // Health truncated after 6 chars + '…'
     }
-    // 📖 Steps 2–5: Progressive column hiding (least useful first)
+    // 📖 Steps 2–6: Progressive column hiding (least useful first)
     if (calcWidth() > terminalCols) showRank = false
+    if (calcWidth() > terminalCols) showAnswerSpeed = false
     if (calcWidth() > terminalCols) showUptime = false
     if (calcWidth() > terminalCols) showTier = false
     if (calcWidth() > terminalCols) showStability = false
@@ -341,6 +348,7 @@ export function renderTable({
     colDefs.push({ name: 'verdict', width: W_VERDICT })
     if (showStability) colDefs.push({ name: 'stability', width: wStab })
     if (showUptime) colDefs.push({ name: 'uptime', width: W_UPTIME })
+    if (showAnswerSpeed) colDefs.push({ name: 'answerSpeed', width: W_ANSWER })
     let x = ROW_MARGIN + 1 // 📖 1-based: first column starts after the 2-char left margin
     const columns = []
     for (let i = 0; i < colDefs.length; i++) {
@@ -467,6 +475,14 @@ export function renderTable({
     return themeColors.hotkey('U') + themeColors.dim('p%' + padding)
   })()
+  // 📖 Answer Speed header — no sort hotkey, just the label
+  const answerLabel = isCompact ? 'Answ.' : 'Answer Speed'
+  const answerH_c = (() => {
+    const plain = answerLabel
+    const padding = ' '.repeat(Math.max(0, W_ANSWER - plain.length))
+    return themeColors.dim('Ans') + themeColors.hotkey('w') + themeColors.dim('er' + (isCompact ? '.' : ' Speed') + padding)
+  })()
   // 📖 Usage column removed from UI – no header or separator for it.
   // 📖 Header row: conditionally include columns based on responsive visibility
   const headerParts = []
@@ -475,6 +491,7 @@ export function renderTable({
   headerParts.push(sweH_c, ctxH_c, modelH_c, originH_c, pingH_c, avgH_c, healthH_c, verdictH_c)
   if (showStability) headerParts.push(stabH_c)
   if (showUptime) headerParts.push(uptimeH_c)
+  if (showAnswerSpeed) headerParts.push(answerH_c)
   lines.push('  ' + headerParts.join(COL_SEP))
   // 📖 Mouse support: the column header row is the last line we just pushed.
@@ -776,6 +793,25 @@ export function renderTable({
     // (We keep the logic but do not render it.)
     const usageCell = ''
+    // 📖 Answer Speed column — show benchmark result, running spinner, or dash
+    const benchmarkKey = `${r.providerKey}/${r.modelId}`
+    const benchmarkResult = benchmarkResults[benchmarkKey]
+    const isBenchmarkRunning = benchmarkRunning.has(benchmarkKey)
+    let answerSpeedCell
+    if (isBenchmarkRunning) {
+      const spinner = FRAMES[frame % FRAMES.length]
+      answerSpeedCell = themeColors.success(spinner.padEnd(W_ANSWER))
+    } else if (benchmarkResult) {
+      const text = formatBenchmarkResult(benchmarkResult)
+      // 📖 Colorize: success = green, error = red/dim
+      const isError = !benchmarkResult.ok
+      answerSpeedCell = isError
+        ? themeColors.metricBad(text.padEnd(W_ANSWER))
+        : themeColors.metricGood(text.padEnd(W_ANSWER))
+    } else {
+      answerSpeedCell = themeColors.dim('—'.padEnd(W_ANSWER))
+    }
     // 📖 Build row: conditionally include columns based on responsive visibility
     const rowParts = []
     if (showRank) rowParts.push(num)
@@ -783,6 +819,7 @@ export function renderTable({
     rowParts.push(sweCell, ctxCell, nameCell, sourceCell, pingCell, avgCell, status, speedCell)
     if (showStability) rowParts.push(stabCell)
     if (showUptime) rowParts.push(uptimeCell)
+    if (showAnswerSpeed) rowParts.push(answerSpeedCell)
     const row = '  ' + rowParts.join(COL_SEP)
     if (isCursor) {

package/src/tui-state.js CHANGED Viewed

@@ -261,5 +261,12 @@ export function createTuiState({
     // 📖 Token usage overlay scroll state (used when overlay opens from footer)
     tokenUsageOpen: false,
+    // 📖 Benchmark results: keyed by `${providerKey}/${modelId}`
+    // 📖 Each entry is the raw result object from benchmarkModel() or null.
+    benchmarkResults: {},
+    // 📖 Set of benchmark keys currently running (for spinner display)
+    benchmarkRunning: new Set(),
   }
 }