free-coding-models 0.3.78 → 0.3.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -456,6 +456,7 @@ When a tool mode is active (via `Z`), models incompatible with that tool are hig
456
456
  ## ✨ Features
457
457
 
458
458
  - **Parallel pings** — all ~165 API/Zen-callable models tested simultaneously via native `fetch` (~170 total cataloged models including CLI-only Gemini rows)
459
+ - **AI benchmark columns** — `Ctrl+A` benchmarks the selected model, `Ctrl+U` benchmarks visible models, and results split cleanly into **AI Latency** plus **TPS**.
459
460
  - **Adaptive monitoring** — 2s burst for 60s → 10s normal → 30s idle
460
461
  - **Stability score** — composite 0–100 (p95 latency, jitter, spike rate, uptime)
461
462
  - **Smart ranking** — top 3 highlighted 🥇🥈🥉
@@ -0,0 +1,21 @@
1
+ # Changelog v0.3.79 - 2026-05-28
2
+
3
+ ### Added
4
+ - **Global AI Speed Test (Ctrl+U)** — Benchmark all currently visible models (respecting all active filters) in parallel with a 5-request concurrency limit to avoid rate limits. Results populate the **AI Speed** column for each model. Displays a prominent violet/purple footer badge warning about high request volume.
5
+ - New TUI footer layout with two speed test controls aligned left:
6
+ - `NEW ⭐️ Ctrl+A 🤖 AI Speed Test` (green badge) — benchmark selected model only
7
+ - `NEW Ctrl+U : Global AI Speed Test (Uses a lot of requests!)` (violet/purple badge) — benchmark all visible models
8
+ - Concurrency utility `runWithConcurrency(tasks, maxConcurrent)` to manage parallel benchmark requests safely.
9
+ - Global benchmark state tracking in TUI:
10
+ - `globalBenchmarkRunning` (boolean)
11
+ - `globalBenchmarkTotal` (number of models to test)
12
+ - `globalBenchmarkCompleted` (progress counter)
13
+
14
+ ### Changed
15
+ - **Renamed column** `Answer Speed` → `AI Speed` (compact: `AI Sp.`) to better reflect that it measures model inference speed, not network ping.
16
+ - **Removed sort hotkey** `w` from the AI Speed column header — the column is not sortable, so the misleading yellow "w" has been removed. The column remains responsive and hides/shows with other optional columns.
17
+ - Help overlay text updated: `Ctrl+A` now labeled "AI Speed Test" with description "(benchmark selected model → time + TPS)".
18
+ - TUI footer structure updated to 3 lines (`TABLE_FOOTER_LINES = 3`) to accommodate both speed test badges and the last release date.
19
+
20
+ ### Fixed
21
+ - Footer layout consistency with new multi-part line construction using `parts` array and proper hotkey zone registration for both `Ctrl+A` and `Ctrl+U` badges.
@@ -0,0 +1,9 @@
1
+ # Changelog v0.3.80 - 2026-05-30
2
+
3
+ ### Changed
4
+ - Split the former single **AI Speed** table value into two clearer columns: **AI Latency** for benchmark wall-clock response time and **TPS** for rounded tokens per second. When a model's Health is not good, AI Latency now mirrors the exact Health text and TPS stays `—`, so benchmark columns do not introduce a second, conflicting error language.
5
+ - Made the AI benchmark prompt request one cohesive 80–100 word paragraph instead of a tiny one-sentence answer, and raised the benchmark cap to 140 output tokens. This gives latency and TPS measurements enough generated text to be more stable and meaningful.
6
+
7
+ ### Fixed
8
+ - Disabled reasoning/thinking for lightweight model ping payloads by sending `thinking: { type: "disabled" }` on OpenAI-compatible chat-completion probes. This keeps latency checks focused on network/model availability instead of accidentally paying for hidden reasoning tokens.
9
+ - Reused the same disabled-thinking ping payload for router health probes, while leaving Replicate prediction probes unchanged because they do not use the OpenAI chat-completions schema.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "free-coding-models",
3
- "version": "0.3.78",
3
+ "version": "0.3.80",
4
4
  "description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
5
5
  "keywords": [
6
6
  "nvidia",
package/src/benchmark.js CHANGED
@@ -18,25 +18,27 @@
18
18
  * → Functions:
19
19
  * - `buildBenchmarkRequest`: Build provider-specific benchmark request
20
20
  * - `benchmarkModel`: Run a single benchmark and return timing + token metrics
21
- * - `formatBenchmarkResult`: Format a benchmark result for the TUI column
21
+ * - `formatBenchmarkLatency`: Format benchmark latency for the AI Latency TUI column
22
+ * - `formatBenchmarkTps`: Format benchmark throughput for the TPS TUI column
23
+ * - `formatBenchmarkResult`: Legacy combined formatter for compatibility
22
24
  * - `estimateTokensFromText`: Fallback token estimator (clearly labeled)
23
25
  *
24
26
  * 📦 Dependencies:
25
27
  * - ./ping.js: buildPingRequest, resolveCloudflareUrl
26
28
  *
27
29
  * @see {@link ./ping.js} Provider-specific request building
28
- * @see {@link ./render-table.js} Answer Speed column rendering
30
+ * @see {@link ./render-table.js} AI Latency + TPS column rendering
29
31
  */
30
32
 
31
33
  import { buildPingRequest, resolveCloudflareUrl } from './ping.js'
32
34
 
33
- // 📖 BENCHMARK_PROMPT: A short, unambiguous question that any model can answer.
34
- // 📖 Constrained to one sentence to keep benchmarks fast and consistent.
35
- export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one short sentence.'
35
+ // 📖 BENCHMARK_PROMPT: A deterministic one-paragraph task that any model can answer.
36
+ // 📖 The longer target gives latency + TPS measurements enough generated tokens to be reliable.
37
+ export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one cohesive paragraph of 80 to 100 words. Do not use bullet points, headings, or multiple paragraphs.'
36
38
 
37
- // 📖 BENCHMARK_MAX_TOKENS: Hard cap on generation length to prevent slow models
38
- // 📖 from producing essays and skewing the TPS calculation.
39
- export const BENCHMARK_MAX_TOKENS = 32
39
+ // 📖 BENCHMARK_MAX_TOKENS: Hard cap high enough for a real paragraph, but low enough
40
+ // 📖 to avoid accidental essays when benchmarking many models at once.
41
+ export const BENCHMARK_MAX_TOKENS = 140
40
42
 
41
43
  // 📖 BENCHMARK_TEMPERATURE: Zero temperature for deterministic, reproducible results.
42
44
  export const BENCHMARK_TEMPERATURE = 0
@@ -52,37 +54,41 @@ export function estimateTokensFromText(text) {
52
54
  return Math.ceil(text.length / 4)
53
55
  }
54
56
 
55
- // 📖 formatBenchmarkResult: Turn a raw benchmark result into a compact display string.
56
- // 📖 Handles all three states: empty, running, success, and error.
57
- // 📖
58
- // 📖 Success: "4.3s / 13 TPS"
59
- // 📖 Running: spinner (caller passes spinner char)
60
- // 📖 Error: compact error code like "ERR", "TIMEOUT", "401", "429"
61
- // 📖 Empty: "—"
62
- export function formatBenchmarkResult(result, { running = false, frame = 0 } = {}) {
63
- if (running) {
64
- const spinIdx = frame % 10
65
- const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
66
- return spinner
67
- }
68
-
69
- if (!result) {
70
- return '—'
71
- }
57
+ // 📖 benchmarkSpinner: Shared tiny spinner for benchmark columns while a request runs.
58
+ function benchmarkSpinner(frame) {
59
+ const spinIdx = frame % 10
60
+ return ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
61
+ }
72
62
 
73
- if (!result.ok) {
74
- return result.code || 'ERR'
75
- }
63
+ // 📖 formatBenchmarkLatency: Turn a raw benchmark result into the AI Latency column value.
64
+ // 📖 Success: "4.3s" / "12s". Error: compact error code. Empty: "—".
65
+ export function formatBenchmarkLatency(result, { running = false, frame = 0 } = {}) {
66
+ if (running) return benchmarkSpinner(frame)
67
+ if (!result) return '—'
68
+ if (!result.ok) return result.code || 'ERR'
76
69
 
77
70
  const totalSeconds = result.totalMs / 1000
78
- const secondsLabel = totalSeconds >= 10
71
+ return totalSeconds >= 10
79
72
  ? totalSeconds.toFixed(0) + 's'
80
73
  : totalSeconds.toFixed(1) + 's'
74
+ }
81
75
 
82
- const tps = result.tokensPerSecond ?? 0
83
- const tpsLabel = Math.round(tps)
76
+ // 📖 formatBenchmarkTps: Turn a raw benchmark result into the TPS column value.
77
+ // 📖 Success is the rounded tokens/second number only because the header carries "TPS".
78
+ // 📖 Errors and empty state stay as a dim dash in the table to avoid duplicating codes.
79
+ export function formatBenchmarkTps(result, { running = false, frame = 0 } = {}) {
80
+ if (running) return benchmarkSpinner(frame)
81
+ if (!result || !result.ok) return '—'
82
+ return String(Math.round(result.tokensPerSecond ?? 0))
83
+ }
84
84
 
85
- return `${secondsLabel} / ${tpsLabel} TPS`
85
+ // 📖 formatBenchmarkResult: legacy combined formatter retained for integrations/tests
86
+ // 📖 that still expect the old single-column "latency / TPS" string.
87
+ export function formatBenchmarkResult(result, options = {}) {
88
+ if (options.running) return benchmarkSpinner(options.frame ?? 0)
89
+ if (!result) return '—'
90
+ if (!result.ok) return result.code || 'ERR'
91
+ return `${formatBenchmarkLatency(result)} / ${formatBenchmarkTps(result)} TPS`
86
92
  }
87
93
 
88
94
  // 📖 buildBenchmarkRequest: Build provider-specific benchmark request.
package/src/constants.js CHANGED
@@ -102,7 +102,7 @@ export const WIDTH_WARNING_MIN_COLS = 80
102
102
  // 📖 Table row-budget constants — must stay in sync with renderTable()'s actual output.
103
103
  // 📖 If this drifts, model rows overflow and can push the title row out of view.
104
104
  export const TABLE_HEADER_LINES = 2 // 📖 title, column headers
105
- export const TABLE_FOOTER_LINES = 2 // 📖 actions, links
105
+ export const TABLE_FOOTER_LINES = 3 // 📖 actions, links, speed test
106
106
  export const TABLE_FIXED_LINES = TABLE_HEADER_LINES + TABLE_FOOTER_LINES
107
107
 
108
108
  // ─── Small cell-formatting helpers ────────────────────────────────────────────
@@ -33,7 +33,8 @@
33
33
 
34
34
  import { loadChangelog } from './changelog-loader.js'
35
35
  import { getToolMeta, isModelCompatibleWithTool, getCompatibleTools, findSimilarCompatibleModels } from './tool-metadata.js'
36
- import { loadConfig, saveConfig, replaceConfigContents } from './config.js'
36
+ import { loadConfig, saveConfig, replaceConfigContents, getApiKey } from './config.js'
37
+ import { sources } from '../sources.js'
37
38
  import { join, dirname } from 'node:path'
38
39
  import { fileURLToPath } from 'node:url'
39
40
  import { spawn } from 'node:child_process'
@@ -1075,6 +1076,80 @@ export function createKeyHandler(ctx) {
1075
1076
  }
1076
1077
  }
1077
1078
 
1079
+ // 📖 runWithConcurrency: Execute tasks with limited parallelism (maxConcurrent simultaneous).
1080
+ function runWithConcurrency(tasks, maxConcurrent) {
1081
+ const results = new Array(tasks.length)
1082
+ let nextIndex = 0
1083
+ const workers = new Array(maxConcurrent).fill(null).map(async () => {
1084
+ while (true) {
1085
+ const index = nextIndex++
1086
+ if (index >= tasks.length) break
1087
+ try {
1088
+ results[index] = await tasks[index]()
1089
+ } catch (err) {
1090
+ results[index] = { error: err }
1091
+ }
1092
+ }
1093
+ })
1094
+ return Promise.all(workers).then(() => results)
1095
+ }
1096
+
1097
+ // 📖 runGlobalBenchmark: Benchmark all visible models with up to 5 concurrent requests.
1098
+ // 📖 Results are stored in state.benchmarkResults (same format as individual benchmarks).
1099
+ async function runGlobalBenchmark(state) {
1100
+ if (state.globalBenchmarkRunning) return
1101
+ state.globalBenchmarkRunning = true
1102
+
1103
+ const models = state.visibleSorted
1104
+ const total = models.length
1105
+ state.globalBenchmarkTotal = total
1106
+ state.globalBenchmarkCompleted = 0
1107
+
1108
+ const tasks = models.map(model => async () => {
1109
+ const benchmarkKey = `${model.providerKey}/${model.modelId}`
1110
+ // Skip if already running (e.g., from Ctrl+A)
1111
+ if (state.benchmarkRunning.has(benchmarkKey)) {
1112
+ state.globalBenchmarkCompleted++
1113
+ return { skipped: true }
1114
+ }
1115
+
1116
+ const apiKey = getApiKey(state.config, model.providerKey) ?? null
1117
+ const providerUrl = sources[model.providerKey]?.url ?? null
1118
+ if (!providerUrl) {
1119
+ state.globalBenchmarkCompleted++
1120
+ return { skipped: true }
1121
+ }
1122
+
1123
+ state.benchmarkRunning.add(benchmarkKey)
1124
+ try {
1125
+ const result = await benchmarkModel({
1126
+ apiKey,
1127
+ modelId: model.modelId,
1128
+ providerKey: model.providerKey,
1129
+ url: providerUrl,
1130
+ })
1131
+ state.benchmarkResults[benchmarkKey] = result
1132
+ return { ok: result.ok }
1133
+ } catch (err) {
1134
+ state.benchmarkResults[benchmarkKey] = {
1135
+ ok: false,
1136
+ code: 'ERR',
1137
+ totalMs: 0,
1138
+ error: err?.message || 'Benchmark failed',
1139
+ }
1140
+ return { ok: false }
1141
+ } finally {
1142
+ state.benchmarkRunning.delete(benchmarkKey)
1143
+ state.globalBenchmarkCompleted++
1144
+ }
1145
+ })
1146
+
1147
+ await runWithConcurrency(tasks, 5)
1148
+ state.globalBenchmarkRunning = false
1149
+ state.globalBenchmarkTotal = 0
1150
+ state.globalBenchmarkCompleted = 0
1151
+ }
1152
+
1078
1153
  // 📖 Favorites display mode:
1079
1154
  // 📖 - true => favorites stay pinned + always visible (legacy behavior)
1080
1155
  // 📖 - false => favorites are just starred rows and obey normal sort/filter rules
@@ -1386,6 +1461,12 @@ export function createKeyHandler(ctx) {
1386
1461
  return
1387
1462
  }
1388
1463
 
1464
+ // 📖 Ctrl+U: Global AI Speed Benchmark (benchmark all visible models, 5 concurrent)
1465
+ if (key.ctrl && key.name === 'u') {
1466
+ await runGlobalBenchmark(state)
1467
+ return
1468
+ }
1469
+
1389
1470
  // 📖 Command palette captures the keyboard while active.
1390
1471
  if (state.commandPaletteOpen) {
1391
1472
  if (key.ctrl && key.name === 'c') { exit(0); return }
package/src/overlays.js CHANGED
@@ -930,7 +930,7 @@ export function createOverlayRenderers(state, deps) {
930
930
  lines.push(` ${heading('Controls')}`)
931
931
  lines.push(` ${key('W')} Toggle ping mode ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
932
932
  lines.push(` ${key('Ctrl+P')} Open ⚡️ command palette ${hint('(search and run actions quickly)')}`)
933
- lines.push(` ${key('Ctrl+A')} Benchmark answer speed ${hint('(real completion on selected model → time + TPS)')}`)
933
+ lines.push(` ${key('Ctrl+A')} AI Speed Test ${hint('(benchmark selected model → time + TPS)')}`)
934
934
  lines.push(` ${key('E')} Cycle filter mode ${hint('(Normal → Configured only → Usable only)')}`)
935
935
  lines.push(` ${key('Z')} Cycle tool mode ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
936
936
  lines.push(` ${key('F')} Toggle favorite on selected row ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)
package/src/ping.js CHANGED
@@ -16,6 +16,9 @@
16
16
  *
17
17
  * → Functions:
18
18
  * - `resolveCloudflareUrl`: Resolve {account_id} placeholder from CLOUDFLARE_ACCOUNT_ID env var
19
+ * - `buildChatCompletionPingBody`: Build minimal chat-completion probe payloads with thinking disabled
20
+ * - `markDisabledThinkingUnsupported`: Cache strict providers that reject the optional thinking control
21
+ * - `shouldUseDisabledThinkingForProvider`: Decide whether a provider should receive disabled-thinking probes
19
22
  * - `buildPingRequest`: Build provider-specific HTTP request for pinging
20
23
  * - `ping`: Send async ping request with timeout; returns { code, ms, quotaPercent }
21
24
  * - `getHeaderValue`: Helper to extract header value from Headers object or plain object
@@ -41,6 +44,9 @@ import { PING_TIMEOUT } from './constants.js'
41
44
  import { fetchProviderQuota as _fetchProviderQuotaFromModule } from './provider-quota-fetchers.js'
42
45
  import { supportsUsagePercent } from './quota-capabilities.js'
43
46
 
47
+ const DISABLED_THINKING_RETRY_STATUSES = new Set([400, 422])
48
+ const disabledThinkingUnsupportedProviders = new Set()
49
+
44
50
  // 📖 resolveCloudflareUrl: Cloudflare's OpenAI-compatible endpoint is account-scoped.
45
51
  // 📖 We resolve {account_id} from env so provider setup can stay simple in config.
46
52
  export function resolveCloudflareUrl(url) {
@@ -50,10 +56,37 @@ export function resolveCloudflareUrl(url) {
50
56
  return url.replace('{account_id}', encodeURIComponent(accountId))
51
57
  }
52
58
 
59
+ // 📖 buildChatCompletionPingBody: Use the smallest useful chat-completion probe.
60
+ // 📖 The explicit thinking toggle prevents reasoning-capable endpoints from spending
61
+ // 📖 hidden tokens or adding thinking latency when we only need availability + RTT.
62
+ export function buildChatCompletionPingBody(modelId, overrides = {}, options = {}) {
63
+ const body = {
64
+ model: modelId,
65
+ messages: [{ role: 'user', content: 'hi' }],
66
+ max_tokens: 1,
67
+ thinking: { type: 'disabled' },
68
+ ...overrides,
69
+ }
70
+ if (options.disableThinking === false) delete body.thinking
71
+ return body
72
+ }
73
+
74
+ // 📖 markDisabledThinkingUnsupported: remember strict providers that reject the
75
+ // 📖 optional `thinking` field so future pings avoid repeated 400/422 retries.
76
+ export function markDisabledThinkingUnsupported(providerKey) {
77
+ if (providerKey) disabledThinkingUnsupportedProviders.add(providerKey)
78
+ }
79
+
80
+ // 📖 shouldUseDisabledThinkingForProvider: central policy for OpenAI-compatible
81
+ // 📖 probes, shared by regular pings and router health probes.
82
+ export function shouldUseDisabledThinkingForProvider(providerKey) {
83
+ return !disabledThinkingUnsupportedProviders.has(providerKey)
84
+ }
85
+
53
86
  // 📖 buildPingRequest: Build provider-specific ping request.
54
87
  // 📖 Handles Replicate's /v1/predictions format, Cloudflare's account_id in URL,
55
88
  // 📖 and standard OpenAI-compliant chat completions with provider-specific headers.
56
- export function buildPingRequest(apiKey, modelId, providerKey, url) {
89
+ export function buildPingRequest(apiKey, modelId, providerKey, url, options = {}) {
57
90
  // 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
58
91
  const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
59
92
 
@@ -75,7 +108,9 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
75
108
  return {
76
109
  url: resolveCloudflareUrl(url),
77
110
  headers,
78
- body: { model: apiModelId, messages: [{ role: 'user', content: 'hi' }], max_tokens: 1 },
111
+ body: buildChatCompletionPingBody(apiModelId, {}, {
112
+ disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
113
+ }),
79
114
  }
80
115
  }
81
116
 
@@ -90,7 +125,31 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
90
125
  return {
91
126
  url,
92
127
  headers,
93
- body: { model: apiModelId, messages: [{ role: 'user', content: 'hi' }], max_tokens: 1 },
128
+ body: buildChatCompletionPingBody(apiModelId, {}, {
129
+ disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
130
+ }),
131
+ }
132
+ }
133
+
134
+ // 📖 sendPingFetch: keep retry code tiny and ensure both attempts use the same abort signal.
135
+ async function sendPingFetch(req, signal) {
136
+ return fetch(req.url, {
137
+ method: 'POST', signal,
138
+ headers: req.headers,
139
+ body: JSON.stringify(req.body),
140
+ })
141
+ }
142
+
143
+ // 📖 isDisabledThinkingRejected: strict OpenAI-compatible gateways may reject
144
+ // 📖 unknown root fields. We only retry when the status and error text names
145
+ // 📖 the optional `thinking` control, avoiding retries for real model failures.
146
+ async function isDisabledThinkingRejected(resp, req) {
147
+ if (!req?.body?.thinking || !DISABLED_THINKING_RETRY_STATUSES.has(resp.status)) return false
148
+ try {
149
+ const text = await resp.clone().text()
150
+ return /thinking/i.test(text)
151
+ } catch {
152
+ return false
94
153
  }
95
154
  }
96
155
 
@@ -104,12 +163,13 @@ export async function ping(apiKey, modelId, providerKey, url) {
104
163
  const timer = setTimeout(() => ctrl.abort(), PING_TIMEOUT)
105
164
  const t0 = performance.now()
106
165
  try {
107
- const req = buildPingRequest(apiKey, modelId, providerKey, url)
108
- const resp = await fetch(req.url, {
109
- method: 'POST', signal: ctrl.signal,
110
- headers: req.headers,
111
- body: JSON.stringify(req.body),
112
- })
166
+ let req = buildPingRequest(apiKey, modelId, providerKey, url)
167
+ let resp = await sendPingFetch(req, ctrl.signal)
168
+ if (await isDisabledThinkingRejected(resp, req)) {
169
+ markDisabledThinkingUnsupported(providerKey)
170
+ req = buildPingRequest(apiKey, modelId, providerKey, url, { disableThinking: false })
171
+ resp = await sendPingFetch(req, ctrl.signal)
172
+ }
113
173
  // 📖 Normalize all HTTP 2xx statuses to "200" so existing verdict/avg logic still works.
114
174
  const code = resp.status >= 200 && resp.status < 300 ? '200' : String(resp.status)
115
175
  return {
@@ -49,8 +49,8 @@ import { themeColors, getProviderRgb, getTierRgb, getReadableTextRgb, getTheme }
49
49
  import { TIER_COLOR } from './tier-colors.js'
50
50
  import { getAvg, getVerdict, getUptime, getStabilityScore, getVersionStatusInfo } from './utils.js'
51
51
  import { usagePlaceholderForProvider } from './ping.js'
52
- import { formatBenchmarkResult } from './benchmark.js'
53
- import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth } from './render-helpers.js'
52
+ import { formatBenchmarkLatency, formatBenchmarkTps } from './benchmark.js'
53
+ import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth, stripAnsi } from './render-helpers.js'
54
54
  import { getToolMeta, TOOL_METADATA, TOOL_MODE_ORDER, isModelCompatibleWithTool } from './tool-metadata.js'
55
55
  import { getColumnSpacing } from './ui-config.js'
56
56
  import { detectPackageManager, getManualInstallCmd } from './updater.js'
@@ -91,6 +91,8 @@ const COLUMN_SORT_MAP = {
91
91
  verdict: 'verdict',
92
92
  stability: 'stability',
93
93
  uptime: 'uptime',
94
+ aiLatency: null,
95
+ tps: null,
94
96
  }
95
97
  export { COLUMN_SORT_MAP }
96
98
 
@@ -277,7 +279,8 @@ export function renderTable({
277
279
  const W_STATUS = 18
278
280
  const W_VERDICT = 14
279
281
  const W_UPTIME = 6
280
- const W_ANSWER = 14
282
+ const W_AI_LATENCY = 18
283
+ const W_TPS = 5
281
284
 
282
285
  // const W_TOKENS = 7 // Used column removed
283
286
  // const W_USAGE = 7 // Usage column removed
@@ -285,17 +288,18 @@ export function renderTable({
285
288
 
286
289
  // 📖 Responsive column visibility: progressively hide least-useful columns
287
290
  // 📖 and shorten header labels when terminal width is insufficient.
288
- // 📖 Hiding order (least useful first): Rank → Answer Speed → Up% → Tier → Stability
291
+ // 📖 Hiding order (least useful first): Rank → AI Latency/TPS → Up% → Tier → Stability
289
292
  // 📖 Compact mode shrinks: Latest Ping→Lat. P (9), Avg Ping→Avg. P (8),
290
293
  // 📖 Stability→StaB. (8), Provider→4chars+… (7), Health→6chars+… (13)
291
- // 📖 Breakpoints: full=183 | compact=160 | -Rank=151 | -Answer=142 | -Up%=133 | -Tier=125 | -Stab=114
294
+ // 📖 Breakpoints are computed dynamically from active column widths.
292
295
  let wPing = 14
293
296
  let wAvg = 11
294
297
  let wStab = 11
295
298
  let wSource = W_SOURCE
296
299
  let wStatus = W_STATUS
300
+ let wAiLatency = W_AI_LATENCY
297
301
  let showRank = true
298
- let showAnswerSpeed = true
302
+ let showBenchmarkColumns = true
299
303
  let showUptime = true
300
304
  let showTier = true
301
305
  let showStability = true
@@ -310,7 +314,7 @@ export function renderTable({
310
314
  cols.push(W_SWE, W_CTX, W_MODEL, wSource, wPing, wAvg, wStatus, W_VERDICT)
311
315
  if (showStability) cols.push(wStab)
312
316
  if (showUptime) cols.push(W_UPTIME)
313
- if (showAnswerSpeed) cols.push(W_ANSWER)
317
+ if (showBenchmarkColumns) cols.push(wAiLatency, W_TPS)
314
318
  return ROW_MARGIN + cols.reduce((a, b) => a + b, 0) + (cols.length - 1) * SEP_W
315
319
  }
316
320
 
@@ -322,10 +326,11 @@ export function renderTable({
322
326
  wStab = 8 // 'StaB.' instead of 'Stability'
323
327
  wSource = 7 // Provider truncated to 4 chars + '…', 7 cols total
324
328
  wStatus = 13 // Health truncated after 6 chars + '…'
329
+ wAiLatency = 13 // Mirror compact Health text when health is not good
325
330
  }
326
331
  // 📖 Steps 2–6: Progressive column hiding (least useful first)
327
332
  if (calcWidth() > terminalCols) showRank = false
328
- if (calcWidth() > terminalCols) showAnswerSpeed = false
333
+ if (calcWidth() > terminalCols) showBenchmarkColumns = false
329
334
  if (calcWidth() > terminalCols) showUptime = false
330
335
  if (calcWidth() > terminalCols) showTier = false
331
336
  if (calcWidth() > terminalCols) showStability = false
@@ -348,7 +353,10 @@ export function renderTable({
348
353
  colDefs.push({ name: 'verdict', width: W_VERDICT })
349
354
  if (showStability) colDefs.push({ name: 'stability', width: wStab })
350
355
  if (showUptime) colDefs.push({ name: 'uptime', width: W_UPTIME })
351
- if (showAnswerSpeed) colDefs.push({ name: 'answerSpeed', width: W_ANSWER })
356
+ if (showBenchmarkColumns) {
357
+ colDefs.push({ name: 'aiLatency', width: wAiLatency })
358
+ colDefs.push({ name: 'tps', width: W_TPS })
359
+ }
352
360
  let x = ROW_MARGIN + 1 // 📖 1-based: first column starts after the 2-char left margin
353
361
  const columns = []
354
362
  for (let i = 0; i < colDefs.length; i++) {
@@ -475,12 +483,17 @@ export function renderTable({
475
483
  return themeColors.hotkey('U') + themeColors.dim('p%' + padding)
476
484
  })()
477
485
 
478
- // 📖 Answer Speed header no sort hotkey, just the label
479
- const answerLabel = isCompact ? 'Answ.' : 'Answer Speed'
480
- const answerH_c = (() => {
481
- const plain = answerLabel
482
- const padding = ' '.repeat(Math.max(0, W_ANSWER - plain.length))
483
- return themeColors.dim('Ans') + themeColors.hotkey('w') + themeColors.dim('er' + (isCompact ? '.' : ' Speed') + padding)
486
+ // 📖 Benchmark headerssplit the old combined AI Speed field into latency + throughput.
487
+ const aiLatencyLabel = isCompact ? 'AI Lat.' : 'AI Latency'
488
+ const aiLatencyH_c = (() => {
489
+ const plain = aiLatencyLabel
490
+ const padding = ' '.repeat(Math.max(0, wAiLatency - plain.length))
491
+ return themeColors.dim(plain + padding)
492
+ })()
493
+ const tpsH_c = (() => {
494
+ const plain = 'TPS'
495
+ const padding = ' '.repeat(Math.max(0, W_TPS - plain.length))
496
+ return themeColors.dim(plain + padding)
484
497
  })()
485
498
 
486
499
  // 📖 Usage column removed from UI – no header or separator for it.
@@ -491,7 +504,7 @@ export function renderTable({
491
504
  headerParts.push(sweH_c, ctxH_c, modelH_c, originH_c, pingH_c, avgH_c, healthH_c, verdictH_c)
492
505
  if (showStability) headerParts.push(stabH_c)
493
506
  if (showUptime) headerParts.push(uptimeH_c)
494
- if (showAnswerSpeed) headerParts.push(answerH_c)
507
+ if (showBenchmarkColumns) headerParts.push(aiLatencyH_c, tpsH_c)
495
508
  lines.push(' ' + headerParts.join(COL_SEP))
496
509
 
497
510
  // 📖 Mouse support: the column header row is the last line we just pushed.
@@ -793,24 +806,28 @@ export function renderTable({
793
806
  // (We keep the logic but do not render it.)
794
807
  const usageCell = ''
795
808
 
796
- // 📖 Answer Speed columnshow benchmark result, running spinner, or dash
809
+ // 📖 AI Latency + TPS columns same benchmark result, split into two readable metrics.
797
810
  const benchmarkKey = `${r.providerKey}/${r.modelId}`
798
811
  const benchmarkResult = benchmarkResults[benchmarkKey]
799
812
  const isBenchmarkRunning = benchmarkRunning.has(benchmarkKey)
800
- let answerSpeedCell
801
- if (isBenchmarkRunning) {
802
- const spinner = FRAMES[frame % FRAMES.length]
803
- answerSpeedCell = themeColors.success(spinner.padEnd(W_ANSWER))
804
- } else if (benchmarkResult) {
805
- const text = formatBenchmarkResult(benchmarkResult)
806
- // 📖 Colorize: success = green, error = red/dim
807
- const isError = !benchmarkResult.ok
808
- answerSpeedCell = isError
809
- ? themeColors.metricBad(text.padEnd(W_ANSWER))
810
- : themeColors.metricGood(text.padEnd(W_ANSWER))
811
- } else {
812
- answerSpeedCell = themeColors.dim('—'.padEnd(W_ANSWER))
813
- }
813
+ const healthIsGood = r.status === 'up'
814
+ const latencyText = healthIsGood
815
+ ? formatBenchmarkLatency(benchmarkResult, { running: isBenchmarkRunning, frame })
816
+ : statusDisplayText
817
+ const tpsText = healthIsGood
818
+ ? formatBenchmarkTps(benchmarkResult, { running: isBenchmarkRunning, frame })
819
+ : '—'
820
+ const benchmarkIsError = healthIsGood && benchmarkResult && !benchmarkResult.ok
821
+ const latencyCell = !healthIsGood
822
+ ? statusColor(padEndDisplay(latencyText, wAiLatency))
823
+ : benchmarkIsError
824
+ ? themeColors.metricBad(latencyText.padEnd(wAiLatency))
825
+ : benchmarkResult || isBenchmarkRunning
826
+ ? themeColors.metricGood(latencyText.padEnd(wAiLatency))
827
+ : themeColors.dim(latencyText.padEnd(wAiLatency))
828
+ const tpsCell = healthIsGood && (benchmarkResult?.ok || isBenchmarkRunning)
829
+ ? themeColors.metricGood(tpsText.padEnd(W_TPS))
830
+ : themeColors.dim(tpsText.padEnd(W_TPS))
814
831
 
815
832
  // 📖 Build row: conditionally include columns based on responsive visibility
816
833
  const rowParts = []
@@ -819,7 +836,7 @@ export function renderTable({
819
836
  rowParts.push(sweCell, ctxCell, nameCell, sourceCell, pingCell, avgCell, status, speedCell)
820
837
  if (showStability) rowParts.push(stabCell)
821
838
  if (showUptime) rowParts.push(uptimeCell)
822
- if (showAnswerSpeed) rowParts.push(answerSpeedCell)
839
+ if (showBenchmarkColumns) rowParts.push(latencyCell, tpsCell)
823
840
  const row = ' ' + rowParts.join(COL_SEP)
824
841
 
825
842
  if (isCursor) {
@@ -942,9 +959,8 @@ export function renderTable({
942
959
  }
943
960
  }
944
961
 
945
- // 📖 Line 2: command palette (highlighted as new) + GitHub link.
946
- // 📖 Ctrl+P Cmd Palette uses neon-green-on-dark-green background to highlight the feature.
947
- const paletteLabel = chalk.bgRgb(0, 60, 0).rgb(57, 255, 20).bold(' Ctrl+P Cmd Palette ')
962
+ // 📖 Line 2: command palette (simple color, no background) + GitHub link.
963
+ const paletteLabel = chalk.rgb(57, 255, 20).bold('Ctrl+P Cmd Palette')
948
964
  const starLink = '⭐ ' + themeColors.link('\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\GitHub\x1b]8;;\x1b\\')
949
965
  lines.push(
950
966
  ' ' + paletteLabel + themeColors.dim(` • `) + starLink + themeColors.dim(` • `) +
@@ -998,8 +1014,29 @@ export function renderTable({
998
1014
  const releaseLabel = lastReleaseDate
999
1015
  ? chalk.rgb(255, 182, 193)(`Last release: ${lastReleaseDate}`)
1000
1016
  : ''
1017
+ const speedTestLabel = chalk.bgRgb(0, 60, 0).rgb(57, 255, 20).bold(' NEW ⭐️ Ctrl+A 🤖 AI Speed Test ')
1018
+ const globalBenchmarkLabel = chalk.bgRgb(180, 0, 255).white.bold(' NEW Ctrl+U : Global AI Speed Test (Uses a lot of requests!) ')
1001
1019
 
1002
- if (releaseLabel) lines.push(' ' + releaseLabel)
1020
+ // 📖 Line 3: Speed Test (Ctrl+A) + Global Benchmark (Ctrl+U) + Last release
1021
+ if (releaseLabel || speedTestLabel || globalBenchmarkLabel) {
1022
+ const parts = [
1023
+ { text: ' ', key: null },
1024
+ { text: speedTestLabel, key: 'a' },
1025
+ { text: ' ', key: null },
1026
+ { text: globalBenchmarkLabel, key: 'u' },
1027
+ { text: ' ', key: null },
1028
+ { text: releaseLabel, key: null },
1029
+ ]
1030
+ const footerRow3 = lines.length + 1
1031
+ let xPos = 1
1032
+ for (const part of parts) {
1033
+ const w = displayWidth(part.text)
1034
+ if (part.key) footerHotkeys.push({ key: part.key, row: footerRow3, xStart: xPos, xEnd: xPos + w - 1 })
1035
+ xPos += w
1036
+ }
1037
+ const line = parts.map(p => p.text).join('')
1038
+ lines.push(line)
1039
+ }
1003
1040
  _lastLayout.footerHotkeys = footerHotkeys
1004
1041
 
1005
1042
  // 📖 Append \x1b[K (erase to EOL) to each line so leftover chars from previous
@@ -47,7 +47,7 @@ import {
47
47
  normalizeRouterConfig,
48
48
  saveConfig,
49
49
  } from './config.js'
50
- import { resolveCloudflareUrl } from './ping.js'
50
+ import { buildChatCompletionPingBody, resolveCloudflareUrl, shouldUseDisabledThinkingForProvider } from './ping.js'
51
51
  import { sendUsageTelemetry } from './telemetry.js'
52
52
 
53
53
  export const ROUTER_DEFAULT_PORT = 19280
@@ -1200,12 +1200,11 @@ class RouterRuntime {
1200
1200
  : await fetch(providerUrl, {
1201
1201
  method: 'POST',
1202
1202
  headers: cloneHeadersForUpstream({}, apiKey, candidate.provider),
1203
- body: JSON.stringify({
1204
- model: getApiModelId(candidate.provider, candidate.model),
1205
- messages: [{ role: 'user', content: 'hi' }],
1206
- max_tokens: 1,
1207
- stream: false,
1208
- }),
1203
+ body: JSON.stringify(buildChatCompletionPingBody(
1204
+ getApiModelId(candidate.provider, candidate.model),
1205
+ { stream: false },
1206
+ { disableThinking: shouldUseDisabledThinkingForProvider(candidate.provider) }
1207
+ )),
1209
1208
  signal: controller.signal,
1210
1209
  })
1211
1210
  const latencyMs = Math.round(performance.now() - started)