free-coding-models 0.3.78 → 0.3.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/changelog/v0.3.79.md +21 -0
- package/changelog/v0.3.80.md +9 -0
- package/package.json +1 -1
- package/src/benchmark.js +38 -32
- package/src/constants.js +1 -1
- package/src/key-handler.js +82 -1
- package/src/overlays.js +1 -1
- package/src/ping.js +69 -9
- package/src/render-table.js +73 -36
- package/src/router-daemon.js +6 -7
- package/src/tui-state.js +5 -0
- package/web/dist/assets/{index-BjX9NY0U.js → index-DDz3_efL.js} +1 -1
- package/web/dist/index.html +1 -1
package/README.md
CHANGED
|
@@ -456,6 +456,7 @@ When a tool mode is active (via `Z`), models incompatible with that tool are hig
|
|
|
456
456
|
## ✨ Features
|
|
457
457
|
|
|
458
458
|
- **Parallel pings** — all ~165 API/Zen-callable models tested simultaneously via native `fetch` (~170 total cataloged models including CLI-only Gemini rows)
|
|
459
|
+
- **AI benchmark columns** — `Ctrl+A` benchmarks the selected model, `Ctrl+U` benchmarks visible models, and results split cleanly into **AI Latency** plus **TPS**.
|
|
459
460
|
- **Adaptive monitoring** — 2s burst for 60s → 10s normal → 30s idle
|
|
460
461
|
- **Stability score** — composite 0–100 (p95 latency, jitter, spike rate, uptime)
|
|
461
462
|
- **Smart ranking** — top 3 highlighted 🥇🥈🥉
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Changelog v0.3.79 - 2026-05-28
|
|
2
|
+
|
|
3
|
+
### Added
|
|
4
|
+
- **Global AI Speed Test (Ctrl+U)** — Benchmark all currently visible models (respecting all active filters) in parallel with a 5-request concurrency limit to avoid rate limits. Results populate the **AI Speed** column for each model. Displays a prominent violet/purple footer badge warning about high request volume.
|
|
5
|
+
- New TUI footer layout with two speed test controls aligned left:
|
|
6
|
+
- `NEW ⭐️ Ctrl+A 🤖 AI Speed Test` (green badge) — benchmark selected model only
|
|
7
|
+
- `NEW Ctrl+U : Global AI Speed Test (Uses a lot of requests!)` (violet/purple badge) — benchmark all visible models
|
|
8
|
+
- Concurrency utility `runWithConcurrency(tasks, maxConcurrent)` to manage parallel benchmark requests safely.
|
|
9
|
+
- Global benchmark state tracking in TUI:
|
|
10
|
+
- `globalBenchmarkRunning` (boolean)
|
|
11
|
+
- `globalBenchmarkTotal` (number of models to test)
|
|
12
|
+
- `globalBenchmarkCompleted` (progress counter)
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
- **Renamed column** `Answer Speed` → `AI Speed` (compact: `AI Sp.`) to better reflect that it measures model inference speed, not network ping.
|
|
16
|
+
- **Removed sort hotkey** `w` from the AI Speed column header — the column is not sortable, so the misleading yellow "w" has been removed. The column remains responsive and hides/shows with other optional columns.
|
|
17
|
+
- Help overlay text updated: `Ctrl+A` now labeled "AI Speed Test" with description "(benchmark selected model → time + TPS)".
|
|
18
|
+
- TUI footer structure updated to 3 lines (`TABLE_FOOTER_LINES = 3`) to accommodate both speed test badges and the last release date.
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- Footer layout consistency with new multi-part line construction using `parts` array and proper hotkey zone registration for both `Ctrl+A` and `Ctrl+U` badges.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Changelog v0.3.80 - 2026-05-30
|
|
2
|
+
|
|
3
|
+
### Changed
|
|
4
|
+
- Split the former single **AI Speed** table value into two clearer columns: **AI Latency** for benchmark wall-clock response time and **TPS** for rounded tokens per second. When a model's Health is not good, AI Latency now mirrors the exact Health text and TPS stays `—`, so benchmark columns do not introduce a second, conflicting error language.
|
|
5
|
+
- Made the AI benchmark prompt request one cohesive 80–100 word paragraph instead of a tiny one-sentence answer, and raised the benchmark cap to 140 output tokens. This gives latency and TPS measurements enough generated text to be more stable and meaningful.
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- Disabled reasoning/thinking for lightweight model ping payloads by sending `thinking: { type: "disabled" }` on OpenAI-compatible chat-completion probes. This keeps latency checks focused on network/model availability instead of accidentally paying for hidden reasoning tokens.
|
|
9
|
+
- Reused the same disabled-thinking ping payload for router health probes, while leaving Replicate prediction probes unchanged because they do not use the OpenAI chat-completions schema.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "free-coding-models",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.80",
|
|
4
4
|
"description": "Find the fastest coding LLM models in seconds — ping free models from multiple providers, pick the best one for OpenCode, Cursor, or any AI coding assistant.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"nvidia",
|
package/src/benchmark.js
CHANGED
|
@@ -18,25 +18,27 @@
|
|
|
18
18
|
* → Functions:
|
|
19
19
|
* - `buildBenchmarkRequest`: Build provider-specific benchmark request
|
|
20
20
|
* - `benchmarkModel`: Run a single benchmark and return timing + token metrics
|
|
21
|
-
* - `
|
|
21
|
+
* - `formatBenchmarkLatency`: Format benchmark latency for the AI Latency TUI column
|
|
22
|
+
* - `formatBenchmarkTps`: Format benchmark throughput for the TPS TUI column
|
|
23
|
+
* - `formatBenchmarkResult`: Legacy combined formatter for compatibility
|
|
22
24
|
* - `estimateTokensFromText`: Fallback token estimator (clearly labeled)
|
|
23
25
|
*
|
|
24
26
|
* 📦 Dependencies:
|
|
25
27
|
* - ./ping.js: buildPingRequest, resolveCloudflareUrl
|
|
26
28
|
*
|
|
27
29
|
* @see {@link ./ping.js} Provider-specific request building
|
|
28
|
-
* @see {@link ./render-table.js}
|
|
30
|
+
* @see {@link ./render-table.js} AI Latency + TPS column rendering
|
|
29
31
|
*/
|
|
30
32
|
|
|
31
33
|
import { buildPingRequest, resolveCloudflareUrl } from './ping.js'
|
|
32
34
|
|
|
33
|
-
// 📖 BENCHMARK_PROMPT: A
|
|
34
|
-
// 📖
|
|
35
|
-
export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one
|
|
35
|
+
// 📖 BENCHMARK_PROMPT: A deterministic one-paragraph task that any model can answer.
|
|
36
|
+
// 📖 The longer target gives latency + TPS measurements enough generated tokens to be reliable.
|
|
37
|
+
export const BENCHMARK_PROMPT = 'Why is the sky blue? Answer in exactly one cohesive paragraph of 80 to 100 words. Do not use bullet points, headings, or multiple paragraphs.'
|
|
36
38
|
|
|
37
|
-
// 📖 BENCHMARK_MAX_TOKENS: Hard cap
|
|
38
|
-
// 📖
|
|
39
|
-
export const BENCHMARK_MAX_TOKENS =
|
|
39
|
+
// 📖 BENCHMARK_MAX_TOKENS: Hard cap high enough for a real paragraph, but low enough
|
|
40
|
+
// 📖 to avoid accidental essays when benchmarking many models at once.
|
|
41
|
+
export const BENCHMARK_MAX_TOKENS = 140
|
|
40
42
|
|
|
41
43
|
// 📖 BENCHMARK_TEMPERATURE: Zero temperature for deterministic, reproducible results.
|
|
42
44
|
export const BENCHMARK_TEMPERATURE = 0
|
|
@@ -52,37 +54,41 @@ export function estimateTokensFromText(text) {
|
|
|
52
54
|
return Math.ceil(text.length / 4)
|
|
53
55
|
}
|
|
54
56
|
|
|
55
|
-
// 📖
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
// 📖 Error: compact error code like "ERR", "TIMEOUT", "401", "429"
|
|
61
|
-
// 📖 Empty: "—"
|
|
62
|
-
export function formatBenchmarkResult(result, { running = false, frame = 0 } = {}) {
|
|
63
|
-
if (running) {
|
|
64
|
-
const spinIdx = frame % 10
|
|
65
|
-
const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
|
|
66
|
-
return spinner
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
if (!result) {
|
|
70
|
-
return '—'
|
|
71
|
-
}
|
|
57
|
+
// 📖 benchmarkSpinner: Shared tiny spinner for benchmark columns while a request runs.
|
|
58
|
+
function benchmarkSpinner(frame) {
|
|
59
|
+
const spinIdx = frame % 10
|
|
60
|
+
return ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'][spinIdx]
|
|
61
|
+
}
|
|
72
62
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
63
|
+
// 📖 formatBenchmarkLatency: Turn a raw benchmark result into the AI Latency column value.
|
|
64
|
+
// 📖 Success: "4.3s" / "12s". Error: compact error code. Empty: "—".
|
|
65
|
+
export function formatBenchmarkLatency(result, { running = false, frame = 0 } = {}) {
|
|
66
|
+
if (running) return benchmarkSpinner(frame)
|
|
67
|
+
if (!result) return '—'
|
|
68
|
+
if (!result.ok) return result.code || 'ERR'
|
|
76
69
|
|
|
77
70
|
const totalSeconds = result.totalMs / 1000
|
|
78
|
-
|
|
71
|
+
return totalSeconds >= 10
|
|
79
72
|
? totalSeconds.toFixed(0) + 's'
|
|
80
73
|
: totalSeconds.toFixed(1) + 's'
|
|
74
|
+
}
|
|
81
75
|
|
|
82
|
-
|
|
83
|
-
|
|
76
|
+
// 📖 formatBenchmarkTps: Turn a raw benchmark result into the TPS column value.
|
|
77
|
+
// 📖 Success is the rounded tokens/second number only because the header carries "TPS".
|
|
78
|
+
// 📖 Errors and empty state stay as a dim dash in the table to avoid duplicating codes.
|
|
79
|
+
export function formatBenchmarkTps(result, { running = false, frame = 0 } = {}) {
|
|
80
|
+
if (running) return benchmarkSpinner(frame)
|
|
81
|
+
if (!result || !result.ok) return '—'
|
|
82
|
+
return String(Math.round(result.tokensPerSecond ?? 0))
|
|
83
|
+
}
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
// 📖 formatBenchmarkResult: legacy combined formatter retained for integrations/tests
|
|
86
|
+
// 📖 that still expect the old single-column "latency / TPS" string.
|
|
87
|
+
export function formatBenchmarkResult(result, options = {}) {
|
|
88
|
+
if (options.running) return benchmarkSpinner(options.frame ?? 0)
|
|
89
|
+
if (!result) return '—'
|
|
90
|
+
if (!result.ok) return result.code || 'ERR'
|
|
91
|
+
return `${formatBenchmarkLatency(result)} / ${formatBenchmarkTps(result)} TPS`
|
|
86
92
|
}
|
|
87
93
|
|
|
88
94
|
// 📖 buildBenchmarkRequest: Build provider-specific benchmark request.
|
package/src/constants.js
CHANGED
|
@@ -102,7 +102,7 @@ export const WIDTH_WARNING_MIN_COLS = 80
|
|
|
102
102
|
// 📖 Table row-budget constants — must stay in sync with renderTable()'s actual output.
|
|
103
103
|
// 📖 If this drifts, model rows overflow and can push the title row out of view.
|
|
104
104
|
export const TABLE_HEADER_LINES = 2 // 📖 title, column headers
|
|
105
|
-
export const TABLE_FOOTER_LINES =
|
|
105
|
+
export const TABLE_FOOTER_LINES = 3 // 📖 actions, links, speed test
|
|
106
106
|
export const TABLE_FIXED_LINES = TABLE_HEADER_LINES + TABLE_FOOTER_LINES
|
|
107
107
|
|
|
108
108
|
// ─── Small cell-formatting helpers ────────────────────────────────────────────
|
package/src/key-handler.js
CHANGED
|
@@ -33,7 +33,8 @@
|
|
|
33
33
|
|
|
34
34
|
import { loadChangelog } from './changelog-loader.js'
|
|
35
35
|
import { getToolMeta, isModelCompatibleWithTool, getCompatibleTools, findSimilarCompatibleModels } from './tool-metadata.js'
|
|
36
|
-
import { loadConfig, saveConfig, replaceConfigContents } from './config.js'
|
|
36
|
+
import { loadConfig, saveConfig, replaceConfigContents, getApiKey } from './config.js'
|
|
37
|
+
import { sources } from '../sources.js'
|
|
37
38
|
import { join, dirname } from 'node:path'
|
|
38
39
|
import { fileURLToPath } from 'node:url'
|
|
39
40
|
import { spawn } from 'node:child_process'
|
|
@@ -1075,6 +1076,80 @@ export function createKeyHandler(ctx) {
|
|
|
1075
1076
|
}
|
|
1076
1077
|
}
|
|
1077
1078
|
|
|
1079
|
+
// 📖 runWithConcurrency: Execute tasks with limited parallelism (maxConcurrent simultaneous).
|
|
1080
|
+
function runWithConcurrency(tasks, maxConcurrent) {
|
|
1081
|
+
const results = new Array(tasks.length)
|
|
1082
|
+
let nextIndex = 0
|
|
1083
|
+
const workers = new Array(maxConcurrent).fill(null).map(async () => {
|
|
1084
|
+
while (true) {
|
|
1085
|
+
const index = nextIndex++
|
|
1086
|
+
if (index >= tasks.length) break
|
|
1087
|
+
try {
|
|
1088
|
+
results[index] = await tasks[index]()
|
|
1089
|
+
} catch (err) {
|
|
1090
|
+
results[index] = { error: err }
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
})
|
|
1094
|
+
return Promise.all(workers).then(() => results)
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
// 📖 runGlobalBenchmark: Benchmark all visible models with up to 5 concurrent requests.
|
|
1098
|
+
// 📖 Results are stored in state.benchmarkResults (same format as individual benchmarks).
|
|
1099
|
+
async function runGlobalBenchmark(state) {
|
|
1100
|
+
if (state.globalBenchmarkRunning) return
|
|
1101
|
+
state.globalBenchmarkRunning = true
|
|
1102
|
+
|
|
1103
|
+
const models = state.visibleSorted
|
|
1104
|
+
const total = models.length
|
|
1105
|
+
state.globalBenchmarkTotal = total
|
|
1106
|
+
state.globalBenchmarkCompleted = 0
|
|
1107
|
+
|
|
1108
|
+
const tasks = models.map(model => async () => {
|
|
1109
|
+
const benchmarkKey = `${model.providerKey}/${model.modelId}`
|
|
1110
|
+
// Skip if already running (e.g., from Ctrl+A)
|
|
1111
|
+
if (state.benchmarkRunning.has(benchmarkKey)) {
|
|
1112
|
+
state.globalBenchmarkCompleted++
|
|
1113
|
+
return { skipped: true }
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
const apiKey = getApiKey(state.config, model.providerKey) ?? null
|
|
1117
|
+
const providerUrl = sources[model.providerKey]?.url ?? null
|
|
1118
|
+
if (!providerUrl) {
|
|
1119
|
+
state.globalBenchmarkCompleted++
|
|
1120
|
+
return { skipped: true }
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
state.benchmarkRunning.add(benchmarkKey)
|
|
1124
|
+
try {
|
|
1125
|
+
const result = await benchmarkModel({
|
|
1126
|
+
apiKey,
|
|
1127
|
+
modelId: model.modelId,
|
|
1128
|
+
providerKey: model.providerKey,
|
|
1129
|
+
url: providerUrl,
|
|
1130
|
+
})
|
|
1131
|
+
state.benchmarkResults[benchmarkKey] = result
|
|
1132
|
+
return { ok: result.ok }
|
|
1133
|
+
} catch (err) {
|
|
1134
|
+
state.benchmarkResults[benchmarkKey] = {
|
|
1135
|
+
ok: false,
|
|
1136
|
+
code: 'ERR',
|
|
1137
|
+
totalMs: 0,
|
|
1138
|
+
error: err?.message || 'Benchmark failed',
|
|
1139
|
+
}
|
|
1140
|
+
return { ok: false }
|
|
1141
|
+
} finally {
|
|
1142
|
+
state.benchmarkRunning.delete(benchmarkKey)
|
|
1143
|
+
state.globalBenchmarkCompleted++
|
|
1144
|
+
}
|
|
1145
|
+
})
|
|
1146
|
+
|
|
1147
|
+
await runWithConcurrency(tasks, 5)
|
|
1148
|
+
state.globalBenchmarkRunning = false
|
|
1149
|
+
state.globalBenchmarkTotal = 0
|
|
1150
|
+
state.globalBenchmarkCompleted = 0
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1078
1153
|
// 📖 Favorites display mode:
|
|
1079
1154
|
// 📖 - true => favorites stay pinned + always visible (legacy behavior)
|
|
1080
1155
|
// 📖 - false => favorites are just starred rows and obey normal sort/filter rules
|
|
@@ -1386,6 +1461,12 @@ export function createKeyHandler(ctx) {
|
|
|
1386
1461
|
return
|
|
1387
1462
|
}
|
|
1388
1463
|
|
|
1464
|
+
// 📖 Ctrl+U: Global AI Speed Benchmark (benchmark all visible models, 5 concurrent)
|
|
1465
|
+
if (key.ctrl && key.name === 'u') {
|
|
1466
|
+
await runGlobalBenchmark(state)
|
|
1467
|
+
return
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1389
1470
|
// 📖 Command palette captures the keyboard while active.
|
|
1390
1471
|
if (state.commandPaletteOpen) {
|
|
1391
1472
|
if (key.ctrl && key.name === 'c') { exit(0); return }
|
package/src/overlays.js
CHANGED
|
@@ -930,7 +930,7 @@ export function createOverlayRenderers(state, deps) {
|
|
|
930
930
|
lines.push(` ${heading('Controls')}`)
|
|
931
931
|
lines.push(` ${key('W')} Toggle ping mode ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
|
|
932
932
|
lines.push(` ${key('Ctrl+P')} Open ⚡️ command palette ${hint('(search and run actions quickly)')}`)
|
|
933
|
-
lines.push(` ${key('Ctrl+A')}
|
|
933
|
+
lines.push(` ${key('Ctrl+A')} AI Speed Test ${hint('(benchmark selected model → time + TPS)')}`)
|
|
934
934
|
lines.push(` ${key('E')} Cycle filter mode ${hint('(Normal → Configured only → Usable only)')}`)
|
|
935
935
|
lines.push(` ${key('Z')} Cycle tool mode ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
|
|
936
936
|
lines.push(` ${key('F')} Toggle favorite on selected row ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)
|
package/src/ping.js
CHANGED
|
@@ -16,6 +16,9 @@
|
|
|
16
16
|
*
|
|
17
17
|
* → Functions:
|
|
18
18
|
* - `resolveCloudflareUrl`: Resolve {account_id} placeholder from CLOUDFLARE_ACCOUNT_ID env var
|
|
19
|
+
* - `buildChatCompletionPingBody`: Build minimal chat-completion probe payloads with thinking disabled
|
|
20
|
+
* - `markDisabledThinkingUnsupported`: Cache strict providers that reject the optional thinking control
|
|
21
|
+
* - `shouldUseDisabledThinkingForProvider`: Decide whether a provider should receive disabled-thinking probes
|
|
19
22
|
* - `buildPingRequest`: Build provider-specific HTTP request for pinging
|
|
20
23
|
* - `ping`: Send async ping request with timeout; returns { code, ms, quotaPercent }
|
|
21
24
|
* - `getHeaderValue`: Helper to extract header value from Headers object or plain object
|
|
@@ -41,6 +44,9 @@ import { PING_TIMEOUT } from './constants.js'
|
|
|
41
44
|
import { fetchProviderQuota as _fetchProviderQuotaFromModule } from './provider-quota-fetchers.js'
|
|
42
45
|
import { supportsUsagePercent } from './quota-capabilities.js'
|
|
43
46
|
|
|
47
|
+
const DISABLED_THINKING_RETRY_STATUSES = new Set([400, 422])
|
|
48
|
+
const disabledThinkingUnsupportedProviders = new Set()
|
|
49
|
+
|
|
44
50
|
// 📖 resolveCloudflareUrl: Cloudflare's OpenAI-compatible endpoint is account-scoped.
|
|
45
51
|
// 📖 We resolve {account_id} from env so provider setup can stay simple in config.
|
|
46
52
|
export function resolveCloudflareUrl(url) {
|
|
@@ -50,10 +56,37 @@ export function resolveCloudflareUrl(url) {
|
|
|
50
56
|
return url.replace('{account_id}', encodeURIComponent(accountId))
|
|
51
57
|
}
|
|
52
58
|
|
|
59
|
+
// 📖 buildChatCompletionPingBody: Use the smallest useful chat-completion probe.
|
|
60
|
+
// 📖 The explicit thinking toggle prevents reasoning-capable endpoints from spending
|
|
61
|
+
// 📖 hidden tokens or adding thinking latency when we only need availability + RTT.
|
|
62
|
+
export function buildChatCompletionPingBody(modelId, overrides = {}, options = {}) {
|
|
63
|
+
const body = {
|
|
64
|
+
model: modelId,
|
|
65
|
+
messages: [{ role: 'user', content: 'hi' }],
|
|
66
|
+
max_tokens: 1,
|
|
67
|
+
thinking: { type: 'disabled' },
|
|
68
|
+
...overrides,
|
|
69
|
+
}
|
|
70
|
+
if (options.disableThinking === false) delete body.thinking
|
|
71
|
+
return body
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// 📖 markDisabledThinkingUnsupported: remember strict providers that reject the
|
|
75
|
+
// 📖 optional `thinking` field so future pings avoid repeated 400/422 retries.
|
|
76
|
+
export function markDisabledThinkingUnsupported(providerKey) {
|
|
77
|
+
if (providerKey) disabledThinkingUnsupportedProviders.add(providerKey)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// 📖 shouldUseDisabledThinkingForProvider: central policy for OpenAI-compatible
|
|
81
|
+
// 📖 probes, shared by regular pings and router health probes.
|
|
82
|
+
export function shouldUseDisabledThinkingForProvider(providerKey) {
|
|
83
|
+
return !disabledThinkingUnsupportedProviders.has(providerKey)
|
|
84
|
+
}
|
|
85
|
+
|
|
53
86
|
// 📖 buildPingRequest: Build provider-specific ping request.
|
|
54
87
|
// 📖 Handles Replicate's /v1/predictions format, Cloudflare's account_id in URL,
|
|
55
88
|
// 📖 and standard OpenAI-compliant chat completions with provider-specific headers.
|
|
56
|
-
export function buildPingRequest(apiKey, modelId, providerKey, url) {
|
|
89
|
+
export function buildPingRequest(apiKey, modelId, providerKey, url, options = {}) {
|
|
57
90
|
// 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
|
|
58
91
|
const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
|
|
59
92
|
|
|
@@ -75,7 +108,9 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
|
|
|
75
108
|
return {
|
|
76
109
|
url: resolveCloudflareUrl(url),
|
|
77
110
|
headers,
|
|
78
|
-
body:
|
|
111
|
+
body: buildChatCompletionPingBody(apiModelId, {}, {
|
|
112
|
+
disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
|
|
113
|
+
}),
|
|
79
114
|
}
|
|
80
115
|
}
|
|
81
116
|
|
|
@@ -90,7 +125,31 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
|
|
|
90
125
|
return {
|
|
91
126
|
url,
|
|
92
127
|
headers,
|
|
93
|
-
body:
|
|
128
|
+
body: buildChatCompletionPingBody(apiModelId, {}, {
|
|
129
|
+
disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
|
|
130
|
+
}),
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// 📖 sendPingFetch: keep retry code tiny and ensure both attempts use the same abort signal.
|
|
135
|
+
async function sendPingFetch(req, signal) {
|
|
136
|
+
return fetch(req.url, {
|
|
137
|
+
method: 'POST', signal,
|
|
138
|
+
headers: req.headers,
|
|
139
|
+
body: JSON.stringify(req.body),
|
|
140
|
+
})
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// 📖 isDisabledThinkingRejected: strict OpenAI-compatible gateways may reject
|
|
144
|
+
// 📖 unknown root fields. We only retry when the status and error text names
|
|
145
|
+
// 📖 the optional `thinking` control, avoiding retries for real model failures.
|
|
146
|
+
async function isDisabledThinkingRejected(resp, req) {
|
|
147
|
+
if (!req?.body?.thinking || !DISABLED_THINKING_RETRY_STATUSES.has(resp.status)) return false
|
|
148
|
+
try {
|
|
149
|
+
const text = await resp.clone().text()
|
|
150
|
+
return /thinking/i.test(text)
|
|
151
|
+
} catch {
|
|
152
|
+
return false
|
|
94
153
|
}
|
|
95
154
|
}
|
|
96
155
|
|
|
@@ -104,12 +163,13 @@ export async function ping(apiKey, modelId, providerKey, url) {
|
|
|
104
163
|
const timer = setTimeout(() => ctrl.abort(), PING_TIMEOUT)
|
|
105
164
|
const t0 = performance.now()
|
|
106
165
|
try {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
166
|
+
let req = buildPingRequest(apiKey, modelId, providerKey, url)
|
|
167
|
+
let resp = await sendPingFetch(req, ctrl.signal)
|
|
168
|
+
if (await isDisabledThinkingRejected(resp, req)) {
|
|
169
|
+
markDisabledThinkingUnsupported(providerKey)
|
|
170
|
+
req = buildPingRequest(apiKey, modelId, providerKey, url, { disableThinking: false })
|
|
171
|
+
resp = await sendPingFetch(req, ctrl.signal)
|
|
172
|
+
}
|
|
113
173
|
// 📖 Normalize all HTTP 2xx statuses to "200" so existing verdict/avg logic still works.
|
|
114
174
|
const code = resp.status >= 200 && resp.status < 300 ? '200' : String(resp.status)
|
|
115
175
|
return {
|
package/src/render-table.js
CHANGED
|
@@ -49,8 +49,8 @@ import { themeColors, getProviderRgb, getTierRgb, getReadableTextRgb, getTheme }
|
|
|
49
49
|
import { TIER_COLOR } from './tier-colors.js'
|
|
50
50
|
import { getAvg, getVerdict, getUptime, getStabilityScore, getVersionStatusInfo } from './utils.js'
|
|
51
51
|
import { usagePlaceholderForProvider } from './ping.js'
|
|
52
|
-
import {
|
|
53
|
-
import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth } from './render-helpers.js'
|
|
52
|
+
import { formatBenchmarkLatency, formatBenchmarkTps } from './benchmark.js'
|
|
53
|
+
import { calculateViewport, sortResultsWithPinnedFavorites, padEndDisplay, displayWidth, stripAnsi } from './render-helpers.js'
|
|
54
54
|
import { getToolMeta, TOOL_METADATA, TOOL_MODE_ORDER, isModelCompatibleWithTool } from './tool-metadata.js'
|
|
55
55
|
import { getColumnSpacing } from './ui-config.js'
|
|
56
56
|
import { detectPackageManager, getManualInstallCmd } from './updater.js'
|
|
@@ -91,6 +91,8 @@ const COLUMN_SORT_MAP = {
|
|
|
91
91
|
verdict: 'verdict',
|
|
92
92
|
stability: 'stability',
|
|
93
93
|
uptime: 'uptime',
|
|
94
|
+
aiLatency: null,
|
|
95
|
+
tps: null,
|
|
94
96
|
}
|
|
95
97
|
export { COLUMN_SORT_MAP }
|
|
96
98
|
|
|
@@ -277,7 +279,8 @@ export function renderTable({
|
|
|
277
279
|
const W_STATUS = 18
|
|
278
280
|
const W_VERDICT = 14
|
|
279
281
|
const W_UPTIME = 6
|
|
280
|
-
const
|
|
282
|
+
const W_AI_LATENCY = 18
|
|
283
|
+
const W_TPS = 5
|
|
281
284
|
|
|
282
285
|
// const W_TOKENS = 7 // Used column removed
|
|
283
286
|
// const W_USAGE = 7 // Usage column removed
|
|
@@ -285,17 +288,18 @@ export function renderTable({
|
|
|
285
288
|
|
|
286
289
|
// 📖 Responsive column visibility: progressively hide least-useful columns
|
|
287
290
|
// 📖 and shorten header labels when terminal width is insufficient.
|
|
288
|
-
// 📖 Hiding order (least useful first): Rank →
|
|
291
|
+
// 📖 Hiding order (least useful first): Rank → AI Latency/TPS → Up% → Tier → Stability
|
|
289
292
|
// 📖 Compact mode shrinks: Latest Ping→Lat. P (9), Avg Ping→Avg. P (8),
|
|
290
293
|
// 📖 Stability→StaB. (8), Provider→4chars+… (7), Health→6chars+… (13)
|
|
291
|
-
// 📖 Breakpoints
|
|
294
|
+
// 📖 Breakpoints are computed dynamically from active column widths.
|
|
292
295
|
let wPing = 14
|
|
293
296
|
let wAvg = 11
|
|
294
297
|
let wStab = 11
|
|
295
298
|
let wSource = W_SOURCE
|
|
296
299
|
let wStatus = W_STATUS
|
|
300
|
+
let wAiLatency = W_AI_LATENCY
|
|
297
301
|
let showRank = true
|
|
298
|
-
let
|
|
302
|
+
let showBenchmarkColumns = true
|
|
299
303
|
let showUptime = true
|
|
300
304
|
let showTier = true
|
|
301
305
|
let showStability = true
|
|
@@ -310,7 +314,7 @@ export function renderTable({
|
|
|
310
314
|
cols.push(W_SWE, W_CTX, W_MODEL, wSource, wPing, wAvg, wStatus, W_VERDICT)
|
|
311
315
|
if (showStability) cols.push(wStab)
|
|
312
316
|
if (showUptime) cols.push(W_UPTIME)
|
|
313
|
-
if (
|
|
317
|
+
if (showBenchmarkColumns) cols.push(wAiLatency, W_TPS)
|
|
314
318
|
return ROW_MARGIN + cols.reduce((a, b) => a + b, 0) + (cols.length - 1) * SEP_W
|
|
315
319
|
}
|
|
316
320
|
|
|
@@ -322,10 +326,11 @@ export function renderTable({
|
|
|
322
326
|
wStab = 8 // 'StaB.' instead of 'Stability'
|
|
323
327
|
wSource = 7 // Provider truncated to 4 chars + '…', 7 cols total
|
|
324
328
|
wStatus = 13 // Health truncated after 6 chars + '…'
|
|
329
|
+
wAiLatency = 13 // Mirror compact Health text when health is not good
|
|
325
330
|
}
|
|
326
331
|
// 📖 Steps 2–6: Progressive column hiding (least useful first)
|
|
327
332
|
if (calcWidth() > terminalCols) showRank = false
|
|
328
|
-
if (calcWidth() > terminalCols)
|
|
333
|
+
if (calcWidth() > terminalCols) showBenchmarkColumns = false
|
|
329
334
|
if (calcWidth() > terminalCols) showUptime = false
|
|
330
335
|
if (calcWidth() > terminalCols) showTier = false
|
|
331
336
|
if (calcWidth() > terminalCols) showStability = false
|
|
@@ -348,7 +353,10 @@ export function renderTable({
|
|
|
348
353
|
colDefs.push({ name: 'verdict', width: W_VERDICT })
|
|
349
354
|
if (showStability) colDefs.push({ name: 'stability', width: wStab })
|
|
350
355
|
if (showUptime) colDefs.push({ name: 'uptime', width: W_UPTIME })
|
|
351
|
-
if (
|
|
356
|
+
if (showBenchmarkColumns) {
|
|
357
|
+
colDefs.push({ name: 'aiLatency', width: wAiLatency })
|
|
358
|
+
colDefs.push({ name: 'tps', width: W_TPS })
|
|
359
|
+
}
|
|
352
360
|
let x = ROW_MARGIN + 1 // 📖 1-based: first column starts after the 2-char left margin
|
|
353
361
|
const columns = []
|
|
354
362
|
for (let i = 0; i < colDefs.length; i++) {
|
|
@@ -475,12 +483,17 @@ export function renderTable({
|
|
|
475
483
|
return themeColors.hotkey('U') + themeColors.dim('p%' + padding)
|
|
476
484
|
})()
|
|
477
485
|
|
|
478
|
-
// 📖
|
|
479
|
-
const
|
|
480
|
-
const
|
|
481
|
-
const plain =
|
|
482
|
-
const padding = ' '.repeat(Math.max(0,
|
|
483
|
-
return themeColors.dim(
|
|
486
|
+
// 📖 Benchmark headers — split the old combined AI Speed field into latency + throughput.
|
|
487
|
+
const aiLatencyLabel = isCompact ? 'AI Lat.' : 'AI Latency'
|
|
488
|
+
const aiLatencyH_c = (() => {
|
|
489
|
+
const plain = aiLatencyLabel
|
|
490
|
+
const padding = ' '.repeat(Math.max(0, wAiLatency - plain.length))
|
|
491
|
+
return themeColors.dim(plain + padding)
|
|
492
|
+
})()
|
|
493
|
+
const tpsH_c = (() => {
|
|
494
|
+
const plain = 'TPS'
|
|
495
|
+
const padding = ' '.repeat(Math.max(0, W_TPS - plain.length))
|
|
496
|
+
return themeColors.dim(plain + padding)
|
|
484
497
|
})()
|
|
485
498
|
|
|
486
499
|
// 📖 Usage column removed from UI – no header or separator for it.
|
|
@@ -491,7 +504,7 @@ export function renderTable({
|
|
|
491
504
|
headerParts.push(sweH_c, ctxH_c, modelH_c, originH_c, pingH_c, avgH_c, healthH_c, verdictH_c)
|
|
492
505
|
if (showStability) headerParts.push(stabH_c)
|
|
493
506
|
if (showUptime) headerParts.push(uptimeH_c)
|
|
494
|
-
if (
|
|
507
|
+
if (showBenchmarkColumns) headerParts.push(aiLatencyH_c, tpsH_c)
|
|
495
508
|
lines.push(' ' + headerParts.join(COL_SEP))
|
|
496
509
|
|
|
497
510
|
// 📖 Mouse support: the column header row is the last line we just pushed.
|
|
@@ -793,24 +806,28 @@ export function renderTable({
|
|
|
793
806
|
// (We keep the logic but do not render it.)
|
|
794
807
|
const usageCell = ''
|
|
795
808
|
|
|
796
|
-
// 📖
|
|
809
|
+
// 📖 AI Latency + TPS columns — same benchmark result, split into two readable metrics.
|
|
797
810
|
const benchmarkKey = `${r.providerKey}/${r.modelId}`
|
|
798
811
|
const benchmarkResult = benchmarkResults[benchmarkKey]
|
|
799
812
|
const isBenchmarkRunning = benchmarkRunning.has(benchmarkKey)
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
813
|
+
const healthIsGood = r.status === 'up'
|
|
814
|
+
const latencyText = healthIsGood
|
|
815
|
+
? formatBenchmarkLatency(benchmarkResult, { running: isBenchmarkRunning, frame })
|
|
816
|
+
: statusDisplayText
|
|
817
|
+
const tpsText = healthIsGood
|
|
818
|
+
? formatBenchmarkTps(benchmarkResult, { running: isBenchmarkRunning, frame })
|
|
819
|
+
: '—'
|
|
820
|
+
const benchmarkIsError = healthIsGood && benchmarkResult && !benchmarkResult.ok
|
|
821
|
+
const latencyCell = !healthIsGood
|
|
822
|
+
? statusColor(padEndDisplay(latencyText, wAiLatency))
|
|
823
|
+
: benchmarkIsError
|
|
824
|
+
? themeColors.metricBad(latencyText.padEnd(wAiLatency))
|
|
825
|
+
: benchmarkResult || isBenchmarkRunning
|
|
826
|
+
? themeColors.metricGood(latencyText.padEnd(wAiLatency))
|
|
827
|
+
: themeColors.dim(latencyText.padEnd(wAiLatency))
|
|
828
|
+
const tpsCell = healthIsGood && (benchmarkResult?.ok || isBenchmarkRunning)
|
|
829
|
+
? themeColors.metricGood(tpsText.padEnd(W_TPS))
|
|
830
|
+
: themeColors.dim(tpsText.padEnd(W_TPS))
|
|
814
831
|
|
|
815
832
|
// 📖 Build row: conditionally include columns based on responsive visibility
|
|
816
833
|
const rowParts = []
|
|
@@ -819,7 +836,7 @@ export function renderTable({
|
|
|
819
836
|
rowParts.push(sweCell, ctxCell, nameCell, sourceCell, pingCell, avgCell, status, speedCell)
|
|
820
837
|
if (showStability) rowParts.push(stabCell)
|
|
821
838
|
if (showUptime) rowParts.push(uptimeCell)
|
|
822
|
-
if (
|
|
839
|
+
if (showBenchmarkColumns) rowParts.push(latencyCell, tpsCell)
|
|
823
840
|
const row = ' ' + rowParts.join(COL_SEP)
|
|
824
841
|
|
|
825
842
|
if (isCursor) {
|
|
@@ -942,9 +959,8 @@ export function renderTable({
|
|
|
942
959
|
}
|
|
943
960
|
}
|
|
944
961
|
|
|
945
|
-
// 📖 Line 2: command palette (
|
|
946
|
-
|
|
947
|
-
const paletteLabel = chalk.bgRgb(0, 60, 0).rgb(57, 255, 20).bold(' Ctrl+P Cmd Palette ')
|
|
962
|
+
// 📖 Line 2: command palette (simple color, no background) + GitHub link.
|
|
963
|
+
const paletteLabel = chalk.rgb(57, 255, 20).bold('Ctrl+P Cmd Palette')
|
|
948
964
|
const starLink = '⭐ ' + themeColors.link('\x1b]8;;https://github.com/vava-nessa/free-coding-models\x1b\\GitHub\x1b]8;;\x1b\\')
|
|
949
965
|
lines.push(
|
|
950
966
|
' ' + paletteLabel + themeColors.dim(` • `) + starLink + themeColors.dim(` • `) +
|
|
@@ -998,8 +1014,29 @@ export function renderTable({
|
|
|
998
1014
|
const releaseLabel = lastReleaseDate
|
|
999
1015
|
? chalk.rgb(255, 182, 193)(`Last release: ${lastReleaseDate}`)
|
|
1000
1016
|
: ''
|
|
1017
|
+
const speedTestLabel = chalk.bgRgb(0, 60, 0).rgb(57, 255, 20).bold(' NEW ⭐️ Ctrl+A 🤖 AI Speed Test ')
|
|
1018
|
+
const globalBenchmarkLabel = chalk.bgRgb(180, 0, 255).white.bold(' NEW Ctrl+U : Global AI Speed Test (Uses a lot of requests!) ')
|
|
1001
1019
|
|
|
1002
|
-
|
|
1020
|
+
// 📖 Line 3: Speed Test (Ctrl+A) + Global Benchmark (Ctrl+U) + Last release
|
|
1021
|
+
if (releaseLabel || speedTestLabel || globalBenchmarkLabel) {
|
|
1022
|
+
const parts = [
|
|
1023
|
+
{ text: ' ', key: null },
|
|
1024
|
+
{ text: speedTestLabel, key: 'a' },
|
|
1025
|
+
{ text: ' ', key: null },
|
|
1026
|
+
{ text: globalBenchmarkLabel, key: 'u' },
|
|
1027
|
+
{ text: ' ', key: null },
|
|
1028
|
+
{ text: releaseLabel, key: null },
|
|
1029
|
+
]
|
|
1030
|
+
const footerRow3 = lines.length + 1
|
|
1031
|
+
let xPos = 1
|
|
1032
|
+
for (const part of parts) {
|
|
1033
|
+
const w = displayWidth(part.text)
|
|
1034
|
+
if (part.key) footerHotkeys.push({ key: part.key, row: footerRow3, xStart: xPos, xEnd: xPos + w - 1 })
|
|
1035
|
+
xPos += w
|
|
1036
|
+
}
|
|
1037
|
+
const line = parts.map(p => p.text).join('')
|
|
1038
|
+
lines.push(line)
|
|
1039
|
+
}
|
|
1003
1040
|
_lastLayout.footerHotkeys = footerHotkeys
|
|
1004
1041
|
|
|
1005
1042
|
// 📖 Append \x1b[K (erase to EOL) to each line so leftover chars from previous
|
package/src/router-daemon.js
CHANGED
|
@@ -47,7 +47,7 @@ import {
|
|
|
47
47
|
normalizeRouterConfig,
|
|
48
48
|
saveConfig,
|
|
49
49
|
} from './config.js'
|
|
50
|
-
import { resolveCloudflareUrl } from './ping.js'
|
|
50
|
+
import { buildChatCompletionPingBody, resolveCloudflareUrl, shouldUseDisabledThinkingForProvider } from './ping.js'
|
|
51
51
|
import { sendUsageTelemetry } from './telemetry.js'
|
|
52
52
|
|
|
53
53
|
export const ROUTER_DEFAULT_PORT = 19280
|
|
@@ -1200,12 +1200,11 @@ class RouterRuntime {
|
|
|
1200
1200
|
: await fetch(providerUrl, {
|
|
1201
1201
|
method: 'POST',
|
|
1202
1202
|
headers: cloneHeadersForUpstream({}, apiKey, candidate.provider),
|
|
1203
|
-
body: JSON.stringify(
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
}),
|
|
1203
|
+
body: JSON.stringify(buildChatCompletionPingBody(
|
|
1204
|
+
getApiModelId(candidate.provider, candidate.model),
|
|
1205
|
+
{ stream: false },
|
|
1206
|
+
{ disableThinking: shouldUseDisabledThinkingForProvider(candidate.provider) }
|
|
1207
|
+
)),
|
|
1209
1208
|
signal: controller.signal,
|
|
1210
1209
|
})
|
|
1211
1210
|
const latencyMs = Math.round(performance.now() - started)
|