free-coding-models 0.3.79 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/overlays.js CHANGED
@@ -118,7 +118,8 @@ export function createOverlayRenderers(state, deps) {
118
118
  const updateRowIdx = providerKeys.length
119
119
  const themeRowIdx = updateRowIdx + 1
120
120
  const favoritesModeRowIdx = themeRowIdx + 1
121
- const cleanupLegacyProxyRowIdx = favoritesModeRowIdx + 1
121
+ const startupAiSpeedScanRowIdx = favoritesModeRowIdx + 1
122
+ const cleanupLegacyProxyRowIdx = startupAiSpeedScanRowIdx + 1
122
123
  const changelogViewRowIdx = cleanupLegacyProxyRowIdx + 1
123
124
  const shellEnvRowIdx = changelogViewRowIdx + 1
124
125
  const EL = '\x1b[K'
@@ -259,6 +260,15 @@ export function createOverlayRenderers(state, deps) {
259
260
  cursorLineByRow[favoritesModeRowIdx] = lines.length
260
261
  lines.push(state.settingsCursor === favoritesModeRowIdx ? themeColors.bgCursorSettingsList(favoritesModeRow) : favoritesModeRow)
261
262
 
263
+ // 📖 Startup AI Speed Scan row controls the opt-in Ctrl+U auto-run at launch.
264
+ const startupAiSpeedScanEnabled = state.config.settings?.runAiSpeedTestOnStartup === true
265
+ const startupAiSpeedScanStatus = startupAiSpeedScanEnabled
266
+ ? themeColors.successBold('✅ Enabled — runs Ctrl+U after startup')
267
+ : themeColors.dim('❌ Disabled — manual Ctrl+U only')
268
+ const startupAiSpeedScanRow = `${bullet(state.settingsCursor === startupAiSpeedScanRowIdx)}${themeColors.textBold('Startup AI Speed Scan').padEnd(44)} ${startupAiSpeedScanStatus}`
269
+ cursorLineByRow[startupAiSpeedScanRowIdx] = lines.length
270
+ lines.push(state.settingsCursor === startupAiSpeedScanRowIdx ? themeColors.bgCursorSettingsList(startupAiSpeedScanRow) : startupAiSpeedScanRow)
271
+
262
272
  if (updateState === 'error' && state.settingsUpdateError) {
263
273
  lines.push(themeColors.error(` ${state.settingsUpdateError}`))
264
274
  }
@@ -290,7 +300,7 @@ export function createOverlayRenderers(state, deps) {
290
300
  if (state.settingsEditMode) {
291
301
  lines.push(themeColors.dim(' Type API key • Enter Save • Esc Cancel'))
292
302
  } else {
293
- lines.push(themeColors.dim(' ↑↓ Navigate • Enter Edit/Run/Cycle • + Add key • - Remove key • Space Toggle/Cycle • T Test key • U Updates • G Global theme • Y Favorites mode • Esc Close'))
303
+ lines.push(themeColors.dim(' ↑↓ Navigate • Enter Edit/Run/Cycle • + Add key • - Remove key • Space Toggle/Cycle • T Test key • U Updates • G Theme • Y Favorites • Esc Close'))
294
304
  }
295
305
  // 📖 Show sync/restore status message if set
296
306
  if (state.settingsSyncStatus) {
@@ -897,10 +907,10 @@ export function createOverlayRenderers(state, deps) {
897
907
  lines.push(` ${label('Provider')} Provider source (NIM, Groq, Cerebras, etc.) ${hint('Sort:')} ${key('O')} ${hint('Cycle:')} ${key('D')}`)
898
908
  lines.push(` ${hint('Same model on different providers can have very different speed and uptime.')}`)
899
909
  lines.push('')
900
- lines.push(` ${label('Latest')} Most recent ping response time (ms) ${hint('Sort:')} ${key('L')}`)
910
+ lines.push(` ${label('Last Ping')} Most recent ping response time (ms) ${hint('Sort:')} ${key('L')}`)
901
911
  lines.push(` ${hint('Shows how fast the server is responding right now — useful to catch live slowdowns.')}`)
902
912
  lines.push('')
903
- lines.push(` ${label('Avg Ping')} Average response time across all measurable pings (200 + 401) (ms) ${hint('Sort:')} ${key('A')}`)
913
+ lines.push(` ${label('Avg Ping')} Average response time across all measurable pings (200 + 401) (ms) ${hint('Sort:')} ${key('A')}`)
904
914
  lines.push(` ${hint('The long-term truth. Even without a key, a 401 still gives real latency so the average stays useful.')}`)
905
915
  lines.push('')
906
916
  lines.push(` ${label('Health')} Live status: ✅ UP / 🔥 429 / ⏳ TIMEOUT / ❌ ERR / 🔑 NO KEY ${hint('Sort:')} ${key('H')}`)
@@ -931,6 +941,7 @@ export function createOverlayRenderers(state, deps) {
931
941
  lines.push(` ${key('W')} Toggle ping mode ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
932
942
  lines.push(` ${key('Ctrl+P')} Open ⚡️ command palette ${hint('(search and run actions quickly)')}`)
933
943
  lines.push(` ${key('Ctrl+A')} AI Speed Test ${hint('(benchmark selected model → time + TPS)')}`)
944
+ lines.push(` ${key('Ctrl+U')} Global AI Speed Test ${hint('(benchmark all models; Settings can auto-run it on startup)')}`)
934
945
  lines.push(` ${key('E')} Cycle filter mode ${hint('(Normal → Configured only → Usable only)')}`)
935
946
  lines.push(` ${key('Z')} Cycle tool mode ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
936
947
  lines.push(` ${key('F')} Toggle favorite on selected row ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)
package/src/ping.js CHANGED
@@ -16,6 +16,9 @@
16
16
  *
17
17
  * → Functions:
18
18
  * - `resolveCloudflareUrl`: Resolve {account_id} placeholder from CLOUDFLARE_ACCOUNT_ID env var
19
+ * - `buildChatCompletionPingBody`: Build minimal chat-completion probe payloads with thinking disabled
20
+ * - `markDisabledThinkingUnsupported`: Cache strict providers that reject the optional thinking control
21
+ * - `shouldUseDisabledThinkingForProvider`: Decide whether a provider should receive disabled-thinking probes
19
22
  * - `buildPingRequest`: Build provider-specific HTTP request for pinging
20
23
  * - `ping`: Send async ping request with timeout; returns { code, ms, quotaPercent }
21
24
  * - `getHeaderValue`: Helper to extract header value from Headers object or plain object
@@ -41,6 +44,9 @@ import { PING_TIMEOUT } from './constants.js'
41
44
  import { fetchProviderQuota as _fetchProviderQuotaFromModule } from './provider-quota-fetchers.js'
42
45
  import { supportsUsagePercent } from './quota-capabilities.js'
43
46
 
47
+ const DISABLED_THINKING_RETRY_STATUSES = new Set([400, 422])
48
+ const disabledThinkingUnsupportedProviders = new Set()
49
+
44
50
  // 📖 resolveCloudflareUrl: Cloudflare's OpenAI-compatible endpoint is account-scoped.
45
51
  // 📖 We resolve {account_id} from env so provider setup can stay simple in config.
46
52
  export function resolveCloudflareUrl(url) {
@@ -50,10 +56,37 @@ export function resolveCloudflareUrl(url) {
50
56
  return url.replace('{account_id}', encodeURIComponent(accountId))
51
57
  }
52
58
 
59
+ // 📖 buildChatCompletionPingBody: Use the smallest useful chat-completion probe.
60
+ // 📖 The explicit thinking toggle prevents reasoning-capable endpoints from spending
61
+ // 📖 hidden tokens or adding thinking latency when we only need availability + RTT.
62
+ export function buildChatCompletionPingBody(modelId, overrides = {}, options = {}) {
63
+ const body = {
64
+ model: modelId,
65
+ messages: [{ role: 'user', content: 'hi' }],
66
+ max_tokens: 1,
67
+ thinking: { type: 'disabled' },
68
+ ...overrides,
69
+ }
70
+ if (options.disableThinking === false) delete body.thinking
71
+ return body
72
+ }
73
+
74
+ // 📖 markDisabledThinkingUnsupported: remember strict providers that reject the
75
+ // 📖 optional `thinking` field so future pings avoid repeated 400/422 retries.
76
+ export function markDisabledThinkingUnsupported(providerKey) {
77
+ if (providerKey) disabledThinkingUnsupportedProviders.add(providerKey)
78
+ }
79
+
80
+ // 📖 shouldUseDisabledThinkingForProvider: central policy for OpenAI-compatible
81
+ // 📖 probes, shared by regular pings and router health probes.
82
+ export function shouldUseDisabledThinkingForProvider(providerKey) {
83
+ return !disabledThinkingUnsupportedProviders.has(providerKey)
84
+ }
85
+
53
86
  // 📖 buildPingRequest: Build provider-specific ping request.
54
87
  // 📖 Handles Replicate's /v1/predictions format, Cloudflare's account_id in URL,
55
88
  // 📖 and standard OpenAI-compliant chat completions with provider-specific headers.
56
- export function buildPingRequest(apiKey, modelId, providerKey, url) {
89
+ export function buildPingRequest(apiKey, modelId, providerKey, url, options = {}) {
57
90
  // 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
58
91
  const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
59
92
 
@@ -75,7 +108,9 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
75
108
  return {
76
109
  url: resolveCloudflareUrl(url),
77
110
  headers,
78
- body: { model: apiModelId, messages: [{ role: 'user', content: 'hi' }], max_tokens: 1 },
111
+ body: buildChatCompletionPingBody(apiModelId, {}, {
112
+ disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
113
+ }),
79
114
  }
80
115
  }
81
116
 
@@ -90,7 +125,31 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
90
125
  return {
91
126
  url,
92
127
  headers,
93
- body: { model: apiModelId, messages: [{ role: 'user', content: 'hi' }], max_tokens: 1 },
128
+ body: buildChatCompletionPingBody(apiModelId, {}, {
129
+ disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
130
+ }),
131
+ }
132
+ }
133
+
134
+ // 📖 sendPingFetch: keep retry code tiny and ensure both attempts use the same abort signal.
135
+ async function sendPingFetch(req, signal) {
136
+ return fetch(req.url, {
137
+ method: 'POST', signal,
138
+ headers: req.headers,
139
+ body: JSON.stringify(req.body),
140
+ })
141
+ }
142
+
143
+ // 📖 isDisabledThinkingRejected: strict OpenAI-compatible gateways may reject
144
+ // 📖 unknown root fields. We only retry when the status and error text names
145
+ // 📖 the optional `thinking` control, avoiding retries for real model failures.
146
+ async function isDisabledThinkingRejected(resp, req) {
147
+ if (!req?.body?.thinking || !DISABLED_THINKING_RETRY_STATUSES.has(resp.status)) return false
148
+ try {
149
+ const text = await resp.clone().text()
150
+ return /thinking/i.test(text)
151
+ } catch {
152
+ return false
94
153
  }
95
154
  }
96
155
 
@@ -104,12 +163,13 @@ export async function ping(apiKey, modelId, providerKey, url) {
104
163
  const timer = setTimeout(() => ctrl.abort(), PING_TIMEOUT)
105
164
  const t0 = performance.now()
106
165
  try {
107
- const req = buildPingRequest(apiKey, modelId, providerKey, url)
108
- const resp = await fetch(req.url, {
109
- method: 'POST', signal: ctrl.signal,
110
- headers: req.headers,
111
- body: JSON.stringify(req.body),
112
- })
166
+ let req = buildPingRequest(apiKey, modelId, providerKey, url)
167
+ let resp = await sendPingFetch(req, ctrl.signal)
168
+ if (await isDisabledThinkingRejected(resp, req)) {
169
+ markDisabledThinkingUnsupported(providerKey)
170
+ req = buildPingRequest(apiKey, modelId, providerKey, url, { disableThinking: false })
171
+ resp = await sendPingFetch(req, ctrl.signal)
172
+ }
113
173
  // 📖 Normalize all HTTP 2xx statuses to "200" so existing verdict/avg logic still works.
114
174
  const code = resp.status >= 200 && resp.status < 300 ? '200' : String(resp.status)
115
175
  return {
@@ -202,7 +202,7 @@ export function calculateViewport(terminalRows, scrollOffset, totalModels, lineB
202
202
  // 📖 Non-favorites: active sort column/direction.
203
203
  // 📖 Models that are both recommended AND favorite — show in recommended section.
204
204
  // 📖 pinFavorites=false keeps favorites highlighted but lets normal sort/filter order apply.
205
- export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirection, { pinFavorites = true } = {}) {
205
+ export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirection, { pinFavorites = true, benchmarkResults = {} } = {}) {
206
206
  if (!pinFavorites) {
207
207
  const recommendedRows = results
208
208
  .filter((r) => r.isRecommended)
@@ -210,7 +210,8 @@ export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirectio
210
210
  const nonRecommendedRows = sortResults(
211
211
  results.filter((r) => !r.isRecommended),
212
212
  sortColumn,
213
- sortDirection
213
+ sortDirection,
214
+ { benchmarkResults }
214
215
  )
215
216
  return [...recommendedRows, ...nonRecommendedRows]
216
217
  }
@@ -224,7 +225,7 @@ export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirectio
224
225
  const bothRows = results
225
226
  .filter((r) => r.isRecommended && r.isFavorite)
226
227
  .sort((a, b) => (b.recommendScore || 0) - (a.recommendScore || 0))
227
- const nonSpecialRows = sortResults(results.filter((r) => !r.isFavorite && !r.isRecommended), sortColumn, sortDirection)
228
+ const nonSpecialRows = sortResults(results.filter((r) => !r.isFavorite && !r.isRecommended), sortColumn, sortDirection, { benchmarkResults })
228
229
  return [...bothRows, ...recommendedRows, ...favoriteRows, ...nonSpecialRows]
229
230
  }
230
231