npm - free-coding-models - Versions diffs - 0.3.79 → 0.4.0 - Mend

free-coding-models 0.3.79 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +4 -0
package/changelog/v0.3.80.md +9 -0
package/changelog/v0.3.81.md +14 -0
package/changelog/v0.4.0.md +23 -0
package/package.json +1 -1
package/src/app.js +19 -1
package/src/benchmark.js +102 -52
package/src/cli-help.js +1 -1
package/src/config.js +3 -1
package/src/key-handler.js +82 -8
package/src/overlays.js +15 -4
package/src/ping.js +69 -9
package/src/render-helpers.js +4 -3
package/src/render-table.js +222 -101
package/src/router-daemon.js +6 -7
package/src/setup.js +16 -6
package/src/theme.js +1 -1
package/src/tui-state.js +5 -0
package/src/utils.js +25 -1
package/web/dist/assets/{index-DCD5slDY.js → index-Dr33-jga.js} +1 -1
package/web/dist/index.html +1 -1

package/src/overlays.js CHANGED Viewed

@@ -118,7 +118,8 @@ export function createOverlayRenderers(state, deps) {
     const updateRowIdx = providerKeys.length
     const themeRowIdx = updateRowIdx + 1
     const favoritesModeRowIdx = themeRowIdx + 1
-    const cleanupLegacyProxyRowIdx = favoritesModeRowIdx + 1
+    const startupAiSpeedScanRowIdx = favoritesModeRowIdx + 1
+    const cleanupLegacyProxyRowIdx = startupAiSpeedScanRowIdx + 1
     const changelogViewRowIdx = cleanupLegacyProxyRowIdx + 1
     const shellEnvRowIdx = changelogViewRowIdx + 1
     const EL = '\x1b[K'
@@ -259,6 +260,15 @@ export function createOverlayRenderers(state, deps) {
     cursorLineByRow[favoritesModeRowIdx] = lines.length
     lines.push(state.settingsCursor === favoritesModeRowIdx ? themeColors.bgCursorSettingsList(favoritesModeRow) : favoritesModeRow)
+    // 📖 Startup AI Speed Scan row controls the opt-in Ctrl+U auto-run at launch.
+    const startupAiSpeedScanEnabled = state.config.settings?.runAiSpeedTestOnStartup === true
+    const startupAiSpeedScanStatus = startupAiSpeedScanEnabled
+      ? themeColors.successBold('✅ Enabled — runs Ctrl+U after startup')
+      : themeColors.dim('❌ Disabled — manual Ctrl+U only')
+    const startupAiSpeedScanRow = `${bullet(state.settingsCursor === startupAiSpeedScanRowIdx)}${themeColors.textBold('Startup AI Speed Scan').padEnd(44)} ${startupAiSpeedScanStatus}`
+    cursorLineByRow[startupAiSpeedScanRowIdx] = lines.length
+    lines.push(state.settingsCursor === startupAiSpeedScanRowIdx ? themeColors.bgCursorSettingsList(startupAiSpeedScanRow) : startupAiSpeedScanRow)
     if (updateState === 'error' && state.settingsUpdateError) {
       lines.push(themeColors.error(`      ${state.settingsUpdateError}`))
     }
@@ -290,7 +300,7 @@ export function createOverlayRenderers(state, deps) {
     if (state.settingsEditMode) {
       lines.push(themeColors.dim('  Type API key  •  Enter Save  •  Esc Cancel'))
     } else {
-      lines.push(themeColors.dim('  ↑↓ Navigate  •  Enter Edit/Run/Cycle  •  + Add key  •  - Remove key  •  Space Toggle/Cycle  •  T Test key  •  U Updates  •  G Global theme  •  Y Favorites mode  •  Esc Close'))
+      lines.push(themeColors.dim('  ↑↓ Navigate  •  Enter Edit/Run/Cycle  •  + Add key  •  - Remove key  •  Space Toggle/Cycle  •  T Test key  •  U Updates  •  G Theme  •  Y Favorites  •  Esc Close'))
     }
     // 📖 Show sync/restore status message if set
     if (state.settingsSyncStatus) {
@@ -897,10 +907,10 @@ export function createOverlayRenderers(state, deps) {
     lines.push(`  ${label('Provider')}    Provider source (NIM, Groq, Cerebras, etc.)  ${hint('Sort:')} ${key('O')}  ${hint('Cycle:')} ${key('D')}`)
     lines.push(`              ${hint('Same model on different providers can have very different speed and uptime.')}`)
     lines.push('')
-    lines.push(`  ${label('Latest')}      Most recent ping response time (ms)  ${hint('Sort:')} ${key('L')}`)
+    lines.push(`  ${label('Last Ping')}   Most recent ping response time (ms)  ${hint('Sort:')} ${key('L')}`)
     lines.push(`              ${hint('Shows how fast the server is responding right now — useful to catch live slowdowns.')}`)
     lines.push('')
-    lines.push(`  ${label('Avg Ping')}    Average response time across all measurable pings (200 + 401) (ms)  ${hint('Sort:')} ${key('A')}`)
+    lines.push(`  ${label('Avg Ping')}   Average response time across all measurable pings (200 + 401) (ms)  ${hint('Sort:')} ${key('A')}`)
     lines.push(`              ${hint('The long-term truth. Even without a key, a 401 still gives real latency so the average stays useful.')}`)
     lines.push('')
     lines.push(`  ${label('Health')}      Live status: ✅ UP / 🔥 429 / ⏳ TIMEOUT / ❌ ERR / 🔑 NO KEY  ${hint('Sort:')} ${key('H')}`)
@@ -931,6 +941,7 @@ export function createOverlayRenderers(state, deps) {
     lines.push(`  ${key('W')}  Toggle ping mode  ${hint('(speed 2s → normal 10s → slow 30s → forced 4s)')}`)
     lines.push(`  ${key('Ctrl+P')}  Open ⚡️ command palette  ${hint('(search and run actions quickly)')}`)
     lines.push(`  ${key('Ctrl+A')}  AI Speed Test  ${hint('(benchmark selected model → time + TPS)')}`)
+    lines.push(`  ${key('Ctrl+U')}  Global AI Speed Test  ${hint('(benchmark all models; Settings can auto-run it on startup)')}`)
     lines.push(`  ${key('E')}  Cycle filter mode  ${hint('(Normal → Configured only → Usable only)')}`)
     lines.push(`  ${key('Z')}  Cycle tool mode  ${hint('(📦 OpenCode → π Pi → 🪼 jcode → 📦 Desktop → 🦞 OpenClaw → 💘 Crush → 🪿 Goose → 🛠 Aider → 🐉 Qwen → 🤲 OpenHands → ⚡ Amp → 🦘 Rovo → ♊ Gemini)')}`)
     lines.push(`  ${key('F')}  Toggle favorite on selected row  ${hint('(1️⃣2️⃣3️⃣ = router fallback order, capped at 🔟)')}`)

package/src/ping.js CHANGED Viewed

@@ -16,6 +16,9 @@
  *
  *   → Functions:
  *   - `resolveCloudflareUrl`: Resolve {account_id} placeholder from CLOUDFLARE_ACCOUNT_ID env var
+ *   - `buildChatCompletionPingBody`: Build minimal chat-completion probe payloads with thinking disabled
+ *   - `markDisabledThinkingUnsupported`: Cache strict providers that reject the optional thinking control
+ *   - `shouldUseDisabledThinkingForProvider`: Decide whether a provider should receive disabled-thinking probes
  *   - `buildPingRequest`: Build provider-specific HTTP request for pinging
  *   - `ping`: Send async ping request with timeout; returns { code, ms, quotaPercent }
  *   - `getHeaderValue`: Helper to extract header value from Headers object or plain object
@@ -41,6 +44,9 @@ import { PING_TIMEOUT } from './constants.js'
 import { fetchProviderQuota as _fetchProviderQuotaFromModule } from './provider-quota-fetchers.js'
 import { supportsUsagePercent } from './quota-capabilities.js'
+const DISABLED_THINKING_RETRY_STATUSES = new Set([400, 422])
+const disabledThinkingUnsupportedProviders = new Set()
 // 📖 resolveCloudflareUrl: Cloudflare's OpenAI-compatible endpoint is account-scoped.
 // 📖 We resolve {account_id} from env so provider setup can stay simple in config.
 export function resolveCloudflareUrl(url) {
@@ -50,10 +56,37 @@ export function resolveCloudflareUrl(url) {
   return url.replace('{account_id}', encodeURIComponent(accountId))
 }
+// 📖 buildChatCompletionPingBody: Use the smallest useful chat-completion probe.
+// 📖 The explicit thinking toggle prevents reasoning-capable endpoints from spending
+// 📖 hidden tokens or adding thinking latency when we only need availability + RTT.
+export function buildChatCompletionPingBody(modelId, overrides = {}, options = {}) {
+  const body = {
+    model: modelId,
+    messages: [{ role: 'user', content: 'hi' }],
+    max_tokens: 1,
+    thinking: { type: 'disabled' },
+    ...overrides,
+  }
+  if (options.disableThinking === false) delete body.thinking
+  return body
+}
+// 📖 markDisabledThinkingUnsupported: remember strict providers that reject the
+// 📖 optional `thinking` field so future pings avoid repeated 400/422 retries.
+export function markDisabledThinkingUnsupported(providerKey) {
+  if (providerKey) disabledThinkingUnsupportedProviders.add(providerKey)
+}
+// 📖 shouldUseDisabledThinkingForProvider: central policy for OpenAI-compatible
+// 📖 probes, shared by regular pings and router health probes.
+export function shouldUseDisabledThinkingForProvider(providerKey) {
+  return !disabledThinkingUnsupportedProviders.has(providerKey)
+}
 // 📖 buildPingRequest: Build provider-specific ping request.
 // 📖 Handles Replicate's /v1/predictions format, Cloudflare's account_id in URL,
 // 📖 and standard OpenAI-compliant chat completions with provider-specific headers.
-export function buildPingRequest(apiKey, modelId, providerKey, url) {
+export function buildPingRequest(apiKey, modelId, providerKey, url, options = {}) {
   // 📖 ZAI models are stored as "zai/glm-..." in sources.js but the API expects just "glm-..."
   const apiModelId = providerKey === 'zai' ? modelId.replace(/^zai\//, '') : modelId
@@ -75,7 +108,9 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
     return {
       url: resolveCloudflareUrl(url),
       headers,
-      body: { model: apiModelId, messages: [{ role: 'user', content: 'hi' }], max_tokens: 1 },
+      body: buildChatCompletionPingBody(apiModelId, {}, {
+        disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
+      }),
     }
   }
@@ -90,7 +125,31 @@ export function buildPingRequest(apiKey, modelId, providerKey, url) {
   return {
     url,
     headers,
-    body: { model: apiModelId, messages: [{ role: 'user', content: 'hi' }], max_tokens: 1 },
+    body: buildChatCompletionPingBody(apiModelId, {}, {
+      disableThinking: options.disableThinking ?? shouldUseDisabledThinkingForProvider(providerKey),
+    }),
+  }
+}
+// 📖 sendPingFetch: keep retry code tiny and ensure both attempts use the same abort signal.
+async function sendPingFetch(req, signal) {
+  return fetch(req.url, {
+    method: 'POST', signal,
+    headers: req.headers,
+    body: JSON.stringify(req.body),
+  })
+}
+// 📖 isDisabledThinkingRejected: strict OpenAI-compatible gateways may reject
+// 📖 unknown root fields. We only retry when the status and error text names
+// 📖 the optional `thinking` control, avoiding retries for real model failures.
+async function isDisabledThinkingRejected(resp, req) {
+  if (!req?.body?.thinking || !DISABLED_THINKING_RETRY_STATUSES.has(resp.status)) return false
+  try {
+    const text = await resp.clone().text()
+    return /thinking/i.test(text)
+  } catch {
+    return false
   }
 }
@@ -104,12 +163,13 @@ export async function ping(apiKey, modelId, providerKey, url) {
   const timer = setTimeout(() => ctrl.abort(), PING_TIMEOUT)
   const t0    = performance.now()
   try {
-    const req = buildPingRequest(apiKey, modelId, providerKey, url)
-    const resp = await fetch(req.url, {
-      method: 'POST', signal: ctrl.signal,
-      headers: req.headers,
-      body: JSON.stringify(req.body),
-    })
+    let req = buildPingRequest(apiKey, modelId, providerKey, url)
+    let resp = await sendPingFetch(req, ctrl.signal)
+    if (await isDisabledThinkingRejected(resp, req)) {
+      markDisabledThinkingUnsupported(providerKey)
+      req = buildPingRequest(apiKey, modelId, providerKey, url, { disableThinking: false })
+      resp = await sendPingFetch(req, ctrl.signal)
+    }
     // 📖 Normalize all HTTP 2xx statuses to "200" so existing verdict/avg logic still works.
     const code = resp.status >= 200 && resp.status < 300 ? '200' : String(resp.status)
     return {

package/src/render-helpers.js CHANGED Viewed

@@ -202,7 +202,7 @@ export function calculateViewport(terminalRows, scrollOffset, totalModels, lineB
 // 📖 Non-favorites: active sort column/direction.
 // 📖 Models that are both recommended AND favorite — show in recommended section.
 // 📖 pinFavorites=false keeps favorites highlighted but lets normal sort/filter order apply.
-export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirection, { pinFavorites = true } = {}) {
+export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirection, { pinFavorites = true, benchmarkResults = {} } = {}) {
   if (!pinFavorites) {
     const recommendedRows = results
       .filter((r) => r.isRecommended)
@@ -210,7 +210,8 @@ export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirectio
     const nonRecommendedRows = sortResults(
       results.filter((r) => !r.isRecommended),
       sortColumn,
-      sortDirection
+      sortDirection,
+      { benchmarkResults }
     )
     return [...recommendedRows, ...nonRecommendedRows]
   }
@@ -224,7 +225,7 @@ export function sortResultsWithPinnedFavorites(results, sortColumn, sortDirectio
   const bothRows = results
     .filter((r) => r.isRecommended && r.isFavorite)
     .sort((a, b) => (b.recommendScore || 0) - (a.recommendScore || 0))
-  const nonSpecialRows = sortResults(results.filter((r) => !r.isFavorite && !r.isRecommended), sortColumn, sortDirection)
+  const nonSpecialRows = sortResults(results.filter((r) => !r.isFavorite && !r.isRecommended), sortColumn, sortDirection, { benchmarkResults })
   return [...bothRows, ...recommendedRows, ...favoriteRows, ...nonSpecialRows]
 }