@khanglvm/llm-router 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
10
10
  ### Fixed
11
11
  - Raised the default inbound JSON body limit for OpenAI `/responses` requests from `1 MiB` to `8 MiB` while keeping other JSON routes at `1 MiB`. This prevents local `413 Request body too large` failures for Codex CLI and other Responses API clients carrying larger conversation state.
12
12
 
13
+ ## [2.0.1] - 2026-03-15
14
+
15
+ ### Fixed
16
+ - Fixed alias-route failover after transient upstream failures. When every candidate on a route was only in cooldown, the balancer now retries the earliest-recovering candidate instead of returning `No eligible providers remain for route ...`.
17
+
13
18
  ## [2.0.0] - 2026-03-15
14
19
 
15
20
  ### Changed
package/README.md CHANGED
@@ -14,7 +14,7 @@ The primary CLI command is now:
14
14
  llr
15
15
  ```
16
16
 
17
- `2.0.0` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
17
+ `2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
18
18
 
19
19
  ## Install
20
20
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.0.0",
3
+ "version": "2.0.1",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -186,6 +186,19 @@ function sortEntriesByOriginalOrder(left, right) {
186
186
  return left.originalIndex - right.originalIndex;
187
187
  }
188
188
 
189
+ function sortCooldownEntries(left, right) {
190
+ if (left.openUntil !== right.openUntil) {
191
+ return left.openUntil - right.openUntil;
192
+ }
193
+ return sortEntriesByOriginalOrder(left, right);
194
+ }
195
+
196
+ function isCooldownOnlyEntry(entry) {
197
+ if (!entry || entry.eligible) return false;
198
+ const reasons = Array.isArray(entry.skipReasons) ? entry.skipReasons : [];
199
+ return reasons.length > 0 && reasons.every((reason) => reason === "cooldown");
200
+ }
201
+
189
202
  async function buildCandidateEntries({
190
203
  candidates,
191
204
  stateStore,
@@ -315,6 +328,19 @@ export async function rankRouteCandidates({
315
328
  const ineligibleEntries = entries
316
329
  .filter((entry) => !entry.eligible)
317
330
  .sort(sortEntriesByOriginalOrder);
331
+ const fallbackCooldownEntries = eligibleEntries.length === 0
332
+ ? ineligibleEntries
333
+ .filter((entry) => isCooldownOnlyEntry(entry))
334
+ .sort(sortCooldownEntries)
335
+ .map((entry) => ({
336
+ ...entry,
337
+ eligible: true,
338
+ skipReasons: [...entry.skipReasons, "cooldown-overridden"]
339
+ }))
340
+ : [];
341
+ const skippedIneligibleEntries = fallbackCooldownEntries.length > 0
342
+ ? ineligibleEntries.filter((entry) => !isCooldownOnlyEntry(entry))
343
+ : ineligibleEntries;
318
344
  const estimatedRequiredTokens = normalizeNonNegativeInteger(
319
345
  requestContext?.estimatedRequiredTokens ??
320
346
  requestContext?.requiredTokens ??
@@ -324,10 +350,13 @@ export async function rankRouteCandidates({
324
350
  const routeCursor = stateStore
325
351
  ? await stateStore.getRouteCursor(resolvedRouteKey)
326
352
  : 0;
353
+ const rankableEntries = fallbackCooldownEntries.length > 0
354
+ ? fallbackCooldownEntries
355
+ : eligibleEntries;
327
356
  const contextAwareGroups = shouldApplyContextAwareOrdering(route, estimatedRequiredTokens)
328
- ? partitionEligibleEntriesByContextWindow(eligibleEntries, estimatedRequiredTokens)
357
+ ? partitionEligibleEntriesByContextWindow(rankableEntries, estimatedRequiredTokens)
329
358
  : {
330
- prioritizedEntries: eligibleEntries,
359
+ prioritizedEntries: rankableEntries,
331
360
  deferredEntries: []
332
361
  };
333
362
  const ranking = rankEligibleEntries(
@@ -339,7 +368,7 @@ export async function rankRouteCandidates({
339
368
  const rankedEntries = [
340
369
  ...ranking.orderedEligible,
341
370
  ...contextAwareGroups.deferredEntries,
342
- ...ineligibleEntries
371
+ ...skippedIneligibleEntries
343
372
  ];
344
373
 
345
374
  return {
@@ -351,7 +380,7 @@ export async function rankRouteCandidates({
351
380
  shouldAdvanceCursor: ranking.shouldAdvanceCursor,
352
381
  entries: rankedEntries,
353
382
  selectedEntry: ranking.orderedEligible[0] || null,
354
- skippedEntries: ineligibleEntries,
383
+ skippedEntries: skippedIneligibleEntries,
355
384
  rankedCandidates: rankedEntries.map((entry) => entry.candidate)
356
385
  };
357
386
  }