@khanglvm/llm-router 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/runtime/balancer.js +33 -4
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
10
10
|
### Fixed
|
|
11
11
|
- Raised the default inbound JSON body limit for OpenAI `/responses` requests from `1 MiB` to `8 MiB` while keeping other JSON routes at `1 MiB`. This prevents local `413 Request body too large` failures for Codex CLI and other Responses API clients carrying larger conversation state.
|
|
12
12
|
|
|
13
|
+
## [2.0.1] - 2026-03-15
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
- Fixed alias-route failover after transient upstream failures. When every candidate on a route was only in cooldown, the balancer now retries the earliest-recovering candidate instead of returning `No eligible providers remain for route ...`.
|
|
17
|
+
|
|
13
18
|
## [2.0.0] - 2026-03-15
|
|
14
19
|
|
|
15
20
|
### Changed
|
package/README.md
CHANGED
|
@@ -14,7 +14,7 @@ The primary CLI command is now:
|
|
|
14
14
|
llr
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
-
`2.0.
|
|
17
|
+
`2.0.1` is the current public release. It includes the Web UI, AMP routing, and coding-tool integrations introduced in the 2.x line.
|
|
18
18
|
|
|
19
19
|
## Install
|
|
20
20
|
|
package/package.json
CHANGED
package/src/runtime/balancer.js
CHANGED
|
@@ -186,6 +186,19 @@ function sortEntriesByOriginalOrder(left, right) {
|
|
|
186
186
|
return left.originalIndex - right.originalIndex;
|
|
187
187
|
}
|
|
188
188
|
|
|
189
|
+
function sortCooldownEntries(left, right) {
|
|
190
|
+
if (left.openUntil !== right.openUntil) {
|
|
191
|
+
return left.openUntil - right.openUntil;
|
|
192
|
+
}
|
|
193
|
+
return sortEntriesByOriginalOrder(left, right);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function isCooldownOnlyEntry(entry) {
|
|
197
|
+
if (!entry || entry.eligible) return false;
|
|
198
|
+
const reasons = Array.isArray(entry.skipReasons) ? entry.skipReasons : [];
|
|
199
|
+
return reasons.length > 0 && reasons.every((reason) => reason === "cooldown");
|
|
200
|
+
}
|
|
201
|
+
|
|
189
202
|
async function buildCandidateEntries({
|
|
190
203
|
candidates,
|
|
191
204
|
stateStore,
|
|
@@ -315,6 +328,19 @@ export async function rankRouteCandidates({
|
|
|
315
328
|
const ineligibleEntries = entries
|
|
316
329
|
.filter((entry) => !entry.eligible)
|
|
317
330
|
.sort(sortEntriesByOriginalOrder);
|
|
331
|
+
const fallbackCooldownEntries = eligibleEntries.length === 0
|
|
332
|
+
? ineligibleEntries
|
|
333
|
+
.filter((entry) => isCooldownOnlyEntry(entry))
|
|
334
|
+
.sort(sortCooldownEntries)
|
|
335
|
+
.map((entry) => ({
|
|
336
|
+
...entry,
|
|
337
|
+
eligible: true,
|
|
338
|
+
skipReasons: [...entry.skipReasons, "cooldown-overridden"]
|
|
339
|
+
}))
|
|
340
|
+
: [];
|
|
341
|
+
const skippedIneligibleEntries = fallbackCooldownEntries.length > 0
|
|
342
|
+
? ineligibleEntries.filter((entry) => !isCooldownOnlyEntry(entry))
|
|
343
|
+
: ineligibleEntries;
|
|
318
344
|
const estimatedRequiredTokens = normalizeNonNegativeInteger(
|
|
319
345
|
requestContext?.estimatedRequiredTokens ??
|
|
320
346
|
requestContext?.requiredTokens ??
|
|
@@ -324,10 +350,13 @@ export async function rankRouteCandidates({
|
|
|
324
350
|
const routeCursor = stateStore
|
|
325
351
|
? await stateStore.getRouteCursor(resolvedRouteKey)
|
|
326
352
|
: 0;
|
|
353
|
+
const rankableEntries = fallbackCooldownEntries.length > 0
|
|
354
|
+
? fallbackCooldownEntries
|
|
355
|
+
: eligibleEntries;
|
|
327
356
|
const contextAwareGroups = shouldApplyContextAwareOrdering(route, estimatedRequiredTokens)
|
|
328
|
-
? partitionEligibleEntriesByContextWindow(
|
|
357
|
+
? partitionEligibleEntriesByContextWindow(rankableEntries, estimatedRequiredTokens)
|
|
329
358
|
: {
|
|
330
|
-
prioritizedEntries:
|
|
359
|
+
prioritizedEntries: rankableEntries,
|
|
331
360
|
deferredEntries: []
|
|
332
361
|
};
|
|
333
362
|
const ranking = rankEligibleEntries(
|
|
@@ -339,7 +368,7 @@ export async function rankRouteCandidates({
|
|
|
339
368
|
const rankedEntries = [
|
|
340
369
|
...ranking.orderedEligible,
|
|
341
370
|
...contextAwareGroups.deferredEntries,
|
|
342
|
-
...
|
|
371
|
+
...skippedIneligibleEntries
|
|
343
372
|
];
|
|
344
373
|
|
|
345
374
|
return {
|
|
@@ -351,7 +380,7 @@ export async function rankRouteCandidates({
|
|
|
351
380
|
shouldAdvanceCursor: ranking.shouldAdvanceCursor,
|
|
352
381
|
entries: rankedEntries,
|
|
353
382
|
selectedEntry: ranking.orderedEligible[0] || null,
|
|
354
|
-
skippedEntries:
|
|
383
|
+
skippedEntries: skippedIneligibleEntries,
|
|
355
384
|
rankedCandidates: rankedEntries.map((entry) => entry.candidate)
|
|
356
385
|
};
|
|
357
386
|
}
|