@yadimon/prio-llm-router 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -2
- package/dist/index.cjs +74 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +74 -20
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -18,6 +18,7 @@ The package keeps the routing logic intentionally small and predictable while re
|
|
|
18
18
|
- Optional source builders for source-centric setup and strict free policies
|
|
19
19
|
- Non-streaming text generation and optional streaming
|
|
20
20
|
- Optional debug mode that mirrors attempt hooks to the console
|
|
21
|
+
- Per-request and router-level attempt timeouts for clean fallback
|
|
21
22
|
- Built-in support for `google`, `openrouter`, `groq`, `mistral`, `cohere`, `perplexity`, `xai`, `togetherai`, `openai`, `anthropic`, `deepseek`, and generic `openai-compatible`
|
|
22
23
|
- Strict TypeScript types
|
|
23
24
|
- Hook points for attempt-level logging and telemetry
|
|
@@ -118,6 +119,7 @@ const router = createLlmRouter({
|
|
|
118
119
|
|
|
119
120
|
const result = await router.generateText({
|
|
120
121
|
prompt: 'Summarize the advantages of priority-based model routing in 3 bullets.',
|
|
122
|
+
attemptTimeoutMs: 12000,
|
|
121
123
|
});
|
|
122
124
|
|
|
123
125
|
console.log(result.text);
|
|
@@ -243,6 +245,8 @@ console.log(final.target.name);
|
|
|
243
245
|
|
|
244
246
|
Use `firstChunkTimeoutMs` when you want "switch if nothing starts quickly enough" behavior. If you omit it, the router waits indefinitely for the first chunk of the current target.
|
|
245
247
|
|
|
248
|
+
You can also use `attemptTimeoutMs` as the shared timeout for normal requests and streaming first-chunk fallback.
|
|
249
|
+
|
|
246
250
|
This makes the behavior safe for chat UIs:
|
|
247
251
|
|
|
248
252
|
- no silent model switch after the answer has already started
|
|
@@ -307,6 +311,29 @@ Common model-level fields:
|
|
|
307
311
|
- `tier`
|
|
308
312
|
- `metadata`
|
|
309
313
|
|
|
314
|
+
## Attempt Timeouts
|
|
315
|
+
|
|
316
|
+
Use `attemptTimeoutMs` on a request when a single model attempt should fail and fall through after a fixed time:
|
|
317
|
+
|
|
318
|
+
```ts
|
|
319
|
+
const result = await router.generateText({
|
|
320
|
+
prompt: 'Write a short answer.',
|
|
321
|
+
attemptTimeoutMs: 8000,
|
|
322
|
+
});
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
Or set a router-level default:
|
|
326
|
+
|
|
327
|
+
```ts
|
|
328
|
+
const router = createLlmRouter({
|
|
329
|
+
defaultAttemptTimeoutMs: 12000,
|
|
330
|
+
providers,
|
|
331
|
+
models,
|
|
332
|
+
});
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
Timeouts become normal failed attempts with `error.name === 'AttemptTimeoutError'`, so they appear in `attempts` and fire `onAttemptFailure(...)` like other execution failures.
|
|
336
|
+
|
|
310
337
|
## Debug Mode And Hooks
|
|
311
338
|
|
|
312
339
|
Use `debug: true` when you want the router to mirror attempt hooks to the console during development.
|
|
@@ -359,6 +386,8 @@ Use `openai-compatible` when you have an OpenAI-style endpoint that is not cover
|
|
|
359
386
|
}
|
|
360
387
|
```
|
|
361
388
|
|
|
389
|
+
`openai-compatible` is also the one built-in provider type that may use an empty API key for local or internal backends. When the key is empty, the router allows the config and creates the adapter without an `Authorization` header.
|
|
390
|
+
|
|
362
391
|
If you prefer typed helpers over raw provider objects, use:
|
|
363
392
|
|
|
364
393
|
```ts
|
|
@@ -389,7 +418,7 @@ const router = createLlmRouter({
|
|
|
389
418
|
providerLabel: 'lm-studio',
|
|
390
419
|
auth: {
|
|
391
420
|
mode: 'single',
|
|
392
|
-
apiKey: '
|
|
421
|
+
apiKey: '',
|
|
393
422
|
},
|
|
394
423
|
}).provider,
|
|
395
424
|
],
|
|
@@ -414,7 +443,7 @@ Notes:
|
|
|
414
443
|
|
|
415
444
|
- for LM Studio, enable the OpenAI-compatible local API before using this config
|
|
416
445
|
- the local server still needs to expose an OpenAI-compatible HTTP API
|
|
417
|
-
- the package
|
|
446
|
+
- the package allows an empty `apiKey` for `openai-compatible`, so local runtimes can use `''` when they do not require auth
|
|
418
447
|
- the `model` value must match the local model name exposed by your runtime
|
|
419
448
|
|
|
420
449
|
For a focused local-setup guide, see [Local Providers](./docs/local-providers.md).
|
|
@@ -443,6 +472,7 @@ Main exports:
|
|
|
443
472
|
- `createLlmRouter`
|
|
444
473
|
- `PrioLlmRouter`
|
|
445
474
|
- `createDefaultTextGenerationExecutor`
|
|
475
|
+
- `AttemptTimeoutError`
|
|
446
476
|
- `createOpenRouterConnection`
|
|
447
477
|
- `createOpenRouterFreeSource`
|
|
448
478
|
- `createOpenAICompatibleConnection`
|
package/dist/index.cjs
CHANGED
|
@@ -23,6 +23,13 @@ var PrioLlmRouterError = class extends Error {
|
|
|
23
23
|
};
|
|
24
24
|
var RouterConfigurationError = class extends PrioLlmRouterError {
|
|
25
25
|
};
|
|
26
|
+
var AttemptTimeoutError = class extends PrioLlmRouterError {
|
|
27
|
+
timeoutMs;
|
|
28
|
+
constructor(timeoutMs) {
|
|
29
|
+
super(`Model attempt timed out after ${timeoutMs}ms`);
|
|
30
|
+
this.timeoutMs = timeoutMs;
|
|
31
|
+
}
|
|
32
|
+
};
|
|
26
33
|
var AllModelsFailedError = class extends PrioLlmRouterError {
|
|
27
34
|
attempts;
|
|
28
35
|
constructor(attempts, cause) {
|
|
@@ -171,7 +178,7 @@ function buildBaseTextCallOptions({
|
|
|
171
178
|
}
|
|
172
179
|
function createProviderHandle(provider) {
|
|
173
180
|
const apiKey = provider.auth.apiKey.trim();
|
|
174
|
-
if (!apiKey) {
|
|
181
|
+
if (!apiKey && provider.type !== "openai-compatible") {
|
|
175
182
|
throw new RouterConfigurationError(
|
|
176
183
|
`Provider "${provider.name}" is missing an API key.`
|
|
177
184
|
);
|
|
@@ -252,9 +259,11 @@ function createProviderHandle(provider) {
|
|
|
252
259
|
case "openai-compatible": {
|
|
253
260
|
const options = {
|
|
254
261
|
name: provider.providerLabel ?? provider.name,
|
|
255
|
-
apiKey,
|
|
256
262
|
baseURL: provider.baseURL
|
|
257
263
|
};
|
|
264
|
+
if (apiKey) {
|
|
265
|
+
options.apiKey = apiKey;
|
|
266
|
+
}
|
|
258
267
|
if (provider.headers) {
|
|
259
268
|
options.headers = provider.headers;
|
|
260
269
|
}
|
|
@@ -422,6 +431,7 @@ var PrioLlmRouter = class {
|
|
|
422
431
|
providersByName = /* @__PURE__ */ new Map();
|
|
423
432
|
modelsByName = /* @__PURE__ */ new Map();
|
|
424
433
|
defaultChain;
|
|
434
|
+
defaultAttemptTimeoutMs;
|
|
425
435
|
executor;
|
|
426
436
|
hooks;
|
|
427
437
|
constructor(options) {
|
|
@@ -437,6 +447,7 @@ var PrioLlmRouter = class {
|
|
|
437
447
|
);
|
|
438
448
|
}
|
|
439
449
|
this.defaultChain = normalized.defaultChain;
|
|
450
|
+
this.defaultAttemptTimeoutMs = normalized.defaultAttemptTimeoutMs;
|
|
440
451
|
this.hooks = createRouterHooks(options.hooks, options.debug === true);
|
|
441
452
|
this.executor = options.executor ?? (options.defaultProviderMaxRetries === void 0 ? createDefaultTextGenerationExecutor() : createDefaultTextGenerationExecutor({
|
|
442
453
|
defaultProviderMaxRetries: options.defaultProviderMaxRetries
|
|
@@ -495,12 +506,24 @@ var PrioLlmRouter = class {
|
|
|
495
506
|
pendingAttempt.tier = model.tier;
|
|
496
507
|
}
|
|
497
508
|
this.hooks?.onAttemptStart?.(pendingAttempt);
|
|
509
|
+
const { controller, cleanup, parentAborted } = createLinkedAbortController(
|
|
510
|
+
request.abortSignal
|
|
511
|
+
);
|
|
498
512
|
try {
|
|
499
|
-
const result = await this.
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
513
|
+
const result = await this.executeAttemptWithTimeout({
|
|
514
|
+
execute: () => this.executor.execute({
|
|
515
|
+
provider,
|
|
516
|
+
model,
|
|
517
|
+
request: {
|
|
518
|
+
...request,
|
|
519
|
+
abortSignal: controller.signal
|
|
520
|
+
}
|
|
521
|
+
}),
|
|
522
|
+
timeoutMs: request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
|
|
523
|
+
abortController: controller,
|
|
524
|
+
parentAborted
|
|
503
525
|
});
|
|
526
|
+
cleanup();
|
|
504
527
|
const finishedAt = /* @__PURE__ */ new Date();
|
|
505
528
|
const attemptRecord = {
|
|
506
529
|
...pendingAttempt,
|
|
@@ -525,7 +548,8 @@ var PrioLlmRouter = class {
|
|
|
525
548
|
}
|
|
526
549
|
return response;
|
|
527
550
|
} catch (error) {
|
|
528
|
-
|
|
551
|
+
cleanup();
|
|
552
|
+
if (isAbortError(error) && parentAborted()) {
|
|
529
553
|
throw error;
|
|
530
554
|
}
|
|
531
555
|
const finishedAt = /* @__PURE__ */ new Date();
|
|
@@ -571,7 +595,7 @@ var PrioLlmRouter = class {
|
|
|
571
595
|
const iterator = streamResult.textStream[Symbol.asyncIterator]();
|
|
572
596
|
const firstChunk = await this.waitForFirstChunk({
|
|
573
597
|
iterator,
|
|
574
|
-
timeoutMs: request.firstChunkTimeoutMs,
|
|
598
|
+
timeoutMs: request.firstChunkTimeoutMs ?? request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
|
|
575
599
|
abortController: controller,
|
|
576
600
|
parentAborted
|
|
577
601
|
});
|
|
@@ -702,7 +726,7 @@ var PrioLlmRouter = class {
|
|
|
702
726
|
if (timeoutMs === void 0) {
|
|
703
727
|
return nextPromise;
|
|
704
728
|
}
|
|
705
|
-
const timeoutError =
|
|
729
|
+
const timeoutError = new AttemptTimeoutError(timeoutMs);
|
|
706
730
|
const timedRace = await Promise.race([
|
|
707
731
|
nextPromise.then(
|
|
708
732
|
(value) => ({ kind: "value", value }),
|
|
@@ -723,6 +747,33 @@ var PrioLlmRouter = class {
|
|
|
723
747
|
}
|
|
724
748
|
throw timedRace.error;
|
|
725
749
|
}
|
|
750
|
+
async executeAttemptWithTimeout(options) {
|
|
751
|
+
const { execute, timeoutMs, abortController, parentAborted } = options;
|
|
752
|
+
const executionPromise = execute();
|
|
753
|
+
if (timeoutMs === void 0) {
|
|
754
|
+
return executionPromise;
|
|
755
|
+
}
|
|
756
|
+
const timeoutError = new AttemptTimeoutError(timeoutMs);
|
|
757
|
+
const timedRace = await Promise.race([
|
|
758
|
+
executionPromise.then(
|
|
759
|
+
(value) => ({ kind: "value", value }),
|
|
760
|
+
(error) => ({ kind: "error", error })
|
|
761
|
+
),
|
|
762
|
+
delay(timeoutMs).then(() => ({ kind: "timeout" }))
|
|
763
|
+
]);
|
|
764
|
+
if (timedRace.kind === "value") {
|
|
765
|
+
return timedRace.value;
|
|
766
|
+
}
|
|
767
|
+
if (timedRace.kind === "timeout") {
|
|
768
|
+
abortController.abort(timeoutError);
|
|
769
|
+
void executionPromise.catch(() => void 0);
|
|
770
|
+
throw timeoutError;
|
|
771
|
+
}
|
|
772
|
+
if (isAbortError(timedRace.error) && parentAborted()) {
|
|
773
|
+
throw timedRace.error;
|
|
774
|
+
}
|
|
775
|
+
throw timedRace.error;
|
|
776
|
+
}
|
|
726
777
|
resolveExecutionChain(chain) {
|
|
727
778
|
if (chain?.length) {
|
|
728
779
|
return this.resolveNamedChain(chain);
|
|
@@ -803,7 +854,7 @@ var PrioLlmRouter = class {
|
|
|
803
854
|
"Provider configuration names must be non-empty."
|
|
804
855
|
);
|
|
805
856
|
}
|
|
806
|
-
if (!provider.auth.apiKey.trim()) {
|
|
857
|
+
if (!provider.auth.apiKey.trim() && provider.type !== "openai-compatible") {
|
|
807
858
|
throw new RouterConfigurationError(
|
|
808
859
|
`Provider "${provider.name}" requires a non-empty API key.`
|
|
809
860
|
);
|
|
@@ -825,7 +876,11 @@ function createLlmRouter(options) {
|
|
|
825
876
|
}
|
|
826
877
|
function resolveRouterConfig(options) {
|
|
827
878
|
if ("sources" in options) {
|
|
828
|
-
return compileSources(
|
|
879
|
+
return compileSources(
|
|
880
|
+
options.sources,
|
|
881
|
+
options.defaultChain,
|
|
882
|
+
options.defaultAttemptTimeoutMs
|
|
883
|
+
);
|
|
829
884
|
}
|
|
830
885
|
const normalized = {
|
|
831
886
|
providers: options.providers,
|
|
@@ -834,6 +889,9 @@ function resolveRouterConfig(options) {
|
|
|
834
889
|
if (options.defaultChain !== void 0) {
|
|
835
890
|
normalized.defaultChain = options.defaultChain;
|
|
836
891
|
}
|
|
892
|
+
if (options.defaultAttemptTimeoutMs !== void 0) {
|
|
893
|
+
normalized.defaultAttemptTimeoutMs = options.defaultAttemptTimeoutMs;
|
|
894
|
+
}
|
|
837
895
|
return normalized;
|
|
838
896
|
}
|
|
839
897
|
function compareModels(left, right) {
|
|
@@ -844,7 +902,7 @@ function compareModels(left, right) {
|
|
|
844
902
|
}
|
|
845
903
|
return left.__index - right.__index;
|
|
846
904
|
}
|
|
847
|
-
function compileSources(sources, defaultChain) {
|
|
905
|
+
function compileSources(sources, defaultChain, defaultAttemptTimeoutMs) {
|
|
848
906
|
const providersByName = /* @__PURE__ */ new Map();
|
|
849
907
|
const models = [];
|
|
850
908
|
for (const source of sources) {
|
|
@@ -904,6 +962,9 @@ function compileSources(sources, defaultChain) {
|
|
|
904
962
|
if (defaultChain !== void 0) {
|
|
905
963
|
normalized.defaultChain = defaultChain;
|
|
906
964
|
}
|
|
965
|
+
if (defaultAttemptTimeoutMs !== void 0) {
|
|
966
|
+
normalized.defaultAttemptTimeoutMs = defaultAttemptTimeoutMs;
|
|
967
|
+
}
|
|
907
968
|
return normalized;
|
|
908
969
|
}
|
|
909
970
|
function assertMatchingSourceProvider(existingProvider, nextProvider) {
|
|
@@ -1037,13 +1098,6 @@ function createLinkedAbortController(parentSignal) {
|
|
|
1037
1098
|
parentAborted: () => abortedByParent
|
|
1038
1099
|
};
|
|
1039
1100
|
}
|
|
1040
|
-
function createFirstChunkTimeoutError(timeoutMs) {
|
|
1041
|
-
const error = new Error(
|
|
1042
|
-
`The first stream chunk did not arrive within ${timeoutMs}ms.`
|
|
1043
|
-
);
|
|
1044
|
-
error.name = "FirstChunkTimeoutError";
|
|
1045
|
-
return error;
|
|
1046
|
-
}
|
|
1047
1101
|
function createEmptyFirstChunkError(targetName) {
|
|
1048
1102
|
const error = new Error(
|
|
1049
1103
|
`Stream for target "${targetName}" completed before the first text chunk.`
|
|
@@ -1082,6 +1136,7 @@ function createRouterHooks(hooks, debug) {
|
|
|
1082
1136
|
}
|
|
1083
1137
|
|
|
1084
1138
|
exports.AllModelsFailedError = AllModelsFailedError;
|
|
1139
|
+
exports.AttemptTimeoutError = AttemptTimeoutError;
|
|
1085
1140
|
exports.PrioLlmRouter = PrioLlmRouter;
|
|
1086
1141
|
exports.PrioLlmRouterError = PrioLlmRouterError;
|
|
1087
1142
|
exports.RouterConfigurationError = RouterConfigurationError;
|