@yadimon/prio-llm-router 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -2
- package/dist/index.cjs +94 -20
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +10 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +94 -21
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,6 +17,8 @@ The package keeps the routing logic intentionally small and predictable while re
|
|
|
17
17
|
- Separate provider config and model target config
|
|
18
18
|
- Optional source builders for source-centric setup and strict free policies
|
|
19
19
|
- Non-streaming text generation and optional streaming
|
|
20
|
+
- Optional debug mode that mirrors attempt hooks to the console
|
|
21
|
+
- Per-request and router-level attempt timeouts for clean fallback
|
|
20
22
|
- Built-in support for `google`, `openrouter`, `groq`, `mistral`, `cohere`, `perplexity`, `xai`, `togetherai`, `openai`, `anthropic`, `deepseek`, and generic `openai-compatible`
|
|
21
23
|
- Strict TypeScript types
|
|
22
24
|
- Hook points for attempt-level logging and telemetry
|
|
@@ -107,6 +109,7 @@ const router = createLlmRouter({
|
|
|
107
109
|
tier: 'paid',
|
|
108
110
|
},
|
|
109
111
|
],
|
|
112
|
+
debug: true,
|
|
110
113
|
hooks: {
|
|
111
114
|
onAttemptFailure(attempt) {
|
|
112
115
|
console.warn('LLM attempt failed:', attempt);
|
|
@@ -116,6 +119,7 @@ const router = createLlmRouter({
|
|
|
116
119
|
|
|
117
120
|
const result = await router.generateText({
|
|
118
121
|
prompt: 'Summarize the advantages of priority-based model routing in 3 bullets.',
|
|
122
|
+
attemptTimeoutMs: 12000,
|
|
119
123
|
});
|
|
120
124
|
|
|
121
125
|
console.log(result.text);
|
|
@@ -123,6 +127,8 @@ console.log(result.target);
|
|
|
123
127
|
console.log(result.attempts);
|
|
124
128
|
```
|
|
125
129
|
|
|
130
|
+
With `debug: true`, the router writes `attempt:start`, `attempt:success`, and `attempt:failure` events to the console while still calling your custom hooks.
|
|
131
|
+
|
|
126
132
|
## Basic Mental Model
|
|
127
133
|
|
|
128
134
|
There are two separate layers:
|
|
@@ -239,6 +245,8 @@ console.log(final.target.name);
|
|
|
239
245
|
|
|
240
246
|
Use `firstChunkTimeoutMs` when you want "switch if nothing starts quickly enough" behavior. If you omit it, the router waits indefinitely for the first chunk of the current target.
|
|
241
247
|
|
|
248
|
+
You can also use `attemptTimeoutMs` as the shared timeout for normal requests and streaming first-chunk fallback.
|
|
249
|
+
|
|
242
250
|
This makes the behavior safe for chat UIs:
|
|
243
251
|
|
|
244
252
|
- no silent model switch after the answer has already started
|
|
@@ -303,6 +311,49 @@ Common model-level fields:
|
|
|
303
311
|
- `tier`
|
|
304
312
|
- `metadata`
|
|
305
313
|
|
|
314
|
+
## Attempt Timeouts
|
|
315
|
+
|
|
316
|
+
Use `attemptTimeoutMs` on a request when a single model attempt should fail and fall through after a fixed time:
|
|
317
|
+
|
|
318
|
+
```ts
|
|
319
|
+
const result = await router.generateText({
|
|
320
|
+
prompt: 'Write a short answer.',
|
|
321
|
+
attemptTimeoutMs: 8000,
|
|
322
|
+
});
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
Or set a router-level default:
|
|
326
|
+
|
|
327
|
+
```ts
|
|
328
|
+
const router = createLlmRouter({
|
|
329
|
+
defaultAttemptTimeoutMs: 12000,
|
|
330
|
+
providers,
|
|
331
|
+
models,
|
|
332
|
+
});
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
Timeouts become normal failed attempts with `error.name === 'AttemptTimeoutError'`, so they appear in `attempts` and fire `onAttemptFailure(...)` like other execution failures.
|
|
336
|
+
|
|
337
|
+
## Debug Mode And Hooks
|
|
338
|
+
|
|
339
|
+
Use `debug: true` when you want the router to mirror attempt hooks to the console during development.
|
|
340
|
+
|
|
341
|
+
```ts
|
|
342
|
+
const router = createLlmRouter({
|
|
343
|
+
debug: true,
|
|
344
|
+
providers,
|
|
345
|
+
models,
|
|
346
|
+
});
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
That debug mode is intentionally small:
|
|
350
|
+
|
|
351
|
+
- `console.log('[prio-llm-router] attempt:start', attempt)`
|
|
352
|
+
- `console.log('[prio-llm-router] attempt:success', attempt)`
|
|
353
|
+
- `console.error('[prio-llm-router] attempt:failure', attempt)`
|
|
354
|
+
|
|
355
|
+
If you also pass `hooks`, both stay active. Debug mode does not replace custom telemetry.
|
|
356
|
+
|
|
306
357
|
## Supported Providers
|
|
307
358
|
|
|
308
359
|
- `google`
|
|
@@ -335,6 +386,8 @@ Use `openai-compatible` when you have an OpenAI-style endpoint that is not cover
|
|
|
335
386
|
}
|
|
336
387
|
```
|
|
337
388
|
|
|
389
|
+
`openai-compatible` is also the one built-in provider type that may use an empty API key for local or internal backends. When the key is empty, the router allows the config and creates the adapter without an `Authorization` header.
|
|
390
|
+
|
|
338
391
|
If you prefer typed helpers over raw provider objects, use:
|
|
339
392
|
|
|
340
393
|
```ts
|
|
@@ -365,7 +418,7 @@ const router = createLlmRouter({
|
|
|
365
418
|
providerLabel: 'lm-studio',
|
|
366
419
|
auth: {
|
|
367
420
|
mode: 'single',
|
|
368
|
-
apiKey: '
|
|
421
|
+
apiKey: '',
|
|
369
422
|
},
|
|
370
423
|
}).provider,
|
|
371
424
|
],
|
|
@@ -390,7 +443,7 @@ Notes:
|
|
|
390
443
|
|
|
391
444
|
- for LM Studio, enable the OpenAI-compatible local API before using this config
|
|
392
445
|
- the local server still needs to expose an OpenAI-compatible HTTP API
|
|
393
|
-
- the package
|
|
446
|
+
- the package allows an empty `apiKey` for `openai-compatible`, so local runtimes can use `''` when they do not require auth
|
|
394
447
|
- the `model` value must match the local model name exposed by your runtime
|
|
395
448
|
|
|
396
449
|
For a focused local-setup guide, see [Local Providers](./docs/local-providers.md).
|
|
@@ -419,6 +472,7 @@ Main exports:
|
|
|
419
472
|
- `createLlmRouter`
|
|
420
473
|
- `PrioLlmRouter`
|
|
421
474
|
- `createDefaultTextGenerationExecutor`
|
|
475
|
+
- `AttemptTimeoutError`
|
|
422
476
|
- `createOpenRouterConnection`
|
|
423
477
|
- `createOpenRouterFreeSource`
|
|
424
478
|
- `createOpenAICompatibleConnection`
|
package/dist/index.cjs
CHANGED
|
@@ -23,6 +23,13 @@ var PrioLlmRouterError = class extends Error {
|
|
|
23
23
|
};
|
|
24
24
|
var RouterConfigurationError = class extends PrioLlmRouterError {
|
|
25
25
|
};
|
|
26
|
+
var AttemptTimeoutError = class extends PrioLlmRouterError {
|
|
27
|
+
timeoutMs;
|
|
28
|
+
constructor(timeoutMs) {
|
|
29
|
+
super(`Model attempt timed out after ${timeoutMs}ms`);
|
|
30
|
+
this.timeoutMs = timeoutMs;
|
|
31
|
+
}
|
|
32
|
+
};
|
|
26
33
|
var AllModelsFailedError = class extends PrioLlmRouterError {
|
|
27
34
|
attempts;
|
|
28
35
|
constructor(attempts, cause) {
|
|
@@ -171,7 +178,7 @@ function buildBaseTextCallOptions({
|
|
|
171
178
|
}
|
|
172
179
|
function createProviderHandle(provider) {
|
|
173
180
|
const apiKey = provider.auth.apiKey.trim();
|
|
174
|
-
if (!apiKey) {
|
|
181
|
+
if (!apiKey && provider.type !== "openai-compatible") {
|
|
175
182
|
throw new RouterConfigurationError(
|
|
176
183
|
`Provider "${provider.name}" is missing an API key.`
|
|
177
184
|
);
|
|
@@ -252,9 +259,11 @@ function createProviderHandle(provider) {
|
|
|
252
259
|
case "openai-compatible": {
|
|
253
260
|
const options = {
|
|
254
261
|
name: provider.providerLabel ?? provider.name,
|
|
255
|
-
apiKey,
|
|
256
262
|
baseURL: provider.baseURL
|
|
257
263
|
};
|
|
264
|
+
if (apiKey) {
|
|
265
|
+
options.apiKey = apiKey;
|
|
266
|
+
}
|
|
258
267
|
if (provider.headers) {
|
|
259
268
|
options.headers = provider.headers;
|
|
260
269
|
}
|
|
@@ -422,6 +431,7 @@ var PrioLlmRouter = class {
|
|
|
422
431
|
providersByName = /* @__PURE__ */ new Map();
|
|
423
432
|
modelsByName = /* @__PURE__ */ new Map();
|
|
424
433
|
defaultChain;
|
|
434
|
+
defaultAttemptTimeoutMs;
|
|
425
435
|
executor;
|
|
426
436
|
hooks;
|
|
427
437
|
constructor(options) {
|
|
@@ -437,7 +447,8 @@ var PrioLlmRouter = class {
|
|
|
437
447
|
);
|
|
438
448
|
}
|
|
439
449
|
this.defaultChain = normalized.defaultChain;
|
|
440
|
-
this.
|
|
450
|
+
this.defaultAttemptTimeoutMs = normalized.defaultAttemptTimeoutMs;
|
|
451
|
+
this.hooks = createRouterHooks(options.hooks, options.debug === true);
|
|
441
452
|
this.executor = options.executor ?? (options.defaultProviderMaxRetries === void 0 ? createDefaultTextGenerationExecutor() : createDefaultTextGenerationExecutor({
|
|
442
453
|
defaultProviderMaxRetries: options.defaultProviderMaxRetries
|
|
443
454
|
}));
|
|
@@ -495,12 +506,24 @@ var PrioLlmRouter = class {
|
|
|
495
506
|
pendingAttempt.tier = model.tier;
|
|
496
507
|
}
|
|
497
508
|
this.hooks?.onAttemptStart?.(pendingAttempt);
|
|
509
|
+
const { controller, cleanup, parentAborted } = createLinkedAbortController(
|
|
510
|
+
request.abortSignal
|
|
511
|
+
);
|
|
498
512
|
try {
|
|
499
|
-
const result = await this.
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
513
|
+
const result = await this.executeAttemptWithTimeout({
|
|
514
|
+
execute: () => this.executor.execute({
|
|
515
|
+
provider,
|
|
516
|
+
model,
|
|
517
|
+
request: {
|
|
518
|
+
...request,
|
|
519
|
+
abortSignal: controller.signal
|
|
520
|
+
}
|
|
521
|
+
}),
|
|
522
|
+
timeoutMs: request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
|
|
523
|
+
abortController: controller,
|
|
524
|
+
parentAborted
|
|
503
525
|
});
|
|
526
|
+
cleanup();
|
|
504
527
|
const finishedAt = /* @__PURE__ */ new Date();
|
|
505
528
|
const attemptRecord = {
|
|
506
529
|
...pendingAttempt,
|
|
@@ -525,7 +548,8 @@ var PrioLlmRouter = class {
|
|
|
525
548
|
}
|
|
526
549
|
return response;
|
|
527
550
|
} catch (error) {
|
|
528
|
-
|
|
551
|
+
cleanup();
|
|
552
|
+
if (isAbortError(error) && parentAborted()) {
|
|
529
553
|
throw error;
|
|
530
554
|
}
|
|
531
555
|
const finishedAt = /* @__PURE__ */ new Date();
|
|
@@ -571,7 +595,7 @@ var PrioLlmRouter = class {
|
|
|
571
595
|
const iterator = streamResult.textStream[Symbol.asyncIterator]();
|
|
572
596
|
const firstChunk = await this.waitForFirstChunk({
|
|
573
597
|
iterator,
|
|
574
|
-
timeoutMs: request.firstChunkTimeoutMs,
|
|
598
|
+
timeoutMs: request.firstChunkTimeoutMs ?? request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
|
|
575
599
|
abortController: controller,
|
|
576
600
|
parentAborted
|
|
577
601
|
});
|
|
@@ -702,7 +726,7 @@ var PrioLlmRouter = class {
|
|
|
702
726
|
if (timeoutMs === void 0) {
|
|
703
727
|
return nextPromise;
|
|
704
728
|
}
|
|
705
|
-
const timeoutError =
|
|
729
|
+
const timeoutError = new AttemptTimeoutError(timeoutMs);
|
|
706
730
|
const timedRace = await Promise.race([
|
|
707
731
|
nextPromise.then(
|
|
708
732
|
(value) => ({ kind: "value", value }),
|
|
@@ -723,6 +747,33 @@ var PrioLlmRouter = class {
|
|
|
723
747
|
}
|
|
724
748
|
throw timedRace.error;
|
|
725
749
|
}
|
|
750
|
+
async executeAttemptWithTimeout(options) {
|
|
751
|
+
const { execute, timeoutMs, abortController, parentAborted } = options;
|
|
752
|
+
const executionPromise = execute();
|
|
753
|
+
if (timeoutMs === void 0) {
|
|
754
|
+
return executionPromise;
|
|
755
|
+
}
|
|
756
|
+
const timeoutError = new AttemptTimeoutError(timeoutMs);
|
|
757
|
+
const timedRace = await Promise.race([
|
|
758
|
+
executionPromise.then(
|
|
759
|
+
(value) => ({ kind: "value", value }),
|
|
760
|
+
(error) => ({ kind: "error", error })
|
|
761
|
+
),
|
|
762
|
+
delay(timeoutMs).then(() => ({ kind: "timeout" }))
|
|
763
|
+
]);
|
|
764
|
+
if (timedRace.kind === "value") {
|
|
765
|
+
return timedRace.value;
|
|
766
|
+
}
|
|
767
|
+
if (timedRace.kind === "timeout") {
|
|
768
|
+
abortController.abort(timeoutError);
|
|
769
|
+
void executionPromise.catch(() => void 0);
|
|
770
|
+
throw timeoutError;
|
|
771
|
+
}
|
|
772
|
+
if (isAbortError(timedRace.error) && parentAborted()) {
|
|
773
|
+
throw timedRace.error;
|
|
774
|
+
}
|
|
775
|
+
throw timedRace.error;
|
|
776
|
+
}
|
|
726
777
|
resolveExecutionChain(chain) {
|
|
727
778
|
if (chain?.length) {
|
|
728
779
|
return this.resolveNamedChain(chain);
|
|
@@ -803,7 +854,7 @@ var PrioLlmRouter = class {
|
|
|
803
854
|
"Provider configuration names must be non-empty."
|
|
804
855
|
);
|
|
805
856
|
}
|
|
806
|
-
if (!provider.auth.apiKey.trim()) {
|
|
857
|
+
if (!provider.auth.apiKey.trim() && provider.type !== "openai-compatible") {
|
|
807
858
|
throw new RouterConfigurationError(
|
|
808
859
|
`Provider "${provider.name}" requires a non-empty API key.`
|
|
809
860
|
);
|
|
@@ -825,7 +876,11 @@ function createLlmRouter(options) {
|
|
|
825
876
|
}
|
|
826
877
|
function resolveRouterConfig(options) {
|
|
827
878
|
if ("sources" in options) {
|
|
828
|
-
return compileSources(
|
|
879
|
+
return compileSources(
|
|
880
|
+
options.sources,
|
|
881
|
+
options.defaultChain,
|
|
882
|
+
options.defaultAttemptTimeoutMs
|
|
883
|
+
);
|
|
829
884
|
}
|
|
830
885
|
const normalized = {
|
|
831
886
|
providers: options.providers,
|
|
@@ -834,6 +889,9 @@ function resolveRouterConfig(options) {
|
|
|
834
889
|
if (options.defaultChain !== void 0) {
|
|
835
890
|
normalized.defaultChain = options.defaultChain;
|
|
836
891
|
}
|
|
892
|
+
if (options.defaultAttemptTimeoutMs !== void 0) {
|
|
893
|
+
normalized.defaultAttemptTimeoutMs = options.defaultAttemptTimeoutMs;
|
|
894
|
+
}
|
|
837
895
|
return normalized;
|
|
838
896
|
}
|
|
839
897
|
function compareModels(left, right) {
|
|
@@ -844,7 +902,7 @@ function compareModels(left, right) {
|
|
|
844
902
|
}
|
|
845
903
|
return left.__index - right.__index;
|
|
846
904
|
}
|
|
847
|
-
function compileSources(sources, defaultChain) {
|
|
905
|
+
function compileSources(sources, defaultChain, defaultAttemptTimeoutMs) {
|
|
848
906
|
const providersByName = /* @__PURE__ */ new Map();
|
|
849
907
|
const models = [];
|
|
850
908
|
for (const source of sources) {
|
|
@@ -904,6 +962,9 @@ function compileSources(sources, defaultChain) {
|
|
|
904
962
|
if (defaultChain !== void 0) {
|
|
905
963
|
normalized.defaultChain = defaultChain;
|
|
906
964
|
}
|
|
965
|
+
if (defaultAttemptTimeoutMs !== void 0) {
|
|
966
|
+
normalized.defaultAttemptTimeoutMs = defaultAttemptTimeoutMs;
|
|
967
|
+
}
|
|
907
968
|
return normalized;
|
|
908
969
|
}
|
|
909
970
|
function assertMatchingSourceProvider(existingProvider, nextProvider) {
|
|
@@ -1037,13 +1098,6 @@ function createLinkedAbortController(parentSignal) {
|
|
|
1037
1098
|
parentAborted: () => abortedByParent
|
|
1038
1099
|
};
|
|
1039
1100
|
}
|
|
1040
|
-
function createFirstChunkTimeoutError(timeoutMs) {
|
|
1041
|
-
const error = new Error(
|
|
1042
|
-
`The first stream chunk did not arrive within ${timeoutMs}ms.`
|
|
1043
|
-
);
|
|
1044
|
-
error.name = "FirstChunkTimeoutError";
|
|
1045
|
-
return error;
|
|
1046
|
-
}
|
|
1047
1101
|
function createEmptyFirstChunkError(targetName) {
|
|
1048
1102
|
const error = new Error(
|
|
1049
1103
|
`Stream for target "${targetName}" completed before the first text chunk.`
|
|
@@ -1061,8 +1115,28 @@ function delay(ms) {
|
|
|
1061
1115
|
setTimeout(resolve, ms);
|
|
1062
1116
|
});
|
|
1063
1117
|
}
|
|
1118
|
+
function createRouterHooks(hooks, debug) {
|
|
1119
|
+
if (!debug) {
|
|
1120
|
+
return hooks;
|
|
1121
|
+
}
|
|
1122
|
+
return {
|
|
1123
|
+
onAttemptStart: (attempt) => {
|
|
1124
|
+
console.log("[prio-llm-router] attempt:start", attempt);
|
|
1125
|
+
hooks?.onAttemptStart?.(attempt);
|
|
1126
|
+
},
|
|
1127
|
+
onAttemptSuccess: (attempt) => {
|
|
1128
|
+
console.log("[prio-llm-router] attempt:success", attempt);
|
|
1129
|
+
hooks?.onAttemptSuccess?.(attempt);
|
|
1130
|
+
},
|
|
1131
|
+
onAttemptFailure: (attempt) => {
|
|
1132
|
+
console.error("[prio-llm-router] attempt:failure", attempt);
|
|
1133
|
+
hooks?.onAttemptFailure?.(attempt);
|
|
1134
|
+
}
|
|
1135
|
+
};
|
|
1136
|
+
}
|
|
1064
1137
|
|
|
1065
1138
|
exports.AllModelsFailedError = AllModelsFailedError;
|
|
1139
|
+
exports.AttemptTimeoutError = AttemptTimeoutError;
|
|
1066
1140
|
exports.PrioLlmRouter = PrioLlmRouter;
|
|
1067
1141
|
exports.PrioLlmRouterError = PrioLlmRouterError;
|
|
1068
1142
|
exports.RouterConfigurationError = RouterConfigurationError;
|