@yadimon/prio-llm-router 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,6 +18,7 @@ The package keeps the routing logic intentionally small and predictable while re
18
18
  - Optional source builders for source-centric setup and strict free policies
19
19
  - Non-streaming text generation and optional streaming
20
20
  - Optional debug mode that mirrors attempt hooks to the console
21
+ - Per-request and router-level attempt timeouts for clean fallback
21
22
  - Built-in support for `google`, `openrouter`, `groq`, `mistral`, `cohere`, `perplexity`, `xai`, `togetherai`, `openai`, `anthropic`, `deepseek`, and generic `openai-compatible`
22
23
  - Strict TypeScript types
23
24
  - Hook points for attempt-level logging and telemetry
@@ -118,6 +119,7 @@ const router = createLlmRouter({
118
119
 
119
120
  const result = await router.generateText({
120
121
  prompt: 'Summarize the advantages of priority-based model routing in 3 bullets.',
122
+ attemptTimeoutMs: 12000,
121
123
  });
122
124
 
123
125
  console.log(result.text);
@@ -243,6 +245,8 @@ console.log(final.target.name);
243
245
 
244
246
  Use `firstChunkTimeoutMs` when you want "switch if nothing starts quickly enough" behavior. If you omit it, the router waits indefinitely for the first chunk of the current target.
245
247
 
248
+ You can also use `attemptTimeoutMs` as the shared timeout for normal requests and streaming first-chunk fallback.
249
+
246
250
  This makes the behavior safe for chat UIs:
247
251
 
248
252
  - no silent model switch after the answer has already started
@@ -307,6 +311,29 @@ Common model-level fields:
307
311
  - `tier`
308
312
  - `metadata`
309
313
 
314
+ ## Attempt Timeouts
315
+
316
+ Use `attemptTimeoutMs` on a request when a single model attempt should fail and fall through after a fixed time:
317
+
318
+ ```ts
319
+ const result = await router.generateText({
320
+ prompt: 'Write a short answer.',
321
+ attemptTimeoutMs: 8000,
322
+ });
323
+ ```
324
+
325
+ Or set a router-level default:
326
+
327
+ ```ts
328
+ const router = createLlmRouter({
329
+ defaultAttemptTimeoutMs: 12000,
330
+ providers,
331
+ models,
332
+ });
333
+ ```
334
+
335
+ Timeouts become normal failed attempts with `error.name === 'AttemptTimeoutError'`, so they appear in `attempts` and fire `onAttemptFailure(...)` like other execution failures.
336
+
310
337
  ## Debug Mode And Hooks
311
338
 
312
339
  Use `debug: true` when you want the router to mirror attempt hooks to the console during development.
@@ -359,6 +386,8 @@ Use `openai-compatible` when you have an OpenAI-style endpoint that is not cover
359
386
  }
360
387
  ```
361
388
 
389
+ `openai-compatible` is also the one built-in provider type that may use an empty API key for local or internal backends. When the key is empty, the router allows the config and creates the adapter without an `Authorization` header.
390
+
362
391
  If you prefer typed helpers over raw provider objects, use:
363
392
 
364
393
  ```ts
@@ -389,7 +418,7 @@ const router = createLlmRouter({
389
418
  providerLabel: 'lm-studio',
390
419
  auth: {
391
420
  mode: 'single',
392
- apiKey: 'lm-studio',
421
+ apiKey: '',
393
422
  },
394
423
  }).provider,
395
424
  ],
@@ -414,7 +443,7 @@ Notes:
414
443
 
415
444
  - for LM Studio, enable the OpenAI-compatible local API before using this config
416
445
  - the local server still needs to expose an OpenAI-compatible HTTP API
417
- - the package currently requires a non-empty `apiKey`, so local runtimes that ignore auth should use a dummy value such as `'lm-studio'`
446
+ - the package allows an empty `apiKey` for `openai-compatible`, so local runtimes can use `''` when they do not require auth
418
447
  - the `model` value must match the local model name exposed by your runtime
419
448
 
420
449
  For a focused local-setup guide, see [Local Providers](./docs/local-providers.md).
@@ -443,6 +472,7 @@ Main exports:
443
472
  - `createLlmRouter`
444
473
  - `PrioLlmRouter`
445
474
  - `createDefaultTextGenerationExecutor`
475
+ - `AttemptTimeoutError`
446
476
  - `createOpenRouterConnection`
447
477
  - `createOpenRouterFreeSource`
448
478
  - `createOpenAICompatibleConnection`
package/dist/index.cjs CHANGED
@@ -23,6 +23,13 @@ var PrioLlmRouterError = class extends Error {
23
23
  };
24
24
  var RouterConfigurationError = class extends PrioLlmRouterError {
25
25
  };
26
+ var AttemptTimeoutError = class extends PrioLlmRouterError {
27
+ timeoutMs;
28
+ constructor(timeoutMs) {
29
+ super(`Model attempt timed out after ${timeoutMs}ms`);
30
+ this.timeoutMs = timeoutMs;
31
+ }
32
+ };
26
33
  var AllModelsFailedError = class extends PrioLlmRouterError {
27
34
  attempts;
28
35
  constructor(attempts, cause) {
@@ -171,7 +178,7 @@ function buildBaseTextCallOptions({
171
178
  }
172
179
  function createProviderHandle(provider) {
173
180
  const apiKey = provider.auth.apiKey.trim();
174
- if (!apiKey) {
181
+ if (!apiKey && provider.type !== "openai-compatible") {
175
182
  throw new RouterConfigurationError(
176
183
  `Provider "${provider.name}" is missing an API key.`
177
184
  );
@@ -252,9 +259,11 @@ function createProviderHandle(provider) {
252
259
  case "openai-compatible": {
253
260
  const options = {
254
261
  name: provider.providerLabel ?? provider.name,
255
- apiKey,
256
262
  baseURL: provider.baseURL
257
263
  };
264
+ if (apiKey) {
265
+ options.apiKey = apiKey;
266
+ }
258
267
  if (provider.headers) {
259
268
  options.headers = provider.headers;
260
269
  }
@@ -422,6 +431,7 @@ var PrioLlmRouter = class {
422
431
  providersByName = /* @__PURE__ */ new Map();
423
432
  modelsByName = /* @__PURE__ */ new Map();
424
433
  defaultChain;
434
+ defaultAttemptTimeoutMs;
425
435
  executor;
426
436
  hooks;
427
437
  constructor(options) {
@@ -437,6 +447,7 @@ var PrioLlmRouter = class {
437
447
  );
438
448
  }
439
449
  this.defaultChain = normalized.defaultChain;
450
+ this.defaultAttemptTimeoutMs = normalized.defaultAttemptTimeoutMs;
440
451
  this.hooks = createRouterHooks(options.hooks, options.debug === true);
441
452
  this.executor = options.executor ?? (options.defaultProviderMaxRetries === void 0 ? createDefaultTextGenerationExecutor() : createDefaultTextGenerationExecutor({
442
453
  defaultProviderMaxRetries: options.defaultProviderMaxRetries
@@ -495,12 +506,24 @@ var PrioLlmRouter = class {
495
506
  pendingAttempt.tier = model.tier;
496
507
  }
497
508
  this.hooks?.onAttemptStart?.(pendingAttempt);
509
+ const { controller, cleanup, parentAborted } = createLinkedAbortController(
510
+ request.abortSignal
511
+ );
498
512
  try {
499
- const result = await this.executor.execute({
500
- provider,
501
- model,
502
- request
513
+ const result = await this.executeAttemptWithTimeout({
514
+ execute: () => this.executor.execute({
515
+ provider,
516
+ model,
517
+ request: {
518
+ ...request,
519
+ abortSignal: controller.signal
520
+ }
521
+ }),
522
+ timeoutMs: request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
523
+ abortController: controller,
524
+ parentAborted
503
525
  });
526
+ cleanup();
504
527
  const finishedAt = /* @__PURE__ */ new Date();
505
528
  const attemptRecord = {
506
529
  ...pendingAttempt,
@@ -525,7 +548,8 @@ var PrioLlmRouter = class {
525
548
  }
526
549
  return response;
527
550
  } catch (error) {
528
- if (isAbortError(error)) {
551
+ cleanup();
552
+ if (isAbortError(error) && parentAborted()) {
529
553
  throw error;
530
554
  }
531
555
  const finishedAt = /* @__PURE__ */ new Date();
@@ -571,7 +595,7 @@ var PrioLlmRouter = class {
571
595
  const iterator = streamResult.textStream[Symbol.asyncIterator]();
572
596
  const firstChunk = await this.waitForFirstChunk({
573
597
  iterator,
574
- timeoutMs: request.firstChunkTimeoutMs,
598
+ timeoutMs: request.firstChunkTimeoutMs ?? request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
575
599
  abortController: controller,
576
600
  parentAborted
577
601
  });
@@ -702,7 +726,7 @@ var PrioLlmRouter = class {
702
726
  if (timeoutMs === void 0) {
703
727
  return nextPromise;
704
728
  }
705
- const timeoutError = createFirstChunkTimeoutError(timeoutMs);
729
+ const timeoutError = new AttemptTimeoutError(timeoutMs);
706
730
  const timedRace = await Promise.race([
707
731
  nextPromise.then(
708
732
  (value) => ({ kind: "value", value }),
@@ -723,6 +747,33 @@ var PrioLlmRouter = class {
723
747
  }
724
748
  throw timedRace.error;
725
749
  }
750
+ async executeAttemptWithTimeout(options) {
751
+ const { execute, timeoutMs, abortController, parentAborted } = options;
752
+ const executionPromise = execute();
753
+ if (timeoutMs === void 0) {
754
+ return executionPromise;
755
+ }
756
+ const timeoutError = new AttemptTimeoutError(timeoutMs);
757
+ const timedRace = await Promise.race([
758
+ executionPromise.then(
759
+ (value) => ({ kind: "value", value }),
760
+ (error) => ({ kind: "error", error })
761
+ ),
762
+ delay(timeoutMs).then(() => ({ kind: "timeout" }))
763
+ ]);
764
+ if (timedRace.kind === "value") {
765
+ return timedRace.value;
766
+ }
767
+ if (timedRace.kind === "timeout") {
768
+ abortController.abort(timeoutError);
769
+ void executionPromise.catch(() => void 0);
770
+ throw timeoutError;
771
+ }
772
+ if (isAbortError(timedRace.error) && parentAborted()) {
773
+ throw timedRace.error;
774
+ }
775
+ throw timedRace.error;
776
+ }
726
777
  resolveExecutionChain(chain) {
727
778
  if (chain?.length) {
728
779
  return this.resolveNamedChain(chain);
@@ -803,7 +854,7 @@ var PrioLlmRouter = class {
803
854
  "Provider configuration names must be non-empty."
804
855
  );
805
856
  }
806
- if (!provider.auth.apiKey.trim()) {
857
+ if (!provider.auth.apiKey.trim() && provider.type !== "openai-compatible") {
807
858
  throw new RouterConfigurationError(
808
859
  `Provider "${provider.name}" requires a non-empty API key.`
809
860
  );
@@ -825,7 +876,11 @@ function createLlmRouter(options) {
825
876
  }
826
877
  function resolveRouterConfig(options) {
827
878
  if ("sources" in options) {
828
- return compileSources(options.sources, options.defaultChain);
879
+ return compileSources(
880
+ options.sources,
881
+ options.defaultChain,
882
+ options.defaultAttemptTimeoutMs
883
+ );
829
884
  }
830
885
  const normalized = {
831
886
  providers: options.providers,
@@ -834,6 +889,9 @@ function resolveRouterConfig(options) {
834
889
  if (options.defaultChain !== void 0) {
835
890
  normalized.defaultChain = options.defaultChain;
836
891
  }
892
+ if (options.defaultAttemptTimeoutMs !== void 0) {
893
+ normalized.defaultAttemptTimeoutMs = options.defaultAttemptTimeoutMs;
894
+ }
837
895
  return normalized;
838
896
  }
839
897
  function compareModels(left, right) {
@@ -844,7 +902,7 @@ function compareModels(left, right) {
844
902
  }
845
903
  return left.__index - right.__index;
846
904
  }
847
- function compileSources(sources, defaultChain) {
905
+ function compileSources(sources, defaultChain, defaultAttemptTimeoutMs) {
848
906
  const providersByName = /* @__PURE__ */ new Map();
849
907
  const models = [];
850
908
  for (const source of sources) {
@@ -904,6 +962,9 @@ function compileSources(sources, defaultChain) {
904
962
  if (defaultChain !== void 0) {
905
963
  normalized.defaultChain = defaultChain;
906
964
  }
965
+ if (defaultAttemptTimeoutMs !== void 0) {
966
+ normalized.defaultAttemptTimeoutMs = defaultAttemptTimeoutMs;
967
+ }
907
968
  return normalized;
908
969
  }
909
970
  function assertMatchingSourceProvider(existingProvider, nextProvider) {
@@ -1037,13 +1098,6 @@ function createLinkedAbortController(parentSignal) {
1037
1098
  parentAborted: () => abortedByParent
1038
1099
  };
1039
1100
  }
1040
- function createFirstChunkTimeoutError(timeoutMs) {
1041
- const error = new Error(
1042
- `The first stream chunk did not arrive within ${timeoutMs}ms.`
1043
- );
1044
- error.name = "FirstChunkTimeoutError";
1045
- return error;
1046
- }
1047
1101
  function createEmptyFirstChunkError(targetName) {
1048
1102
  const error = new Error(
1049
1103
  `Stream for target "${targetName}" completed before the first text chunk.`
@@ -1082,6 +1136,7 @@ function createRouterHooks(hooks, debug) {
1082
1136
  }
1083
1137
 
1084
1138
  exports.AllModelsFailedError = AllModelsFailedError;
1139
+ exports.AttemptTimeoutError = AttemptTimeoutError;
1085
1140
  exports.PrioLlmRouter = PrioLlmRouter;
1086
1141
  exports.PrioLlmRouterError = PrioLlmRouterError;
1087
1142
  exports.RouterConfigurationError = RouterConfigurationError;