@yadimon/prio-llm-router 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,6 +17,8 @@ The package keeps the routing logic intentionally small and predictable while re
17
17
  - Separate provider config and model target config
18
18
  - Optional source builders for source-centric setup and strict free policies
19
19
  - Non-streaming text generation and optional streaming
20
+ - Optional debug mode that mirrors attempt hooks to the console
21
+ - Per-request and router-level attempt timeouts for clean fallback
20
22
  - Built-in support for `google`, `openrouter`, `groq`, `mistral`, `cohere`, `perplexity`, `xai`, `togetherai`, `openai`, `anthropic`, `deepseek`, and generic `openai-compatible`
21
23
  - Strict TypeScript types
22
24
  - Hook points for attempt-level logging and telemetry
@@ -107,6 +109,7 @@ const router = createLlmRouter({
107
109
  tier: 'paid',
108
110
  },
109
111
  ],
112
+ debug: true,
110
113
  hooks: {
111
114
  onAttemptFailure(attempt) {
112
115
  console.warn('LLM attempt failed:', attempt);
@@ -116,6 +119,7 @@ const router = createLlmRouter({
116
119
 
117
120
  const result = await router.generateText({
118
121
  prompt: 'Summarize the advantages of priority-based model routing in 3 bullets.',
122
+ attemptTimeoutMs: 12000,
119
123
  });
120
124
 
121
125
  console.log(result.text);
@@ -123,6 +127,8 @@ console.log(result.target);
123
127
  console.log(result.attempts);
124
128
  ```
125
129
 
130
+ With `debug: true`, the router writes `attempt:start`, `attempt:success`, and `attempt:failure` events to the console while still calling your custom hooks.
131
+
126
132
  ## Basic Mental Model
127
133
 
128
134
  There are two separate layers:
@@ -239,6 +245,8 @@ console.log(final.target.name);
239
245
 
240
246
  Use `firstChunkTimeoutMs` when you want "switch if nothing starts quickly enough" behavior. If you omit it, the router waits indefinitely for the first chunk of the current target.
241
247
 
248
+ You can also use `attemptTimeoutMs` as the shared timeout for normal requests and streaming first-chunk fallback.
249
+
242
250
  This makes the behavior safe for chat UIs:
243
251
 
244
252
  - no silent model switch after the answer has already started
@@ -303,6 +311,49 @@ Common model-level fields:
303
311
  - `tier`
304
312
  - `metadata`
305
313
 
314
+ ## Attempt Timeouts
315
+
316
+ Use `attemptTimeoutMs` on a request when a single model attempt should fail and fall through after a fixed time:
317
+
318
+ ```ts
319
+ const result = await router.generateText({
320
+ prompt: 'Write a short answer.',
321
+ attemptTimeoutMs: 8000,
322
+ });
323
+ ```
324
+
325
+ Or set a router-level default:
326
+
327
+ ```ts
328
+ const router = createLlmRouter({
329
+ defaultAttemptTimeoutMs: 12000,
330
+ providers,
331
+ models,
332
+ });
333
+ ```
334
+
335
+ Timeouts become normal failed attempts with `error.name === 'AttemptTimeoutError'`, so they appear in `attempts` and fire `onAttemptFailure(...)` like other execution failures.
336
+
337
+ ## Debug Mode And Hooks
338
+
339
+ Use `debug: true` when you want the router to mirror attempt hooks to the console during development.
340
+
341
+ ```ts
342
+ const router = createLlmRouter({
343
+ debug: true,
344
+ providers,
345
+ models,
346
+ });
347
+ ```
348
+
349
+ That debug mode is intentionally small:
350
+
351
+ - `console.log('[prio-llm-router] attempt:start', attempt)`
352
+ - `console.log('[prio-llm-router] attempt:success', attempt)`
353
+ - `console.error('[prio-llm-router] attempt:failure', attempt)`
354
+
355
+ If you also pass `hooks`, both stay active. Debug mode does not replace custom telemetry.
356
+
306
357
  ## Supported Providers
307
358
 
308
359
  - `google`
@@ -335,6 +386,8 @@ Use `openai-compatible` when you have an OpenAI-style endpoint that is not cover
335
386
  }
336
387
  ```
337
388
 
389
+ `openai-compatible` is also the one built-in provider type that may use an empty API key for local or internal backends. When the key is empty, the router allows the config and creates the adapter without an `Authorization` header.
390
+
338
391
  If you prefer typed helpers over raw provider objects, use:
339
392
 
340
393
  ```ts
@@ -365,7 +418,7 @@ const router = createLlmRouter({
365
418
  providerLabel: 'lm-studio',
366
419
  auth: {
367
420
  mode: 'single',
368
- apiKey: 'lm-studio',
421
+ apiKey: '',
369
422
  },
370
423
  }).provider,
371
424
  ],
@@ -390,7 +443,7 @@ Notes:
390
443
 
391
444
  - for LM Studio, enable the OpenAI-compatible local API before using this config
392
445
  - the local server still needs to expose an OpenAI-compatible HTTP API
393
- - the package currently requires a non-empty `apiKey`, so local runtimes that ignore auth should use a dummy value such as `'lm-studio'`
446
+ - the package allows an empty `apiKey` for `openai-compatible`, so local runtimes can use `''` when they do not require auth
394
447
  - the `model` value must match the local model name exposed by your runtime
395
448
 
396
449
  For a focused local-setup guide, see [Local Providers](./docs/local-providers.md).
@@ -419,6 +472,7 @@ Main exports:
419
472
  - `createLlmRouter`
420
473
  - `PrioLlmRouter`
421
474
  - `createDefaultTextGenerationExecutor`
475
+ - `AttemptTimeoutError`
422
476
  - `createOpenRouterConnection`
423
477
  - `createOpenRouterFreeSource`
424
478
  - `createOpenAICompatibleConnection`
package/dist/index.cjs CHANGED
@@ -23,6 +23,13 @@ var PrioLlmRouterError = class extends Error {
23
23
  };
24
24
  var RouterConfigurationError = class extends PrioLlmRouterError {
25
25
  };
26
+ var AttemptTimeoutError = class extends PrioLlmRouterError {
27
+ timeoutMs;
28
+ constructor(timeoutMs) {
29
+ super(`Model attempt timed out after ${timeoutMs}ms`);
30
+ this.timeoutMs = timeoutMs;
31
+ }
32
+ };
26
33
  var AllModelsFailedError = class extends PrioLlmRouterError {
27
34
  attempts;
28
35
  constructor(attempts, cause) {
@@ -171,7 +178,7 @@ function buildBaseTextCallOptions({
171
178
  }
172
179
  function createProviderHandle(provider) {
173
180
  const apiKey = provider.auth.apiKey.trim();
174
- if (!apiKey) {
181
+ if (!apiKey && provider.type !== "openai-compatible") {
175
182
  throw new RouterConfigurationError(
176
183
  `Provider "${provider.name}" is missing an API key.`
177
184
  );
@@ -252,9 +259,11 @@ function createProviderHandle(provider) {
252
259
  case "openai-compatible": {
253
260
  const options = {
254
261
  name: provider.providerLabel ?? provider.name,
255
- apiKey,
256
262
  baseURL: provider.baseURL
257
263
  };
264
+ if (apiKey) {
265
+ options.apiKey = apiKey;
266
+ }
258
267
  if (provider.headers) {
259
268
  options.headers = provider.headers;
260
269
  }
@@ -422,6 +431,7 @@ var PrioLlmRouter = class {
422
431
  providersByName = /* @__PURE__ */ new Map();
423
432
  modelsByName = /* @__PURE__ */ new Map();
424
433
  defaultChain;
434
+ defaultAttemptTimeoutMs;
425
435
  executor;
426
436
  hooks;
427
437
  constructor(options) {
@@ -437,7 +447,8 @@ var PrioLlmRouter = class {
437
447
  );
438
448
  }
439
449
  this.defaultChain = normalized.defaultChain;
440
- this.hooks = options.hooks;
450
+ this.defaultAttemptTimeoutMs = normalized.defaultAttemptTimeoutMs;
451
+ this.hooks = createRouterHooks(options.hooks, options.debug === true);
441
452
  this.executor = options.executor ?? (options.defaultProviderMaxRetries === void 0 ? createDefaultTextGenerationExecutor() : createDefaultTextGenerationExecutor({
442
453
  defaultProviderMaxRetries: options.defaultProviderMaxRetries
443
454
  }));
@@ -495,12 +506,24 @@ var PrioLlmRouter = class {
495
506
  pendingAttempt.tier = model.tier;
496
507
  }
497
508
  this.hooks?.onAttemptStart?.(pendingAttempt);
509
+ const { controller, cleanup, parentAborted } = createLinkedAbortController(
510
+ request.abortSignal
511
+ );
498
512
  try {
499
- const result = await this.executor.execute({
500
- provider,
501
- model,
502
- request
513
+ const result = await this.executeAttemptWithTimeout({
514
+ execute: () => this.executor.execute({
515
+ provider,
516
+ model,
517
+ request: {
518
+ ...request,
519
+ abortSignal: controller.signal
520
+ }
521
+ }),
522
+ timeoutMs: request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
523
+ abortController: controller,
524
+ parentAborted
503
525
  });
526
+ cleanup();
504
527
  const finishedAt = /* @__PURE__ */ new Date();
505
528
  const attemptRecord = {
506
529
  ...pendingAttempt,
@@ -525,7 +548,8 @@ var PrioLlmRouter = class {
525
548
  }
526
549
  return response;
527
550
  } catch (error) {
528
- if (isAbortError(error)) {
551
+ cleanup();
552
+ if (isAbortError(error) && parentAborted()) {
529
553
  throw error;
530
554
  }
531
555
  const finishedAt = /* @__PURE__ */ new Date();
@@ -571,7 +595,7 @@ var PrioLlmRouter = class {
571
595
  const iterator = streamResult.textStream[Symbol.asyncIterator]();
572
596
  const firstChunk = await this.waitForFirstChunk({
573
597
  iterator,
574
- timeoutMs: request.firstChunkTimeoutMs,
598
+ timeoutMs: request.firstChunkTimeoutMs ?? request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
575
599
  abortController: controller,
576
600
  parentAborted
577
601
  });
@@ -702,7 +726,7 @@ var PrioLlmRouter = class {
702
726
  if (timeoutMs === void 0) {
703
727
  return nextPromise;
704
728
  }
705
- const timeoutError = createFirstChunkTimeoutError(timeoutMs);
729
+ const timeoutError = new AttemptTimeoutError(timeoutMs);
706
730
  const timedRace = await Promise.race([
707
731
  nextPromise.then(
708
732
  (value) => ({ kind: "value", value }),
@@ -723,6 +747,33 @@ var PrioLlmRouter = class {
723
747
  }
724
748
  throw timedRace.error;
725
749
  }
750
+ async executeAttemptWithTimeout(options) {
751
+ const { execute, timeoutMs, abortController, parentAborted } = options;
752
+ const executionPromise = execute();
753
+ if (timeoutMs === void 0) {
754
+ return executionPromise;
755
+ }
756
+ const timeoutError = new AttemptTimeoutError(timeoutMs);
757
+ const timedRace = await Promise.race([
758
+ executionPromise.then(
759
+ (value) => ({ kind: "value", value }),
760
+ (error) => ({ kind: "error", error })
761
+ ),
762
+ delay(timeoutMs).then(() => ({ kind: "timeout" }))
763
+ ]);
764
+ if (timedRace.kind === "value") {
765
+ return timedRace.value;
766
+ }
767
+ if (timedRace.kind === "timeout") {
768
+ abortController.abort(timeoutError);
769
+ void executionPromise.catch(() => void 0);
770
+ throw timeoutError;
771
+ }
772
+ if (isAbortError(timedRace.error) && parentAborted()) {
773
+ throw timedRace.error;
774
+ }
775
+ throw timedRace.error;
776
+ }
726
777
  resolveExecutionChain(chain) {
727
778
  if (chain?.length) {
728
779
  return this.resolveNamedChain(chain);
@@ -803,7 +854,7 @@ var PrioLlmRouter = class {
803
854
  "Provider configuration names must be non-empty."
804
855
  );
805
856
  }
806
- if (!provider.auth.apiKey.trim()) {
857
+ if (!provider.auth.apiKey.trim() && provider.type !== "openai-compatible") {
807
858
  throw new RouterConfigurationError(
808
859
  `Provider "${provider.name}" requires a non-empty API key.`
809
860
  );
@@ -825,7 +876,11 @@ function createLlmRouter(options) {
825
876
  }
826
877
  function resolveRouterConfig(options) {
827
878
  if ("sources" in options) {
828
- return compileSources(options.sources, options.defaultChain);
879
+ return compileSources(
880
+ options.sources,
881
+ options.defaultChain,
882
+ options.defaultAttemptTimeoutMs
883
+ );
829
884
  }
830
885
  const normalized = {
831
886
  providers: options.providers,
@@ -834,6 +889,9 @@ function resolveRouterConfig(options) {
834
889
  if (options.defaultChain !== void 0) {
835
890
  normalized.defaultChain = options.defaultChain;
836
891
  }
892
+ if (options.defaultAttemptTimeoutMs !== void 0) {
893
+ normalized.defaultAttemptTimeoutMs = options.defaultAttemptTimeoutMs;
894
+ }
837
895
  return normalized;
838
896
  }
839
897
  function compareModels(left, right) {
@@ -844,7 +902,7 @@ function compareModels(left, right) {
844
902
  }
845
903
  return left.__index - right.__index;
846
904
  }
847
- function compileSources(sources, defaultChain) {
905
+ function compileSources(sources, defaultChain, defaultAttemptTimeoutMs) {
848
906
  const providersByName = /* @__PURE__ */ new Map();
849
907
  const models = [];
850
908
  for (const source of sources) {
@@ -904,6 +962,9 @@ function compileSources(sources, defaultChain) {
904
962
  if (defaultChain !== void 0) {
905
963
  normalized.defaultChain = defaultChain;
906
964
  }
965
+ if (defaultAttemptTimeoutMs !== void 0) {
966
+ normalized.defaultAttemptTimeoutMs = defaultAttemptTimeoutMs;
967
+ }
907
968
  return normalized;
908
969
  }
909
970
  function assertMatchingSourceProvider(existingProvider, nextProvider) {
@@ -1037,13 +1098,6 @@ function createLinkedAbortController(parentSignal) {
1037
1098
  parentAborted: () => abortedByParent
1038
1099
  };
1039
1100
  }
1040
- function createFirstChunkTimeoutError(timeoutMs) {
1041
- const error = new Error(
1042
- `The first stream chunk did not arrive within ${timeoutMs}ms.`
1043
- );
1044
- error.name = "FirstChunkTimeoutError";
1045
- return error;
1046
- }
1047
1101
  function createEmptyFirstChunkError(targetName) {
1048
1102
  const error = new Error(
1049
1103
  `Stream for target "${targetName}" completed before the first text chunk.`
@@ -1061,8 +1115,28 @@ function delay(ms) {
1061
1115
  setTimeout(resolve, ms);
1062
1116
  });
1063
1117
  }
1118
+ function createRouterHooks(hooks, debug) {
1119
+ if (!debug) {
1120
+ return hooks;
1121
+ }
1122
+ return {
1123
+ onAttemptStart: (attempt) => {
1124
+ console.log("[prio-llm-router] attempt:start", attempt);
1125
+ hooks?.onAttemptStart?.(attempt);
1126
+ },
1127
+ onAttemptSuccess: (attempt) => {
1128
+ console.log("[prio-llm-router] attempt:success", attempt);
1129
+ hooks?.onAttemptSuccess?.(attempt);
1130
+ },
1131
+ onAttemptFailure: (attempt) => {
1132
+ console.error("[prio-llm-router] attempt:failure", attempt);
1133
+ hooks?.onAttemptFailure?.(attempt);
1134
+ }
1135
+ };
1136
+ }
1064
1137
 
1065
1138
  exports.AllModelsFailedError = AllModelsFailedError;
1139
+ exports.AttemptTimeoutError = AttemptTimeoutError;
1066
1140
  exports.PrioLlmRouter = PrioLlmRouter;
1067
1141
  exports.PrioLlmRouterError = PrioLlmRouterError;
1068
1142
  exports.RouterConfigurationError = RouterConfigurationError;