npm - @yadimon/prio-llm-router - Versions diffs - 0.3.0 → 0.4.0 - Mend

@yadimon/prio-llm-router 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -18,6 +18,7 @@ The package keeps the routing logic intentionally small and predictable while re
 - Optional source builders for source-centric setup and strict free policies
 - Non-streaming text generation and optional streaming
 - Optional debug mode that mirrors attempt hooks to the console
+- Per-request and router-level attempt timeouts for clean fallback
 - Built-in support for `google`, `openrouter`, `groq`, `mistral`, `cohere`, `perplexity`, `xai`, `togetherai`, `openai`, `anthropic`, `deepseek`, and generic `openai-compatible`
 - Strict TypeScript types
 - Hook points for attempt-level logging and telemetry
@@ -118,6 +119,7 @@ const router = createLlmRouter({
 const result = await router.generateText({
   prompt: 'Summarize the advantages of priority-based model routing in 3 bullets.',
+  attemptTimeoutMs: 12000,
 });
 console.log(result.text);
@@ -243,6 +245,8 @@ console.log(final.target.name);
 Use `firstChunkTimeoutMs` when you want "switch if nothing starts quickly enough" behavior. If you omit it, the router waits indefinitely for the first chunk of the current target.
+You can also use `attemptTimeoutMs` as the shared timeout for normal requests and streaming first-chunk fallback.
 This makes the behavior safe for chat UIs:
 - no silent model switch after the answer has already started
@@ -307,6 +311,29 @@ Common model-level fields:
 - `tier`
 - `metadata`
+## Attempt Timeouts
+Use `attemptTimeoutMs` on a request when a single model attempt should fail and fall through after a fixed time:
+```ts
+const result = await router.generateText({
+  prompt: 'Write a short answer.',
+  attemptTimeoutMs: 8000,
+});
+```
+Or set a router-level default:
+```ts
+const router = createLlmRouter({
+  defaultAttemptTimeoutMs: 12000,
+  providers,
+  models,
+});
+```
+Timeouts become normal failed attempts with `error.name === 'AttemptTimeoutError'`, so they appear in `attempts` and fire `onAttemptFailure(...)` like other execution failures.
 ## Debug Mode And Hooks
 Use `debug: true` when you want the router to mirror attempt hooks to the console during development.
@@ -359,6 +386,8 @@ Use `openai-compatible` when you have an OpenAI-style endpoint that is not cover
 }
 ```
+`openai-compatible` is also the one built-in provider type that may use an empty API key for local or internal backends. When the key is empty, the router allows the config and creates the adapter without an `Authorization` header.
 If you prefer typed helpers over raw provider objects, use:
 ```ts
@@ -389,7 +418,7 @@ const router = createLlmRouter({
       providerLabel: 'lm-studio',
       auth: {
         mode: 'single',
-        apiKey: 'lm-studio',
+        apiKey: '',
       },
     }).provider,
   ],
@@ -414,7 +443,7 @@ Notes:
 - for LM Studio, enable the OpenAI-compatible local API before using this config
 - the local server still needs to expose an OpenAI-compatible HTTP API
-- the package currently requires a non-empty `apiKey`, so local runtimes that ignore auth should use a dummy value such as `'lm-studio'`
+- the package allows an empty `apiKey` for `openai-compatible`, so local runtimes can use `''` when they do not require auth
 - the `model` value must match the local model name exposed by your runtime
 For a focused local-setup guide, see [Local Providers](./docs/local-providers.md).
@@ -443,6 +472,7 @@ Main exports:
 - `createLlmRouter`
 - `PrioLlmRouter`
 - `createDefaultTextGenerationExecutor`
+- `AttemptTimeoutError`
 - `createOpenRouterConnection`
 - `createOpenRouterFreeSource`
 - `createOpenAICompatibleConnection`

package/dist/index.cjs CHANGED Viewed

@@ -23,6 +23,13 @@ var PrioLlmRouterError = class extends Error {
 };
 var RouterConfigurationError = class extends PrioLlmRouterError {
 };
+var AttemptTimeoutError = class extends PrioLlmRouterError {
+  timeoutMs;
+  constructor(timeoutMs) {
+    super(`Model attempt timed out after ${timeoutMs}ms`);
+    this.timeoutMs = timeoutMs;
+  }
+};
 var AllModelsFailedError = class extends PrioLlmRouterError {
   attempts;
   constructor(attempts, cause) {
@@ -171,7 +178,7 @@ function buildBaseTextCallOptions({
 }
 function createProviderHandle(provider) {
   const apiKey = provider.auth.apiKey.trim();
-  if (!apiKey) {
+  if (!apiKey && provider.type !== "openai-compatible") {
     throw new RouterConfigurationError(
       `Provider "${provider.name}" is missing an API key.`
     );
@@ -252,9 +259,11 @@ function createProviderHandle(provider) {
     case "openai-compatible": {
       const options = {
         name: provider.providerLabel ?? provider.name,
-        apiKey,
         baseURL: provider.baseURL
       };
+      if (apiKey) {
+        options.apiKey = apiKey;
+      }
       if (provider.headers) {
         options.headers = provider.headers;
       }
@@ -422,6 +431,7 @@ var PrioLlmRouter = class {
   providersByName = /* @__PURE__ */ new Map();
   modelsByName = /* @__PURE__ */ new Map();
   defaultChain;
+  defaultAttemptTimeoutMs;
   executor;
   hooks;
   constructor(options) {
@@ -437,6 +447,7 @@ var PrioLlmRouter = class {
       );
     }
     this.defaultChain = normalized.defaultChain;
+    this.defaultAttemptTimeoutMs = normalized.defaultAttemptTimeoutMs;
     this.hooks = createRouterHooks(options.hooks, options.debug === true);
     this.executor = options.executor ?? (options.defaultProviderMaxRetries === void 0 ? createDefaultTextGenerationExecutor() : createDefaultTextGenerationExecutor({
       defaultProviderMaxRetries: options.defaultProviderMaxRetries
@@ -495,12 +506,24 @@ var PrioLlmRouter = class {
         pendingAttempt.tier = model.tier;
       }
       this.hooks?.onAttemptStart?.(pendingAttempt);
+      const { controller, cleanup, parentAborted } = createLinkedAbortController(
+        request.abortSignal
+      );
       try {
-        const result = await this.executor.execute({
-          provider,
-          model,
-          request
+        const result = await this.executeAttemptWithTimeout({
+          execute: () => this.executor.execute({
+            provider,
+            model,
+            request: {
+              ...request,
+              abortSignal: controller.signal
+            }
+          }),
+          timeoutMs: request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
+          abortController: controller,
+          parentAborted
         });
+        cleanup();
         const finishedAt = /* @__PURE__ */ new Date();
         const attemptRecord = {
           ...pendingAttempt,
@@ -525,7 +548,8 @@ var PrioLlmRouter = class {
         }
         return response;
       } catch (error) {
-        if (isAbortError(error)) {
+        cleanup();
+        if (isAbortError(error) && parentAborted()) {
           throw error;
         }
         const finishedAt = /* @__PURE__ */ new Date();
@@ -571,7 +595,7 @@ var PrioLlmRouter = class {
         const iterator = streamResult.textStream[Symbol.asyncIterator]();
         const firstChunk = await this.waitForFirstChunk({
           iterator,
-          timeoutMs: request.firstChunkTimeoutMs,
+          timeoutMs: request.firstChunkTimeoutMs ?? request.attemptTimeoutMs ?? this.defaultAttemptTimeoutMs,
           abortController: controller,
           parentAborted
         });
@@ -702,7 +726,7 @@ var PrioLlmRouter = class {
     if (timeoutMs === void 0) {
       return nextPromise;
     }
-    const timeoutError = createFirstChunkTimeoutError(timeoutMs);
+    const timeoutError = new AttemptTimeoutError(timeoutMs);
     const timedRace = await Promise.race([
       nextPromise.then(
         (value) => ({ kind: "value", value }),
@@ -723,6 +747,33 @@ var PrioLlmRouter = class {
     }
     throw timedRace.error;
   }
+  async executeAttemptWithTimeout(options) {
+    const { execute, timeoutMs, abortController, parentAborted } = options;
+    const executionPromise = execute();
+    if (timeoutMs === void 0) {
+      return executionPromise;
+    }
+    const timeoutError = new AttemptTimeoutError(timeoutMs);
+    const timedRace = await Promise.race([
+      executionPromise.then(
+        (value) => ({ kind: "value", value }),
+        (error) => ({ kind: "error", error })
+      ),
+      delay(timeoutMs).then(() => ({ kind: "timeout" }))
+    ]);
+    if (timedRace.kind === "value") {
+      return timedRace.value;
+    }
+    if (timedRace.kind === "timeout") {
+      abortController.abort(timeoutError);
+      void executionPromise.catch(() => void 0);
+      throw timeoutError;
+    }
+    if (isAbortError(timedRace.error) && parentAborted()) {
+      throw timedRace.error;
+    }
+    throw timedRace.error;
+  }
   resolveExecutionChain(chain) {
     if (chain?.length) {
       return this.resolveNamedChain(chain);
@@ -803,7 +854,7 @@ var PrioLlmRouter = class {
         "Provider configuration names must be non-empty."
       );
     }
-    if (!provider.auth.apiKey.trim()) {
+    if (!provider.auth.apiKey.trim() && provider.type !== "openai-compatible") {
       throw new RouterConfigurationError(
         `Provider "${provider.name}" requires a non-empty API key.`
       );
@@ -825,7 +876,11 @@ function createLlmRouter(options) {
 }
 function resolveRouterConfig(options) {
   if ("sources" in options) {
-    return compileSources(options.sources, options.defaultChain);
+    return compileSources(
+      options.sources,
+      options.defaultChain,
+      options.defaultAttemptTimeoutMs
+    );
   }
   const normalized = {
     providers: options.providers,
@@ -834,6 +889,9 @@ function resolveRouterConfig(options) {
   if (options.defaultChain !== void 0) {
     normalized.defaultChain = options.defaultChain;
   }
+  if (options.defaultAttemptTimeoutMs !== void 0) {
+    normalized.defaultAttemptTimeoutMs = options.defaultAttemptTimeoutMs;
+  }
   return normalized;
 }
 function compareModels(left, right) {
@@ -844,7 +902,7 @@ function compareModels(left, right) {
   }
   return left.__index - right.__index;
 }
-function compileSources(sources, defaultChain) {
+function compileSources(sources, defaultChain, defaultAttemptTimeoutMs) {
   const providersByName = /* @__PURE__ */ new Map();
   const models = [];
   for (const source of sources) {
@@ -904,6 +962,9 @@ function compileSources(sources, defaultChain) {
   if (defaultChain !== void 0) {
     normalized.defaultChain = defaultChain;
   }
+  if (defaultAttemptTimeoutMs !== void 0) {
+    normalized.defaultAttemptTimeoutMs = defaultAttemptTimeoutMs;
+  }
   return normalized;
 }
 function assertMatchingSourceProvider(existingProvider, nextProvider) {
@@ -1037,13 +1098,6 @@ function createLinkedAbortController(parentSignal) {
     parentAborted: () => abortedByParent
   };
 }
-function createFirstChunkTimeoutError(timeoutMs) {
-  const error = new Error(
-    `The first stream chunk did not arrive within ${timeoutMs}ms.`
-  );
-  error.name = "FirstChunkTimeoutError";
-  return error;
-}
 function createEmptyFirstChunkError(targetName) {
   const error = new Error(
     `Stream for target "${targetName}" completed before the first text chunk.`
@@ -1082,6 +1136,7 @@ function createRouterHooks(hooks, debug) {
 }
 exports.AllModelsFailedError = AllModelsFailedError;
+exports.AttemptTimeoutError = AttemptTimeoutError;
 exports.PrioLlmRouter = PrioLlmRouter;
 exports.PrioLlmRouterError = PrioLlmRouterError;
 exports.RouterConfigurationError = RouterConfigurationError;