npm - ai-sdk-rate-limiter - Versions diffs - 0.2.0 → 0.3.0 - Mend

ai-sdk-rate-limiter 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -77,7 +77,7 @@ const limiter = createRateLimiter({
       daily:   50,
       monthly: 500,
     },
-    onExceeded: 'throw', // or 'queue' — wait until the period resets
+    onExceeded: 'throw', // 'throw' | 'queue' | 'fallback'
   },
   // Queue behavior
@@ -169,6 +169,49 @@ Costs are based on **actual token counts** from API responses — not estimates.
 ---
+## Budget fallback routing
+When a budget limit is hit, you can transparently reroute to a cheaper model instead of throwing an error. Pass a `fallback` option to `wrap()`:
+```typescript
+const limiter = createRateLimiter({
+  cost: {
+    budget: { daily: 10 },
+    onExceeded: 'fallback',  // reroute to fallback instead of throwing
+  },
+  on: {
+    budgetHit: ({ model, currentCostUsd, limitUsd, period }) =>
+      console.warn(`${model} ${period} budget hit ($${currentCostUsd} of $${limitUsd})`),
+  },
+})
+const model = limiter.wrap(
+  openai('gpt-4o'),                     // primary model
+  { fallback: openai('gpt-4o-mini') },  // used when budget is exceeded
+)
+// Under budget  → uses gpt-4o normally
+// Over $10/day  → silently switches to gpt-4o-mini, no code changes needed
+const result = await generateText({ model, prompt })
+```
+**How it works:**
+1. The budget is checked before every request against total rolling spend
+2. When exceeded, `BudgetExceededError` is caught inside `wrap()` before it reaches your code
+3. The request is re-executed against the fallback model, bypassing the budget pre-check
+4. Fallback usage is tracked under the fallback model's ID in `getCostReport()`
+**Behavior matrix:**
+| `onExceeded` | `fallback` configured | Outcome |
+|---|---|---|
+| `'throw'` | any | Throws `BudgetExceededError` |
+| `'fallback'` | yes | Transparently uses fallback model |
+| `'fallback'` | no | Throws `BudgetExceededError` |
+| `'queue'` | any | Queues until period resets |
+---
 ## Backpressure — know before you send
 Check estimated wait time before committing to a request. Useful for showing loading states or shedding load gracefully.
@@ -209,7 +252,7 @@ limiter.off('queued', handler)
 | `dequeued` | Request leaves the queue | `model`, `waitedMs`, `priority` |
 | `retrying` | A failed request is about to retry | `model`, `attempt`, `maxAttempts`, `delayMs`, `error` |
 | `rateLimited` | Limit hit (local or remote 429) | `model`, `source`, `limitType`, `resetAt` |
-| `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period` |
+| `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period`, `usingFallback` |
 | `dropped` | Request rejected (queue full or timeout) | `model`, `reason` |
 | `completed` | Request finished successfully | `model`, `inputTokens`, `outputTokens`, `costUsd`, `latencyMs` |

package/dist/index.cjs CHANGED Viewed

@@ -317,7 +317,7 @@ var CostTracker = class {
     ];
     for (const { limit, current, period } of checks) {
       if (limit !== void 0 && current + estimatedCostUsd > limit) {
-        if (onExceeded === "throw") {
+        if (onExceeded === "throw" || onExceeded === "fallback") {
           throw new BudgetExceededError(model, current, limit, period);
         }
         return false;
@@ -1320,7 +1320,7 @@ var Pipeline = class {
     const estimatedInput = estimateInputTokens(prompt);
     const startMs = Date.now();
     const key = `${provider}:${modelId}`;
-    if (this.config.cost?.budget) {
+    if (this.config.cost?.budget && !opts.skipBudgetCheck) {
       const estimatedCost = this.costTracker.estimateCost(
         estimatedInput,
         500,
@@ -1328,12 +1328,26 @@ var Pipeline = class {
         limits.inputPricePerMillion,
         limits.outputPricePerMillion
       );
-      this.costTracker.checkBudget(
-        modelId,
-        estimatedCost,
-        this.config.cost.budget,
-        this.config.cost.onExceeded ?? "throw"
-      );
+      try {
+        this.costTracker.checkBudget(
+          modelId,
+          estimatedCost,
+          this.config.cost.budget,
+          this.config.cost.onExceeded ?? "throw"
+        );
+      } catch (err) {
+        if (err instanceof BudgetExceededError) {
+          this.emitter.emit("budgetHit", {
+            model: err.model,
+            provider,
+            currentCostUsd: err.currentCostUsd,
+            limitUsd: err.limitUsd,
+            period: err.period,
+            usingFallback: false
+          });
+        }
+        throw err;
+      }
     }
     await this.engine.acquire(key, {
       limits,
@@ -1472,7 +1486,8 @@ function getPerRequestOptions(params, queueTimeout) {
   return {
     priority: raw?.priority ?? "normal",
     timeoutMs: raw?.timeout ?? queueTimeout,
-    metadata: raw?.metadata ?? {}
+    metadata: raw?.metadata ?? {},
+    skipBudgetCheck: raw?._skipBudgetCheck ?? false
   };
 }
 function extractTokenUsage(usage) {
@@ -1488,7 +1503,7 @@ function createMiddleware(pipeline, queueTimeout) {
     // wrapGenerate — non-streaming
     // -----------------------------------------------------------------------
     async wrapGenerate({ doGenerate, params, model }) {
-      const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -1501,6 +1516,7 @@ function createMiddleware(pipeline, queueTimeout) {
           streaming: false,
           priority,
           timeoutMs,
+          skipBudgetCheck,
           onUsage: () => {
           }
         }
@@ -1515,7 +1531,7 @@ function createMiddleware(pipeline, queueTimeout) {
     // wrapStream — streaming
     // -----------------------------------------------------------------------
     async wrapStream({ doStream, params, model }) {
-      const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -1528,6 +1544,7 @@ function createMiddleware(pipeline, queueTimeout) {
           streaming: true,
           priority,
           timeoutMs,
+          skipBudgetCheck,
           onUsage: () => {
           }
         }
@@ -1554,26 +1571,71 @@ function createMiddleware(pipeline, queueTimeout) {
 function wrapModel(model, middleware, overrides) {
   const providerId = overrides?.providerId ?? model.provider;
   const modelId = overrides?.modelId ?? model.modelId;
+  const fallbackModel = overrides?.fallback;
   return {
     specificationVersion: "v4",
     provider: providerId,
     modelId,
     supportedUrls: model["supportedUrls"],
     async doGenerate(params) {
-      return middleware.wrapGenerate({
-        doGenerate: () => model.doGenerate(params),
-        doStream: () => model.doStream(params),
-        params,
-        model
-      });
+      try {
+        return await middleware.wrapGenerate({
+          doGenerate: () => model.doGenerate(params),
+          doStream: () => model.doStream(params),
+          params,
+          model
+        });
+      } catch (err) {
+        if (err instanceof BudgetExceededError && fallbackModel) {
+          const fallbackParams = {
+            ...params,
+            providerOptions: {
+              ...params.providerOptions,
+              rateLimiter: {
+                ...params.providerOptions?.["rateLimiter"] ?? {},
+                _skipBudgetCheck: true
+              }
+            }
+          };
+          return middleware.wrapGenerate({
+            doGenerate: () => fallbackModel.doGenerate(fallbackParams),
+            doStream: () => fallbackModel.doStream(fallbackParams),
+            params: fallbackParams,
+            model: fallbackModel
+          });
+        }
+        throw err;
+      }
     },
     async doStream(params) {
-      return middleware.wrapStream({
-        doGenerate: () => model.doGenerate(params),
-        doStream: () => model.doStream(params),
-        params,
-        model
-      });
+      try {
+        return await middleware.wrapStream({
+          doGenerate: () => model.doGenerate(params),
+          doStream: () => model.doStream(params),
+          params,
+          model
+        });
+      } catch (err) {
+        if (err instanceof BudgetExceededError && fallbackModel) {
+          const fallbackParams = {
+            ...params,
+            providerOptions: {
+              ...params.providerOptions,
+              rateLimiter: {
+                ...params.providerOptions?.["rateLimiter"] ?? {},
+                _skipBudgetCheck: true
+              }
+            }
+          };
+          return middleware.wrapStream({
+            doGenerate: () => fallbackModel.doGenerate(fallbackParams),
+            doStream: () => fallbackModel.doStream(fallbackParams),
+            params: fallbackParams,
+            model: fallbackModel
+          });
+        }
+        throw err;
+      }
     }
   };
 }