npm - ai-sdk-rate-limiter - Versions diffs - 0.11.0 → 0.12.0 - Mend

ai-sdk-rate-limiter 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +72 -0
package/dist/index.cjs +84 -3
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +38 -3
package/dist/index.d.ts +38 -3
package/dist/index.js +84 -4
package/dist/index.js.map +1 -1
package/dist/middleware.d.cts +1 -1
package/dist/middleware.d.ts +1 -1
package/dist/otel.d.cts +1 -1
package/dist/otel.d.ts +1 -1
package/dist/prometheus.d.cts +1 -1
package/dist/prometheus.d.ts +1 -1
package/dist/redis.d.cts +1 -1
package/dist/redis.d.ts +1 -1
package/dist/statsd.d.cts +1 -1
package/dist/statsd.d.ts +1 -1
package/dist/testing.cjs +51 -3
package/dist/testing.cjs.map +1 -1
package/dist/testing.d.cts +1 -1
package/dist/testing.d.ts +1 -1
package/dist/testing.js +51 -3
package/dist/testing.js.map +1 -1
package/dist/{types-CMevWGWK.d.cts → types-DKIz0MLZ.d.cts} +38 -1
package/dist/{types-CMevWGWK.d.ts → types-DKIz0MLZ.d.ts} +38 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -557,6 +557,27 @@ Costs are based on **actual token counts** from API responses — not estimates.
 `byScope` is populated automatically when requests carry a `scope` (either set on `limiter.wrap()` or via `providerOptions.rateLimiter.scope`). Unscoped requests don't appear in `byScope`.
+### Cost forecasting
+`getCostForecast()` projects your end-of-period spend based on the current hourly rate. Useful for alerting before a budget cap is hit:
+```typescript
+const forecast = limiter.getCostForecast()
+console.log(forecast)
+// {
+//   hour:  { spentUsd: 1.20, projectedUsd: 1.20,  ratePerHourUsd: 1.20 },
+//   day:   { spentUsd: 3.50, projectedUsd: 28.80, ratePerHourUsd: 1.20 },
+//   month: { spentUsd: 8.10, projectedUsd: 864,   ratePerHourUsd: 1.20 },
+// }
+if (forecast.day.projectedUsd > 40) {
+  console.warn(`Heads up — on track to spend $${forecast.day.projectedUsd.toFixed(2)} today`)
+}
+```
+`projectedUsd` = current hourly rate × hours in the period. It is based on the **last 60 minutes** of spend, so it responds quickly to usage spikes.
 ---
 ## Budget fallback routing
@@ -1456,6 +1477,57 @@ class MyStore implements RateLimitStore {
 const limiter = createRateLimiter({ store: new MyStore() })
 ```
+### Load balancing across API keys
+`createModelPool()` distributes requests round-robin across multiple model instances — useful when you have more than one API key:
+```typescript
+import { createRateLimiter, createModelPool } from 'ai-sdk-rate-limiter'
+import { createOpenAI } from '@ai-sdk/openai'
+// Two API keys, each with their own limiter tracking separate RPM limits
+const limiter1 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
+const limiter2 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
+const openai1 = createOpenAI({ apiKey: process.env.OPENAI_KEY_1 })
+const openai2 = createOpenAI({ apiKey: process.env.OPENAI_KEY_2 })
+const pool = createModelPool([
+  limiter1.wrap(openai1('gpt-4o')),
+  limiter2.wrap(openai2('gpt-4o')),
+])
+// Use exactly like a regular model — calls alternate between the two keys
+const { text } = await generateText({ model: pool, prompt: 'Hello!' })
+```
+Pass `{ strategy: 'random' }` for random selection instead of round-robin.
+### Request deduplication
+When multiple concurrent requests carry the same `dedupKey`, only one API call is made and all callers receive the same result. Useful for FAQ-style workloads where many users ask the same question simultaneously:
+```typescript
+const model = limiter.wrap(openai('gpt-4o'))
+// Server handler — two simultaneous identical requests share one API call
+async function handleRequest(questionId: string) {
+  const { text } = await generateText({
+    model,
+    prompt: questions[questionId],
+    providerOptions: {
+      rateLimiter: { dedupKey: `faq:${questionId}` },
+    },
+  })
+  return text
+}
+// If 50 users hit the same FAQ item at the same time → 1 API call, not 50
+const results = await Promise.all(users.map(() => handleRequest('faq-42')))
+```
+The dedup entry is removed once the request completes (success or error), so subsequent requests always make a fresh call.
 ---
 ## How it works

package/dist/index.cjs CHANGED Viewed

@@ -668,6 +668,30 @@ var CostTracker = class {
   estimateCost(inputTokens, outputTokens, inputPricePerMillion, outputPricePerMillion) {
     return inputTokens / 1e6 * inputPricePerMillion + outputTokens / 1e6 * outputPricePerMillion;
   }
+  getForecast() {
+    const now = Date.now();
+    this.evict(now);
+    const hourlyRate = this.entries.filter((e) => e.timestamp > now - HOUR_MS).reduce((s, e) => s + e.costUsd, 0);
+    const daySpent = this.entries.filter((e) => e.timestamp > now - DAY_MS2).reduce((s, e) => s + e.costUsd, 0);
+    const monthSpent = this.entries.filter((e) => e.timestamp > now - MONTH_MS).reduce((s, e) => s + e.costUsd, 0);
+    return {
+      hour: {
+        spentUsd: hourlyRate,
+        projectedUsd: hourlyRate,
+        ratePerHourUsd: hourlyRate
+      },
+      day: {
+        spentUsd: daySpent,
+        projectedUsd: hourlyRate * 24,
+        ratePerHourUsd: hourlyRate
+      },
+      month: {
+        spentUsd: monthSpent,
+        projectedUsd: hourlyRate * 24 * 30,
+        ratePerHourUsd: hourlyRate
+      }
+    };
+  }
   getReport() {
     const now = Date.now();
     this.evict(now);
@@ -1734,6 +1758,8 @@ var Pipeline = class {
     this.circuits = /* @__PURE__ */ new Map();
     /** Limits detected from provider response headers (lower priority than user config) */
     this.detectedLimits = /* @__PURE__ */ new Map();
+    /** In-flight promises indexed by dedup key, shared across identical concurrent requests */
+    this.dedupMap = /* @__PURE__ */ new Map();
     /** Set to true after shutdown() is called */
     this.shutdownRequested = false;
     this.config = config;
@@ -1780,6 +1806,19 @@ var Pipeline = class {
    */
   async execute(modelId, provider, prompt, fn, opts) {
     this.log.log(modelId, "execute", { provider, priority: opts.priority, ...opts.scope !== void 0 && { scope: opts.scope } });
+    if (opts.dedupKey !== void 0) {
+      const existing = this.dedupMap.get(opts.dedupKey);
+      if (existing !== void 0) {
+        this.log.log(modelId, "dedup hit", { dedupKey: opts.dedupKey });
+        return existing;
+      }
+      const { dedupKey, ...optsWithoutDedup } = opts;
+      const promise = this.execute(modelId, provider, prompt, fn, optsWithoutDedup).finally(() => {
+        this.dedupMap.delete(dedupKey);
+      });
+      this.dedupMap.set(dedupKey, promise);
+      return promise;
+    }
     if (this.shutdownRequested) {
       this.emitter.emit("dropped", {
         model: modelId,
@@ -1994,6 +2033,9 @@ var Pipeline = class {
   getCostReport() {
     return this.costTracker.getReport();
   }
+  getCostForecast() {
+    return this.costTracker.getForecast();
+  }
   getStatus() {
     const models = [];
     let totalQueueDepth = 0;
@@ -2122,7 +2164,8 @@ function getPerRequestOptions(params, queueTimeout) {
     metadata: raw?.metadata ?? {},
     skipBudgetCheck: raw?._skipBudgetCheck ?? false,
     scope: raw?.scope,
-    callTimeout: raw?.callTimeout
+    callTimeout: raw?.callTimeout,
+    dedupKey: raw?.dedupKey
   };
 }
 function extractTokenUsage(usage) {
@@ -2138,7 +2181,7 @@ function createMiddleware(pipeline, queueTimeout) {
     // wrapGenerate — non-streaming
     // -----------------------------------------------------------------------
     async wrapGenerate({ doGenerate, params, model }) {
-      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -2154,6 +2197,7 @@ function createMiddleware(pipeline, queueTimeout) {
           skipBudgetCheck,
           ...scope !== void 0 && { scope },
           ...callTimeout !== void 0 && { callTimeout },
+          ...dedupKey !== void 0 && { dedupKey },
           ...params.abortSignal !== void 0 && { signal: params.abortSignal }
         }
       );
@@ -2167,7 +2211,7 @@ function createMiddleware(pipeline, queueTimeout) {
     // wrapStream — streaming
     // -----------------------------------------------------------------------
     async wrapStream({ doStream, params, model }) {
-      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -2183,6 +2227,7 @@ function createMiddleware(pipeline, queueTimeout) {
           skipBudgetCheck,
           ...scope !== void 0 && { scope },
           ...callTimeout !== void 0 && { callTimeout },
+          ...dedupKey !== void 0 && { dedupKey },
           ...params.abortSignal !== void 0 && { signal: params.abortSignal }
         }
       );
@@ -2508,6 +2553,9 @@ function createRateLimiter(config = {}) {
     getCostReport() {
       return pipeline.getCostReport();
     },
+    getCostForecast() {
+      return pipeline.getCostForecast();
+    },
     getStatus() {
       return pipeline.getStatus();
     },
@@ -2532,6 +2580,38 @@ function createRateLimiter(config = {}) {
   };
 }
+// src/adapters/model-pool.ts
+function createModelPool(models, options) {
+  if (models.length === 0) {
+    throw new Error("createModelPool: at least one model is required");
+  }
+  const strategy = options?.strategy ?? "round-robin";
+  let index = 0;
+  function pick() {
+    if (strategy === "random") {
+      return models[Math.floor(Math.random() * models.length)];
+    }
+    const model = models[index % models.length];
+    index = (index + 1) % models.length;
+    return model;
+  }
+  const primary = models[0];
+  return {
+    get modelId() {
+      return primary.modelId;
+    },
+    get provider() {
+      return primary.provider;
+    },
+    doGenerate(params) {
+      return pick().doGenerate(params);
+    },
+    doStream(params) {
+      return pick().doStream(params);
+    }
+  };
+}
 exports.ANTHROPIC_MODELS = ANTHROPIC_MODELS;
 exports.BudgetExceededError = BudgetExceededError;
 exports.COHERE_MODELS = COHERE_MODELS;
@@ -2546,6 +2626,7 @@ exports.RateLimitExceededError = RateLimitExceededError;
 exports.RateLimiterError = RateLimiterError;
 exports.RetryExhaustedError = RetryExhaustedError;
 exports.ShutdownError = ShutdownError;
+exports.createModelPool = createModelPool;
 exports.createRateLimiter = createRateLimiter;
 exports.isKnownModel = isKnownModel;
 exports.rateLimited = rateLimited;