npm - ai-sdk-rate-limiter - Versions diffs - 0.4.0 → 0.5.0 - Mend

ai-sdk-rate-limiter 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +47 -0
package/dist/index.cjs +206 -142
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +3 -320
package/dist/index.d.ts +3 -320
package/dist/index.js +206 -142
package/dist/index.js.map +1 -1
package/dist/redis.cjs +209 -0
package/dist/redis.cjs.map +1 -0
package/dist/redis.d.cts +54 -0
package/dist/redis.d.ts +54 -0
package/dist/redis.js +207 -0
package/dist/redis.js.map +1 -0
package/dist/types-CgePLtmQ.d.cts +385 -0
package/dist/types-CgePLtmQ.d.ts +385 -0
package/package.json +16 -2

package/README.md CHANGED Viewed

@@ -214,6 +214,53 @@ const result = await generateText({ model, prompt })
 ---
+## Multi-instance Redis store
+By default, rate limit state is in-memory (per-process). In multi-instance deployments — serverless functions, multiple pods, workers — each instance has its own counters. Install the Redis store to share state across all instances:
+```
+npm install ioredis
+```
+```typescript
+import { createRateLimiter } from 'ai-sdk-rate-limiter'
+import { RedisStore } from 'ai-sdk-rate-limiter/redis'
+import Redis from 'ioredis'
+const limiter = createRateLimiter({
+  store: new RedisStore(new Redis(process.env.REDIS_URL)),
+  // ... rest of your config
+})
+```
+That's the entire change. All APIs — `wrap()`, `rawProxy()`, events, cost reports — work identically. The Redis store enforces rate limits collectively so no two instances can jointly exceed the API limits.
+**How it works:**
+Each request atomically runs a Lua script that:
+1. Removes entries older than 60 seconds from a sorted set (`ZREMRANGEBYSCORE`)
+2. Counts remaining requests and sums input tokens
+3. Checks against RPM and ITPM limits
+4. If allowed: reserves the slot (`ZADD`) and returns immediately
+5. If blocked: returns the timestamp when the next slot opens
+The local queue (priority ordering, drain timer, timeout handling) stays in-memory per instance — only the window counters are shared.
+**Options:**
+```typescript
+new RedisStore(redis, {
+  keyPrefix: 'rl:myapp:',  // namespace if multiple apps share Redis
+  windowMs:  60_000,        // window size; match your provider's limit window
+})
+```
+**Compatible clients** — any Redis client with `eval()`, `get()`, and `set()` works: `ioredis`, `node-redis`, Upstash Redis.
+**Single-instance deployments:** the default `InMemoryStore` is more accurate (true sliding window, no network round-trips) and zero-config. Only switch to `RedisStore` when you actually need cross-instance coordination.
+---
 ## Raw SDK proxy
 If you're using the OpenAI, Anthropic, Groq, Mistral, or Cohere SDK directly — without the Vercel AI SDK — use `limiter.rawProxy()` to add rate limiting as a transparent drop-in:

package/dist/index.cjs CHANGED Viewed

@@ -69,6 +69,130 @@ Caused by: ${cause.stack}`;
   }
 };
+// src/store/in-memory-store.ts
+var WINDOW_MS = 6e4;
+var InMemoryStore = class {
+  constructor() {
+    this.windows = /* @__PURE__ */ new Map();
+    this.backoffs = /* @__PURE__ */ new Map();
+  }
+  // -------------------------------------------------------------------------
+  // RateLimitStore implementation
+  // -------------------------------------------------------------------------
+  async checkAndRecord(key, estimatedInputTokens, limits) {
+    const now = Date.now();
+    const backoffUntil = this.backoffs.get(key) ?? 0;
+    if (now < backoffUntil) return backoffUntil;
+    const window = this.getOrCreate(key);
+    this.evict(window, now);
+    if (window.length >= limits.rpm) {
+      return (window[0]?.timestamp ?? now) + WINDOW_MS + 1;
+    }
+    if (limits.itpm !== void 0) {
+      const usedInput = sumInput(window);
+      if (usedInput + estimatedInputTokens > limits.itpm) {
+        return this.itpmNextSlot(window, limits.itpm, estimatedInputTokens, now);
+      }
+    }
+    window.push({ timestamp: now, inputTokens: estimatedInputTokens, outputTokens: 0 });
+    return 0;
+  }
+  async reconcile(key, actualInputTokens, actualOutputTokens) {
+    const window = this.windows.get(key);
+    if (!window) return;
+    for (let i = window.length - 1; i >= 0; i--) {
+      const entry = window[i];
+      if (entry.outputTokens === 0 && entry.inputTokens > 0) {
+        entry.inputTokens = actualInputTokens;
+        entry.outputTokens = actualOutputTokens;
+        return;
+      }
+    }
+  }
+  async setBackoff(key, untilMs) {
+    const current = this.backoffs.get(key) ?? 0;
+    if (untilMs > current) this.backoffs.set(key, untilMs);
+  }
+  async getBackoff(key) {
+    return this.backoffs.get(key) ?? 0;
+  }
+  async nextSlotMs(key, limits, estimatedInputTokens = 0) {
+    const now = Date.now();
+    const backoffUntil = this.backoffs.get(key) ?? 0;
+    if (now < backoffUntil) return backoffUntil;
+    const window = this.windows.get(key) ?? [];
+    this.evict(window, now);
+    if (window.length < limits.rpm) {
+      if (limits.itpm === void 0 || sumInput(window) + estimatedInputTokens <= limits.itpm) {
+        return 0;
+      }
+    }
+    let nextSlot = now;
+    if (window.length >= limits.rpm && window[0]) {
+      nextSlot = Math.max(nextSlot, window[0].timestamp + WINDOW_MS + 1);
+    }
+    if (limits.itpm !== void 0) {
+      let usedInput = sumInput(window);
+      if (usedInput + estimatedInputTokens > limits.itpm) {
+        for (const entry of window) {
+          usedInput -= entry.inputTokens;
+          if (usedInput + estimatedInputTokens <= limits.itpm) {
+            nextSlot = Math.max(nextSlot, entry.timestamp + WINDOW_MS + 1);
+            break;
+          }
+        }
+      }
+    }
+    return nextSlot;
+  }
+  // -------------------------------------------------------------------------
+  // Snapshot helpers (used by engine for status reporting)
+  // -------------------------------------------------------------------------
+  snapshot(key) {
+    const window = this.windows.get(key) ?? [];
+    this.evict(window, Date.now());
+    return {
+      requests: window.length,
+      inputTokens: sumInput(window),
+      outputTokens: window.reduce((s, e) => s + e.outputTokens, 0)
+    };
+  }
+  currentBackoff(key) {
+    const until = this.backoffs.get(key) ?? 0;
+    return Date.now() < until ? until : null;
+  }
+  // -------------------------------------------------------------------------
+  // Private helpers
+  // -------------------------------------------------------------------------
+  getOrCreate(key) {
+    let w = this.windows.get(key);
+    if (!w) {
+      w = [];
+      this.windows.set(key, w);
+    }
+    return w;
+  }
+  evict(window, now) {
+    const cutoff = now - WINDOW_MS;
+    let i = 0;
+    while (i < window.length && (window[i]?.timestamp ?? 0) <= cutoff) i++;
+    if (i > 0) window.splice(0, i);
+  }
+  itpmNextSlot(window, itpmLimit, estimatedInputTokens, now) {
+    let usedInput = sumInput(window);
+    for (const entry of window) {
+      usedInput -= entry.inputTokens;
+      if (usedInput + estimatedInputTokens <= itpmLimit) {
+        return entry.timestamp + WINDOW_MS + 1;
+      }
+    }
+    return now + WINDOW_MS + 1;
+  }
+};
+function sumInput(window) {
+  return window.reduce((s, e) => s + e.inputTokens, 0);
+}
 // src/core/rate-limit-engine.ts
 var PRIORITY_RANK = {
   high: 0,
@@ -90,11 +214,14 @@ function insertWaiter(waiters, waiter) {
   }
   waiters.splice(lo, 0, waiter);
 }
-var WINDOW_MS = 6e4;
 var RateLimitEngine = class {
-  constructor({ maxQueueSize = 500 } = {}) {
-    this.states = /* @__PURE__ */ new Map();
+  constructor({
+    maxQueueSize = 500,
+    store
+  } = {}) {
+    this.localStates = /* @__PURE__ */ new Map();
     this.maxQueueSize = maxQueueSize;
+    this.store = store ?? new InMemoryStore();
   }
   // -------------------------------------------------------------------------
   // Public API
@@ -102,30 +229,30 @@ var RateLimitEngine = class {
   /**
    * Acquire a slot for the given model.
    *
-   * - If capacity is available: records the request in the sliding window and
-   *   resolves immediately.
-   * - If at capacity: enqueues the request (sorted by priority) and resolves
-   *   when a slot opens.
-   * - If the queue is full: throws QueueFullError immediately.
-   * - If the request waits longer than `timeoutMs`: throws QueueTimeoutError.
+   * - If capacity is available: records the request in the window and resolves.
+   * - If at capacity: enqueues (sorted by priority) and resolves when a slot opens.
+   * - If queue is full: throws QueueFullError immediately.
+   * - If waiting exceeds timeoutMs: throws QueueTimeoutError.
    */
   async acquire(key, opts) {
-    const state = this.getOrCreate(key);
-    if (this.canProceed(state, opts.limits, opts.estimatedInputTokens)) {
-      this.record(state, opts.estimatedInputTokens, 0);
-      return;
-    }
-    if (state.waiters.length >= this.maxQueueSize) {
+    const local = this.getOrCreate(key);
+    const nextSlotAtMs = await this.store.checkAndRecord(
+      key,
+      opts.estimatedInputTokens,
+      opts.limits
+    );
+    if (nextSlotAtMs <= Date.now()) return;
+    if (local.waiters.length >= this.maxQueueSize) {
       throw new QueueFullError(key, this.maxQueueSize);
     }
-    const estimatedWaitMs = this.estimatedWaitMs(key, opts.limits, opts.estimatedInputTokens);
-    opts.onQueued?.(state.waiters.length, estimatedWaitMs);
+    const estimatedWaitMs = Math.max(0, nextSlotAtMs - Date.now());
+    opts.onQueued?.(local.waiters.length, estimatedWaitMs);
     return new Promise((resolve, reject) => {
       const enqueuedAt = Date.now();
       const timeoutHandle = setTimeout(() => {
-        const idx = state.waiters.indexOf(waiter);
-        if (idx !== -1) state.waiters.splice(idx, 1);
-        reject(new QueueTimeoutError(key, Date.now() - enqueuedAt, state.waiters.length));
+        const idx = local.waiters.indexOf(waiter);
+        if (idx !== -1) local.waiters.splice(idx, 1);
+        reject(new QueueTimeoutError(key, Date.now() - enqueuedAt, local.waiters.length));
       }, opts.timeoutMs);
       const waiter = {
         resolve: () => {
@@ -138,157 +265,93 @@ var RateLimitEngine = class {
         estimatedInputTokens: opts.estimatedInputTokens,
         timeoutHandle
       };
-      insertWaiter(state.waiters, waiter);
-      this.scheduleDrain(key, opts.limits);
+      insertWaiter(local.waiters, waiter);
+      this.scheduleDrain(key, opts.limits, nextSlotAtMs);
     });
   }
   /**
    * Record actual token usage after a request completes.
-   * Replaces the estimated token count with the real values.
+   * Best-effort reconciliation with the estimate recorded during acquire().
    */
   recordActualUsage(key, inputTokens, outputTokens) {
-    const state = this.states.get(key);
-    if (!state) return;
-    for (let i = state.window.length - 1; i >= 0; i--) {
-      const entry = state.window[i];
-      if (entry.outputTokens === 0 && entry.inputTokens > 0) {
-        entry.inputTokens = inputTokens;
-        entry.outputTokens = outputTokens;
-        break;
-      }
-    }
+    void this.store.reconcile(key, inputTokens, outputTokens);
   }
   /**
-   * Apply a backoff delay to a model key.
-   * While a backoff is active, no new requests will be allowed through — they
-   * will queue and wait until backoffUntil, then drain in priority order.
-   *
-   * Called when a remote 429 comes back with a Retry-After header.
+   * Apply a backoff delay from a Retry-After header.
+   * Propagated to the store so all instances respect it (Redis) or
+   * queued requests on this instance wait (in-memory).
    */
   applyBackoff(key, delayMs) {
-    const state = this.getOrCreate(key);
-    const newUntil = Date.now() + delayMs;
-    if (newUntil > state.backoffUntil) {
-      state.backoffUntil = newUntil;
-    }
+    void this.store.setBackoff(key, Date.now() + delayMs);
   }
   /**
-   * Estimated time in ms before the next slot opens for this model/priority.
-   * Returns 0 if a slot is available right now.
+   * Estimated wait time in ms before the next slot opens.
+   * Returns 0 if immediately available. With RedisStore this is async
+   * so we return a Promise; callers that need the value should await it.
    */
-  estimatedWaitMs(key, limits, estimatedTokens = 0) {
-    const state = this.states.get(key);
-    if (!state) return 0;
-    if (this.canProceed(state, limits, estimatedTokens)) return 0;
-    return this.nextSlotAt(state, limits, estimatedTokens) - Date.now();
+  async estimatedWaitMs(key, limits, estimatedTokens = 0) {
+    if (!this.store.nextSlotMs) return 0;
+    const nextSlot = await this.store.nextSlotMs(key, limits, estimatedTokens);
+    return Math.max(0, nextSlot - Date.now());
   }
   /** Current queue depth for a model */
   queueDepth(key) {
-    return this.states.get(key)?.waiters.length ?? 0;
+    return this.localStates.get(key)?.waiters.length ?? 0;
   }
-  /** Snapshot of the current window state for a model */
+  /** Snapshot of the current window (delegates to store where supported) */
   windowSnapshot(key) {
-    const state = this.states.get(key);
-    if (!state) return { requests: 0, inputTokens: 0, outputTokens: 0 };
-    this.evict(state);
-    return {
-      requests: state.window.length,
-      inputTokens: state.window.reduce((s, e) => s + e.inputTokens, 0),
-      outputTokens: state.window.reduce((s, e) => s + e.outputTokens, 0)
-    };
+    if (this.store instanceof InMemoryStore) {
+      return this.store.snapshot(key);
+    }
+    return { requests: 0, inputTokens: 0, outputTokens: 0 };
   }
   backoffUntil(key) {
-    const state = this.states.get(key);
-    if (!state || Date.now() >= state.backoffUntil) return null;
-    return state.backoffUntil;
+    if (this.store instanceof InMemoryStore) {
+      return this.store.currentBackoff(key);
+    }
+    return null;
   }
   // -------------------------------------------------------------------------
-  // Internal helpers
+  // Private helpers
   // -------------------------------------------------------------------------
   getOrCreate(key) {
-    let state = this.states.get(key);
+    let state = this.localStates.get(key);
     if (!state) {
-      state = { window: [], waiters: [], backoffUntil: 0, drainScheduled: false };
-      this.states.set(key, state);
+      state = { waiters: [], drainScheduled: false };
+      this.localStates.set(key, state);
     }
     return state;
   }
-  evict(state) {
-    const cutoff = Date.now() - WINDOW_MS;
-    let i = 0;
-    while (i < state.window.length && (state.window[i]?.timestamp ?? 0) <= cutoff) i++;
-    if (i > 0) state.window.splice(0, i);
-  }
-  canProceed(state, limits, estimatedInputTokens) {
-    const now = Date.now();
-    if (now < state.backoffUntil) return false;
-    this.evict(state);
-    if (state.window.length >= limits.rpm) return false;
-    if (limits.itpm !== void 0) {
-      const usedInput = state.window.reduce((s, e) => s + e.inputTokens, 0);
-      if (usedInput + estimatedInputTokens > limits.itpm) return false;
-    }
-    return true;
-  }
-  record(state, inputTokens, outputTokens) {
-    state.window.push({ timestamp: Date.now(), inputTokens, outputTokens });
-  }
-  /**
-   * Returns the timestamp (ms) at which the next slot will open.
-   */
-  nextSlotAt(state, limits, estimatedInputTokens) {
-    const now = Date.now();
-    if (now < state.backoffUntil) return state.backoffUntil;
-    this.evict(state);
-    let nextSlot = now;
-    if (state.window.length >= limits.rpm && state.window[0]) {
-      nextSlot = Math.max(nextSlot, state.window[0].timestamp + WINDOW_MS + 1);
-    }
-    if (limits.itpm !== void 0) {
-      let usedInput = state.window.reduce((s, e) => s + e.inputTokens, 0);
-      if (usedInput + estimatedInputTokens > limits.itpm) {
-        for (const entry of state.window) {
-          usedInput -= entry.inputTokens;
-          if (usedInput + estimatedInputTokens <= limits.itpm) {
-            nextSlot = Math.max(nextSlot, entry.timestamp + WINDOW_MS + 1);
-            break;
-          }
-        }
-      }
-    }
-    return nextSlot;
-  }
-  /**
-   * Schedule a drain of the waiters queue for the given model.
-   * Only one drain timer is active at a time per model.
-   */
-  scheduleDrain(key, limits) {
-    const state = this.states.get(key);
-    if (!state || state.drainScheduled) return;
-    state.drainScheduled = true;
-    const delay = Math.max(0, this.nextSlotAt(state, limits, 0) - Date.now());
+  scheduleDrain(key, limits, nextSlotAtMs) {
+    const local = this.localStates.get(key);
+    if (!local || local.drainScheduled) return;
+    local.drainScheduled = true;
+    const delay = Math.max(0, nextSlotAtMs - Date.now());
     setTimeout(() => {
-      state.drainScheduled = false;
-      this.drain(key, limits);
+      local.drainScheduled = false;
+      void this.drain(key, limits);
     }, delay);
   }
-  /**
-   * Process as many waiters as possible. Reschedule if there are still waiters
-   * but no capacity yet.
-   */
-  drain(key, limits) {
-    const state = this.states.get(key);
-    if (!state || state.waiters.length === 0) return;
-    while (state.waiters.length > 0) {
-      const next = state.waiters[0];
-      if (!this.canProceed(state, limits, next.estimatedInputTokens)) break;
-      state.waiters.shift();
-      clearTimeout(next.timeoutHandle);
-      this.record(state, next.estimatedInputTokens, 0);
-      next.resolve();
-    }
-    if (state.waiters.length > 0) {
-      this.scheduleDrain(key, limits);
+  async drain(key, limits) {
+    const local = this.localStates.get(key);
+    if (!local) return;
+    while (local.waiters.length > 0) {
+      const waiter = local.waiters[0];
+      const nextSlotAtMs = await this.store.checkAndRecord(
+        key,
+        waiter.estimatedInputTokens,
+        limits
+      );
+      if (nextSlotAtMs > Date.now()) {
+        this.scheduleDrain(key, limits, nextSlotAtMs);
+        return;
+      }
+      if (local.waiters[0] !== waiter) {
+        continue;
+      }
+      local.waiters.shift();
+      clearTimeout(waiter.timeoutHandle);
+      waiter.resolve();
     }
   }
 };
@@ -1292,7 +1355,8 @@ var Pipeline = class {
   constructor(config) {
     this.config = config;
     this.engine = new RateLimitEngine({
-      maxQueueSize: config.queue?.maxSize ?? 500
+      maxQueueSize: config.queue?.maxSize ?? 500,
+      ...config.store !== void 0 && { store: config.store }
     });
     this.costTracker = new CostTracker();
     this.emitter = new Emitter();
@@ -1461,7 +1525,7 @@ var Pipeline = class {
     const models = [];
     return { models, totalQueueDepth: 0 };
   }
-  estimatedWait(modelId, provider, priority = "normal") {
+  async estimatedWait(modelId, provider, priority = "normal") {
     const key = `${provider}:${modelId}`;
     const limits = this.resolveModelLimits(modelId, provider);
     return this.engine.estimatedWaitMs(key, limits);