npm - ai-sdk-rate-limiter - Versions diffs - 0.6.0 → 0.7.0 - Mend

ai-sdk-rate-limiter 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/index.cjs +208 -87
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +208 -87
package/dist/index.js.map +1 -1
package/dist/otel.d.cts +1 -1
package/dist/otel.d.ts +1 -1
package/dist/redis.d.cts +1 -1
package/dist/redis.d.ts +1 -1
package/dist/testing.cjs +2007 -0
package/dist/testing.cjs.map +1 -0
package/dist/testing.d.cts +59 -0
package/dist/testing.d.ts +59 -0
package/dist/testing.js +2005 -0
package/dist/testing.js.map +1 -0
package/dist/{types-CgePLtmQ.d.cts → types-D7qskXNw.d.cts} +54 -1
package/dist/{types-CgePLtmQ.d.ts → types-D7qskXNw.d.ts} +54 -1
package/package.json +12 -2

package/dist/index.cjs CHANGED Viewed

@@ -194,6 +194,9 @@ function sumInput(window) {
 }
 // src/core/rate-limit-engine.ts
+function makeAbortError() {
+  return Object.assign(new Error("The operation was aborted"), { name: "AbortError" });
+}
 var PRIORITY_RANK = {
   high: 0,
   normal: 1,
@@ -233,41 +236,84 @@ var RateLimitEngine = class {
    * - If at capacity: enqueues (sorted by priority) and resolves when a slot opens.
    * - If queue is full: throws QueueFullError immediately.
    * - If waiting exceeds timeoutMs: throws QueueTimeoutError.
+   * - If signal is aborted while queued: throws an AbortError.
    */
   async acquire(key, opts) {
+    if (opts.signal?.aborted) throw makeAbortError();
     const local = this.getOrCreate(key);
     const nextSlotAtMs = await this.store.checkAndRecord(
       key,
       opts.estimatedInputTokens,
       opts.limits
     );
-    if (nextSlotAtMs <= Date.now()) return;
-    if (local.waiters.length >= this.maxQueueSize) {
-      throw new QueueFullError(key, this.maxQueueSize);
+    if (nextSlotAtMs > Date.now()) {
+      if (local.waiters.length >= this.maxQueueSize) {
+        throw new QueueFullError(key, this.maxQueueSize);
+      }
+      const estimatedWaitMs = Math.max(0, nextSlotAtMs - Date.now());
+      opts.onQueued?.(local.waiters.length, estimatedWaitMs);
+      await new Promise((resolve, reject) => {
+        const enqueuedAt = Date.now();
+        const timeoutHandle = setTimeout(() => {
+          const idx = local.waiters.indexOf(waiter);
+          if (idx !== -1) local.waiters.splice(idx, 1);
+          cleanup();
+          reject(new QueueTimeoutError(key, Date.now() - enqueuedAt, local.waiters.length));
+        }, opts.timeoutMs);
+        const onAbort = () => {
+          const idx = local.waiters.indexOf(waiter);
+          if (idx !== -1) local.waiters.splice(idx, 1);
+          clearTimeout(timeoutHandle);
+          cleanup();
+          reject(makeAbortError());
+        };
+        const cleanup = () => opts.signal?.removeEventListener("abort", onAbort);
+        opts.signal?.addEventListener("abort", onAbort, { once: true });
+        const waiter = {
+          resolve: () => {
+            clearTimeout(timeoutHandle);
+            cleanup();
+            opts.onDequeued?.(Date.now() - enqueuedAt);
+            resolve();
+          },
+          reject: (err) => {
+            clearTimeout(timeoutHandle);
+            cleanup();
+            reject(err);
+          },
+          priority: opts.priority,
+          enqueued: enqueuedAt,
+          estimatedInputTokens: opts.estimatedInputTokens,
+          timeoutHandle
+        };
+        insertWaiter(local.waiters, waiter);
+        this.scheduleDrain(key, opts.limits, nextSlotAtMs);
+      });
     }
-    const estimatedWaitMs = Math.max(0, nextSlotAtMs - Date.now());
-    opts.onQueued?.(local.waiters.length, estimatedWaitMs);
-    return new Promise((resolve, reject) => {
-      const enqueuedAt = Date.now();
-      const timeoutHandle = setTimeout(() => {
-        const idx = local.waiters.indexOf(waiter);
-        if (idx !== -1) local.waiters.splice(idx, 1);
-        reject(new QueueTimeoutError(key, Date.now() - enqueuedAt, local.waiters.length));
-      }, opts.timeoutMs);
-      const waiter = {
-        resolve: () => {
-          opts.onDequeued?.(Date.now() - enqueuedAt);
+    const maxConcurrent = opts.limits.maxConcurrent;
+    if (maxConcurrent !== void 0 && local.activeCount >= maxConcurrent) {
+      if (opts.signal?.aborted) throw makeAbortError();
+      await new Promise((resolve, reject) => {
+        const onAbort = () => {
+          const idx = local.concurrencyWaiters.findIndex((w) => w.resolve === resolveWrapped);
+          if (idx !== -1) local.concurrencyWaiters.splice(idx, 1);
+          cleanup();
+          reject(makeAbortError());
+        };
+        const resolveWrapped = () => {
+          cleanup();
           resolve();
-        },
-        reject,
-        priority: opts.priority,
-        enqueued: enqueuedAt,
-        estimatedInputTokens: opts.estimatedInputTokens,
-        timeoutHandle
-      };
-      insertWaiter(local.waiters, waiter);
-      this.scheduleDrain(key, opts.limits, nextSlotAtMs);
-    });
+        };
+        const rejectWrapped = (e) => {
+          cleanup();
+          reject(e);
+        };
+        const cleanup = () => opts.signal?.removeEventListener("abort", onAbort);
+        opts.signal?.addEventListener("abort", onAbort, { once: true });
+        local.concurrencyWaiters.push({ resolve: resolveWrapped, reject: rejectWrapped });
+      });
+    }
+    if (maxConcurrent !== void 0) local.activeCount++;
   }
   /**
    * Record actual token usage after a request completes.
@@ -311,13 +357,31 @@ var RateLimitEngine = class {
     }
     return null;
   }
+  /** All model keys that have been seen by this engine instance. */
+  knownKeys() {
+    return Array.from(this.localStates.keys());
+  }
+  /**
+   * Signal that a request has completed, decrementing the concurrency counter
+   * and unblocking the next concurrency waiter if one is queued.
+   *
+   * Must be called after every acquire() that succeeded (even on error).
+   * Only has an effect when maxConcurrent is configured for the model.
+   */
+  release(key) {
+    const local = this.localStates.get(key);
+    if (!local || local.activeCount === 0) return;
+    local.activeCount--;
+    const next = local.concurrencyWaiters.shift();
+    if (next) next.resolve();
+  }
   // -------------------------------------------------------------------------
   // Private helpers
   // -------------------------------------------------------------------------
   getOrCreate(key) {
     let state = this.localStates.get(key);
     if (!state) {
-      state = { waiters: [], drainScheduled: false };
+      state = { waiters: [], drainScheduled: false, activeCount: 0, concurrencyWaiters: [] };
       this.localStates.set(key, state);
     }
     return state;
@@ -1339,6 +1403,24 @@ function isKnownModel(modelId, provider) {
 }
 // src/core/pipeline.ts
+function matchScope(pattern, scope) {
+  if (pattern === scope) return true;
+  if (pattern.includes("*")) {
+    const regex = new RegExp(
+      "^" + pattern.replace(/[.+?^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*") + "$"
+    );
+    return regex.test(scope);
+  }
+  return false;
+}
+function mergeScopeLimits(base, scope) {
+  return {
+    ...base,
+    ...scope.rpm !== void 0 && { rpm: scope.rpm },
+    ...scope.itpm !== void 0 && { itpm: scope.itpm },
+    ...scope.maxConcurrent !== void 0 && { maxConcurrent: scope.maxConcurrent }
+  };
+}
 function resolveRetryConfig(config) {
   const r = config.retry ?? {};
   return {
@@ -1375,15 +1457,32 @@ var Pipeline = class {
   // -------------------------------------------------------------------------
   // execute — called by both generate and stream adapters
   // -------------------------------------------------------------------------
+  // -------------------------------------------------------------------------
+  // Scope resolution helpers
+  // -------------------------------------------------------------------------
+  resolveScopedLimits(modelId, provider, scope) {
+    const base = this.resolveModelLimits(modelId, provider);
+    if (!this.config.scopes) return base;
+    for (const [pattern, scopeConfig] of Object.entries(this.config.scopes)) {
+      if (matchScope(pattern, scope)) {
+        return mergeScopeLimits(base, scopeConfig);
+      }
+    }
+    return base;
+  }
   /**
    * Execute an AI request through the full pipeline:
-   *   budget check → acquire slot → retry wrapper → usage recording
+   *   budget check → acquire slot → retry wrapper
+   *
+   * Usage recording (completed event) is NOT emitted here. Callers must call
+   * recordUsage() once they have actual token counts from the API response.
    */
   async execute(modelId, provider, prompt, fn, opts) {
-    const limits = this.resolveModelLimits(modelId, provider);
+    const scope = opts.scope;
+    const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
     const estimatedInput = estimateInputTokens(prompt);
-    const startMs = Date.now();
-    const key = `${provider}:${modelId}`;
+    const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
+    let slotAcquired = false;
     if (this.config.cost?.budget && !opts.skipBudgetCheck) {
       const estimatedCost = this.costTracker.estimateCost(
         estimatedInput,
@@ -1418,6 +1517,7 @@ var Pipeline = class {
       estimatedInputTokens: estimatedInput,
       priority: opts.priority,
       timeoutMs: opts.timeoutMs,
+      ...opts.signal !== void 0 && { signal: opts.signal },
       onQueued: (queueDepth, estimatedWaitMs) => {
         this.emitter.emit("queued", {
           model: modelId,
@@ -1443,9 +1543,9 @@ var Pipeline = class {
         });
       }
     });
-    let result;
+    slotAcquired = true;
     try {
-      result = await withRetry(fn, this.retryConfig, {
+      const result = await withRetry(fn, this.retryConfig, {
         modelId,
         onRetry: ({ attempt, maxAttempts, delayMs, error }) => {
           this.emitter.emit("retrying", {
@@ -1468,6 +1568,7 @@ var Pipeline = class {
           });
         }
       });
+      return result;
     } catch (error) {
       this.emitter.emit("dropped", {
         model: modelId,
@@ -1475,29 +1576,18 @@ var Pipeline = class {
         reason: "queue-timeout"
       });
       throw error;
+    } finally {
+      if (slotAcquired) this.engine.release(key);
     }
-    opts.onUsage({
-      inputTokens: estimatedInput,
-      outputTokens: 0
-    });
-    this.emitter.emit("completed", {
-      model: modelId,
-      provider,
-      inputTokens: estimatedInput,
-      outputTokens: 0,
-      costUsd: 0,
-      latencyMs: Date.now() - startMs,
-      streaming: opts.streaming
-    });
-    return result;
   }
   /**
    * Record actual usage after a request resolves.
-   * Called with real token counts from the API response.
+   * Called with real token counts from the API response. Emits the single
+   * authoritative `completed` event for this request.
    */
-  recordUsage(modelId, provider, usage, latencyMs, streaming) {
-    const key = `${provider}:${modelId}`;
-    const limits = this.resolveModelLimits(modelId, provider);
+  recordUsage(modelId, provider, scope, usage, latencyMs, streaming) {
+    const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
+    const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
     this.engine.recordActualUsage(key, usage.inputTokens, usage.outputTokens);
     const costUsd = this.costTracker.record(
       modelId,
@@ -1523,11 +1613,32 @@ var Pipeline = class {
   }
   getStatus() {
     const models = [];
-    return { models, totalQueueDepth: 0 };
+    let totalQueueDepth = 0;
+    for (const key of this.engine.knownKeys()) {
+      const colonIdx = key.indexOf(":");
+      const provider = colonIdx !== -1 ? key.slice(0, colonIdx) : key;
+      const modelId = colonIdx !== -1 ? key.slice(colonIdx + 1) : key;
+      const snapshot = this.engine.windowSnapshot(key);
+      const queueDepth = this.engine.queueDepth(key);
+      const backoffUntil = this.engine.backoffUntil(key);
+      totalQueueDepth += queueDepth;
+      models.push({
+        modelId,
+        provider,
+        requestsInWindow: snapshot.requests,
+        inputTokensInWindow: snapshot.inputTokens,
+        outputTokensInWindow: snapshot.outputTokens,
+        queueDepth,
+        estimatedWaitMs: 0,
+        // async — use limiter.estimatedWait() for an accurate value
+        backoffUntil
+      });
+    }
+    return { models, totalQueueDepth };
   }
-  async estimatedWait(modelId, provider, priority = "normal") {
-    const key = `${provider}:${modelId}`;
-    const limits = this.resolveModelLimits(modelId, provider);
+  async estimatedWait(modelId, provider, priority = "normal", scope) {
+    const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
+    const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
     return this.engine.estimatedWaitMs(key, limits);
   }
   on(event, handler) {
@@ -1551,7 +1662,8 @@ function getPerRequestOptions(params, queueTimeout) {
     priority: raw?.priority ?? "normal",
     timeoutMs: raw?.timeout ?? queueTimeout,
     metadata: raw?.metadata ?? {},
-    skipBudgetCheck: raw?._skipBudgetCheck ?? false
+    skipBudgetCheck: raw?._skipBudgetCheck ?? false,
+    scope: raw?.scope
   };
 }
 function extractTokenUsage(usage) {
@@ -1567,7 +1679,7 @@ function createMiddleware(pipeline, queueTimeout) {
     // wrapGenerate — non-streaming
     // -----------------------------------------------------------------------
     async wrapGenerate({ doGenerate, params, model }) {
-      const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck, scope } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -1581,21 +1693,19 @@ function createMiddleware(pipeline, queueTimeout) {
           priority,
           timeoutMs,
           skipBudgetCheck,
-          onUsage: () => {
-          }
+          ...scope !== void 0 && { scope },
+          ...params.abortSignal !== void 0 && { signal: params.abortSignal }
         }
       );
-      if (result.usage) {
-        const usage = extractTokenUsage(result.usage);
-        pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, false);
-      }
+      const usage = result.usage ? extractTokenUsage(result.usage) : { inputTokens: 0, outputTokens: 0 };
+      pipeline.recordUsage(modelId, provider, scope, usage, Date.now() - startMs, false);
       return result;
     },
     // -----------------------------------------------------------------------
     // wrapStream — streaming
     // -----------------------------------------------------------------------
     async wrapStream({ doStream, params, model }) {
-      const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck, scope } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -1609,18 +1719,16 @@ function createMiddleware(pipeline, queueTimeout) {
           priority,
           timeoutMs,
           skipBudgetCheck,
-          onUsage: () => {
-          }
+          ...scope !== void 0 && { scope },
+          ...params.abortSignal !== void 0 && { signal: params.abortSignal }
         }
       );
       const { stream, ...rest } = streamResult;
       const transformStream = new TransformStream({
         transform(chunk, controller) {
-          if (chunk.type === "finish" && chunk.usage) {
-            const usage = extractTokenUsage(
-              chunk.usage
-            );
-            pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, true);
+          if (chunk.type === "finish") {
+            const usage = chunk.usage ? extractTokenUsage(chunk.usage) : { inputTokens: 0, outputTokens: 0 };
+            pipeline.recordUsage(modelId, provider, scope, usage, Date.now() - startMs, true);
           }
           controller.enqueue(chunk);
         }
@@ -1636,27 +1744,41 @@ function wrapModel(model, middleware, overrides) {
   const providerId = overrides?.providerId ?? model.provider;
   const modelId = overrides?.modelId ?? model.modelId;
   const fallbackModel = overrides?.fallback;
+  const staticScope = overrides?.scope;
+  function injectScope(params) {
+    if (!staticScope) return params;
+    const existingRl = params.providerOptions?.["rateLimiter"] ?? {};
+    if (existingRl["scope"]) return params;
+    return {
+      ...params,
+      providerOptions: {
+        ...params.providerOptions,
+        rateLimiter: { ...existingRl, scope: staticScope }
+      }
+    };
+  }
   return {
     specificationVersion: "v4",
     provider: providerId,
     modelId,
     supportedUrls: model["supportedUrls"],
     async doGenerate(params) {
+      const enrichedParams = injectScope(params);
       try {
         return await middleware.wrapGenerate({
-          doGenerate: () => model.doGenerate(params),
-          doStream: () => model.doStream(params),
-          params,
+          doGenerate: () => model.doGenerate(enrichedParams),
+          doStream: () => model.doStream(enrichedParams),
+          params: enrichedParams,
           model
         });
       } catch (err) {
         if (err instanceof BudgetExceededError && fallbackModel) {
           const fallbackParams = {
-            ...params,
+            ...enrichedParams,
             providerOptions: {
-              ...params.providerOptions,
+              ...enrichedParams.providerOptions,
               rateLimiter: {
-                ...params.providerOptions?.["rateLimiter"] ?? {},
+                ...enrichedParams.providerOptions?.["rateLimiter"] ?? {},
                 _skipBudgetCheck: true
               }
             }
@@ -1672,21 +1794,22 @@ function wrapModel(model, middleware, overrides) {
       }
     },
     async doStream(params) {
+      const enrichedParams = injectScope(params);
       try {
         return await middleware.wrapStream({
-          doGenerate: () => model.doGenerate(params),
-          doStream: () => model.doStream(params),
-          params,
+          doGenerate: () => model.doGenerate(enrichedParams),
+          doStream: () => model.doStream(enrichedParams),
+          params: enrichedParams,
           model
         });
       } catch (err) {
         if (err instanceof BudgetExceededError && fallbackModel) {
           const fallbackParams = {
-            ...params,
+            ...enrichedParams,
             providerOptions: {
-              ...params.providerOptions,
+              ...enrichedParams.providerOptions,
               rateLimiter: {
-                ...params.providerOptions?.["rateLimiter"] ?? {},
+                ...enrichedParams.providerOptions?.["rateLimiter"] ?? {},
                 _skipBudgetCheck: true
               }
             }
@@ -1749,7 +1872,7 @@ function wrapAsyncIterableStream(stream, pipeline, modelId, provider, startMs) {
         }
       }
       if (result.done) {
-        pipeline.recordUsage(modelId, provider, { inputTokens, outputTokens }, Date.now() - startMs, true);
+        pipeline.recordUsage(modelId, provider, void 0, { inputTokens, outputTokens }, Date.now() - startMs, true);
       }
       return result;
     },
@@ -1810,16 +1933,14 @@ async function executeViaProxy(fn, args, modelId, provider, params, pipeline, qu
     {
       streaming: isStreaming,
       priority,
-      timeoutMs: queueTimeout,
-      onUsage: () => {
-      }
+      timeoutMs: queueTimeout
     }
   );
   if (isStreaming && result !== null && typeof result === "object" && Symbol.asyncIterator in result) {
     return wrapAsyncIterableStream(result, pipeline, modelId, provider, startMs);
   }
   const usage = extractUsage(result);
-  pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, false);
+  pipeline.recordUsage(modelId, provider, void 0, usage, Date.now() - startMs, false);
   return result;
 }
 function rateLimited(client, options = {}) {