ai-sdk-rate-limiter 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -194,6 +194,9 @@ function sumInput(window) {
194
194
  }
195
195
 
196
196
  // src/core/rate-limit-engine.ts
197
+ function makeAbortError() {
198
+ return Object.assign(new Error("The operation was aborted"), { name: "AbortError" });
199
+ }
197
200
  var PRIORITY_RANK = {
198
201
  high: 0,
199
202
  normal: 1,
@@ -233,41 +236,84 @@ var RateLimitEngine = class {
233
236
  * - If at capacity: enqueues (sorted by priority) and resolves when a slot opens.
234
237
  * - If queue is full: throws QueueFullError immediately.
235
238
  * - If waiting exceeds timeoutMs: throws QueueTimeoutError.
239
+ * - If signal is aborted while queued: throws an AbortError.
236
240
  */
237
241
  async acquire(key, opts) {
242
+ if (opts.signal?.aborted) throw makeAbortError();
238
243
  const local = this.getOrCreate(key);
239
244
  const nextSlotAtMs = await this.store.checkAndRecord(
240
245
  key,
241
246
  opts.estimatedInputTokens,
242
247
  opts.limits
243
248
  );
244
- if (nextSlotAtMs <= Date.now()) return;
245
- if (local.waiters.length >= this.maxQueueSize) {
246
- throw new QueueFullError(key, this.maxQueueSize);
249
+ if (nextSlotAtMs > Date.now()) {
250
+ if (local.waiters.length >= this.maxQueueSize) {
251
+ throw new QueueFullError(key, this.maxQueueSize);
252
+ }
253
+ const estimatedWaitMs = Math.max(0, nextSlotAtMs - Date.now());
254
+ opts.onQueued?.(local.waiters.length, estimatedWaitMs);
255
+ await new Promise((resolve, reject) => {
256
+ const enqueuedAt = Date.now();
257
+ const timeoutHandle = setTimeout(() => {
258
+ const idx = local.waiters.indexOf(waiter);
259
+ if (idx !== -1) local.waiters.splice(idx, 1);
260
+ cleanup();
261
+ reject(new QueueTimeoutError(key, Date.now() - enqueuedAt, local.waiters.length));
262
+ }, opts.timeoutMs);
263
+ const onAbort = () => {
264
+ const idx = local.waiters.indexOf(waiter);
265
+ if (idx !== -1) local.waiters.splice(idx, 1);
266
+ clearTimeout(timeoutHandle);
267
+ cleanup();
268
+ reject(makeAbortError());
269
+ };
270
+ const cleanup = () => opts.signal?.removeEventListener("abort", onAbort);
271
+ opts.signal?.addEventListener("abort", onAbort, { once: true });
272
+ const waiter = {
273
+ resolve: () => {
274
+ clearTimeout(timeoutHandle);
275
+ cleanup();
276
+ opts.onDequeued?.(Date.now() - enqueuedAt);
277
+ resolve();
278
+ },
279
+ reject: (err) => {
280
+ clearTimeout(timeoutHandle);
281
+ cleanup();
282
+ reject(err);
283
+ },
284
+ priority: opts.priority,
285
+ enqueued: enqueuedAt,
286
+ estimatedInputTokens: opts.estimatedInputTokens,
287
+ timeoutHandle
288
+ };
289
+ insertWaiter(local.waiters, waiter);
290
+ this.scheduleDrain(key, opts.limits, nextSlotAtMs);
291
+ });
247
292
  }
248
- const estimatedWaitMs = Math.max(0, nextSlotAtMs - Date.now());
249
- opts.onQueued?.(local.waiters.length, estimatedWaitMs);
250
- return new Promise((resolve, reject) => {
251
- const enqueuedAt = Date.now();
252
- const timeoutHandle = setTimeout(() => {
253
- const idx = local.waiters.indexOf(waiter);
254
- if (idx !== -1) local.waiters.splice(idx, 1);
255
- reject(new QueueTimeoutError(key, Date.now() - enqueuedAt, local.waiters.length));
256
- }, opts.timeoutMs);
257
- const waiter = {
258
- resolve: () => {
259
- opts.onDequeued?.(Date.now() - enqueuedAt);
293
+ const maxConcurrent = opts.limits.maxConcurrent;
294
+ if (maxConcurrent !== void 0 && local.activeCount >= maxConcurrent) {
295
+ if (opts.signal?.aborted) throw makeAbortError();
296
+ await new Promise((resolve, reject) => {
297
+ const onAbort = () => {
298
+ const idx = local.concurrencyWaiters.findIndex((w) => w.resolve === resolveWrapped);
299
+ if (idx !== -1) local.concurrencyWaiters.splice(idx, 1);
300
+ cleanup();
301
+ reject(makeAbortError());
302
+ };
303
+ const resolveWrapped = () => {
304
+ cleanup();
260
305
  resolve();
261
- },
262
- reject,
263
- priority: opts.priority,
264
- enqueued: enqueuedAt,
265
- estimatedInputTokens: opts.estimatedInputTokens,
266
- timeoutHandle
267
- };
268
- insertWaiter(local.waiters, waiter);
269
- this.scheduleDrain(key, opts.limits, nextSlotAtMs);
270
- });
306
+ };
307
+ const rejectWrapped = (e) => {
308
+ cleanup();
309
+ reject(e);
310
+ };
311
+ const cleanup = () => opts.signal?.removeEventListener("abort", onAbort);
312
+ opts.signal?.addEventListener("abort", onAbort, { once: true });
313
+ local.concurrencyWaiters.push({ resolve: resolveWrapped, reject: rejectWrapped });
314
+ });
315
+ }
316
+ if (maxConcurrent !== void 0) local.activeCount++;
271
317
  }
272
318
  /**
273
319
  * Record actual token usage after a request completes.
@@ -311,13 +357,31 @@ var RateLimitEngine = class {
311
357
  }
312
358
  return null;
313
359
  }
360
+ /** All model keys that have been seen by this engine instance. */
361
+ knownKeys() {
362
+ return Array.from(this.localStates.keys());
363
+ }
364
+ /**
365
+ * Signal that a request has completed, decrementing the concurrency counter
366
+ * and unblocking the next concurrency waiter if one is queued.
367
+ *
368
+ * Must be called after every acquire() that succeeded (even on error).
369
+ * Only has an effect when maxConcurrent is configured for the model.
370
+ */
371
+ release(key) {
372
+ const local = this.localStates.get(key);
373
+ if (!local || local.activeCount === 0) return;
374
+ local.activeCount--;
375
+ const next = local.concurrencyWaiters.shift();
376
+ if (next) next.resolve();
377
+ }
314
378
  // -------------------------------------------------------------------------
315
379
  // Private helpers
316
380
  // -------------------------------------------------------------------------
317
381
  getOrCreate(key) {
318
382
  let state = this.localStates.get(key);
319
383
  if (!state) {
320
- state = { waiters: [], drainScheduled: false };
384
+ state = { waiters: [], drainScheduled: false, activeCount: 0, concurrencyWaiters: [] };
321
385
  this.localStates.set(key, state);
322
386
  }
323
387
  return state;
@@ -1339,6 +1403,24 @@ function isKnownModel(modelId, provider) {
1339
1403
  }
1340
1404
 
1341
1405
  // src/core/pipeline.ts
1406
+ function matchScope(pattern, scope) {
1407
+ if (pattern === scope) return true;
1408
+ if (pattern.includes("*")) {
1409
+ const regex = new RegExp(
1410
+ "^" + pattern.replace(/[.+?^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*") + "$"
1411
+ );
1412
+ return regex.test(scope);
1413
+ }
1414
+ return false;
1415
+ }
1416
+ function mergeScopeLimits(base, scope) {
1417
+ return {
1418
+ ...base,
1419
+ ...scope.rpm !== void 0 && { rpm: scope.rpm },
1420
+ ...scope.itpm !== void 0 && { itpm: scope.itpm },
1421
+ ...scope.maxConcurrent !== void 0 && { maxConcurrent: scope.maxConcurrent }
1422
+ };
1423
+ }
1342
1424
  function resolveRetryConfig(config) {
1343
1425
  const r = config.retry ?? {};
1344
1426
  return {
@@ -1375,15 +1457,32 @@ var Pipeline = class {
1375
1457
  // -------------------------------------------------------------------------
1376
1458
  // execute — called by both generate and stream adapters
1377
1459
  // -------------------------------------------------------------------------
1460
+ // -------------------------------------------------------------------------
1461
+ // Scope resolution helpers
1462
+ // -------------------------------------------------------------------------
1463
+ resolveScopedLimits(modelId, provider, scope) {
1464
+ const base = this.resolveModelLimits(modelId, provider);
1465
+ if (!this.config.scopes) return base;
1466
+ for (const [pattern, scopeConfig] of Object.entries(this.config.scopes)) {
1467
+ if (matchScope(pattern, scope)) {
1468
+ return mergeScopeLimits(base, scopeConfig);
1469
+ }
1470
+ }
1471
+ return base;
1472
+ }
1378
1473
  /**
1379
1474
  * Execute an AI request through the full pipeline:
1380
- * budget check → acquire slot → retry wrapper → usage recording
1475
+ * budget check → acquire slot → retry wrapper
1476
+ *
1477
+ * Usage recording (completed event) is NOT emitted here. Callers must call
1478
+ * recordUsage() once they have actual token counts from the API response.
1381
1479
  */
1382
1480
  async execute(modelId, provider, prompt, fn, opts) {
1383
- const limits = this.resolveModelLimits(modelId, provider);
1481
+ const scope = opts.scope;
1482
+ const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
1384
1483
  const estimatedInput = estimateInputTokens(prompt);
1385
- const startMs = Date.now();
1386
- const key = `${provider}:${modelId}`;
1484
+ const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
1485
+ let slotAcquired = false;
1387
1486
  if (this.config.cost?.budget && !opts.skipBudgetCheck) {
1388
1487
  const estimatedCost = this.costTracker.estimateCost(
1389
1488
  estimatedInput,
@@ -1418,6 +1517,7 @@ var Pipeline = class {
1418
1517
  estimatedInputTokens: estimatedInput,
1419
1518
  priority: opts.priority,
1420
1519
  timeoutMs: opts.timeoutMs,
1520
+ ...opts.signal !== void 0 && { signal: opts.signal },
1421
1521
  onQueued: (queueDepth, estimatedWaitMs) => {
1422
1522
  this.emitter.emit("queued", {
1423
1523
  model: modelId,
@@ -1443,9 +1543,9 @@ var Pipeline = class {
1443
1543
  });
1444
1544
  }
1445
1545
  });
1446
- let result;
1546
+ slotAcquired = true;
1447
1547
  try {
1448
- result = await withRetry(fn, this.retryConfig, {
1548
+ const result = await withRetry(fn, this.retryConfig, {
1449
1549
  modelId,
1450
1550
  onRetry: ({ attempt, maxAttempts, delayMs, error }) => {
1451
1551
  this.emitter.emit("retrying", {
@@ -1468,6 +1568,7 @@ var Pipeline = class {
1468
1568
  });
1469
1569
  }
1470
1570
  });
1571
+ return result;
1471
1572
  } catch (error) {
1472
1573
  this.emitter.emit("dropped", {
1473
1574
  model: modelId,
@@ -1475,29 +1576,18 @@ var Pipeline = class {
1475
1576
  reason: "queue-timeout"
1476
1577
  });
1477
1578
  throw error;
1579
+ } finally {
1580
+ if (slotAcquired) this.engine.release(key);
1478
1581
  }
1479
- opts.onUsage({
1480
- inputTokens: estimatedInput,
1481
- outputTokens: 0
1482
- });
1483
- this.emitter.emit("completed", {
1484
- model: modelId,
1485
- provider,
1486
- inputTokens: estimatedInput,
1487
- outputTokens: 0,
1488
- costUsd: 0,
1489
- latencyMs: Date.now() - startMs,
1490
- streaming: opts.streaming
1491
- });
1492
- return result;
1493
1582
  }
1494
1583
  /**
1495
1584
  * Record actual usage after a request resolves.
1496
- * Called with real token counts from the API response.
1585
+ * Called with real token counts from the API response. Emits the single
1586
+ * authoritative `completed` event for this request.
1497
1587
  */
1498
- recordUsage(modelId, provider, usage, latencyMs, streaming) {
1499
- const key = `${provider}:${modelId}`;
1500
- const limits = this.resolveModelLimits(modelId, provider);
1588
+ recordUsage(modelId, provider, scope, usage, latencyMs, streaming) {
1589
+ const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
1590
+ const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
1501
1591
  this.engine.recordActualUsage(key, usage.inputTokens, usage.outputTokens);
1502
1592
  const costUsd = this.costTracker.record(
1503
1593
  modelId,
@@ -1523,11 +1613,32 @@ var Pipeline = class {
1523
1613
  }
1524
1614
  getStatus() {
1525
1615
  const models = [];
1526
- return { models, totalQueueDepth: 0 };
1616
+ let totalQueueDepth = 0;
1617
+ for (const key of this.engine.knownKeys()) {
1618
+ const colonIdx = key.indexOf(":");
1619
+ const provider = colonIdx !== -1 ? key.slice(0, colonIdx) : key;
1620
+ const modelId = colonIdx !== -1 ? key.slice(colonIdx + 1) : key;
1621
+ const snapshot = this.engine.windowSnapshot(key);
1622
+ const queueDepth = this.engine.queueDepth(key);
1623
+ const backoffUntil = this.engine.backoffUntil(key);
1624
+ totalQueueDepth += queueDepth;
1625
+ models.push({
1626
+ modelId,
1627
+ provider,
1628
+ requestsInWindow: snapshot.requests,
1629
+ inputTokensInWindow: snapshot.inputTokens,
1630
+ outputTokensInWindow: snapshot.outputTokens,
1631
+ queueDepth,
1632
+ estimatedWaitMs: 0,
1633
+ // async — use limiter.estimatedWait() for an accurate value
1634
+ backoffUntil
1635
+ });
1636
+ }
1637
+ return { models, totalQueueDepth };
1527
1638
  }
1528
- async estimatedWait(modelId, provider, priority = "normal") {
1529
- const key = `${provider}:${modelId}`;
1530
- const limits = this.resolveModelLimits(modelId, provider);
1639
+ async estimatedWait(modelId, provider, priority = "normal", scope) {
1640
+ const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
1641
+ const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
1531
1642
  return this.engine.estimatedWaitMs(key, limits);
1532
1643
  }
1533
1644
  on(event, handler) {
@@ -1551,7 +1662,8 @@ function getPerRequestOptions(params, queueTimeout) {
1551
1662
  priority: raw?.priority ?? "normal",
1552
1663
  timeoutMs: raw?.timeout ?? queueTimeout,
1553
1664
  metadata: raw?.metadata ?? {},
1554
- skipBudgetCheck: raw?._skipBudgetCheck ?? false
1665
+ skipBudgetCheck: raw?._skipBudgetCheck ?? false,
1666
+ scope: raw?.scope
1555
1667
  };
1556
1668
  }
1557
1669
  function extractTokenUsage(usage) {
@@ -1567,7 +1679,7 @@ function createMiddleware(pipeline, queueTimeout) {
1567
1679
  // wrapGenerate — non-streaming
1568
1680
  // -----------------------------------------------------------------------
1569
1681
  async wrapGenerate({ doGenerate, params, model }) {
1570
- const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
1682
+ const { priority, timeoutMs, skipBudgetCheck, scope } = getPerRequestOptions(params, queueTimeout);
1571
1683
  const modelId = model.modelId;
1572
1684
  const provider = model.provider;
1573
1685
  const startMs = Date.now();
@@ -1581,21 +1693,19 @@ function createMiddleware(pipeline, queueTimeout) {
1581
1693
  priority,
1582
1694
  timeoutMs,
1583
1695
  skipBudgetCheck,
1584
- onUsage: () => {
1585
- }
1696
+ ...scope !== void 0 && { scope },
1697
+ ...params.abortSignal !== void 0 && { signal: params.abortSignal }
1586
1698
  }
1587
1699
  );
1588
- if (result.usage) {
1589
- const usage = extractTokenUsage(result.usage);
1590
- pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, false);
1591
- }
1700
+ const usage = result.usage ? extractTokenUsage(result.usage) : { inputTokens: 0, outputTokens: 0 };
1701
+ pipeline.recordUsage(modelId, provider, scope, usage, Date.now() - startMs, false);
1592
1702
  return result;
1593
1703
  },
1594
1704
  // -----------------------------------------------------------------------
1595
1705
  // wrapStream — streaming
1596
1706
  // -----------------------------------------------------------------------
1597
1707
  async wrapStream({ doStream, params, model }) {
1598
- const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
1708
+ const { priority, timeoutMs, skipBudgetCheck, scope } = getPerRequestOptions(params, queueTimeout);
1599
1709
  const modelId = model.modelId;
1600
1710
  const provider = model.provider;
1601
1711
  const startMs = Date.now();
@@ -1609,18 +1719,16 @@ function createMiddleware(pipeline, queueTimeout) {
1609
1719
  priority,
1610
1720
  timeoutMs,
1611
1721
  skipBudgetCheck,
1612
- onUsage: () => {
1613
- }
1722
+ ...scope !== void 0 && { scope },
1723
+ ...params.abortSignal !== void 0 && { signal: params.abortSignal }
1614
1724
  }
1615
1725
  );
1616
1726
  const { stream, ...rest } = streamResult;
1617
1727
  const transformStream = new TransformStream({
1618
1728
  transform(chunk, controller) {
1619
- if (chunk.type === "finish" && chunk.usage) {
1620
- const usage = extractTokenUsage(
1621
- chunk.usage
1622
- );
1623
- pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, true);
1729
+ if (chunk.type === "finish") {
1730
+ const usage = chunk.usage ? extractTokenUsage(chunk.usage) : { inputTokens: 0, outputTokens: 0 };
1731
+ pipeline.recordUsage(modelId, provider, scope, usage, Date.now() - startMs, true);
1624
1732
  }
1625
1733
  controller.enqueue(chunk);
1626
1734
  }
@@ -1636,27 +1744,41 @@ function wrapModel(model, middleware, overrides) {
1636
1744
  const providerId = overrides?.providerId ?? model.provider;
1637
1745
  const modelId = overrides?.modelId ?? model.modelId;
1638
1746
  const fallbackModel = overrides?.fallback;
1747
+ const staticScope = overrides?.scope;
1748
+ function injectScope(params) {
1749
+ if (!staticScope) return params;
1750
+ const existingRl = params.providerOptions?.["rateLimiter"] ?? {};
1751
+ if (existingRl["scope"]) return params;
1752
+ return {
1753
+ ...params,
1754
+ providerOptions: {
1755
+ ...params.providerOptions,
1756
+ rateLimiter: { ...existingRl, scope: staticScope }
1757
+ }
1758
+ };
1759
+ }
1639
1760
  return {
1640
1761
  specificationVersion: "v4",
1641
1762
  provider: providerId,
1642
1763
  modelId,
1643
1764
  supportedUrls: model["supportedUrls"],
1644
1765
  async doGenerate(params) {
1766
+ const enrichedParams = injectScope(params);
1645
1767
  try {
1646
1768
  return await middleware.wrapGenerate({
1647
- doGenerate: () => model.doGenerate(params),
1648
- doStream: () => model.doStream(params),
1649
- params,
1769
+ doGenerate: () => model.doGenerate(enrichedParams),
1770
+ doStream: () => model.doStream(enrichedParams),
1771
+ params: enrichedParams,
1650
1772
  model
1651
1773
  });
1652
1774
  } catch (err) {
1653
1775
  if (err instanceof BudgetExceededError && fallbackModel) {
1654
1776
  const fallbackParams = {
1655
- ...params,
1777
+ ...enrichedParams,
1656
1778
  providerOptions: {
1657
- ...params.providerOptions,
1779
+ ...enrichedParams.providerOptions,
1658
1780
  rateLimiter: {
1659
- ...params.providerOptions?.["rateLimiter"] ?? {},
1781
+ ...enrichedParams.providerOptions?.["rateLimiter"] ?? {},
1660
1782
  _skipBudgetCheck: true
1661
1783
  }
1662
1784
  }
@@ -1672,21 +1794,22 @@ function wrapModel(model, middleware, overrides) {
1672
1794
  }
1673
1795
  },
1674
1796
  async doStream(params) {
1797
+ const enrichedParams = injectScope(params);
1675
1798
  try {
1676
1799
  return await middleware.wrapStream({
1677
- doGenerate: () => model.doGenerate(params),
1678
- doStream: () => model.doStream(params),
1679
- params,
1800
+ doGenerate: () => model.doGenerate(enrichedParams),
1801
+ doStream: () => model.doStream(enrichedParams),
1802
+ params: enrichedParams,
1680
1803
  model
1681
1804
  });
1682
1805
  } catch (err) {
1683
1806
  if (err instanceof BudgetExceededError && fallbackModel) {
1684
1807
  const fallbackParams = {
1685
- ...params,
1808
+ ...enrichedParams,
1686
1809
  providerOptions: {
1687
- ...params.providerOptions,
1810
+ ...enrichedParams.providerOptions,
1688
1811
  rateLimiter: {
1689
- ...params.providerOptions?.["rateLimiter"] ?? {},
1812
+ ...enrichedParams.providerOptions?.["rateLimiter"] ?? {},
1690
1813
  _skipBudgetCheck: true
1691
1814
  }
1692
1815
  }
@@ -1749,7 +1872,7 @@ function wrapAsyncIterableStream(stream, pipeline, modelId, provider, startMs) {
1749
1872
  }
1750
1873
  }
1751
1874
  if (result.done) {
1752
- pipeline.recordUsage(modelId, provider, { inputTokens, outputTokens }, Date.now() - startMs, true);
1875
+ pipeline.recordUsage(modelId, provider, void 0, { inputTokens, outputTokens }, Date.now() - startMs, true);
1753
1876
  }
1754
1877
  return result;
1755
1878
  },
@@ -1810,16 +1933,14 @@ async function executeViaProxy(fn, args, modelId, provider, params, pipeline, qu
1810
1933
  {
1811
1934
  streaming: isStreaming,
1812
1935
  priority,
1813
- timeoutMs: queueTimeout,
1814
- onUsage: () => {
1815
- }
1936
+ timeoutMs: queueTimeout
1816
1937
  }
1817
1938
  );
1818
1939
  if (isStreaming && result !== null && typeof result === "object" && Symbol.asyncIterator in result) {
1819
1940
  return wrapAsyncIterableStream(result, pipeline, modelId, provider, startMs);
1820
1941
  }
1821
1942
  const usage = extractUsage(result);
1822
- pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, false);
1943
+ pipeline.recordUsage(modelId, provider, void 0, usage, Date.now() - startMs, false);
1823
1944
  return result;
1824
1945
  }
1825
1946
  function rateLimited(client, options = {}) {