ai-sdk-rate-limiter 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -557,6 +557,27 @@ Costs are based on **actual token counts** from API responses — not estimates.
557
557
 
558
558
  `byScope` is populated automatically when requests carry a `scope` (either set on `limiter.wrap()` or via `providerOptions.rateLimiter.scope`). Unscoped requests don't appear in `byScope`.
559
559
 
560
+ ### Cost forecasting
561
+
562
+ `getCostForecast()` projects your end-of-period spend based on the current hourly rate. Useful for alerting before a budget cap is hit:
563
+
564
+ ```typescript
565
+ const forecast = limiter.getCostForecast()
566
+
567
+ console.log(forecast)
568
+ // {
569
+ // hour: { spentUsd: 1.20, projectedUsd: 1.20, ratePerHourUsd: 1.20 },
570
+ // day: { spentUsd: 3.50, projectedUsd: 28.80, ratePerHourUsd: 1.20 },
571
+ // month: { spentUsd: 8.10, projectedUsd: 864, ratePerHourUsd: 1.20 },
572
+ // }
573
+
574
+ if (forecast.day.projectedUsd > 40) {
575
+ console.warn(`Heads up — on track to spend $${forecast.day.projectedUsd.toFixed(2)} today`)
576
+ }
577
+ ```
578
+
579
+ `projectedUsd` = current hourly rate × hours in the period. It is based on the **last 60 minutes** of spend, so it responds quickly to usage spikes.
580
+
560
581
  ---
561
582
 
562
583
  ## Budget fallback routing
@@ -1456,6 +1477,57 @@ class MyStore implements RateLimitStore {
1456
1477
  const limiter = createRateLimiter({ store: new MyStore() })
1457
1478
  ```
1458
1479
 
1480
+ ### Load balancing across API keys
1481
+
1482
+ `createModelPool()` distributes requests round-robin across multiple model instances — useful when you have more than one API key:
1483
+
1484
+ ```typescript
1485
+ import { createRateLimiter, createModelPool } from 'ai-sdk-rate-limiter'
1486
+ import { createOpenAI } from '@ai-sdk/openai'
1487
+
1488
+ // Two API keys, each with their own limiter tracking separate RPM limits
1489
+ const limiter1 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
1490
+ const limiter2 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
1491
+
1492
+ const openai1 = createOpenAI({ apiKey: process.env.OPENAI_KEY_1 })
1493
+ const openai2 = createOpenAI({ apiKey: process.env.OPENAI_KEY_2 })
1494
+
1495
+ const pool = createModelPool([
1496
+ limiter1.wrap(openai1('gpt-4o')),
1497
+ limiter2.wrap(openai2('gpt-4o')),
1498
+ ])
1499
+
1500
+ // Use exactly like a regular model — calls alternate between the two keys
1501
+ const { text } = await generateText({ model: pool, prompt: 'Hello!' })
1502
+ ```
1503
+
1504
+ Pass `{ strategy: 'random' }` for random selection instead of round-robin.
1505
+
1506
+ ### Request deduplication
1507
+
1508
+ When multiple concurrent requests carry the same `dedupKey`, only one API call is made and all callers receive the same result. Useful for FAQ-style workloads where many users ask the same question simultaneously:
1509
+
1510
+ ```typescript
1511
+ const model = limiter.wrap(openai('gpt-4o'))
1512
+
1513
+ // Server handler — two simultaneous identical requests share one API call
1514
+ async function handleRequest(questionId: string) {
1515
+ const { text } = await generateText({
1516
+ model,
1517
+ prompt: questions[questionId],
1518
+ providerOptions: {
1519
+ rateLimiter: { dedupKey: `faq:${questionId}` },
1520
+ },
1521
+ })
1522
+ return text
1523
+ }
1524
+
1525
+ // If 50 users hit the same FAQ item at the same time → 1 API call, not 50
1526
+ const results = await Promise.all(users.map(() => handleRequest('faq-42')))
1527
+ ```
1528
+
1529
+ The dedup entry is removed once the request completes (success or error), so subsequent requests always make a fresh call.
1530
+
1459
1531
  ---
1460
1532
 
1461
1533
  ## How it works
package/dist/index.cjs CHANGED
@@ -668,6 +668,30 @@ var CostTracker = class {
668
668
  estimateCost(inputTokens, outputTokens, inputPricePerMillion, outputPricePerMillion) {
669
669
  return inputTokens / 1e6 * inputPricePerMillion + outputTokens / 1e6 * outputPricePerMillion;
670
670
  }
671
+ getForecast() {
672
+ const now = Date.now();
673
+ this.evict(now);
674
+ const hourlyRate = this.entries.filter((e) => e.timestamp > now - HOUR_MS).reduce((s, e) => s + e.costUsd, 0);
675
+ const daySpent = this.entries.filter((e) => e.timestamp > now - DAY_MS2).reduce((s, e) => s + e.costUsd, 0);
676
+ const monthSpent = this.entries.filter((e) => e.timestamp > now - MONTH_MS).reduce((s, e) => s + e.costUsd, 0);
677
+ return {
678
+ hour: {
679
+ spentUsd: hourlyRate,
680
+ projectedUsd: hourlyRate,
681
+ ratePerHourUsd: hourlyRate
682
+ },
683
+ day: {
684
+ spentUsd: daySpent,
685
+ projectedUsd: hourlyRate * 24,
686
+ ratePerHourUsd: hourlyRate
687
+ },
688
+ month: {
689
+ spentUsd: monthSpent,
690
+ projectedUsd: hourlyRate * 24 * 30,
691
+ ratePerHourUsd: hourlyRate
692
+ }
693
+ };
694
+ }
671
695
  getReport() {
672
696
  const now = Date.now();
673
697
  this.evict(now);
@@ -1734,6 +1758,8 @@ var Pipeline = class {
1734
1758
  this.circuits = /* @__PURE__ */ new Map();
1735
1759
  /** Limits detected from provider response headers (lower priority than user config) */
1736
1760
  this.detectedLimits = /* @__PURE__ */ new Map();
1761
+ /** In-flight promises indexed by dedup key, shared across identical concurrent requests */
1762
+ this.dedupMap = /* @__PURE__ */ new Map();
1737
1763
  /** Set to true after shutdown() is called */
1738
1764
  this.shutdownRequested = false;
1739
1765
  this.config = config;
@@ -1780,6 +1806,19 @@ var Pipeline = class {
1780
1806
  */
1781
1807
  async execute(modelId, provider, prompt, fn, opts) {
1782
1808
  this.log.log(modelId, "execute", { provider, priority: opts.priority, ...opts.scope !== void 0 && { scope: opts.scope } });
1809
+ if (opts.dedupKey !== void 0) {
1810
+ const existing = this.dedupMap.get(opts.dedupKey);
1811
+ if (existing !== void 0) {
1812
+ this.log.log(modelId, "dedup hit", { dedupKey: opts.dedupKey });
1813
+ return existing;
1814
+ }
1815
+ const { dedupKey, ...optsWithoutDedup } = opts;
1816
+ const promise = this.execute(modelId, provider, prompt, fn, optsWithoutDedup).finally(() => {
1817
+ this.dedupMap.delete(dedupKey);
1818
+ });
1819
+ this.dedupMap.set(dedupKey, promise);
1820
+ return promise;
1821
+ }
1783
1822
  if (this.shutdownRequested) {
1784
1823
  this.emitter.emit("dropped", {
1785
1824
  model: modelId,
@@ -1994,6 +2033,9 @@ var Pipeline = class {
1994
2033
  getCostReport() {
1995
2034
  return this.costTracker.getReport();
1996
2035
  }
2036
+ getCostForecast() {
2037
+ return this.costTracker.getForecast();
2038
+ }
1997
2039
  getStatus() {
1998
2040
  const models = [];
1999
2041
  let totalQueueDepth = 0;
@@ -2122,7 +2164,8 @@ function getPerRequestOptions(params, queueTimeout) {
2122
2164
  metadata: raw?.metadata ?? {},
2123
2165
  skipBudgetCheck: raw?._skipBudgetCheck ?? false,
2124
2166
  scope: raw?.scope,
2125
- callTimeout: raw?.callTimeout
2167
+ callTimeout: raw?.callTimeout,
2168
+ dedupKey: raw?.dedupKey
2126
2169
  };
2127
2170
  }
2128
2171
  function extractTokenUsage(usage) {
@@ -2138,7 +2181,7 @@ function createMiddleware(pipeline, queueTimeout) {
2138
2181
  // wrapGenerate — non-streaming
2139
2182
  // -----------------------------------------------------------------------
2140
2183
  async wrapGenerate({ doGenerate, params, model }) {
2141
- const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
2184
+ const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
2142
2185
  const modelId = model.modelId;
2143
2186
  const provider = model.provider;
2144
2187
  const startMs = Date.now();
@@ -2154,6 +2197,7 @@ function createMiddleware(pipeline, queueTimeout) {
2154
2197
  skipBudgetCheck,
2155
2198
  ...scope !== void 0 && { scope },
2156
2199
  ...callTimeout !== void 0 && { callTimeout },
2200
+ ...dedupKey !== void 0 && { dedupKey },
2157
2201
  ...params.abortSignal !== void 0 && { signal: params.abortSignal }
2158
2202
  }
2159
2203
  );
@@ -2167,7 +2211,7 @@ function createMiddleware(pipeline, queueTimeout) {
2167
2211
  // wrapStream — streaming
2168
2212
  // -----------------------------------------------------------------------
2169
2213
  async wrapStream({ doStream, params, model }) {
2170
- const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
2214
+ const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
2171
2215
  const modelId = model.modelId;
2172
2216
  const provider = model.provider;
2173
2217
  const startMs = Date.now();
@@ -2183,6 +2227,7 @@ function createMiddleware(pipeline, queueTimeout) {
2183
2227
  skipBudgetCheck,
2184
2228
  ...scope !== void 0 && { scope },
2185
2229
  ...callTimeout !== void 0 && { callTimeout },
2230
+ ...dedupKey !== void 0 && { dedupKey },
2186
2231
  ...params.abortSignal !== void 0 && { signal: params.abortSignal }
2187
2232
  }
2188
2233
  );
@@ -2508,6 +2553,9 @@ function createRateLimiter(config = {}) {
2508
2553
  getCostReport() {
2509
2554
  return pipeline.getCostReport();
2510
2555
  },
2556
+ getCostForecast() {
2557
+ return pipeline.getCostForecast();
2558
+ },
2511
2559
  getStatus() {
2512
2560
  return pipeline.getStatus();
2513
2561
  },
@@ -2532,6 +2580,38 @@ function createRateLimiter(config = {}) {
2532
2580
  };
2533
2581
  }
2534
2582
 
2583
+ // src/adapters/model-pool.ts
2584
+ function createModelPool(models, options) {
2585
+ if (models.length === 0) {
2586
+ throw new Error("createModelPool: at least one model is required");
2587
+ }
2588
+ const strategy = options?.strategy ?? "round-robin";
2589
+ let index = 0;
2590
+ function pick() {
2591
+ if (strategy === "random") {
2592
+ return models[Math.floor(Math.random() * models.length)];
2593
+ }
2594
+ const model = models[index % models.length];
2595
+ index = (index + 1) % models.length;
2596
+ return model;
2597
+ }
2598
+ const primary = models[0];
2599
+ return {
2600
+ get modelId() {
2601
+ return primary.modelId;
2602
+ },
2603
+ get provider() {
2604
+ return primary.provider;
2605
+ },
2606
+ doGenerate(params) {
2607
+ return pick().doGenerate(params);
2608
+ },
2609
+ doStream(params) {
2610
+ return pick().doStream(params);
2611
+ }
2612
+ };
2613
+ }
2614
+
2535
2615
  exports.ANTHROPIC_MODELS = ANTHROPIC_MODELS;
2536
2616
  exports.BudgetExceededError = BudgetExceededError;
2537
2617
  exports.COHERE_MODELS = COHERE_MODELS;
@@ -2546,6 +2626,7 @@ exports.RateLimitExceededError = RateLimitExceededError;
2546
2626
  exports.RateLimiterError = RateLimiterError;
2547
2627
  exports.RetryExhaustedError = RetryExhaustedError;
2548
2628
  exports.ShutdownError = ShutdownError;
2629
+ exports.createModelPool = createModelPool;
2549
2630
  exports.createRateLimiter = createRateLimiter;
2550
2631
  exports.isKnownModel = isKnownModel;
2551
2632
  exports.rateLimited = rateLimited;