ai-sdk-rate-limiter 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -0
- package/dist/index.cjs +84 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +38 -3
- package/dist/index.d.ts +38 -3
- package/dist/index.js +84 -4
- package/dist/index.js.map +1 -1
- package/dist/middleware.d.cts +1 -1
- package/dist/middleware.d.ts +1 -1
- package/dist/otel.d.cts +1 -1
- package/dist/otel.d.ts +1 -1
- package/dist/prometheus.d.cts +1 -1
- package/dist/prometheus.d.ts +1 -1
- package/dist/redis.d.cts +1 -1
- package/dist/redis.d.ts +1 -1
- package/dist/statsd.d.cts +1 -1
- package/dist/statsd.d.ts +1 -1
- package/dist/testing.cjs +51 -3
- package/dist/testing.cjs.map +1 -1
- package/dist/testing.d.cts +1 -1
- package/dist/testing.d.ts +1 -1
- package/dist/testing.js +51 -3
- package/dist/testing.js.map +1 -1
- package/dist/{types-CMevWGWK.d.cts → types-DKIz0MLZ.d.cts} +38 -1
- package/dist/{types-CMevWGWK.d.ts → types-DKIz0MLZ.d.ts} +38 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -557,6 +557,27 @@ Costs are based on **actual token counts** from API responses — not estimates.
|
|
|
557
557
|
|
|
558
558
|
`byScope` is populated automatically when requests carry a `scope` (either set on `limiter.wrap()` or via `providerOptions.rateLimiter.scope`). Unscoped requests don't appear in `byScope`.
|
|
559
559
|
|
|
560
|
+
### Cost forecasting
|
|
561
|
+
|
|
562
|
+
`getCostForecast()` projects your end-of-period spend based on the current hourly rate. Useful for alerting before a budget cap is hit:
|
|
563
|
+
|
|
564
|
+
```typescript
|
|
565
|
+
const forecast = limiter.getCostForecast()
|
|
566
|
+
|
|
567
|
+
console.log(forecast)
|
|
568
|
+
// {
|
|
569
|
+
// hour: { spentUsd: 1.20, projectedUsd: 1.20, ratePerHourUsd: 1.20 },
|
|
570
|
+
// day: { spentUsd: 3.50, projectedUsd: 28.80, ratePerHourUsd: 1.20 },
|
|
571
|
+
// month: { spentUsd: 8.10, projectedUsd: 864, ratePerHourUsd: 1.20 },
|
|
572
|
+
// }
|
|
573
|
+
|
|
574
|
+
if (forecast.day.projectedUsd > 40) {
|
|
575
|
+
console.warn(`Heads up — on track to spend $${forecast.day.projectedUsd.toFixed(2)} today`)
|
|
576
|
+
}
|
|
577
|
+
```
|
|
578
|
+
|
|
579
|
+
`projectedUsd` = current hourly rate × hours in the period. It is based on the **last 60 minutes** of spend, so it responds quickly to usage spikes.
|
|
580
|
+
|
|
560
581
|
---
|
|
561
582
|
|
|
562
583
|
## Budget fallback routing
|
|
@@ -1456,6 +1477,57 @@ class MyStore implements RateLimitStore {
|
|
|
1456
1477
|
const limiter = createRateLimiter({ store: new MyStore() })
|
|
1457
1478
|
```
|
|
1458
1479
|
|
|
1480
|
+
### Load balancing across API keys
|
|
1481
|
+
|
|
1482
|
+
`createModelPool()` distributes requests round-robin across multiple model instances — useful when you have more than one API key:
|
|
1483
|
+
|
|
1484
|
+
```typescript
|
|
1485
|
+
import { createRateLimiter, createModelPool } from 'ai-sdk-rate-limiter'
|
|
1486
|
+
import { createOpenAI } from '@ai-sdk/openai'
|
|
1487
|
+
|
|
1488
|
+
// Two API keys, each with their own limiter tracking separate RPM limits
|
|
1489
|
+
const limiter1 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
|
|
1490
|
+
const limiter2 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
|
|
1491
|
+
|
|
1492
|
+
const openai1 = createOpenAI({ apiKey: process.env.OPENAI_KEY_1 })
|
|
1493
|
+
const openai2 = createOpenAI({ apiKey: process.env.OPENAI_KEY_2 })
|
|
1494
|
+
|
|
1495
|
+
const pool = createModelPool([
|
|
1496
|
+
limiter1.wrap(openai1('gpt-4o')),
|
|
1497
|
+
limiter2.wrap(openai2('gpt-4o')),
|
|
1498
|
+
])
|
|
1499
|
+
|
|
1500
|
+
// Use exactly like a regular model — calls alternate between the two keys
|
|
1501
|
+
const { text } = await generateText({ model: pool, prompt: 'Hello!' })
|
|
1502
|
+
```
|
|
1503
|
+
|
|
1504
|
+
Pass `{ strategy: 'random' }` for random selection instead of round-robin.
|
|
1505
|
+
|
|
1506
|
+
### Request deduplication
|
|
1507
|
+
|
|
1508
|
+
When multiple concurrent requests carry the same `dedupKey`, only one API call is made and all callers receive the same result. Useful for FAQ-style workloads where many users ask the same question simultaneously:
|
|
1509
|
+
|
|
1510
|
+
```typescript
|
|
1511
|
+
const model = limiter.wrap(openai('gpt-4o'))
|
|
1512
|
+
|
|
1513
|
+
// Server handler — two simultaneous identical requests share one API call
|
|
1514
|
+
async function handleRequest(questionId: string) {
|
|
1515
|
+
const { text } = await generateText({
|
|
1516
|
+
model,
|
|
1517
|
+
prompt: questions[questionId],
|
|
1518
|
+
providerOptions: {
|
|
1519
|
+
rateLimiter: { dedupKey: `faq:${questionId}` },
|
|
1520
|
+
},
|
|
1521
|
+
})
|
|
1522
|
+
return text
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
// If 50 users hit the same FAQ item at the same time → 1 API call, not 50
|
|
1526
|
+
const results = await Promise.all(users.map(() => handleRequest('faq-42')))
|
|
1527
|
+
```
|
|
1528
|
+
|
|
1529
|
+
The dedup entry is removed once the request completes (success or error), so subsequent requests always make a fresh call.
|
|
1530
|
+
|
|
1459
1531
|
---
|
|
1460
1532
|
|
|
1461
1533
|
## How it works
|
package/dist/index.cjs
CHANGED
|
@@ -668,6 +668,30 @@ var CostTracker = class {
|
|
|
668
668
|
estimateCost(inputTokens, outputTokens, inputPricePerMillion, outputPricePerMillion) {
|
|
669
669
|
return inputTokens / 1e6 * inputPricePerMillion + outputTokens / 1e6 * outputPricePerMillion;
|
|
670
670
|
}
|
|
671
|
+
getForecast() {
|
|
672
|
+
const now = Date.now();
|
|
673
|
+
this.evict(now);
|
|
674
|
+
const hourlyRate = this.entries.filter((e) => e.timestamp > now - HOUR_MS).reduce((s, e) => s + e.costUsd, 0);
|
|
675
|
+
const daySpent = this.entries.filter((e) => e.timestamp > now - DAY_MS2).reduce((s, e) => s + e.costUsd, 0);
|
|
676
|
+
const monthSpent = this.entries.filter((e) => e.timestamp > now - MONTH_MS).reduce((s, e) => s + e.costUsd, 0);
|
|
677
|
+
return {
|
|
678
|
+
hour: {
|
|
679
|
+
spentUsd: hourlyRate,
|
|
680
|
+
projectedUsd: hourlyRate,
|
|
681
|
+
ratePerHourUsd: hourlyRate
|
|
682
|
+
},
|
|
683
|
+
day: {
|
|
684
|
+
spentUsd: daySpent,
|
|
685
|
+
projectedUsd: hourlyRate * 24,
|
|
686
|
+
ratePerHourUsd: hourlyRate
|
|
687
|
+
},
|
|
688
|
+
month: {
|
|
689
|
+
spentUsd: monthSpent,
|
|
690
|
+
projectedUsd: hourlyRate * 24 * 30,
|
|
691
|
+
ratePerHourUsd: hourlyRate
|
|
692
|
+
}
|
|
693
|
+
};
|
|
694
|
+
}
|
|
671
695
|
getReport() {
|
|
672
696
|
const now = Date.now();
|
|
673
697
|
this.evict(now);
|
|
@@ -1734,6 +1758,8 @@ var Pipeline = class {
|
|
|
1734
1758
|
this.circuits = /* @__PURE__ */ new Map();
|
|
1735
1759
|
/** Limits detected from provider response headers (lower priority than user config) */
|
|
1736
1760
|
this.detectedLimits = /* @__PURE__ */ new Map();
|
|
1761
|
+
/** In-flight promises indexed by dedup key, shared across identical concurrent requests */
|
|
1762
|
+
this.dedupMap = /* @__PURE__ */ new Map();
|
|
1737
1763
|
/** Set to true after shutdown() is called */
|
|
1738
1764
|
this.shutdownRequested = false;
|
|
1739
1765
|
this.config = config;
|
|
@@ -1780,6 +1806,19 @@ var Pipeline = class {
|
|
|
1780
1806
|
*/
|
|
1781
1807
|
async execute(modelId, provider, prompt, fn, opts) {
|
|
1782
1808
|
this.log.log(modelId, "execute", { provider, priority: opts.priority, ...opts.scope !== void 0 && { scope: opts.scope } });
|
|
1809
|
+
if (opts.dedupKey !== void 0) {
|
|
1810
|
+
const existing = this.dedupMap.get(opts.dedupKey);
|
|
1811
|
+
if (existing !== void 0) {
|
|
1812
|
+
this.log.log(modelId, "dedup hit", { dedupKey: opts.dedupKey });
|
|
1813
|
+
return existing;
|
|
1814
|
+
}
|
|
1815
|
+
const { dedupKey, ...optsWithoutDedup } = opts;
|
|
1816
|
+
const promise = this.execute(modelId, provider, prompt, fn, optsWithoutDedup).finally(() => {
|
|
1817
|
+
this.dedupMap.delete(dedupKey);
|
|
1818
|
+
});
|
|
1819
|
+
this.dedupMap.set(dedupKey, promise);
|
|
1820
|
+
return promise;
|
|
1821
|
+
}
|
|
1783
1822
|
if (this.shutdownRequested) {
|
|
1784
1823
|
this.emitter.emit("dropped", {
|
|
1785
1824
|
model: modelId,
|
|
@@ -1994,6 +2033,9 @@ var Pipeline = class {
|
|
|
1994
2033
|
getCostReport() {
|
|
1995
2034
|
return this.costTracker.getReport();
|
|
1996
2035
|
}
|
|
2036
|
+
getCostForecast() {
|
|
2037
|
+
return this.costTracker.getForecast();
|
|
2038
|
+
}
|
|
1997
2039
|
getStatus() {
|
|
1998
2040
|
const models = [];
|
|
1999
2041
|
let totalQueueDepth = 0;
|
|
@@ -2122,7 +2164,8 @@ function getPerRequestOptions(params, queueTimeout) {
|
|
|
2122
2164
|
metadata: raw?.metadata ?? {},
|
|
2123
2165
|
skipBudgetCheck: raw?._skipBudgetCheck ?? false,
|
|
2124
2166
|
scope: raw?.scope,
|
|
2125
|
-
callTimeout: raw?.callTimeout
|
|
2167
|
+
callTimeout: raw?.callTimeout,
|
|
2168
|
+
dedupKey: raw?.dedupKey
|
|
2126
2169
|
};
|
|
2127
2170
|
}
|
|
2128
2171
|
function extractTokenUsage(usage) {
|
|
@@ -2138,7 +2181,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
2138
2181
|
// wrapGenerate — non-streaming
|
|
2139
2182
|
// -----------------------------------------------------------------------
|
|
2140
2183
|
async wrapGenerate({ doGenerate, params, model }) {
|
|
2141
|
-
const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
|
|
2184
|
+
const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
|
|
2142
2185
|
const modelId = model.modelId;
|
|
2143
2186
|
const provider = model.provider;
|
|
2144
2187
|
const startMs = Date.now();
|
|
@@ -2154,6 +2197,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
2154
2197
|
skipBudgetCheck,
|
|
2155
2198
|
...scope !== void 0 && { scope },
|
|
2156
2199
|
...callTimeout !== void 0 && { callTimeout },
|
|
2200
|
+
...dedupKey !== void 0 && { dedupKey },
|
|
2157
2201
|
...params.abortSignal !== void 0 && { signal: params.abortSignal }
|
|
2158
2202
|
}
|
|
2159
2203
|
);
|
|
@@ -2167,7 +2211,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
2167
2211
|
// wrapStream — streaming
|
|
2168
2212
|
// -----------------------------------------------------------------------
|
|
2169
2213
|
async wrapStream({ doStream, params, model }) {
|
|
2170
|
-
const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
|
|
2214
|
+
const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
|
|
2171
2215
|
const modelId = model.modelId;
|
|
2172
2216
|
const provider = model.provider;
|
|
2173
2217
|
const startMs = Date.now();
|
|
@@ -2183,6 +2227,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
2183
2227
|
skipBudgetCheck,
|
|
2184
2228
|
...scope !== void 0 && { scope },
|
|
2185
2229
|
...callTimeout !== void 0 && { callTimeout },
|
|
2230
|
+
...dedupKey !== void 0 && { dedupKey },
|
|
2186
2231
|
...params.abortSignal !== void 0 && { signal: params.abortSignal }
|
|
2187
2232
|
}
|
|
2188
2233
|
);
|
|
@@ -2508,6 +2553,9 @@ function createRateLimiter(config = {}) {
|
|
|
2508
2553
|
getCostReport() {
|
|
2509
2554
|
return pipeline.getCostReport();
|
|
2510
2555
|
},
|
|
2556
|
+
getCostForecast() {
|
|
2557
|
+
return pipeline.getCostForecast();
|
|
2558
|
+
},
|
|
2511
2559
|
getStatus() {
|
|
2512
2560
|
return pipeline.getStatus();
|
|
2513
2561
|
},
|
|
@@ -2532,6 +2580,38 @@ function createRateLimiter(config = {}) {
|
|
|
2532
2580
|
};
|
|
2533
2581
|
}
|
|
2534
2582
|
|
|
2583
|
+
// src/adapters/model-pool.ts
|
|
2584
|
+
function createModelPool(models, options) {
|
|
2585
|
+
if (models.length === 0) {
|
|
2586
|
+
throw new Error("createModelPool: at least one model is required");
|
|
2587
|
+
}
|
|
2588
|
+
const strategy = options?.strategy ?? "round-robin";
|
|
2589
|
+
let index = 0;
|
|
2590
|
+
function pick() {
|
|
2591
|
+
if (strategy === "random") {
|
|
2592
|
+
return models[Math.floor(Math.random() * models.length)];
|
|
2593
|
+
}
|
|
2594
|
+
const model = models[index % models.length];
|
|
2595
|
+
index = (index + 1) % models.length;
|
|
2596
|
+
return model;
|
|
2597
|
+
}
|
|
2598
|
+
const primary = models[0];
|
|
2599
|
+
return {
|
|
2600
|
+
get modelId() {
|
|
2601
|
+
return primary.modelId;
|
|
2602
|
+
},
|
|
2603
|
+
get provider() {
|
|
2604
|
+
return primary.provider;
|
|
2605
|
+
},
|
|
2606
|
+
doGenerate(params) {
|
|
2607
|
+
return pick().doGenerate(params);
|
|
2608
|
+
},
|
|
2609
|
+
doStream(params) {
|
|
2610
|
+
return pick().doStream(params);
|
|
2611
|
+
}
|
|
2612
|
+
};
|
|
2613
|
+
}
|
|
2614
|
+
|
|
2535
2615
|
exports.ANTHROPIC_MODELS = ANTHROPIC_MODELS;
|
|
2536
2616
|
exports.BudgetExceededError = BudgetExceededError;
|
|
2537
2617
|
exports.COHERE_MODELS = COHERE_MODELS;
|
|
@@ -2546,6 +2626,7 @@ exports.RateLimitExceededError = RateLimitExceededError;
|
|
|
2546
2626
|
exports.RateLimiterError = RateLimiterError;
|
|
2547
2627
|
exports.RetryExhaustedError = RetryExhaustedError;
|
|
2548
2628
|
exports.ShutdownError = ShutdownError;
|
|
2629
|
+
exports.createModelPool = createModelPool;
|
|
2549
2630
|
exports.createRateLimiter = createRateLimiter;
|
|
2550
2631
|
exports.isKnownModel = isKnownModel;
|
|
2551
2632
|
exports.rateLimited = rateLimited;
|