ai-sdk-rate-limiter 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -2
- package/dist/index.cjs +85 -23
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +24 -3
- package/dist/index.d.ts +24 -3
- package/dist/index.js +85 -23
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -77,7 +77,7 @@ const limiter = createRateLimiter({
|
|
|
77
77
|
daily: 50,
|
|
78
78
|
monthly: 500,
|
|
79
79
|
},
|
|
80
|
-
onExceeded: 'throw', //
|
|
80
|
+
onExceeded: 'throw', // 'throw' | 'queue' | 'fallback'
|
|
81
81
|
},
|
|
82
82
|
|
|
83
83
|
// Queue behavior
|
|
@@ -169,6 +169,49 @@ Costs are based on **actual token counts** from API responses — not estimates.
|
|
|
169
169
|
|
|
170
170
|
---
|
|
171
171
|
|
|
172
|
+
## Budget fallback routing
|
|
173
|
+
|
|
174
|
+
When a budget limit is hit, you can transparently reroute to a cheaper model instead of throwing an error. Pass a `fallback` option to `wrap()`:
|
|
175
|
+
|
|
176
|
+
```typescript
|
|
177
|
+
const limiter = createRateLimiter({
|
|
178
|
+
cost: {
|
|
179
|
+
budget: { daily: 10 },
|
|
180
|
+
onExceeded: 'fallback', // reroute to fallback instead of throwing
|
|
181
|
+
},
|
|
182
|
+
on: {
|
|
183
|
+
budgetHit: ({ model, currentCostUsd, limitUsd, period }) =>
|
|
184
|
+
console.warn(`${model} ${period} budget hit ($${currentCostUsd} of $${limitUsd})`),
|
|
185
|
+
},
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
const model = limiter.wrap(
|
|
189
|
+
openai('gpt-4o'), // primary model
|
|
190
|
+
{ fallback: openai('gpt-4o-mini') }, // used when budget is exceeded
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
// Under budget → uses gpt-4o normally
|
|
194
|
+
// Over $10/day → silently switches to gpt-4o-mini, no code changes needed
|
|
195
|
+
const result = await generateText({ model, prompt })
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**How it works:**
|
|
199
|
+
1. The budget is checked before every request against total rolling spend
|
|
200
|
+
2. When exceeded, `BudgetExceededError` is caught inside `wrap()` before it reaches your code
|
|
201
|
+
3. The request is re-executed against the fallback model, bypassing the budget pre-check
|
|
202
|
+
4. Fallback usage is tracked under the fallback model's ID in `getCostReport()`
|
|
203
|
+
|
|
204
|
+
**Behavior matrix:**
|
|
205
|
+
|
|
206
|
+
| `onExceeded` | `fallback` configured | Outcome |
|
|
207
|
+
|---|---|---|
|
|
208
|
+
| `'throw'` | any | Throws `BudgetExceededError` |
|
|
209
|
+
| `'fallback'` | yes | Transparently uses fallback model |
|
|
210
|
+
| `'fallback'` | no | Throws `BudgetExceededError` |
|
|
211
|
+
| `'queue'` | any | Queues until period resets |
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
172
215
|
## Backpressure — know before you send
|
|
173
216
|
|
|
174
217
|
Check estimated wait time before committing to a request. Useful for showing loading states or shedding load gracefully.
|
|
@@ -209,7 +252,7 @@ limiter.off('queued', handler)
|
|
|
209
252
|
| `dequeued` | Request leaves the queue | `model`, `waitedMs`, `priority` |
|
|
210
253
|
| `retrying` | A failed request is about to retry | `model`, `attempt`, `maxAttempts`, `delayMs`, `error` |
|
|
211
254
|
| `rateLimited` | Limit hit (local or remote 429) | `model`, `source`, `limitType`, `resetAt` |
|
|
212
|
-
| `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period` |
|
|
255
|
+
| `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period`, `usingFallback` |
|
|
213
256
|
| `dropped` | Request rejected (queue full or timeout) | `model`, `reason` |
|
|
214
257
|
| `completed` | Request finished successfully | `model`, `inputTokens`, `outputTokens`, `costUsd`, `latencyMs` |
|
|
215
258
|
|
package/dist/index.cjs
CHANGED
|
@@ -317,7 +317,7 @@ var CostTracker = class {
|
|
|
317
317
|
];
|
|
318
318
|
for (const { limit, current, period } of checks) {
|
|
319
319
|
if (limit !== void 0 && current + estimatedCostUsd > limit) {
|
|
320
|
-
if (onExceeded === "throw") {
|
|
320
|
+
if (onExceeded === "throw" || onExceeded === "fallback") {
|
|
321
321
|
throw new BudgetExceededError(model, current, limit, period);
|
|
322
322
|
}
|
|
323
323
|
return false;
|
|
@@ -1320,7 +1320,7 @@ var Pipeline = class {
|
|
|
1320
1320
|
const estimatedInput = estimateInputTokens(prompt);
|
|
1321
1321
|
const startMs = Date.now();
|
|
1322
1322
|
const key = `${provider}:${modelId}`;
|
|
1323
|
-
if (this.config.cost?.budget) {
|
|
1323
|
+
if (this.config.cost?.budget && !opts.skipBudgetCheck) {
|
|
1324
1324
|
const estimatedCost = this.costTracker.estimateCost(
|
|
1325
1325
|
estimatedInput,
|
|
1326
1326
|
500,
|
|
@@ -1328,12 +1328,26 @@ var Pipeline = class {
|
|
|
1328
1328
|
limits.inputPricePerMillion,
|
|
1329
1329
|
limits.outputPricePerMillion
|
|
1330
1330
|
);
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1331
|
+
try {
|
|
1332
|
+
this.costTracker.checkBudget(
|
|
1333
|
+
modelId,
|
|
1334
|
+
estimatedCost,
|
|
1335
|
+
this.config.cost.budget,
|
|
1336
|
+
this.config.cost.onExceeded ?? "throw"
|
|
1337
|
+
);
|
|
1338
|
+
} catch (err) {
|
|
1339
|
+
if (err instanceof BudgetExceededError) {
|
|
1340
|
+
this.emitter.emit("budgetHit", {
|
|
1341
|
+
model: err.model,
|
|
1342
|
+
provider,
|
|
1343
|
+
currentCostUsd: err.currentCostUsd,
|
|
1344
|
+
limitUsd: err.limitUsd,
|
|
1345
|
+
period: err.period,
|
|
1346
|
+
usingFallback: false
|
|
1347
|
+
});
|
|
1348
|
+
}
|
|
1349
|
+
throw err;
|
|
1350
|
+
}
|
|
1337
1351
|
}
|
|
1338
1352
|
await this.engine.acquire(key, {
|
|
1339
1353
|
limits,
|
|
@@ -1472,7 +1486,8 @@ function getPerRequestOptions(params, queueTimeout) {
|
|
|
1472
1486
|
return {
|
|
1473
1487
|
priority: raw?.priority ?? "normal",
|
|
1474
1488
|
timeoutMs: raw?.timeout ?? queueTimeout,
|
|
1475
|
-
metadata: raw?.metadata ?? {}
|
|
1489
|
+
metadata: raw?.metadata ?? {},
|
|
1490
|
+
skipBudgetCheck: raw?._skipBudgetCheck ?? false
|
|
1476
1491
|
};
|
|
1477
1492
|
}
|
|
1478
1493
|
function extractTokenUsage(usage) {
|
|
@@ -1488,7 +1503,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1488
1503
|
// wrapGenerate — non-streaming
|
|
1489
1504
|
// -----------------------------------------------------------------------
|
|
1490
1505
|
async wrapGenerate({ doGenerate, params, model }) {
|
|
1491
|
-
const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
|
|
1506
|
+
const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
|
|
1492
1507
|
const modelId = model.modelId;
|
|
1493
1508
|
const provider = model.provider;
|
|
1494
1509
|
const startMs = Date.now();
|
|
@@ -1501,6 +1516,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1501
1516
|
streaming: false,
|
|
1502
1517
|
priority,
|
|
1503
1518
|
timeoutMs,
|
|
1519
|
+
skipBudgetCheck,
|
|
1504
1520
|
onUsage: () => {
|
|
1505
1521
|
}
|
|
1506
1522
|
}
|
|
@@ -1515,7 +1531,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1515
1531
|
// wrapStream — streaming
|
|
1516
1532
|
// -----------------------------------------------------------------------
|
|
1517
1533
|
async wrapStream({ doStream, params, model }) {
|
|
1518
|
-
const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
|
|
1534
|
+
const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
|
|
1519
1535
|
const modelId = model.modelId;
|
|
1520
1536
|
const provider = model.provider;
|
|
1521
1537
|
const startMs = Date.now();
|
|
@@ -1528,6 +1544,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1528
1544
|
streaming: true,
|
|
1529
1545
|
priority,
|
|
1530
1546
|
timeoutMs,
|
|
1547
|
+
skipBudgetCheck,
|
|
1531
1548
|
onUsage: () => {
|
|
1532
1549
|
}
|
|
1533
1550
|
}
|
|
@@ -1554,26 +1571,71 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1554
1571
|
function wrapModel(model, middleware, overrides) {
|
|
1555
1572
|
const providerId = overrides?.providerId ?? model.provider;
|
|
1556
1573
|
const modelId = overrides?.modelId ?? model.modelId;
|
|
1574
|
+
const fallbackModel = overrides?.fallback;
|
|
1557
1575
|
return {
|
|
1558
1576
|
specificationVersion: "v4",
|
|
1559
1577
|
provider: providerId,
|
|
1560
1578
|
modelId,
|
|
1561
1579
|
supportedUrls: model["supportedUrls"],
|
|
1562
1580
|
async doGenerate(params) {
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1581
|
+
try {
|
|
1582
|
+
return await middleware.wrapGenerate({
|
|
1583
|
+
doGenerate: () => model.doGenerate(params),
|
|
1584
|
+
doStream: () => model.doStream(params),
|
|
1585
|
+
params,
|
|
1586
|
+
model
|
|
1587
|
+
});
|
|
1588
|
+
} catch (err) {
|
|
1589
|
+
if (err instanceof BudgetExceededError && fallbackModel) {
|
|
1590
|
+
const fallbackParams = {
|
|
1591
|
+
...params,
|
|
1592
|
+
providerOptions: {
|
|
1593
|
+
...params.providerOptions,
|
|
1594
|
+
rateLimiter: {
|
|
1595
|
+
...params.providerOptions?.["rateLimiter"] ?? {},
|
|
1596
|
+
_skipBudgetCheck: true
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
};
|
|
1600
|
+
return middleware.wrapGenerate({
|
|
1601
|
+
doGenerate: () => fallbackModel.doGenerate(fallbackParams),
|
|
1602
|
+
doStream: () => fallbackModel.doStream(fallbackParams),
|
|
1603
|
+
params: fallbackParams,
|
|
1604
|
+
model: fallbackModel
|
|
1605
|
+
});
|
|
1606
|
+
}
|
|
1607
|
+
throw err;
|
|
1608
|
+
}
|
|
1569
1609
|
},
|
|
1570
1610
|
async doStream(params) {
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1611
|
+
try {
|
|
1612
|
+
return await middleware.wrapStream({
|
|
1613
|
+
doGenerate: () => model.doGenerate(params),
|
|
1614
|
+
doStream: () => model.doStream(params),
|
|
1615
|
+
params,
|
|
1616
|
+
model
|
|
1617
|
+
});
|
|
1618
|
+
} catch (err) {
|
|
1619
|
+
if (err instanceof BudgetExceededError && fallbackModel) {
|
|
1620
|
+
const fallbackParams = {
|
|
1621
|
+
...params,
|
|
1622
|
+
providerOptions: {
|
|
1623
|
+
...params.providerOptions,
|
|
1624
|
+
rateLimiter: {
|
|
1625
|
+
...params.providerOptions?.["rateLimiter"] ?? {},
|
|
1626
|
+
_skipBudgetCheck: true
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
};
|
|
1630
|
+
return middleware.wrapStream({
|
|
1631
|
+
doGenerate: () => fallbackModel.doGenerate(fallbackParams),
|
|
1632
|
+
doStream: () => fallbackModel.doStream(fallbackParams),
|
|
1633
|
+
params: fallbackParams,
|
|
1634
|
+
model: fallbackModel
|
|
1635
|
+
});
|
|
1636
|
+
}
|
|
1637
|
+
throw err;
|
|
1638
|
+
}
|
|
1577
1639
|
}
|
|
1578
1640
|
};
|
|
1579
1641
|
}
|