ai-sdk-rate-limiter 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -77,7 +77,7 @@ const limiter = createRateLimiter({
77
77
  daily: 50,
78
78
  monthly: 500,
79
79
  },
80
- onExceeded: 'throw', // or 'queue' wait until the period resets
80
+ onExceeded: 'throw', // 'throw' | 'queue' | 'fallback'
81
81
  },
82
82
 
83
83
  // Queue behavior
@@ -169,6 +169,49 @@ Costs are based on **actual token counts** from API responses — not estimates.
169
169
 
170
170
  ---
171
171
 
172
+ ## Budget fallback routing
173
+
174
+ When a budget limit is hit, you can transparently reroute to a cheaper model instead of throwing an error. Pass a `fallback` option to `wrap()`:
175
+
176
+ ```typescript
177
+ const limiter = createRateLimiter({
178
+ cost: {
179
+ budget: { daily: 10 },
180
+ onExceeded: 'fallback', // reroute to fallback instead of throwing
181
+ },
182
+ on: {
183
+ budgetHit: ({ model, currentCostUsd, limitUsd, period }) =>
184
+ console.warn(`${model} ${period} budget hit ($${currentCostUsd} of $${limitUsd})`),
185
+ },
186
+ })
187
+
188
+ const model = limiter.wrap(
189
+ openai('gpt-4o'), // primary model
190
+ { fallback: openai('gpt-4o-mini') }, // used when budget is exceeded
191
+ )
192
+
193
+ // Under budget → uses gpt-4o normally
194
+ // Over $10/day → silently switches to gpt-4o-mini, no code changes needed
195
+ const result = await generateText({ model, prompt })
196
+ ```
197
+
198
+ **How it works:**
199
+ 1. The budget is checked before every request against total rolling spend
200
+ 2. When exceeded, `BudgetExceededError` is caught inside `wrap()` before it reaches your code
201
+ 3. The request is re-executed against the fallback model, bypassing the budget pre-check
202
+ 4. Fallback usage is tracked under the fallback model's ID in `getCostReport()`
203
+
204
+ **Behavior matrix:**
205
+
206
+ | `onExceeded` | `fallback` configured | Outcome |
207
+ |---|---|---|
208
+ | `'throw'` | any | Throws `BudgetExceededError` |
209
+ | `'fallback'` | yes | Transparently uses fallback model |
210
+ | `'fallback'` | no | Throws `BudgetExceededError` |
211
+ | `'queue'` | any | Queues until period resets |
212
+
213
+ ---
214
+
172
215
  ## Backpressure — know before you send
173
216
 
174
217
  Check estimated wait time before committing to a request. Useful for showing loading states or shedding load gracefully.
@@ -209,7 +252,7 @@ limiter.off('queued', handler)
209
252
  | `dequeued` | Request leaves the queue | `model`, `waitedMs`, `priority` |
210
253
  | `retrying` | A failed request is about to retry | `model`, `attempt`, `maxAttempts`, `delayMs`, `error` |
211
254
  | `rateLimited` | Limit hit (local or remote 429) | `model`, `source`, `limitType`, `resetAt` |
212
- | `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period` |
255
+ | `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period`, `usingFallback` |
213
256
  | `dropped` | Request rejected (queue full or timeout) | `model`, `reason` |
214
257
  | `completed` | Request finished successfully | `model`, `inputTokens`, `outputTokens`, `costUsd`, `latencyMs` |
215
258
 
package/dist/index.cjs CHANGED
@@ -317,7 +317,7 @@ var CostTracker = class {
317
317
  ];
318
318
  for (const { limit, current, period } of checks) {
319
319
  if (limit !== void 0 && current + estimatedCostUsd > limit) {
320
- if (onExceeded === "throw") {
320
+ if (onExceeded === "throw" || onExceeded === "fallback") {
321
321
  throw new BudgetExceededError(model, current, limit, period);
322
322
  }
323
323
  return false;
@@ -1320,7 +1320,7 @@ var Pipeline = class {
1320
1320
  const estimatedInput = estimateInputTokens(prompt);
1321
1321
  const startMs = Date.now();
1322
1322
  const key = `${provider}:${modelId}`;
1323
- if (this.config.cost?.budget) {
1323
+ if (this.config.cost?.budget && !opts.skipBudgetCheck) {
1324
1324
  const estimatedCost = this.costTracker.estimateCost(
1325
1325
  estimatedInput,
1326
1326
  500,
@@ -1328,12 +1328,26 @@ var Pipeline = class {
1328
1328
  limits.inputPricePerMillion,
1329
1329
  limits.outputPricePerMillion
1330
1330
  );
1331
- this.costTracker.checkBudget(
1332
- modelId,
1333
- estimatedCost,
1334
- this.config.cost.budget,
1335
- this.config.cost.onExceeded ?? "throw"
1336
- );
1331
+ try {
1332
+ this.costTracker.checkBudget(
1333
+ modelId,
1334
+ estimatedCost,
1335
+ this.config.cost.budget,
1336
+ this.config.cost.onExceeded ?? "throw"
1337
+ );
1338
+ } catch (err) {
1339
+ if (err instanceof BudgetExceededError) {
1340
+ this.emitter.emit("budgetHit", {
1341
+ model: err.model,
1342
+ provider,
1343
+ currentCostUsd: err.currentCostUsd,
1344
+ limitUsd: err.limitUsd,
1345
+ period: err.period,
1346
+ usingFallback: false
1347
+ });
1348
+ }
1349
+ throw err;
1350
+ }
1337
1351
  }
1338
1352
  await this.engine.acquire(key, {
1339
1353
  limits,
@@ -1472,7 +1486,8 @@ function getPerRequestOptions(params, queueTimeout) {
1472
1486
  return {
1473
1487
  priority: raw?.priority ?? "normal",
1474
1488
  timeoutMs: raw?.timeout ?? queueTimeout,
1475
- metadata: raw?.metadata ?? {}
1489
+ metadata: raw?.metadata ?? {},
1490
+ skipBudgetCheck: raw?._skipBudgetCheck ?? false
1476
1491
  };
1477
1492
  }
1478
1493
  function extractTokenUsage(usage) {
@@ -1488,7 +1503,7 @@ function createMiddleware(pipeline, queueTimeout) {
1488
1503
  // wrapGenerate — non-streaming
1489
1504
  // -----------------------------------------------------------------------
1490
1505
  async wrapGenerate({ doGenerate, params, model }) {
1491
- const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
1506
+ const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
1492
1507
  const modelId = model.modelId;
1493
1508
  const provider = model.provider;
1494
1509
  const startMs = Date.now();
@@ -1501,6 +1516,7 @@ function createMiddleware(pipeline, queueTimeout) {
1501
1516
  streaming: false,
1502
1517
  priority,
1503
1518
  timeoutMs,
1519
+ skipBudgetCheck,
1504
1520
  onUsage: () => {
1505
1521
  }
1506
1522
  }
@@ -1515,7 +1531,7 @@ function createMiddleware(pipeline, queueTimeout) {
1515
1531
  // wrapStream — streaming
1516
1532
  // -----------------------------------------------------------------------
1517
1533
  async wrapStream({ doStream, params, model }) {
1518
- const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
1534
+ const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
1519
1535
  const modelId = model.modelId;
1520
1536
  const provider = model.provider;
1521
1537
  const startMs = Date.now();
@@ -1528,6 +1544,7 @@ function createMiddleware(pipeline, queueTimeout) {
1528
1544
  streaming: true,
1529
1545
  priority,
1530
1546
  timeoutMs,
1547
+ skipBudgetCheck,
1531
1548
  onUsage: () => {
1532
1549
  }
1533
1550
  }
@@ -1554,26 +1571,71 @@ function createMiddleware(pipeline, queueTimeout) {
1554
1571
  function wrapModel(model, middleware, overrides) {
1555
1572
  const providerId = overrides?.providerId ?? model.provider;
1556
1573
  const modelId = overrides?.modelId ?? model.modelId;
1574
+ const fallbackModel = overrides?.fallback;
1557
1575
  return {
1558
1576
  specificationVersion: "v4",
1559
1577
  provider: providerId,
1560
1578
  modelId,
1561
1579
  supportedUrls: model["supportedUrls"],
1562
1580
  async doGenerate(params) {
1563
- return middleware.wrapGenerate({
1564
- doGenerate: () => model.doGenerate(params),
1565
- doStream: () => model.doStream(params),
1566
- params,
1567
- model
1568
- });
1581
+ try {
1582
+ return await middleware.wrapGenerate({
1583
+ doGenerate: () => model.doGenerate(params),
1584
+ doStream: () => model.doStream(params),
1585
+ params,
1586
+ model
1587
+ });
1588
+ } catch (err) {
1589
+ if (err instanceof BudgetExceededError && fallbackModel) {
1590
+ const fallbackParams = {
1591
+ ...params,
1592
+ providerOptions: {
1593
+ ...params.providerOptions,
1594
+ rateLimiter: {
1595
+ ...params.providerOptions?.["rateLimiter"] ?? {},
1596
+ _skipBudgetCheck: true
1597
+ }
1598
+ }
1599
+ };
1600
+ return middleware.wrapGenerate({
1601
+ doGenerate: () => fallbackModel.doGenerate(fallbackParams),
1602
+ doStream: () => fallbackModel.doStream(fallbackParams),
1603
+ params: fallbackParams,
1604
+ model: fallbackModel
1605
+ });
1606
+ }
1607
+ throw err;
1608
+ }
1569
1609
  },
1570
1610
  async doStream(params) {
1571
- return middleware.wrapStream({
1572
- doGenerate: () => model.doGenerate(params),
1573
- doStream: () => model.doStream(params),
1574
- params,
1575
- model
1576
- });
1611
+ try {
1612
+ return await middleware.wrapStream({
1613
+ doGenerate: () => model.doGenerate(params),
1614
+ doStream: () => model.doStream(params),
1615
+ params,
1616
+ model
1617
+ });
1618
+ } catch (err) {
1619
+ if (err instanceof BudgetExceededError && fallbackModel) {
1620
+ const fallbackParams = {
1621
+ ...params,
1622
+ providerOptions: {
1623
+ ...params.providerOptions,
1624
+ rateLimiter: {
1625
+ ...params.providerOptions?.["rateLimiter"] ?? {},
1626
+ _skipBudgetCheck: true
1627
+ }
1628
+ }
1629
+ };
1630
+ return middleware.wrapStream({
1631
+ doGenerate: () => fallbackModel.doGenerate(fallbackParams),
1632
+ doStream: () => fallbackModel.doStream(fallbackParams),
1633
+ params: fallbackParams,
1634
+ model: fallbackModel
1635
+ });
1636
+ }
1637
+ throw err;
1638
+ }
1577
1639
  }
1578
1640
  };
1579
1641
  }