ai-sdk-rate-limiter 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +182 -0
- package/dist/index.cjs +78 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +78 -1
- package/dist/index.js.map +1 -1
- package/dist/middleware.d.cts +1 -1
- package/dist/middleware.d.ts +1 -1
- package/dist/otel.d.cts +1 -1
- package/dist/otel.d.ts +1 -1
- package/dist/prometheus.d.cts +1 -1
- package/dist/prometheus.d.ts +1 -1
- package/dist/redis.d.cts +1 -1
- package/dist/redis.d.ts +1 -1
- package/dist/statsd.d.cts +1 -1
- package/dist/statsd.d.ts +1 -1
- package/dist/testing.cjs +78 -1
- package/dist/testing.cjs.map +1 -1
- package/dist/testing.d.cts +1 -1
- package/dist/testing.d.ts +1 -1
- package/dist/testing.js +78 -1
- package/dist/testing.js.map +1 -1
- package/dist/{types-CUPpMRPE.d.cts → types-CMevWGWK.d.cts} +18 -0
- package/dist/{types-CUPpMRPE.d.ts → types-CMevWGWK.d.ts} +18 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
Smart rate limiting, queuing, and cost tracking for AI API calls. Works across providers. Zero required dependencies.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/ai-sdk-rate-limiter)
|
|
6
|
+
[](https://github.com/piyushgupta344/ai-sdk-rate-limiter/actions/workflows/ci.yml)
|
|
7
|
+
[](https://www.npmjs.com/package/ai-sdk-rate-limiter)
|
|
6
8
|
|
|
7
9
|
```
|
|
8
10
|
npm install ai-sdk-rate-limiter
|
|
@@ -83,6 +85,8 @@ The wrapped model is a drop-in replacement. Every Vercel AI SDK feature — stre
|
|
|
83
85
|
|
|
84
86
|
**Fallback chains** — `fallback` now accepts an array of models. On `BudgetExceededError`, the chain is walked in order until one succeeds.
|
|
85
87
|
|
|
88
|
+
**Express / Hono middleware** — `createRateLimiterMiddleware()` (from `ai-sdk-rate-limiter/middleware`) attaches `req.rateLimiter` to every request and converts rate-limiter errors to proper HTTP responses at the middleware layer — no per-route boilerplate.
|
|
89
|
+
|
|
86
90
|
**OpenTelemetry** — Drop-in OTel plugin that emits GenAI-spec spans for every request. Works with any OTel-compatible tracer.
|
|
87
91
|
|
|
88
92
|
**Testing utilities** — `createTestLimiter()` records every completed call so you can assert on model usage, token counts, and costs in unit tests.
|
|
@@ -109,6 +113,7 @@ The wrapped model is a drop-in replacement. Every Vercel AI SDK feature — stre
|
|
|
109
113
|
- [Graceful shutdown](#graceful-shutdown)
|
|
110
114
|
- [Prometheus metrics](#prometheus-metrics)
|
|
111
115
|
- [StatsD metrics](#statsD-metrics)
|
|
116
|
+
- [Express / Hono middleware](#express--hono-middleware)
|
|
112
117
|
- [Events](#events)
|
|
113
118
|
- [Backpressure](#backpressure)
|
|
114
119
|
- [Error handling](#error-handling)
|
|
@@ -858,6 +863,121 @@ const client: StatsDClient = {
|
|
|
858
863
|
|
|
859
864
|
---
|
|
860
865
|
|
|
866
|
+
## Express / Hono middleware
|
|
867
|
+
|
|
868
|
+
The `ai-sdk-rate-limiter/middleware` entry point eliminates per-route boilerplate. Scope extraction, priority assignment, and rate-limiter error handling all move to the middleware layer — route handlers just pass `req.rateLimiter` through.
|
|
869
|
+
|
|
870
|
+
### Express
|
|
871
|
+
|
|
872
|
+
```typescript
|
|
873
|
+
import { createRateLimiterMiddleware } from 'ai-sdk-rate-limiter/middleware'
|
|
874
|
+
|
|
875
|
+
const { middleware, errorHandler } = createRateLimiterMiddleware(limiter, {
|
|
876
|
+
// Extract scope from the request — stored in req.rateLimiter.scope
|
|
877
|
+
scope: (req) => {
|
|
878
|
+
const plan = req.headers['x-user-plan'] ?? 'free'
|
|
879
|
+
const id = req.headers['x-user-id']
|
|
880
|
+
return id ? `user:${plan}:${id}` : undefined
|
|
881
|
+
},
|
|
882
|
+
|
|
883
|
+
// Derive queue priority per-request
|
|
884
|
+
priority: (req) => req.headers['x-user-plan'] === 'pro' ? 'normal' : 'low',
|
|
885
|
+
|
|
886
|
+
// Add X-RateLimit-* informational headers to every response
|
|
887
|
+
injectHeaders: 'gpt-4o-mini',
|
|
888
|
+
})
|
|
889
|
+
|
|
890
|
+
app.use(middleware) // BEFORE routes
|
|
891
|
+
|
|
892
|
+
app.post('/chat', async (req, res) => {
|
|
893
|
+
const { text } = await generateText({
|
|
894
|
+
model,
|
|
895
|
+
prompt: req.body.message,
|
|
896
|
+
// req.rateLimiter already has scope + priority — just pass it through
|
|
897
|
+
providerOptions: { rateLimiter: req.rateLimiter },
|
|
898
|
+
})
|
|
899
|
+
res.json({ text })
|
|
900
|
+
})
|
|
901
|
+
|
|
902
|
+
app.use(errorHandler) // AFTER routes
|
|
903
|
+
```
|
|
904
|
+
|
|
905
|
+
The `errorHandler` converts every `RateLimiterError` to a typed HTTP response automatically — no try/catch needed in route handlers:
|
|
906
|
+
|
|
907
|
+
| Error | HTTP status | `code` |
|
|
908
|
+
|---|---|---|
|
|
909
|
+
| `QueueTimeoutError` | 503 | `QUEUE_TIMEOUT` |
|
|
910
|
+
| `QueueFullError` | 503 | `QUEUE_FULL` |
|
|
911
|
+
| `CircuitOpenError` | 503 | `CIRCUIT_OPEN` |
|
|
912
|
+
| `ShutdownError` | 503 | `SHUTDOWN` |
|
|
913
|
+
| `BudgetExceededError` | 402 | `BUDGET_EXCEEDED` |
|
|
914
|
+
| `RateLimiterError` (generic) | 429 | `RATE_LIMITED` |
|
|
915
|
+
|
|
916
|
+
Non-rate-limiter errors are passed to the next error handler unchanged.
|
|
917
|
+
|
|
918
|
+
### Hono
|
|
919
|
+
|
|
920
|
+
```typescript
|
|
921
|
+
import { createHonoMiddleware } from 'ai-sdk-rate-limiter/middleware'
|
|
922
|
+
|
|
923
|
+
app.use(createHonoMiddleware(limiter, {
|
|
924
|
+
scope: (c) => c.req.header('x-user-id'),
|
|
925
|
+
priority: (c) => c.req.header('x-plan') === 'pro' ? 'normal' : 'low',
|
|
926
|
+
}))
|
|
927
|
+
|
|
928
|
+
app.post('/chat', async (c) => {
|
|
929
|
+
const { text } = await generateText({
|
|
930
|
+
model,
|
|
931
|
+
prompt: await c.req.text(),
|
|
932
|
+
providerOptions: { rateLimiter: c.var.rateLimiter },
|
|
933
|
+
})
|
|
934
|
+
return c.json({ text })
|
|
935
|
+
})
|
|
936
|
+
```
|
|
937
|
+
|
|
938
|
+
`createHonoMiddleware` wraps the `next()` call in a try/catch, so `RateLimiterErrors` thrown inside route handlers are caught and returned as JSON responses automatically.
|
|
939
|
+
|
|
940
|
+
### Standalone error handler
|
|
941
|
+
|
|
942
|
+
If you only need error handling without scope injection:
|
|
943
|
+
|
|
944
|
+
```typescript
|
|
945
|
+
import { createRateLimiterErrorHandler } from 'ai-sdk-rate-limiter/middleware'
|
|
946
|
+
|
|
947
|
+
app.use(createRateLimiterErrorHandler({
|
|
948
|
+
includeDetails: false, // omit retryAfter, period, limitUsd from response body
|
|
949
|
+
}))
|
|
950
|
+
```
|
|
951
|
+
|
|
952
|
+
### Custom framework (Fastify, etc.)
|
|
953
|
+
|
|
954
|
+
`mapErrorToResponse` is exported for frameworks that don't use the `(req, res, next)` convention:
|
|
955
|
+
|
|
956
|
+
```typescript
|
|
957
|
+
import { mapErrorToResponse } from 'ai-sdk-rate-limiter/middleware'
|
|
958
|
+
import { RateLimiterError } from 'ai-sdk-rate-limiter'
|
|
959
|
+
|
|
960
|
+
// Fastify onError hook
|
|
961
|
+
fastify.setErrorHandler((err, request, reply) => {
|
|
962
|
+
if (err instanceof RateLimiterError) {
|
|
963
|
+
const { status, body } = mapErrorToResponse(err)
|
|
964
|
+
return reply.status(status).send(body)
|
|
965
|
+
}
|
|
966
|
+
reply.send(err)
|
|
967
|
+
})
|
|
968
|
+
```
|
|
969
|
+
|
|
970
|
+
### `req.rateLimiter` TypeScript type
|
|
971
|
+
|
|
972
|
+
The middleware augments `http.IncomingMessage` so `req.rateLimiter` is typed in Express and Fastify without any additional setup:
|
|
973
|
+
|
|
974
|
+
```typescript
|
|
975
|
+
import type { RateLimiterRequestContext } from 'ai-sdk-rate-limiter/middleware'
|
|
976
|
+
// req.rateLimiter is automatically typed as RateLimiterRequestContext | undefined
|
|
977
|
+
```
|
|
978
|
+
|
|
979
|
+
---
|
|
980
|
+
|
|
861
981
|
## Events
|
|
862
982
|
|
|
863
983
|
All events are typed. Register handlers at creation time or dynamically:
|
|
@@ -1358,6 +1478,42 @@ const limiter = createRateLimiter({ store: new MyStore() })
|
|
|
1358
1478
|
|
|
1359
1479
|
---
|
|
1360
1480
|
|
|
1481
|
+
## Debug mode
|
|
1482
|
+
|
|
1483
|
+
Set `debug: true` to enable structured console logging for every rate-limit decision, queue entry/exit, slot acquisition, circuit breaker transition, and completed call cost:
|
|
1484
|
+
|
|
1485
|
+
```typescript
|
|
1486
|
+
const limiter = createRateLimiter({ debug: true })
|
|
1487
|
+
```
|
|
1488
|
+
|
|
1489
|
+
Sample output:
|
|
1490
|
+
|
|
1491
|
+
```
|
|
1492
|
+
[ai-sdk-rate-limiter] gpt-4o: execute (provider="openai" priority="normal")
|
|
1493
|
+
[ai-sdk-rate-limiter] gpt-4o: queuing (queueDepth=3 estimatedWaitMs=1200 priority="normal")
|
|
1494
|
+
[ai-sdk-rate-limiter] gpt-4o: dequeued (waitedMs=1187 priority="normal")
|
|
1495
|
+
[ai-sdk-rate-limiter] gpt-4o: completed (tokens=342+87 costUsd=0.000021 latencyMs=1343 streaming=false)
|
|
1496
|
+
```
|
|
1497
|
+
|
|
1498
|
+
Debug logging is completely zero-overhead when disabled — no string building, no `JSON.stringify`, no property access on the details object.
|
|
1499
|
+
|
|
1500
|
+
---
|
|
1501
|
+
|
|
1502
|
+
## Config validation
|
|
1503
|
+
|
|
1504
|
+
`createRateLimiter()` validates your configuration at construction time. If it spots a likely misconfiguration it logs a `console.warn` (never throws). Catches you've got covered:
|
|
1505
|
+
|
|
1506
|
+
| Issue | Warning |
|
|
1507
|
+
|---|---|
|
|
1508
|
+
| `cost.store` set but `warmUp()` never called | Reminds you to call `warmUp()` at startup |
|
|
1509
|
+
| `circuit.failureThreshold < 3` | Too sensitive — risks false trips on transient errors |
|
|
1510
|
+
| `retry.retryOn` excludes 429 | Rate-limit errors won't be retried |
|
|
1511
|
+
| `queue.timeout < 3000ms` | Requests will time out before they can be served |
|
|
1512
|
+
| `cost.budget` set without `onExceeded` | Silent default is `'throw'` — may want `'queue'` or `'fallback'` |
|
|
1513
|
+
| `cost.onExceeded: 'fallback'` | Reminds you to pass a `fallback` model to `limiter.wrap()` |
|
|
1514
|
+
|
|
1515
|
+
---
|
|
1516
|
+
|
|
1361
1517
|
## Comparison
|
|
1362
1518
|
|
|
1363
1519
|
| | ai-sdk-rate-limiter | bottleneck | p-limit | SDK built-in retry | LangChain |
|
|
@@ -1382,6 +1538,7 @@ const limiter = createRateLimiter({ store: new MyStore() })
|
|
|
1382
1538
|
| Backoff propagation | yes | no | no | no | no |
|
|
1383
1539
|
| Prometheus metrics | yes | no | no | no | no |
|
|
1384
1540
|
| StatsD metrics | yes | no | no | no | no |
|
|
1541
|
+
| Express/Hono middleware | yes | no | no | no | no |
|
|
1385
1542
|
| OpenTelemetry | yes | no | no | no | partial |
|
|
1386
1543
|
| Testing utilities | yes | no | no | no | no |
|
|
1387
1544
|
| CLI audit | yes | no | no | no | no |
|
|
@@ -1425,6 +1582,14 @@ import type {
|
|
|
1425
1582
|
} from 'ai-sdk-rate-limiter/redis'
|
|
1426
1583
|
|
|
1427
1584
|
import type { StatsDClient } from 'ai-sdk-rate-limiter/statsd'
|
|
1585
|
+
|
|
1586
|
+
import type {
|
|
1587
|
+
RateLimiterRequestContext,
|
|
1588
|
+
RateLimiterMiddlewareOptions,
|
|
1589
|
+
ErrorHandlerOptions,
|
|
1590
|
+
HonoMiddlewareOptions,
|
|
1591
|
+
HonoContext,
|
|
1592
|
+
} from 'ai-sdk-rate-limiter/middleware'
|
|
1428
1593
|
```
|
|
1429
1594
|
|
|
1430
1595
|
---
|
|
@@ -1442,6 +1607,23 @@ Four runnable examples are included, each with its own README:
|
|
|
1442
1607
|
|
|
1443
1608
|
---
|
|
1444
1609
|
|
|
1610
|
+
## Bundle sizes
|
|
1611
|
+
|
|
1612
|
+
Each entry point is independently tree-shakeable. Importing `ai-sdk-rate-limiter` never pulls in Redis, Prometheus, OTel, or StatsD.
|
|
1613
|
+
|
|
1614
|
+
| Entry point | Size (minified) | Size (gzip) |
|
|
1615
|
+
|---|---|---|
|
|
1616
|
+
| `ai-sdk-rate-limiter` | ~80 KB | ~22 KB |
|
|
1617
|
+
| `ai-sdk-rate-limiter/redis` | ~12 KB | ~4 KB |
|
|
1618
|
+
| `ai-sdk-rate-limiter/middleware` | ~8 KB | ~2.5 KB |
|
|
1619
|
+
| `ai-sdk-rate-limiter/prometheus` | ~8 KB | ~2.5 KB |
|
|
1620
|
+
| `ai-sdk-rate-limiter/otel` | ~4 KB | ~1.5 KB |
|
|
1621
|
+
| `ai-sdk-rate-limiter/statsd` | ~4 KB | ~1.2 KB |
|
|
1622
|
+
|
|
1623
|
+
The core package is self-contained. Optional peer deps (`ioredis`, `@opentelemetry/api`) are only loaded when you import the corresponding entry point.
|
|
1624
|
+
|
|
1625
|
+
---
|
|
1626
|
+
|
|
1445
1627
|
## Requirements
|
|
1446
1628
|
|
|
1447
1629
|
- Node.js 18+ / Bun / Deno
|
package/dist/index.cjs
CHANGED
|
@@ -1678,6 +1678,23 @@ var CircuitBreaker = class {
|
|
|
1678
1678
|
}
|
|
1679
1679
|
};
|
|
1680
1680
|
|
|
1681
|
+
// src/core/debug-logger.ts
|
|
1682
|
+
var PREFIX = "[ai-sdk-rate-limiter]";
|
|
1683
|
+
var DebugLogger = class {
|
|
1684
|
+
constructor(enabled) {
|
|
1685
|
+
this.enabled = enabled;
|
|
1686
|
+
}
|
|
1687
|
+
log(model, message, details) {
|
|
1688
|
+
if (!this.enabled) return;
|
|
1689
|
+
if (details && Object.keys(details).length > 0) {
|
|
1690
|
+
const parts = Object.entries(details).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(" ");
|
|
1691
|
+
console.log(`${PREFIX} ${model}: ${message} (${parts})`);
|
|
1692
|
+
} else {
|
|
1693
|
+
console.log(`${PREFIX} ${model}: ${message}`);
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
};
|
|
1697
|
+
|
|
1681
1698
|
// src/core/pipeline.ts
|
|
1682
1699
|
function resolveRetryConfig(config) {
|
|
1683
1700
|
const r = config.retry ?? {};
|
|
@@ -1720,6 +1737,7 @@ var Pipeline = class {
|
|
|
1720
1737
|
/** Set to true after shutdown() is called */
|
|
1721
1738
|
this.shutdownRequested = false;
|
|
1722
1739
|
this.config = config;
|
|
1740
|
+
this.log = new DebugLogger(config.debug === true);
|
|
1723
1741
|
this.engine = new RateLimitEngine({
|
|
1724
1742
|
maxQueueSize: config.queue?.maxSize ?? 500,
|
|
1725
1743
|
...config.store !== void 0 && { store: config.store }
|
|
@@ -1761,6 +1779,7 @@ var Pipeline = class {
|
|
|
1761
1779
|
* recordUsage() once they have actual token counts from the API response.
|
|
1762
1780
|
*/
|
|
1763
1781
|
async execute(modelId, provider, prompt, fn, opts) {
|
|
1782
|
+
this.log.log(modelId, "execute", { provider, priority: opts.priority, ...opts.scope !== void 0 && { scope: opts.scope } });
|
|
1764
1783
|
if (this.shutdownRequested) {
|
|
1765
1784
|
this.emitter.emit("dropped", {
|
|
1766
1785
|
model: modelId,
|
|
@@ -1850,10 +1869,12 @@ var Pipeline = class {
|
|
|
1850
1869
|
timeoutMs: opts.timeoutMs,
|
|
1851
1870
|
...opts.signal !== void 0 && { signal: opts.signal },
|
|
1852
1871
|
onQueued: (queueDepth, estimatedWaitMs) => {
|
|
1872
|
+
this.log.log(modelId, "queuing", { queueDepth, estimatedWaitMs, priority: opts.priority });
|
|
1853
1873
|
this.emitter.emit("queued", { model: modelId, provider, priority: opts.priority, queueDepth, estimatedWaitMs });
|
|
1854
1874
|
this.emitter.emit("rateLimited", { source: "local", model: modelId, provider, limitType: "rpm", resetAt: Date.now() + estimatedWaitMs });
|
|
1855
1875
|
},
|
|
1856
1876
|
onDequeued: (waitedMs) => {
|
|
1877
|
+
this.log.log(modelId, "dequeued", { waitedMs, priority: opts.priority });
|
|
1857
1878
|
this.emitter.emit("dequeued", { model: modelId, provider, waitedMs, priority: opts.priority });
|
|
1858
1879
|
}
|
|
1859
1880
|
});
|
|
@@ -1905,7 +1926,10 @@ var Pipeline = class {
|
|
|
1905
1926
|
});
|
|
1906
1927
|
if (circuit) {
|
|
1907
1928
|
const justClosed = circuit.recordSuccess();
|
|
1908
|
-
if (justClosed)
|
|
1929
|
+
if (justClosed) {
|
|
1930
|
+
this.log.log(modelId, "circuit closed \u2014 upstream recovered");
|
|
1931
|
+
this.emitter.emit("circuitClosed", { model: modelId, provider });
|
|
1932
|
+
}
|
|
1909
1933
|
}
|
|
1910
1934
|
return result;
|
|
1911
1935
|
} catch (error) {
|
|
@@ -1915,6 +1939,7 @@ var Pipeline = class {
|
|
|
1915
1939
|
if (shouldTrip) {
|
|
1916
1940
|
const justOpened = circuit.recordFailure();
|
|
1917
1941
|
if (justOpened) {
|
|
1942
|
+
this.log.log(modelId, "circuit OPEN", { status, cooldownMs: this.config.circuit?.cooldownMs ?? 6e4 });
|
|
1918
1943
|
this.emitter.emit("circuitOpen", {
|
|
1919
1944
|
model: modelId,
|
|
1920
1945
|
provider,
|
|
@@ -1945,6 +1970,13 @@ var Pipeline = class {
|
|
|
1945
1970
|
limits.outputPricePerMillion,
|
|
1946
1971
|
scope
|
|
1947
1972
|
);
|
|
1973
|
+
this.log.log(modelId, "completed", {
|
|
1974
|
+
tokens: `${usage.inputTokens}+${usage.outputTokens}`,
|
|
1975
|
+
costUsd: costUsd.toFixed(6),
|
|
1976
|
+
latencyMs,
|
|
1977
|
+
streaming,
|
|
1978
|
+
...scope !== void 0 && { scope }
|
|
1979
|
+
});
|
|
1948
1980
|
this.emitter.emit("completed", {
|
|
1949
1981
|
model: modelId,
|
|
1950
1982
|
provider,
|
|
@@ -2416,8 +2448,53 @@ function rateLimited(client, options = {}) {
|
|
|
2416
2448
|
});
|
|
2417
2449
|
}
|
|
2418
2450
|
|
|
2451
|
+
// src/core/config-validator.ts
|
|
2452
|
+
var PREFIX2 = "\x1B[33m\u26A0 ai-sdk-rate-limiter\x1B[0m";
|
|
2453
|
+
var RESET = "\x1B[0m";
|
|
2454
|
+
function validateConfig(config) {
|
|
2455
|
+
const warnings = [];
|
|
2456
|
+
if (config.cost?.store !== void 0) {
|
|
2457
|
+
warnings.push(
|
|
2458
|
+
"cost.store is configured \u2014 call `await limiter.warmUp()` at startup.\n Without it, budget caps won't account for spend from previous process runs."
|
|
2459
|
+
);
|
|
2460
|
+
}
|
|
2461
|
+
const threshold = config.circuit?.failureThreshold;
|
|
2462
|
+
if (threshold !== void 0 && threshold < 3) {
|
|
2463
|
+
warnings.push(
|
|
2464
|
+
`circuit.failureThreshold is ${threshold} \u2014 very low. The circuit will open after nearly every error. Consider a value of 5 or higher for typical production workloads.`
|
|
2465
|
+
);
|
|
2466
|
+
}
|
|
2467
|
+
if (config.retry?.retryOn !== void 0 && !config.retry.retryOn.includes(429)) {
|
|
2468
|
+
warnings.push(
|
|
2469
|
+
"retry.retryOn does not include 429. Rate limit errors from the API will not be retried. Add 429 to retry.retryOn, or remove the override to use the default."
|
|
2470
|
+
);
|
|
2471
|
+
}
|
|
2472
|
+
const queueTimeout = config.queue?.timeout;
|
|
2473
|
+
if (queueTimeout !== void 0 && queueTimeout < 3e3) {
|
|
2474
|
+
warnings.push(
|
|
2475
|
+
`queue.timeout is ${queueTimeout}ms \u2014 less than 3 seconds. Requests may time out before the rate limit window resets (typically 60s). Consider 30_000ms (30s) or higher.`
|
|
2476
|
+
);
|
|
2477
|
+
}
|
|
2478
|
+
if (config.cost?.onExceeded === "fallback") {
|
|
2479
|
+
warnings.push(
|
|
2480
|
+
"cost.onExceeded is 'fallback' but fallback models are configured per-model in limiter.wrap(model, { fallback: cheaperModel }). If no fallback is set on a wrapped model, BudgetExceededError will still be thrown."
|
|
2481
|
+
);
|
|
2482
|
+
}
|
|
2483
|
+
if (config.cost?.budget !== void 0 && config.cost.onExceeded === void 0) {
|
|
2484
|
+
warnings.push(
|
|
2485
|
+
"cost.budget is set but cost.onExceeded is not. Defaulting to 'throw' \u2014 requests will throw BudgetExceededError when the cap is hit. Set onExceeded: 'queue' or 'fallback' to change this behavior."
|
|
2486
|
+
);
|
|
2487
|
+
}
|
|
2488
|
+
for (const warning of warnings) {
|
|
2489
|
+
const formatted = warning.replace(/\n/g, `
|
|
2490
|
+
`);
|
|
2491
|
+
console.warn(`${PREFIX2}: ${formatted}${RESET}`);
|
|
2492
|
+
}
|
|
2493
|
+
}
|
|
2494
|
+
|
|
2419
2495
|
// src/create-rate-limiter.ts
|
|
2420
2496
|
function createRateLimiter(config = {}) {
|
|
2497
|
+
validateConfig(config);
|
|
2421
2498
|
const pipeline = new Pipeline(config);
|
|
2422
2499
|
const queueTimeout = config.queue?.timeout ?? 3e4;
|
|
2423
2500
|
const middleware = createMiddleware(pipeline, queueTimeout);
|