npm - ai-sdk-rate-limiter - Versions diffs - 0.10.0 → 0.12.0 - Mend

ai-sdk-rate-limiter 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +254 -0
package/dist/index.cjs +162 -4
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +38 -3
package/dist/index.d.ts +38 -3
package/dist/index.js +162 -5
package/dist/index.js.map +1 -1
package/dist/middleware.d.cts +1 -1
package/dist/middleware.d.ts +1 -1
package/dist/otel.d.cts +1 -1
package/dist/otel.d.ts +1 -1
package/dist/prometheus.d.cts +1 -1
package/dist/prometheus.d.ts +1 -1
package/dist/redis.d.cts +1 -1
package/dist/redis.d.ts +1 -1
package/dist/statsd.d.cts +1 -1
package/dist/statsd.d.ts +1 -1
package/dist/testing.cjs +129 -4
package/dist/testing.cjs.map +1 -1
package/dist/testing.d.cts +1 -1
package/dist/testing.d.ts +1 -1
package/dist/testing.js +129 -4
package/dist/testing.js.map +1 -1
package/dist/{types-CUPpMRPE.d.cts → types-DKIz0MLZ.d.cts} +56 -1
package/dist/{types-CUPpMRPE.d.ts → types-DKIz0MLZ.d.ts} +56 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -3,6 +3,8 @@
 Smart rate limiting, queuing, and cost tracking for AI API calls. Works across providers. Zero required dependencies.
 [![npm](https://img.shields.io/npm/v/ai-sdk-rate-limiter)](https://www.npmjs.com/package/ai-sdk-rate-limiter)
+[![CI](https://github.com/piyushgupta344/ai-sdk-rate-limiter/actions/workflows/ci.yml/badge.svg)](https://github.com/piyushgupta344/ai-sdk-rate-limiter/actions/workflows/ci.yml)
+[![npm downloads](https://img.shields.io/npm/dm/ai-sdk-rate-limiter)](https://www.npmjs.com/package/ai-sdk-rate-limiter)
 ```
 npm install ai-sdk-rate-limiter
@@ -83,6 +85,8 @@ The wrapped model is a drop-in replacement. Every Vercel AI SDK feature — stre
 **Fallback chains** — `fallback` now accepts an array of models. On `BudgetExceededError`, the chain is walked in order until one succeeds.
+**Express / Hono middleware** — `createRateLimiterMiddleware()` (from `ai-sdk-rate-limiter/middleware`) attaches `req.rateLimiter` to every request and converts rate-limiter errors to proper HTTP responses at the middleware layer — no per-route boilerplate.
 **OpenTelemetry** — Drop-in OTel plugin that emits GenAI-spec spans for every request. Works with any OTel-compatible tracer.
 **Testing utilities** — `createTestLimiter()` records every completed call so you can assert on model usage, token counts, and costs in unit tests.
@@ -109,6 +113,7 @@ The wrapped model is a drop-in replacement. Every Vercel AI SDK feature — stre
 - [Graceful shutdown](#graceful-shutdown)
 - [Prometheus metrics](#prometheus-metrics)
 - [StatsD metrics](#statsD-metrics)
+- [Express / Hono middleware](#express--hono-middleware)
 - [Events](#events)
 - [Backpressure](#backpressure)
 - [Error handling](#error-handling)
@@ -552,6 +557,27 @@ Costs are based on **actual token counts** from API responses — not estimates.
 `byScope` is populated automatically when requests carry a `scope` (either set on `limiter.wrap()` or via `providerOptions.rateLimiter.scope`). Unscoped requests don't appear in `byScope`.
+### Cost forecasting
+`getCostForecast()` projects your end-of-period spend based on the current hourly rate. Useful for alerting before a budget cap is hit:
+```typescript
+const forecast = limiter.getCostForecast()
+console.log(forecast)
+// {
+//   hour:  { spentUsd: 1.20, projectedUsd: 1.20,  ratePerHourUsd: 1.20 },
+//   day:   { spentUsd: 3.50, projectedUsd: 28.80, ratePerHourUsd: 1.20 },
+//   month: { spentUsd: 8.10, projectedUsd: 864,   ratePerHourUsd: 1.20 },
+// }
+if (forecast.day.projectedUsd > 40) {
+  console.warn(`Heads up — on track to spend $${forecast.day.projectedUsd.toFixed(2)} today`)
+}
+```
+`projectedUsd` = current hourly rate × hours in the period. It is based on the **last 60 minutes** of spend, so it responds quickly to usage spikes.
 ---
 ## Budget fallback routing
@@ -858,6 +884,121 @@ const client: StatsDClient = {
 ---
+## Express / Hono middleware
+The `ai-sdk-rate-limiter/middleware` entry point eliminates per-route boilerplate. Scope extraction, priority assignment, and rate-limiter error handling all move to the middleware layer — route handlers just pass `req.rateLimiter` through.
+### Express
+```typescript
+import { createRateLimiterMiddleware } from 'ai-sdk-rate-limiter/middleware'
+const { middleware, errorHandler } = createRateLimiterMiddleware(limiter, {
+  // Extract scope from the request — stored in req.rateLimiter.scope
+  scope: (req) => {
+    const plan = req.headers['x-user-plan'] ?? 'free'
+    const id   = req.headers['x-user-id']
+    return id ? `user:${plan}:${id}` : undefined
+  },
+  // Derive queue priority per-request
+  priority: (req) => req.headers['x-user-plan'] === 'pro' ? 'normal' : 'low',
+  // Add X-RateLimit-* informational headers to every response
+  injectHeaders: 'gpt-4o-mini',
+})
+app.use(middleware)    // BEFORE routes
+app.post('/chat', async (req, res) => {
+  const { text } = await generateText({
+    model,
+    prompt: req.body.message,
+    // req.rateLimiter already has scope + priority — just pass it through
+    providerOptions: { rateLimiter: req.rateLimiter },
+  })
+  res.json({ text })
+})
+app.use(errorHandler)  // AFTER routes
+```
+The `errorHandler` converts every `RateLimiterError` to a typed HTTP response automatically — no try/catch needed in route handlers:
+| Error | HTTP status | `code` |
+|---|---|---|
+| `QueueTimeoutError` | 503 | `QUEUE_TIMEOUT` |
+| `QueueFullError` | 503 | `QUEUE_FULL` |
+| `CircuitOpenError` | 503 | `CIRCUIT_OPEN` |
+| `ShutdownError` | 503 | `SHUTDOWN` |
+| `BudgetExceededError` | 402 | `BUDGET_EXCEEDED` |
+| `RateLimiterError` (generic) | 429 | `RATE_LIMITED` |
+Non-rate-limiter errors are passed to the next error handler unchanged.
+### Hono
+```typescript
+import { createHonoMiddleware } from 'ai-sdk-rate-limiter/middleware'
+app.use(createHonoMiddleware(limiter, {
+  scope:    (c) => c.req.header('x-user-id'),
+  priority: (c) => c.req.header('x-plan') === 'pro' ? 'normal' : 'low',
+}))
+app.post('/chat', async (c) => {
+  const { text } = await generateText({
+    model,
+    prompt: await c.req.text(),
+    providerOptions: { rateLimiter: c.var.rateLimiter },
+  })
+  return c.json({ text })
+})
+```
+`createHonoMiddleware` wraps the `next()` call in a try/catch, so `RateLimiterErrors` thrown inside route handlers are caught and returned as JSON responses automatically.
+### Standalone error handler
+If you only need error handling without scope injection:
+```typescript
+import { createRateLimiterErrorHandler } from 'ai-sdk-rate-limiter/middleware'
+app.use(createRateLimiterErrorHandler({
+  includeDetails: false, // omit retryAfter, period, limitUsd from response body
+}))
+```
+### Custom framework (Fastify, etc.)
+`mapErrorToResponse` is exported for frameworks that don't use the `(req, res, next)` convention:
+```typescript
+import { mapErrorToResponse } from 'ai-sdk-rate-limiter/middleware'
+import { RateLimiterError } from 'ai-sdk-rate-limiter'
+// Fastify onError hook
+fastify.setErrorHandler((err, request, reply) => {
+  if (err instanceof RateLimiterError) {
+    const { status, body } = mapErrorToResponse(err)
+    return reply.status(status).send(body)
+  }
+  reply.send(err)
+})
+```
+### `req.rateLimiter` TypeScript type
+The middleware augments `http.IncomingMessage` so `req.rateLimiter` is typed in Express and Fastify without any additional setup:
+```typescript
+import type { RateLimiterRequestContext } from 'ai-sdk-rate-limiter/middleware'
+// req.rateLimiter is automatically typed as RateLimiterRequestContext | undefined
+```
+---
 ## Events
 All events are typed. Register handlers at creation time or dynamically:
@@ -1336,6 +1477,57 @@ class MyStore implements RateLimitStore {
 const limiter = createRateLimiter({ store: new MyStore() })
 ```
+### Load balancing across API keys
+`createModelPool()` distributes requests round-robin across multiple model instances — useful when you have more than one API key:
+```typescript
+import { createRateLimiter, createModelPool } from 'ai-sdk-rate-limiter'
+import { createOpenAI } from '@ai-sdk/openai'
+// Two API keys, each with their own limiter tracking separate RPM limits
+const limiter1 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
+const limiter2 = createRateLimiter({ limits: { 'gpt-4o': { rpm: 500, itpm: 2_000_000 } } })
+const openai1 = createOpenAI({ apiKey: process.env.OPENAI_KEY_1 })
+const openai2 = createOpenAI({ apiKey: process.env.OPENAI_KEY_2 })
+const pool = createModelPool([
+  limiter1.wrap(openai1('gpt-4o')),
+  limiter2.wrap(openai2('gpt-4o')),
+])
+// Use exactly like a regular model — calls alternate between the two keys
+const { text } = await generateText({ model: pool, prompt: 'Hello!' })
+```
+Pass `{ strategy: 'random' }` for random selection instead of round-robin.
+### Request deduplication
+When multiple concurrent requests carry the same `dedupKey`, only one API call is made and all callers receive the same result. Useful for FAQ-style workloads where many users ask the same question simultaneously:
+```typescript
+const model = limiter.wrap(openai('gpt-4o'))
+// Server handler — two simultaneous identical requests share one API call
+async function handleRequest(questionId: string) {
+  const { text } = await generateText({
+    model,
+    prompt: questions[questionId],
+    providerOptions: {
+      rateLimiter: { dedupKey: `faq:${questionId}` },
+    },
+  })
+  return text
+}
+// If 50 users hit the same FAQ item at the same time → 1 API call, not 50
+const results = await Promise.all(users.map(() => handleRequest('faq-42')))
+```
+The dedup entry is removed once the request completes (success or error), so subsequent requests always make a fresh call.
 ---
 ## How it works
@@ -1358,6 +1550,42 @@ const limiter = createRateLimiter({ store: new MyStore() })
 ---
+## Debug mode
+Set `debug: true` to enable structured console logging for every rate-limit decision, queue entry/exit, slot acquisition, circuit breaker transition, and completed call cost:
+```typescript
+const limiter = createRateLimiter({ debug: true })
+```
+Sample output:
+```
+[ai-sdk-rate-limiter] gpt-4o: execute (provider="openai" priority="normal")
+[ai-sdk-rate-limiter] gpt-4o: queuing (queueDepth=3 estimatedWaitMs=1200 priority="normal")
+[ai-sdk-rate-limiter] gpt-4o: dequeued (waitedMs=1187 priority="normal")
+[ai-sdk-rate-limiter] gpt-4o: completed (tokens=342+87 costUsd=0.000021 latencyMs=1343 streaming=false)
+```
+Debug logging is completely zero-overhead when disabled — no string building, no `JSON.stringify`, no property access on the details object.
+---
+## Config validation
+`createRateLimiter()` validates your configuration at construction time. If it spots a likely misconfiguration it logs a `console.warn` (never throws). Catches you've got covered:
+| Issue | Warning |
+|---|---|
+| `cost.store` set but `warmUp()` never called | Reminds you to call `warmUp()` at startup |
+| `circuit.failureThreshold < 3` | Too sensitive — risks false trips on transient errors |
+| `retry.retryOn` excludes 429 | Rate-limit errors won't be retried |
+| `queue.timeout < 3000ms` | Requests will time out before they can be served |
+| `cost.budget` set without `onExceeded` | Silent default is `'throw'` — may want `'queue'` or `'fallback'` |
+| `cost.onExceeded: 'fallback'` | Reminds you to pass a `fallback` model to `limiter.wrap()` |
+---
 ## Comparison
 | | ai-sdk-rate-limiter | bottleneck | p-limit | SDK built-in retry | LangChain |
@@ -1382,6 +1610,7 @@ const limiter = createRateLimiter({ store: new MyStore() })
 | Backoff propagation | yes | no | no | no | no |
 | Prometheus metrics | yes | no | no | no | no |
 | StatsD metrics | yes | no | no | no | no |
+| Express/Hono middleware | yes | no | no | no | no |
 | OpenTelemetry | yes | no | no | no | partial |
 | Testing utilities | yes | no | no | no | no |
 | CLI audit | yes | no | no | no | no |
@@ -1425,6 +1654,14 @@ import type {
 } from 'ai-sdk-rate-limiter/redis'
 import type { StatsDClient } from 'ai-sdk-rate-limiter/statsd'
+import type {
+  RateLimiterRequestContext,
+  RateLimiterMiddlewareOptions,
+  ErrorHandlerOptions,
+  HonoMiddlewareOptions,
+  HonoContext,
+} from 'ai-sdk-rate-limiter/middleware'
 ```
 ---
@@ -1442,6 +1679,23 @@ Four runnable examples are included, each with its own README:
 ---
+## Bundle sizes
+Each entry point is independently tree-shakeable. Importing `ai-sdk-rate-limiter` never pulls in Redis, Prometheus, OTel, or StatsD.
+| Entry point | Size (minified) | Size (gzip) |
+|---|---|---|
+| `ai-sdk-rate-limiter` | ~80 KB | ~22 KB |
+| `ai-sdk-rate-limiter/redis` | ~12 KB | ~4 KB |
+| `ai-sdk-rate-limiter/middleware` | ~8 KB | ~2.5 KB |
+| `ai-sdk-rate-limiter/prometheus` | ~8 KB | ~2.5 KB |
+| `ai-sdk-rate-limiter/otel` | ~4 KB | ~1.5 KB |
+| `ai-sdk-rate-limiter/statsd` | ~4 KB | ~1.2 KB |
+The core package is self-contained. Optional peer deps (`ioredis`, `@opentelemetry/api`) are only loaded when you import the corresponding entry point.
+---
 ## Requirements
 - Node.js 18+ / Bun / Deno

package/dist/index.cjs CHANGED Viewed

@@ -668,6 +668,30 @@ var CostTracker = class {
   estimateCost(inputTokens, outputTokens, inputPricePerMillion, outputPricePerMillion) {
     return inputTokens / 1e6 * inputPricePerMillion + outputTokens / 1e6 * outputPricePerMillion;
   }
+  getForecast() {
+    const now = Date.now();
+    this.evict(now);
+    const hourlyRate = this.entries.filter((e) => e.timestamp > now - HOUR_MS).reduce((s, e) => s + e.costUsd, 0);
+    const daySpent = this.entries.filter((e) => e.timestamp > now - DAY_MS2).reduce((s, e) => s + e.costUsd, 0);
+    const monthSpent = this.entries.filter((e) => e.timestamp > now - MONTH_MS).reduce((s, e) => s + e.costUsd, 0);
+    return {
+      hour: {
+        spentUsd: hourlyRate,
+        projectedUsd: hourlyRate,
+        ratePerHourUsd: hourlyRate
+      },
+      day: {
+        spentUsd: daySpent,
+        projectedUsd: hourlyRate * 24,
+        ratePerHourUsd: hourlyRate
+      },
+      month: {
+        spentUsd: monthSpent,
+        projectedUsd: hourlyRate * 24 * 30,
+        ratePerHourUsd: hourlyRate
+      }
+    };
+  }
   getReport() {
     const now = Date.now();
     this.evict(now);
@@ -1678,6 +1702,23 @@ var CircuitBreaker = class {
   }
 };
+// src/core/debug-logger.ts
+var PREFIX = "[ai-sdk-rate-limiter]";
+var DebugLogger = class {
+  constructor(enabled) {
+    this.enabled = enabled;
+  }
+  log(model, message, details) {
+    if (!this.enabled) return;
+    if (details && Object.keys(details).length > 0) {
+      const parts = Object.entries(details).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(" ");
+      console.log(`${PREFIX} ${model}: ${message} (${parts})`);
+    } else {
+      console.log(`${PREFIX} ${model}: ${message}`);
+    }
+  }
+};
 // src/core/pipeline.ts
 function resolveRetryConfig(config) {
   const r = config.retry ?? {};
@@ -1717,9 +1758,12 @@ var Pipeline = class {
     this.circuits = /* @__PURE__ */ new Map();
     /** Limits detected from provider response headers (lower priority than user config) */
     this.detectedLimits = /* @__PURE__ */ new Map();
+    /** In-flight promises indexed by dedup key, shared across identical concurrent requests */
+    this.dedupMap = /* @__PURE__ */ new Map();
     /** Set to true after shutdown() is called */
     this.shutdownRequested = false;
     this.config = config;
+    this.log = new DebugLogger(config.debug === true);
     this.engine = new RateLimitEngine({
       maxQueueSize: config.queue?.maxSize ?? 500,
       ...config.store !== void 0 && { store: config.store }
@@ -1761,6 +1805,20 @@ var Pipeline = class {
    * recordUsage() once they have actual token counts from the API response.
    */
   async execute(modelId, provider, prompt, fn, opts) {
+    this.log.log(modelId, "execute", { provider, priority: opts.priority, ...opts.scope !== void 0 && { scope: opts.scope } });
+    if (opts.dedupKey !== void 0) {
+      const existing = this.dedupMap.get(opts.dedupKey);
+      if (existing !== void 0) {
+        this.log.log(modelId, "dedup hit", { dedupKey: opts.dedupKey });
+        return existing;
+      }
+      const { dedupKey, ...optsWithoutDedup } = opts;
+      const promise = this.execute(modelId, provider, prompt, fn, optsWithoutDedup).finally(() => {
+        this.dedupMap.delete(dedupKey);
+      });
+      this.dedupMap.set(dedupKey, promise);
+      return promise;
+    }
     if (this.shutdownRequested) {
       this.emitter.emit("dropped", {
         model: modelId,
@@ -1850,10 +1908,12 @@ var Pipeline = class {
         timeoutMs: opts.timeoutMs,
         ...opts.signal !== void 0 && { signal: opts.signal },
         onQueued: (queueDepth, estimatedWaitMs) => {
+          this.log.log(modelId, "queuing", { queueDepth, estimatedWaitMs, priority: opts.priority });
           this.emitter.emit("queued", { model: modelId, provider, priority: opts.priority, queueDepth, estimatedWaitMs });
           this.emitter.emit("rateLimited", { source: "local", model: modelId, provider, limitType: "rpm", resetAt: Date.now() + estimatedWaitMs });
         },
         onDequeued: (waitedMs) => {
+          this.log.log(modelId, "dequeued", { waitedMs, priority: opts.priority });
           this.emitter.emit("dequeued", { model: modelId, provider, waitedMs, priority: opts.priority });
         }
       });
@@ -1905,7 +1965,10 @@ var Pipeline = class {
       });
       if (circuit) {
         const justClosed = circuit.recordSuccess();
-        if (justClosed) this.emitter.emit("circuitClosed", { model: modelId, provider });
+        if (justClosed) {
+          this.log.log(modelId, "circuit closed \u2014 upstream recovered");
+          this.emitter.emit("circuitClosed", { model: modelId, provider });
+        }
       }
       return result;
     } catch (error) {
@@ -1915,6 +1978,7 @@ var Pipeline = class {
         if (shouldTrip) {
           const justOpened = circuit.recordFailure();
           if (justOpened) {
+            this.log.log(modelId, "circuit OPEN", { status, cooldownMs: this.config.circuit?.cooldownMs ?? 6e4 });
             this.emitter.emit("circuitOpen", {
               model: modelId,
               provider,
@@ -1945,6 +2009,13 @@ var Pipeline = class {
       limits.outputPricePerMillion,
       scope
     );
+    this.log.log(modelId, "completed", {
+      tokens: `${usage.inputTokens}+${usage.outputTokens}`,
+      costUsd: costUsd.toFixed(6),
+      latencyMs,
+      streaming,
+      ...scope !== void 0 && { scope }
+    });
     this.emitter.emit("completed", {
       model: modelId,
       provider,
@@ -1962,6 +2033,9 @@ var Pipeline = class {
   getCostReport() {
     return this.costTracker.getReport();
   }
+  getCostForecast() {
+    return this.costTracker.getForecast();
+  }
   getStatus() {
     const models = [];
     let totalQueueDepth = 0;
@@ -2090,7 +2164,8 @@ function getPerRequestOptions(params, queueTimeout) {
     metadata: raw?.metadata ?? {},
     skipBudgetCheck: raw?._skipBudgetCheck ?? false,
     scope: raw?.scope,
-    callTimeout: raw?.callTimeout
+    callTimeout: raw?.callTimeout,
+    dedupKey: raw?.dedupKey
   };
 }
 function extractTokenUsage(usage) {
@@ -2106,7 +2181,7 @@ function createMiddleware(pipeline, queueTimeout) {
     // wrapGenerate — non-streaming
     // -----------------------------------------------------------------------
     async wrapGenerate({ doGenerate, params, model }) {
-      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -2122,6 +2197,7 @@ function createMiddleware(pipeline, queueTimeout) {
           skipBudgetCheck,
           ...scope !== void 0 && { scope },
           ...callTimeout !== void 0 && { callTimeout },
+          ...dedupKey !== void 0 && { dedupKey },
           ...params.abortSignal !== void 0 && { signal: params.abortSignal }
         }
       );
@@ -2135,7 +2211,7 @@ function createMiddleware(pipeline, queueTimeout) {
     // wrapStream — streaming
     // -----------------------------------------------------------------------
     async wrapStream({ doStream, params, model }) {
-      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout } = getPerRequestOptions(params, queueTimeout);
+      const { priority, timeoutMs, skipBudgetCheck, scope, callTimeout, dedupKey } = getPerRequestOptions(params, queueTimeout);
       const modelId = model.modelId;
       const provider = model.provider;
       const startMs = Date.now();
@@ -2151,6 +2227,7 @@ function createMiddleware(pipeline, queueTimeout) {
           skipBudgetCheck,
           ...scope !== void 0 && { scope },
           ...callTimeout !== void 0 && { callTimeout },
+          ...dedupKey !== void 0 && { dedupKey },
           ...params.abortSignal !== void 0 && { signal: params.abortSignal }
         }
       );
@@ -2416,8 +2493,53 @@ function rateLimited(client, options = {}) {
   });
 }
+// src/core/config-validator.ts
+var PREFIX2 = "\x1B[33m\u26A0 ai-sdk-rate-limiter\x1B[0m";
+var RESET = "\x1B[0m";
+function validateConfig(config) {
+  const warnings = [];
+  if (config.cost?.store !== void 0) {
+    warnings.push(
+      "cost.store is configured \u2014 call `await limiter.warmUp()` at startup.\n  Without it, budget caps won't account for spend from previous process runs."
+    );
+  }
+  const threshold = config.circuit?.failureThreshold;
+  if (threshold !== void 0 && threshold < 3) {
+    warnings.push(
+      `circuit.failureThreshold is ${threshold} \u2014 very low. The circuit will open after nearly every error. Consider a value of 5 or higher for typical production workloads.`
+    );
+  }
+  if (config.retry?.retryOn !== void 0 && !config.retry.retryOn.includes(429)) {
+    warnings.push(
+      "retry.retryOn does not include 429. Rate limit errors from the API will not be retried. Add 429 to retry.retryOn, or remove the override to use the default."
+    );
+  }
+  const queueTimeout = config.queue?.timeout;
+  if (queueTimeout !== void 0 && queueTimeout < 3e3) {
+    warnings.push(
+      `queue.timeout is ${queueTimeout}ms \u2014 less than 3 seconds. Requests may time out before the rate limit window resets (typically 60s). Consider 30_000ms (30s) or higher.`
+    );
+  }
+  if (config.cost?.onExceeded === "fallback") {
+    warnings.push(
+      "cost.onExceeded is 'fallback' but fallback models are configured per-model in limiter.wrap(model, { fallback: cheaperModel }). If no fallback is set on a wrapped model, BudgetExceededError will still be thrown."
+    );
+  }
+  if (config.cost?.budget !== void 0 && config.cost.onExceeded === void 0) {
+    warnings.push(
+      "cost.budget is set but cost.onExceeded is not. Defaulting to 'throw' \u2014 requests will throw BudgetExceededError when the cap is hit. Set onExceeded: 'queue' or 'fallback' to change this behavior."
+    );
+  }
+  for (const warning of warnings) {
+    const formatted = warning.replace(/\n/g, `
+  `);
+    console.warn(`${PREFIX2}: ${formatted}${RESET}`);
+  }
+}
 // src/create-rate-limiter.ts
 function createRateLimiter(config = {}) {
+  validateConfig(config);
   const pipeline = new Pipeline(config);
   const queueTimeout = config.queue?.timeout ?? 3e4;
   const middleware = createMiddleware(pipeline, queueTimeout);
@@ -2431,6 +2553,9 @@ function createRateLimiter(config = {}) {
     getCostReport() {
       return pipeline.getCostReport();
     },
+    getCostForecast() {
+      return pipeline.getCostForecast();
+    },
     getStatus() {
       return pipeline.getStatus();
     },
@@ -2455,6 +2580,38 @@ function createRateLimiter(config = {}) {
   };
 }
+// src/adapters/model-pool.ts
+function createModelPool(models, options) {
+  if (models.length === 0) {
+    throw new Error("createModelPool: at least one model is required");
+  }
+  const strategy = options?.strategy ?? "round-robin";
+  let index = 0;
+  function pick() {
+    if (strategy === "random") {
+      return models[Math.floor(Math.random() * models.length)];
+    }
+    const model = models[index % models.length];
+    index = (index + 1) % models.length;
+    return model;
+  }
+  const primary = models[0];
+  return {
+    get modelId() {
+      return primary.modelId;
+    },
+    get provider() {
+      return primary.provider;
+    },
+    doGenerate(params) {
+      return pick().doGenerate(params);
+    },
+    doStream(params) {
+      return pick().doStream(params);
+    }
+  };
+}
 exports.ANTHROPIC_MODELS = ANTHROPIC_MODELS;
 exports.BudgetExceededError = BudgetExceededError;
 exports.COHERE_MODELS = COHERE_MODELS;
@@ -2469,6 +2626,7 @@ exports.RateLimitExceededError = RateLimitExceededError;
 exports.RateLimiterError = RateLimiterError;
 exports.RetryExhaustedError = RetryExhaustedError;
 exports.ShutdownError = ShutdownError;
+exports.createModelPool = createModelPool;
 exports.createRateLimiter = createRateLimiter;
 exports.isKnownModel = isKnownModel;
 exports.rateLimited = rateLimited;