npm - ai-sdk-rate-limiter - Versions diffs - 0.10.0 → 0.11.0 - Mend

ai-sdk-rate-limiter 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +182 -0
package/dist/index.cjs +78 -1
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +78 -1
package/dist/index.js.map +1 -1
package/dist/middleware.d.cts +1 -1
package/dist/middleware.d.ts +1 -1
package/dist/otel.d.cts +1 -1
package/dist/otel.d.ts +1 -1
package/dist/prometheus.d.cts +1 -1
package/dist/prometheus.d.ts +1 -1
package/dist/redis.d.cts +1 -1
package/dist/redis.d.ts +1 -1
package/dist/statsd.d.cts +1 -1
package/dist/statsd.d.ts +1 -1
package/dist/testing.cjs +78 -1
package/dist/testing.cjs.map +1 -1
package/dist/testing.d.cts +1 -1
package/dist/testing.d.ts +1 -1
package/dist/testing.js +78 -1
package/dist/testing.js.map +1 -1
package/dist/{types-CUPpMRPE.d.cts → types-CMevWGWK.d.cts} +18 -0
package/dist/{types-CUPpMRPE.d.ts → types-CMevWGWK.d.ts} +18 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -3,6 +3,8 @@
 Smart rate limiting, queuing, and cost tracking for AI API calls. Works across providers. Zero required dependencies.
 [![npm](https://img.shields.io/npm/v/ai-sdk-rate-limiter)](https://www.npmjs.com/package/ai-sdk-rate-limiter)
+[![CI](https://github.com/piyushgupta344/ai-sdk-rate-limiter/actions/workflows/ci.yml/badge.svg)](https://github.com/piyushgupta344/ai-sdk-rate-limiter/actions/workflows/ci.yml)
+[![npm downloads](https://img.shields.io/npm/dm/ai-sdk-rate-limiter)](https://www.npmjs.com/package/ai-sdk-rate-limiter)
 ```
 npm install ai-sdk-rate-limiter
@@ -83,6 +85,8 @@ The wrapped model is a drop-in replacement. Every Vercel AI SDK feature — stre
 **Fallback chains** — `fallback` now accepts an array of models. On `BudgetExceededError`, the chain is walked in order until one succeeds.
+**Express / Hono middleware** — `createRateLimiterMiddleware()` (from `ai-sdk-rate-limiter/middleware`) attaches `req.rateLimiter` to every request and converts rate-limiter errors to proper HTTP responses at the middleware layer — no per-route boilerplate.
 **OpenTelemetry** — Drop-in OTel plugin that emits GenAI-spec spans for every request. Works with any OTel-compatible tracer.
 **Testing utilities** — `createTestLimiter()` records every completed call so you can assert on model usage, token counts, and costs in unit tests.
@@ -109,6 +113,7 @@ The wrapped model is a drop-in replacement. Every Vercel AI SDK feature — stre
 - [Graceful shutdown](#graceful-shutdown)
 - [Prometheus metrics](#prometheus-metrics)
 - [StatsD metrics](#statsD-metrics)
+- [Express / Hono middleware](#express--hono-middleware)
 - [Events](#events)
 - [Backpressure](#backpressure)
 - [Error handling](#error-handling)
@@ -858,6 +863,121 @@ const client: StatsDClient = {
 ---
+## Express / Hono middleware
+The `ai-sdk-rate-limiter/middleware` entry point eliminates per-route boilerplate. Scope extraction, priority assignment, and rate-limiter error handling all move to the middleware layer — route handlers just pass `req.rateLimiter` through.
+### Express
+```typescript
+import { createRateLimiterMiddleware } from 'ai-sdk-rate-limiter/middleware'
+const { middleware, errorHandler } = createRateLimiterMiddleware(limiter, {
+  // Extract scope from the request — stored in req.rateLimiter.scope
+  scope: (req) => {
+    const plan = req.headers['x-user-plan'] ?? 'free'
+    const id   = req.headers['x-user-id']
+    return id ? `user:${plan}:${id}` : undefined
+  },
+  // Derive queue priority per-request
+  priority: (req) => req.headers['x-user-plan'] === 'pro' ? 'normal' : 'low',
+  // Add X-RateLimit-* informational headers to every response
+  injectHeaders: 'gpt-4o-mini',
+})
+app.use(middleware)    // BEFORE routes
+app.post('/chat', async (req, res) => {
+  const { text } = await generateText({
+    model,
+    prompt: req.body.message,
+    // req.rateLimiter already has scope + priority — just pass it through
+    providerOptions: { rateLimiter: req.rateLimiter },
+  })
+  res.json({ text })
+})
+app.use(errorHandler)  // AFTER routes
+```
+The `errorHandler` converts every `RateLimiterError` to a typed HTTP response automatically — no try/catch needed in route handlers:
+| Error | HTTP status | `code` |
+|---|---|---|
+| `QueueTimeoutError` | 503 | `QUEUE_TIMEOUT` |
+| `QueueFullError` | 503 | `QUEUE_FULL` |
+| `CircuitOpenError` | 503 | `CIRCUIT_OPEN` |
+| `ShutdownError` | 503 | `SHUTDOWN` |
+| `BudgetExceededError` | 402 | `BUDGET_EXCEEDED` |
+| `RateLimiterError` (generic) | 429 | `RATE_LIMITED` |
+Non-rate-limiter errors are passed to the next error handler unchanged.
+### Hono
+```typescript
+import { createHonoMiddleware } from 'ai-sdk-rate-limiter/middleware'
+app.use(createHonoMiddleware(limiter, {
+  scope:    (c) => c.req.header('x-user-id'),
+  priority: (c) => c.req.header('x-plan') === 'pro' ? 'normal' : 'low',
+}))
+app.post('/chat', async (c) => {
+  const { text } = await generateText({
+    model,
+    prompt: await c.req.text(),
+    providerOptions: { rateLimiter: c.var.rateLimiter },
+  })
+  return c.json({ text })
+})
+```
+`createHonoMiddleware` wraps the `next()` call in a try/catch, so `RateLimiterErrors` thrown inside route handlers are caught and returned as JSON responses automatically.
+### Standalone error handler
+If you only need error handling without scope injection:
+```typescript
+import { createRateLimiterErrorHandler } from 'ai-sdk-rate-limiter/middleware'
+app.use(createRateLimiterErrorHandler({
+  includeDetails: false, // omit retryAfter, period, limitUsd from response body
+}))
+```
+### Custom framework (Fastify, etc.)
+`mapErrorToResponse` is exported for frameworks that don't use the `(req, res, next)` convention:
+```typescript
+import { mapErrorToResponse } from 'ai-sdk-rate-limiter/middleware'
+import { RateLimiterError } from 'ai-sdk-rate-limiter'
+// Fastify onError hook
+fastify.setErrorHandler((err, request, reply) => {
+  if (err instanceof RateLimiterError) {
+    const { status, body } = mapErrorToResponse(err)
+    return reply.status(status).send(body)
+  }
+  reply.send(err)
+})
+```
+### `req.rateLimiter` TypeScript type
+The middleware augments `http.IncomingMessage` so `req.rateLimiter` is typed in Express and Fastify without any additional setup:
+```typescript
+import type { RateLimiterRequestContext } from 'ai-sdk-rate-limiter/middleware'
+// req.rateLimiter is automatically typed as RateLimiterRequestContext | undefined
+```
+---
 ## Events
 All events are typed. Register handlers at creation time or dynamically:
@@ -1358,6 +1478,42 @@ const limiter = createRateLimiter({ store: new MyStore() })
 ---
+## Debug mode
+Set `debug: true` to enable structured console logging for every rate-limit decision, queue entry/exit, slot acquisition, circuit breaker transition, and completed call cost:
+```typescript
+const limiter = createRateLimiter({ debug: true })
+```
+Sample output:
+```
+[ai-sdk-rate-limiter] gpt-4o: execute (provider="openai" priority="normal")
+[ai-sdk-rate-limiter] gpt-4o: queuing (queueDepth=3 estimatedWaitMs=1200 priority="normal")
+[ai-sdk-rate-limiter] gpt-4o: dequeued (waitedMs=1187 priority="normal")
+[ai-sdk-rate-limiter] gpt-4o: completed (tokens=342+87 costUsd=0.000021 latencyMs=1343 streaming=false)
+```
+Debug logging is completely zero-overhead when disabled — no string building, no `JSON.stringify`, no property access on the details object.
+---
+## Config validation
+`createRateLimiter()` validates your configuration at construction time. If it spots a likely misconfiguration it logs a `console.warn` (never throws). Catches you've got covered:
+| Issue | Warning |
+|---|---|
+| `cost.store` set but `warmUp()` never called | Reminds you to call `warmUp()` at startup |
+| `circuit.failureThreshold < 3` | Too sensitive — risks false trips on transient errors |
+| `retry.retryOn` excludes 429 | Rate-limit errors won't be retried |
+| `queue.timeout < 3000ms` | Requests will time out before they can be served |
+| `cost.budget` set without `onExceeded` | Silent default is `'throw'` — may want `'queue'` or `'fallback'` |
+| `cost.onExceeded: 'fallback'` | Reminds you to pass a `fallback` model to `limiter.wrap()` |
+---
 ## Comparison
 | | ai-sdk-rate-limiter | bottleneck | p-limit | SDK built-in retry | LangChain |
@@ -1382,6 +1538,7 @@ const limiter = createRateLimiter({ store: new MyStore() })
 | Backoff propagation | yes | no | no | no | no |
 | Prometheus metrics | yes | no | no | no | no |
 | StatsD metrics | yes | no | no | no | no |
+| Express/Hono middleware | yes | no | no | no | no |
 | OpenTelemetry | yes | no | no | no | partial |
 | Testing utilities | yes | no | no | no | no |
 | CLI audit | yes | no | no | no | no |
@@ -1425,6 +1582,14 @@ import type {
 } from 'ai-sdk-rate-limiter/redis'
 import type { StatsDClient } from 'ai-sdk-rate-limiter/statsd'
+import type {
+  RateLimiterRequestContext,
+  RateLimiterMiddlewareOptions,
+  ErrorHandlerOptions,
+  HonoMiddlewareOptions,
+  HonoContext,
+} from 'ai-sdk-rate-limiter/middleware'
 ```
 ---
@@ -1442,6 +1607,23 @@ Four runnable examples are included, each with its own README:
 ---
+## Bundle sizes
+Each entry point is independently tree-shakeable. Importing `ai-sdk-rate-limiter` never pulls in Redis, Prometheus, OTel, or StatsD.
+| Entry point | Size (minified) | Size (gzip) |
+|---|---|---|
+| `ai-sdk-rate-limiter` | ~80 KB | ~22 KB |
+| `ai-sdk-rate-limiter/redis` | ~12 KB | ~4 KB |
+| `ai-sdk-rate-limiter/middleware` | ~8 KB | ~2.5 KB |
+| `ai-sdk-rate-limiter/prometheus` | ~8 KB | ~2.5 KB |
+| `ai-sdk-rate-limiter/otel` | ~4 KB | ~1.5 KB |
+| `ai-sdk-rate-limiter/statsd` | ~4 KB | ~1.2 KB |
+The core package is self-contained. Optional peer deps (`ioredis`, `@opentelemetry/api`) are only loaded when you import the corresponding entry point.
+---
 ## Requirements
 - Node.js 18+ / Bun / Deno

package/dist/index.cjs CHANGED Viewed

@@ -1678,6 +1678,23 @@ var CircuitBreaker = class {
   }
 };
+// src/core/debug-logger.ts
+var PREFIX = "[ai-sdk-rate-limiter]";
+var DebugLogger = class {
+  constructor(enabled) {
+    this.enabled = enabled;
+  }
+  log(model, message, details) {
+    if (!this.enabled) return;
+    if (details && Object.keys(details).length > 0) {
+      const parts = Object.entries(details).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(" ");
+      console.log(`${PREFIX} ${model}: ${message} (${parts})`);
+    } else {
+      console.log(`${PREFIX} ${model}: ${message}`);
+    }
+  }
+};
 // src/core/pipeline.ts
 function resolveRetryConfig(config) {
   const r = config.retry ?? {};
@@ -1720,6 +1737,7 @@ var Pipeline = class {
     /** Set to true after shutdown() is called */
     this.shutdownRequested = false;
     this.config = config;
+    this.log = new DebugLogger(config.debug === true);
     this.engine = new RateLimitEngine({
       maxQueueSize: config.queue?.maxSize ?? 500,
       ...config.store !== void 0 && { store: config.store }
@@ -1761,6 +1779,7 @@ var Pipeline = class {
    * recordUsage() once they have actual token counts from the API response.
    */
   async execute(modelId, provider, prompt, fn, opts) {
+    this.log.log(modelId, "execute", { provider, priority: opts.priority, ...opts.scope !== void 0 && { scope: opts.scope } });
     if (this.shutdownRequested) {
       this.emitter.emit("dropped", {
         model: modelId,
@@ -1850,10 +1869,12 @@ var Pipeline = class {
         timeoutMs: opts.timeoutMs,
         ...opts.signal !== void 0 && { signal: opts.signal },
         onQueued: (queueDepth, estimatedWaitMs) => {
+          this.log.log(modelId, "queuing", { queueDepth, estimatedWaitMs, priority: opts.priority });
           this.emitter.emit("queued", { model: modelId, provider, priority: opts.priority, queueDepth, estimatedWaitMs });
           this.emitter.emit("rateLimited", { source: "local", model: modelId, provider, limitType: "rpm", resetAt: Date.now() + estimatedWaitMs });
         },
         onDequeued: (waitedMs) => {
+          this.log.log(modelId, "dequeued", { waitedMs, priority: opts.priority });
           this.emitter.emit("dequeued", { model: modelId, provider, waitedMs, priority: opts.priority });
         }
       });
@@ -1905,7 +1926,10 @@ var Pipeline = class {
       });
       if (circuit) {
         const justClosed = circuit.recordSuccess();
-        if (justClosed) this.emitter.emit("circuitClosed", { model: modelId, provider });
+        if (justClosed) {
+          this.log.log(modelId, "circuit closed \u2014 upstream recovered");
+          this.emitter.emit("circuitClosed", { model: modelId, provider });
+        }
       }
       return result;
     } catch (error) {
@@ -1915,6 +1939,7 @@ var Pipeline = class {
         if (shouldTrip) {
           const justOpened = circuit.recordFailure();
           if (justOpened) {
+            this.log.log(modelId, "circuit OPEN", { status, cooldownMs: this.config.circuit?.cooldownMs ?? 6e4 });
             this.emitter.emit("circuitOpen", {
               model: modelId,
               provider,
@@ -1945,6 +1970,13 @@ var Pipeline = class {
       limits.outputPricePerMillion,
       scope
     );
+    this.log.log(modelId, "completed", {
+      tokens: `${usage.inputTokens}+${usage.outputTokens}`,
+      costUsd: costUsd.toFixed(6),
+      latencyMs,
+      streaming,
+      ...scope !== void 0 && { scope }
+    });
     this.emitter.emit("completed", {
       model: modelId,
       provider,
@@ -2416,8 +2448,53 @@ function rateLimited(client, options = {}) {
   });
 }
+// src/core/config-validator.ts
+var PREFIX2 = "\x1B[33m\u26A0 ai-sdk-rate-limiter\x1B[0m";
+var RESET = "\x1B[0m";
+function validateConfig(config) {
+  const warnings = [];
+  if (config.cost?.store !== void 0) {
+    warnings.push(
+      "cost.store is configured \u2014 call `await limiter.warmUp()` at startup.\n  Without it, budget caps won't account for spend from previous process runs."
+    );
+  }
+  const threshold = config.circuit?.failureThreshold;
+  if (threshold !== void 0 && threshold < 3) {
+    warnings.push(
+      `circuit.failureThreshold is ${threshold} \u2014 very low. The circuit will open after nearly every error. Consider a value of 5 or higher for typical production workloads.`
+    );
+  }
+  if (config.retry?.retryOn !== void 0 && !config.retry.retryOn.includes(429)) {
+    warnings.push(
+      "retry.retryOn does not include 429. Rate limit errors from the API will not be retried. Add 429 to retry.retryOn, or remove the override to use the default."
+    );
+  }
+  const queueTimeout = config.queue?.timeout;
+  if (queueTimeout !== void 0 && queueTimeout < 3e3) {
+    warnings.push(
+      `queue.timeout is ${queueTimeout}ms \u2014 less than 3 seconds. Requests may time out before the rate limit window resets (typically 60s). Consider 30_000ms (30s) or higher.`
+    );
+  }
+  if (config.cost?.onExceeded === "fallback") {
+    warnings.push(
+      "cost.onExceeded is 'fallback' but fallback models are configured per-model in limiter.wrap(model, { fallback: cheaperModel }). If no fallback is set on a wrapped model, BudgetExceededError will still be thrown."
+    );
+  }
+  if (config.cost?.budget !== void 0 && config.cost.onExceeded === void 0) {
+    warnings.push(
+      "cost.budget is set but cost.onExceeded is not. Defaulting to 'throw' \u2014 requests will throw BudgetExceededError when the cap is hit. Set onExceeded: 'queue' or 'fallback' to change this behavior."
+    );
+  }
+  for (const warning of warnings) {
+    const formatted = warning.replace(/\n/g, `
+  `);
+    console.warn(`${PREFIX2}: ${formatted}${RESET}`);
+  }
+}
 // src/create-rate-limiter.ts
 function createRateLimiter(config = {}) {
+  validateConfig(config);
   const pipeline = new Pipeline(config);
   const queueTimeout = config.queue?.timeout ?? 3e4;
   const middleware = createMiddleware(pipeline, queueTimeout);