@crewhaus/rate-limiter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "@crewhaus/rate-limiter",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Multi-dimensional token-bucket / leaky-bucket rate limiter (per-tenant, per-provider, per-tool)",
6
+ "main": "src/index.ts",
7
+ "types": "src/index.ts",
8
+ "exports": {
9
+ ".": "./src/index.ts"
10
+ },
11
+ "scripts": {
12
+ "test": "bun test src"
13
+ },
14
+ "dependencies": {
15
+ "@crewhaus/errors": "0.0.0"
16
+ },
17
+ "license": "Apache-2.0",
18
+ "author": {
19
+ "name": "Max Meier",
20
+ "email": "max@studiomax.io",
21
+ "url": "https://studiomax.io"
22
+ },
23
+ "repository": {
24
+ "type": "git",
25
+ "url": "git+https://github.com/crewhaus/factory.git",
26
+ "directory": "packages/rate-limiter"
27
+ },
28
+ "homepage": "https://github.com/crewhaus/factory/tree/main/packages/rate-limiter#readme",
29
+ "bugs": {
30
+ "url": "https://github.com/crewhaus/factory/issues"
31
+ },
32
+ "publishConfig": {
33
+ "access": "restricted"
34
+ },
35
+ "files": [
36
+ "src",
37
+ "README.md",
38
+ "LICENSE",
39
+ "NOTICE"
40
+ ]
41
+ }
@@ -0,0 +1,179 @@
1
+ /**
2
+ * Section 27 — `rate-limiter` tests:
3
+ * - T1 per algorithm (token-bucket vs leaky-bucket) edge cases
4
+ * - T7 1000-acquirer load test (concurrency-fair + no starvation)
5
+ * - T8 fail-closed when keys are missing (deny rather than allow)
6
+ */
7
+ import { describe, expect, test } from "bun:test";
8
+ import {
9
+ type AcquireKey,
10
+ type BucketConfig,
11
+ RateLimitError,
12
+ bucketKeyOf,
13
+ createRateLimiter,
14
+ } from "./index";
15
+
16
+ describe("rate-limiter — T1 token-bucket", () => {
17
+ test("acquire below capacity is immediate", async () => {
18
+ const buckets = new Map<string, BucketConfig>([
19
+ ["tenant:t1", { kind: "token-bucket", capacity: 10, refillPerSec: 1 }],
20
+ ]);
21
+ const rl = createRateLimiter({ buckets });
22
+ const t0 = Date.now();
23
+ await rl.acquire([{ dimension: "tenant", id: "t1" }], 5);
24
+ // 250 ms threshold: "immediate" relative to the bucket's 1-token-per-second
25
+ // refill rate, while tolerating CI scheduler jitter (we saw 51 ms flakes
26
+ // against a 50 ms cap on shared GitHub runners).
27
+ expect(Date.now() - t0).toBeLessThan(250);
28
+ const inspect = rl.inspect({ dimension: "tenant", id: "t1" });
29
+ expect(inspect?.available).toBeCloseTo(5, 1);
30
+ });
31
+
32
+ test("burst tolerance: capacity available immediately at start", async () => {
33
+ const buckets = new Map<string, BucketConfig>([
34
+ ["tenant:t1", { kind: "token-bucket", capacity: 10, refillPerSec: 0.1 }],
35
+ ]);
36
+ const rl = createRateLimiter({ buckets });
37
+ const t0 = Date.now();
38
+ for (let i = 0; i < 10; i++) {
39
+ await rl.acquire([{ dimension: "tenant", id: "t1" }], 1);
40
+ }
41
+ expect(Date.now() - t0).toBeLessThan(100);
42
+ });
43
+
44
+ test("blocks until refill when over capacity", async () => {
45
+ const buckets = new Map<string, BucketConfig>([
46
+ ["tenant:t1", { kind: "token-bucket", capacity: 1, refillPerSec: 10 }],
47
+ ]);
48
+ const rl = createRateLimiter({ buckets });
49
+ const t0 = Date.now();
50
+ await rl.acquire([{ dimension: "tenant", id: "t1" }], 1);
51
+ await rl.acquire([{ dimension: "tenant", id: "t1" }], 1);
52
+ const elapsed = Date.now() - t0;
53
+ // Second call needs to wait for ~100ms refill. Generous lower bound for
54
+ // shared-CI scheduling jitter; upper bound large enough to avoid flake.
55
+ expect(elapsed).toBeGreaterThanOrEqual(50);
56
+ expect(elapsed).toBeLessThan(2_000);
57
+ });
58
+
59
+ test("rejects after maxWaitMs when refill rate too slow", async () => {
60
+ const buckets = new Map<string, BucketConfig>([
61
+ ["tenant:t1", { kind: "token-bucket", capacity: 1, refillPerSec: 0.01 }],
62
+ ]);
63
+ const rl = createRateLimiter({ buckets });
64
+ await rl.acquire([{ dimension: "tenant", id: "t1" }], 1);
65
+ expect(
66
+ rl.acquire([{ dimension: "tenant", id: "t1" }], 1, { maxWaitMs: 100 }),
67
+ ).rejects.toBeInstanceOf(RateLimitError);
68
+ });
69
+ });
70
+
71
+ describe("rate-limiter — T1 leaky-bucket", () => {
72
+ test("smoothing: requests release at refill rate", async () => {
73
+ const buckets = new Map<string, BucketConfig>([
74
+ ["tenant:t1", { kind: "leaky-bucket", capacity: 5, refillPerSec: 50 }],
75
+ ]);
76
+ const rl = createRateLimiter({ buckets });
77
+ // 5 fit under capacity; 6th queues for ~20ms. Generous bounds for jitter.
78
+ const promises: Array<Promise<void>> = [];
79
+ const t0 = Date.now();
80
+ for (let i = 0; i < 7; i++) {
81
+ promises.push(rl.acquire([{ dimension: "tenant", id: "t1" }], 1, { maxWaitMs: 30_000 }));
82
+ }
83
+ await Promise.all(promises);
84
+ const elapsed = Date.now() - t0;
85
+ expect(elapsed).toBeGreaterThanOrEqual(15);
86
+ expect(elapsed).toBeLessThan(5_000);
87
+ });
88
+
89
+ test("rejects on maxWait when queue stays full", async () => {
90
+ const buckets = new Map<string, BucketConfig>([
91
+ ["tenant:t1", { kind: "leaky-bucket", capacity: 1, refillPerSec: 0.01 }],
92
+ ]);
93
+ const rl = createRateLimiter({ buckets });
94
+ await rl.acquire([{ dimension: "tenant", id: "t1" }], 1);
95
+ expect(
96
+ rl.acquire([{ dimension: "tenant", id: "t1" }], 1, { maxWaitMs: 50 }),
97
+ ).rejects.toBeInstanceOf(RateLimitError);
98
+ });
99
+ });
100
+
101
+ describe("rate-limiter — T8 fail-closed on missing keys", () => {
102
+ test("acquire on unknown key throws RateLimitError", async () => {
103
+ const rl = createRateLimiter({ buckets: new Map() });
104
+ expect(rl.acquire([{ dimension: "tenant", id: "unknown" }], 1)).rejects.toBeInstanceOf(
105
+ RateLimitError,
106
+ );
107
+ });
108
+
109
+ test("acquire passes for unknown id when * default exists", async () => {
110
+ const buckets = new Map<string, BucketConfig>([
111
+ ["tenant:*", { kind: "token-bucket", capacity: 5, refillPerSec: 1 }],
112
+ ]);
113
+ const rl = createRateLimiter({ buckets });
114
+ await rl.acquire([{ dimension: "tenant", id: "any" }], 1);
115
+ });
116
+
117
+ test("partial failure refunds successful acquisitions", async () => {
118
+ const buckets = new Map<string, BucketConfig>([
119
+ ["tenant:t1", { kind: "token-bucket", capacity: 10, refillPerSec: 1 }],
120
+ // provider:p1 missing
121
+ ]);
122
+ const rl = createRateLimiter({ buckets });
123
+ expect(
124
+ rl.acquire(
125
+ [
126
+ { dimension: "tenant", id: "t1" },
127
+ { dimension: "provider", id: "p1" },
128
+ ],
129
+ 1,
130
+ ),
131
+ ).rejects.toBeInstanceOf(RateLimitError);
132
+ // tenant bucket should still have full capacity after refund.
133
+ const inspect = rl.inspect({ dimension: "tenant", id: "t1" });
134
+ expect(inspect?.available).toBeCloseTo(10, 1);
135
+ });
136
+ });
137
+
138
+ describe("rate-limiter — multi-dimensional", () => {
139
+ test("acquire sums against tenant + provider + tool buckets", async () => {
140
+ const buckets = new Map<string, BucketConfig>([
141
+ ["tenant:t1", { kind: "token-bucket", capacity: 10, refillPerSec: 1 }],
142
+ ["provider:p1", { kind: "token-bucket", capacity: 10, refillPerSec: 1 }],
143
+ ["tool:Bash", { kind: "token-bucket", capacity: 5, refillPerSec: 1 }],
144
+ ]);
145
+ const rl = createRateLimiter({ buckets });
146
+ await rl.acquire([
147
+ { dimension: "tenant", id: "t1" },
148
+ { dimension: "provider", id: "p1" },
149
+ { dimension: "tool", id: "Bash" },
150
+ ]);
151
+ expect(rl.inspect({ dimension: "tenant", id: "t1" })?.available).toBeCloseTo(9, 1);
152
+ expect(rl.inspect({ dimension: "tool", id: "Bash" })?.available).toBeCloseTo(4, 1);
153
+ });
154
+ });
155
+
156
+ describe("rate-limiter — T7 load: 1000 acquirers, no starvation", () => {
157
+ test("1000 concurrent acquires drain in expected wall-clock time", async () => {
158
+ const buckets = new Map<string, BucketConfig>([
159
+ ["tenant:t1", { kind: "token-bucket", capacity: 100, refillPerSec: 5000 }],
160
+ ]);
161
+ const rl = createRateLimiter({ buckets });
162
+ const t0 = Date.now();
163
+ const promises = Array.from({ length: 1000 }, () =>
164
+ rl.acquire([{ dimension: "tenant", id: "t1" }], 1, { maxWaitMs: 60_000 }),
165
+ );
166
+ await Promise.all(promises);
167
+ const elapsed = Date.now() - t0;
168
+ // (1000 - 100) tokens to refill at 5000/s ≈ 180ms baseline. Allow very
169
+ // generous headroom for parallel-CI jitter.
170
+ expect(elapsed).toBeLessThan(15_000);
171
+ });
172
+ });
173
+
174
+ describe("rate-limiter — bucketKeyOf", () => {
175
+ test("formats dimension + id stably", () => {
176
+ const k: AcquireKey = { dimension: "provider", id: "anthropic" };
177
+ expect(bucketKeyOf(k)).toBe("provider:anthropic");
178
+ });
179
+ });
package/src/index.ts ADDED
@@ -0,0 +1,337 @@
1
+ /**
2
+ * Section 27 — `rate-limiter`. Multi-dimensional gating between callers
3
+ * and downstream services. Three keyed dimensions:
4
+ * - **per-tenant** (gateway-server pre-handler)
5
+ * - **per-provider** (model-router pre-call)
6
+ * - **per-tool** (runtime-core pre-tool-execute, configured in spec under
7
+ * `tools.<Name>.rateLimit`)
8
+ *
9
+ * Two algorithms; pick per-bucket:
10
+ * - **token-bucket** — burst-tolerant. `capacity` tokens; refill at
11
+ * `refillPerSec`. Acquire blocks until enough tokens are available.
12
+ * - **leaky-bucket** — smoothing. Treat acquires as drops landing in a
13
+ * bucket that drains at `refillPerSec`. New drops queue when the
14
+ * bucket is full; the queue serves drops at the drain rate.
15
+ *
16
+ * `acquire(keys, cost)` evaluates each key in order and only proceeds
17
+ * when *every* bucket has the requested cost. The implementation never
18
+ * takes a partial reservation — if any bucket would block, the call
19
+ * either waits for the longest delay or rejects on `maxWaitMs`. This
20
+ * guarantees fail-closed semantics: an unknown key always denies.
21
+ */
22
+ import { CrewhausError } from "@crewhaus/errors";
23
+
24
+ export class RateLimitError extends CrewhausError {
25
+ override readonly name = "RateLimitError";
26
+ constructor(message: string, cause?: unknown) {
27
+ super("config", message, cause);
28
+ }
29
+ }
30
+
31
+ export type BucketKind = "token-bucket" | "leaky-bucket";
32
+
33
+ export type BucketConfig = {
34
+ readonly kind: BucketKind;
35
+ /** Maximum tokens (token-bucket) or queue depth (leaky-bucket). */
36
+ readonly capacity: number;
37
+ /** Refill rate (token-bucket) or drain rate (leaky-bucket), per second. */
38
+ readonly refillPerSec: number;
39
+ };
40
+
41
+ export type AcquireKey = {
42
+ readonly dimension: "tenant" | "provider" | "tool";
43
+ readonly id: string;
44
+ };
45
+
46
+ export type AcquireOptions = {
47
+ /** How long to wait for tokens before rejecting. Defaults to 30s. */
48
+ readonly maxWaitMs?: number;
49
+ /** Override now() for tests. */
50
+ readonly now?: () => number;
51
+ };
52
+
53
+ export type RateLimiterOptions = {
54
+ /**
55
+ * Per-`(dimension, id)` bucket configuration. Lookup is exact-match;
56
+ * unknown keys deny by default (fail-closed). The `*` id is reserved
57
+ * for the per-dimension default — declared explicitly when one is
58
+ * desired.
59
+ */
60
+ readonly buckets: ReadonlyMap<string, BucketConfig>;
61
+ /** Override "now" for tests. */
62
+ readonly now?: () => number;
63
+ };
64
+
65
+ export interface RateLimiter {
66
+ /**
67
+ * Acquire `cost` tokens (default 1) from each key's bucket. Resolves
68
+ * once every bucket has paid out. Rejects with `RateLimitError` if
69
+ * any waited longer than `maxWaitMs`, or if any key is missing.
70
+ */
71
+ acquire(keys: ReadonlyArray<AcquireKey>, cost?: number, opts?: AcquireOptions): Promise<void>;
72
+ /** Diagnostic snapshot of current bucket state. */
73
+ inspect(key: AcquireKey):
74
+ | {
75
+ config: BucketConfig;
76
+ available: number;
77
+ waitingCount: number;
78
+ }
79
+ | undefined;
80
+ }
81
+
82
+ /** Stable string key for a dimension+id pair. */
83
+ export function bucketKeyOf(key: AcquireKey): string {
84
+ return `${key.dimension}:${key.id}`;
85
+ }
86
+
87
+ /** Static helper: bucket capacity check (no async waiting). */
88
+ export function tokenBucketAvailable(
89
+ state: TokenBucketState,
90
+ cost: number,
91
+ now: number,
92
+ config: BucketConfig,
93
+ ): boolean {
94
+ refillTokenBucket(state, now, config);
95
+ return state.tokens >= cost;
96
+ }
97
+
98
+ type TokenBucketState = {
99
+ tokens: number;
100
+ lastRefillMs: number;
101
+ };
102
+
103
+ type LeakyBucketState = {
104
+ /** Number of tokens currently in the bucket (queued). */
105
+ level: number;
106
+ lastDrainMs: number;
107
+ /** FIFO queue of pending acquirers awaiting drain. */
108
+ queue: Array<{
109
+ cost: number;
110
+ resolve: () => void;
111
+ reject: (err: Error) => void;
112
+ timer?: ReturnType<typeof setTimeout>;
113
+ }>;
114
+ };
115
+
116
+ function refillTokenBucket(state: TokenBucketState, now: number, config: BucketConfig): void {
117
+ const elapsedSec = Math.max(0, (now - state.lastRefillMs) / 1000);
118
+ const refilled = elapsedSec * config.refillPerSec;
119
+ state.tokens = Math.min(config.capacity, state.tokens + refilled);
120
+ state.lastRefillMs = now;
121
+ }
122
+
123
+ function drainLeakyBucket(state: LeakyBucketState, now: number, config: BucketConfig): void {
124
+ const elapsedSec = Math.max(0, (now - state.lastDrainMs) / 1000);
125
+ const drained = elapsedSec * config.refillPerSec;
126
+ state.level = Math.max(0, state.level - drained);
127
+ state.lastDrainMs = now;
128
+ }
129
+
130
+ export function createRateLimiter(opts: RateLimiterOptions): RateLimiter {
131
+ const buckets = opts.buckets;
132
+ const tokenStates = new Map<string, TokenBucketState>();
133
+ const leakyStates = new Map<string, LeakyBucketState>();
134
+
135
+ function getNow(callerNow?: () => number): number {
136
+ return (callerNow ?? opts.now ?? Date.now)();
137
+ }
138
+
139
+ function getOrInitTokenState(key: string, config: BucketConfig, now: number): TokenBucketState {
140
+ let s = tokenStates.get(key);
141
+ if (!s) {
142
+ s = { tokens: config.capacity, lastRefillMs: now };
143
+ tokenStates.set(key, s);
144
+ }
145
+ return s;
146
+ }
147
+
148
+ function getOrInitLeakyState(key: string, now: number): LeakyBucketState {
149
+ let s = leakyStates.get(key);
150
+ if (!s) {
151
+ s = { level: 0, lastDrainMs: now, queue: [] };
152
+ leakyStates.set(key, s);
153
+ }
154
+ return s;
155
+ }
156
+
157
+ /**
158
+ * Wait for a single bucket to allow `cost` tokens. Resolves when ready.
159
+ * `maxWaitMs` enforces the cap; rejects with RateLimitError on timeout.
160
+ */
161
+ function acquireOne(
162
+ key: AcquireKey,
163
+ cost: number,
164
+ config: BucketConfig,
165
+ maxWaitMs: number,
166
+ nowFn: () => number,
167
+ ): Promise<void> {
168
+ const k = bucketKeyOf(key);
169
+ const start = nowFn();
170
+
171
+ if (config.kind === "token-bucket") {
172
+ return new Promise<void>((resolve, reject) => {
173
+ const tryAcquire = (): void => {
174
+ const now = nowFn();
175
+ const state = getOrInitTokenState(k, config, now);
176
+ refillTokenBucket(state, now, config);
177
+ if (state.tokens >= cost) {
178
+ state.tokens -= cost;
179
+ resolve();
180
+ return;
181
+ }
182
+ const elapsedMs = now - start;
183
+ const remainingMs = maxWaitMs - elapsedMs;
184
+ if (remainingMs <= 0) {
185
+ reject(
186
+ new RateLimitError(
187
+ `rate limit exceeded for ${k}: ${cost} tokens needed, ${state.tokens.toFixed(2)} available, max wait ${maxWaitMs}ms reached`,
188
+ ),
189
+ );
190
+ return;
191
+ }
192
+ // Time until enough tokens accrue
193
+ const deficit = cost - state.tokens;
194
+ const msToWait = Math.min(
195
+ remainingMs,
196
+ Math.max(10, (deficit / config.refillPerSec) * 1000),
197
+ );
198
+ setTimeout(tryAcquire, msToWait);
199
+ };
200
+ tryAcquire();
201
+ });
202
+ }
203
+
204
+ // leaky-bucket
205
+ return new Promise<void>((resolve, reject) => {
206
+ const now = nowFn();
207
+ const state = getOrInitLeakyState(k, now);
208
+ drainLeakyBucket(state, now, config);
209
+ const wouldExceed = state.level + cost > config.capacity;
210
+ if (!wouldExceed && state.queue.length === 0) {
211
+ // Fast-path: no queue, fits in capacity.
212
+ state.level += cost;
213
+ resolve();
214
+ return;
215
+ }
216
+ // Queue and rely on drain timer.
217
+ const entry = {
218
+ cost,
219
+ resolve,
220
+ reject,
221
+ timer: setTimeout(() => {
222
+ const idx = state.queue.indexOf(entry);
223
+ if (idx >= 0) state.queue.splice(idx, 1);
224
+ reject(
225
+ new RateLimitError(
226
+ `rate limit exceeded for ${k}: leaky bucket full, max wait ${maxWaitMs}ms reached`,
227
+ ),
228
+ );
229
+ }, maxWaitMs),
230
+ };
231
+ state.queue.push(entry);
232
+ // Schedule a drain check.
233
+ const drainEveryMs = Math.max(10, 1000 / config.refillPerSec);
234
+ const tick = (): void => {
235
+ const tickNow = nowFn();
236
+ drainLeakyBucket(state, tickNow, config);
237
+ // Process as many queue entries as fit under capacity.
238
+ while (state.queue.length > 0) {
239
+ const head = state.queue[0];
240
+ if (!head) break;
241
+ if (state.level + head.cost <= config.capacity) {
242
+ state.queue.shift();
243
+ if (head.timer) clearTimeout(head.timer);
244
+ state.level += head.cost;
245
+ head.resolve();
246
+ } else {
247
+ break;
248
+ }
249
+ }
250
+ if (state.queue.length > 0) {
251
+ setTimeout(tick, drainEveryMs);
252
+ }
253
+ };
254
+ setTimeout(tick, drainEveryMs);
255
+ });
256
+ }
257
+
258
+ return {
259
+ async acquire(keys, cost = 1, callerOpts = {}): Promise<void> {
260
+ const maxWaitMs = callerOpts.maxWaitMs ?? 30_000;
261
+ const nowFn = (): number => getNow(callerOpts.now);
262
+
263
+ // Fail-closed: every key must resolve to a known bucket.
264
+ for (const key of keys) {
265
+ const k = bucketKeyOf(key);
266
+ if (!buckets.has(k)) {
267
+ // Per-dimension default lookup
268
+ const fallback = `${key.dimension}:*`;
269
+ if (!buckets.has(fallback)) {
270
+ throw new RateLimitError(`no bucket configured for ${k} (and no ${fallback} default)`);
271
+ }
272
+ }
273
+ }
274
+
275
+ // Acquire each in sequence so we don't double-charge a bucket on
276
+ // partial failure. (Parallel acquisition would require two-phase
277
+ // commit; sequential is simpler and the bucket counts stay correct.)
278
+ const acquired: AcquireKey[] = [];
279
+ try {
280
+ for (const key of keys) {
281
+ const k = bucketKeyOf(key);
282
+ const config = buckets.get(k) ?? buckets.get(`${key.dimension}:*`);
283
+ if (!config) throw new RateLimitError(`no bucket for ${k}`);
284
+ await acquireOne(key, cost, config, maxWaitMs, nowFn);
285
+ acquired.push(key);
286
+ }
287
+ } catch (err) {
288
+ // Refund any successful acquisitions so partial failures don't drain buckets.
289
+ const now = nowFn();
290
+ for (const key of acquired) {
291
+ const k = bucketKeyOf(key);
292
+ const config = buckets.get(k) ?? buckets.get(`${key.dimension}:*`);
293
+ if (!config) continue;
294
+ if (config.kind === "token-bucket") {
295
+ const state = tokenStates.get(k);
296
+ if (state) {
297
+ state.tokens = Math.min(config.capacity, state.tokens + cost);
298
+ state.lastRefillMs = now;
299
+ }
300
+ } else {
301
+ const state = leakyStates.get(k);
302
+ if (state) {
303
+ state.level = Math.max(0, state.level - cost);
304
+ state.lastDrainMs = now;
305
+ }
306
+ }
307
+ }
308
+ throw err;
309
+ }
310
+ },
311
+
312
+ inspect(key): { config: BucketConfig; available: number; waitingCount: number } | undefined {
313
+ const k = bucketKeyOf(key);
314
+ const config = buckets.get(k) ?? buckets.get(`${key.dimension}:*`);
315
+ if (!config) return undefined;
316
+ const now = (opts.now ?? Date.now)();
317
+ if (config.kind === "token-bucket") {
318
+ const state = tokenStates.get(k);
319
+ if (!state) {
320
+ return { config, available: config.capacity, waitingCount: 0 };
321
+ }
322
+ refillTokenBucket(state, now, config);
323
+ return { config, available: state.tokens, waitingCount: 0 };
324
+ }
325
+ const state = leakyStates.get(k);
326
+ if (!state) {
327
+ return { config, available: config.capacity, waitingCount: 0 };
328
+ }
329
+ drainLeakyBucket(state, now, config);
330
+ return {
331
+ config,
332
+ available: Math.max(0, config.capacity - state.level),
333
+ waitingCount: state.queue.length,
334
+ };
335
+ },
336
+ };
337
+ }