@crewhaus/gateway-server 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -14
- package/src/index.test.ts +599 -5
- package/src/index.ts +134 -49
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crewhaus/gateway-server",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Bun.serve daemon speaking gateway-protocol — JWT auth + per-tenant routing + budget enforcement",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -12,16 +12,17 @@
|
|
|
12
12
|
"test": "bun test src"
|
|
13
13
|
},
|
|
14
14
|
"dependencies": {
|
|
15
|
-
"@crewhaus/
|
|
16
|
-
"@crewhaus/
|
|
17
|
-
"@crewhaus/
|
|
18
|
-
"@crewhaus/
|
|
15
|
+
"@crewhaus/durable-state": "0.1.2",
|
|
16
|
+
"@crewhaus/audit-log": "0.1.2",
|
|
17
|
+
"@crewhaus/errors": "0.1.2",
|
|
18
|
+
"@crewhaus/gateway-protocol": "0.1.2",
|
|
19
|
+
"@crewhaus/tenancy": "0.1.2"
|
|
19
20
|
},
|
|
20
21
|
"license": "Apache-2.0",
|
|
21
22
|
"author": {
|
|
22
23
|
"name": "Max Meier",
|
|
23
|
-
"email": "max@
|
|
24
|
-
"url": "https://
|
|
24
|
+
"email": "max@crewhaus.ai",
|
|
25
|
+
"url": "https://crewhaus.ai"
|
|
25
26
|
},
|
|
26
27
|
"repository": {
|
|
27
28
|
"type": "git",
|
|
@@ -33,12 +34,7 @@
|
|
|
33
34
|
"url": "https://github.com/crewhaus/factory/issues"
|
|
34
35
|
},
|
|
35
36
|
"publishConfig": {
|
|
36
|
-
"access": "
|
|
37
|
+
"access": "public"
|
|
37
38
|
},
|
|
38
|
-
"files": [
|
|
39
|
-
"src",
|
|
40
|
-
"README.md",
|
|
41
|
-
"LICENSE",
|
|
42
|
-
"NOTICE"
|
|
43
|
-
]
|
|
39
|
+
"files": ["src", "README.md", "LICENSE", "NOTICE"]
|
|
44
40
|
}
|
package/src/index.test.ts
CHANGED
|
@@ -1,9 +1,45 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { createHmac } from "node:crypto";
|
|
2
3
|
import { mkdtempSync, rmSync } from "node:fs";
|
|
3
4
|
import { tmpdir } from "node:os";
|
|
4
5
|
import { join } from "node:path";
|
|
6
|
+
import { SqliteBudgetStore } from "@crewhaus/durable-state";
|
|
7
|
+
import { ErrorCode } from "@crewhaus/gateway-protocol";
|
|
5
8
|
import { type Tenant, buildTenant } from "@crewhaus/tenancy";
|
|
6
|
-
import {
|
|
9
|
+
import {
|
|
10
|
+
GatewayServerError,
|
|
11
|
+
PROTOCOL_VERSION,
|
|
12
|
+
createGatewayServer,
|
|
13
|
+
signJwt,
|
|
14
|
+
statusFor,
|
|
15
|
+
verifyJwt,
|
|
16
|
+
} from "./index";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Forge a token with an arbitrary header + claims (signed with `secret`) so
|
|
20
|
+
* we can exercise rejection paths `signJwt` would never produce — e.g. an
|
|
21
|
+
* `alg: none` header or a body with no `exp`.
|
|
22
|
+
*/
|
|
23
|
+
function forgeToken(
|
|
24
|
+
header: Record<string, unknown>,
|
|
25
|
+
claims: Record<string, unknown>,
|
|
26
|
+
secret: string,
|
|
27
|
+
): string {
|
|
28
|
+
const b64url = (s: string): string =>
|
|
29
|
+
Buffer.from(s, "utf8")
|
|
30
|
+
.toString("base64")
|
|
31
|
+
.replace(/\+/g, "-")
|
|
32
|
+
.replace(/\//g, "_")
|
|
33
|
+
.replace(/=+$/, "");
|
|
34
|
+
const data = `${b64url(JSON.stringify(header))}.${b64url(JSON.stringify(claims))}`;
|
|
35
|
+
const sig = createHmac("sha256", secret)
|
|
36
|
+
.update(data)
|
|
37
|
+
.digest("base64")
|
|
38
|
+
.replace(/\+/g, "-")
|
|
39
|
+
.replace(/\//g, "_")
|
|
40
|
+
.replace(/=+$/, "");
|
|
41
|
+
return `${data}.${sig}`;
|
|
42
|
+
}
|
|
7
43
|
|
|
8
44
|
let tmp: string;
|
|
9
45
|
|
|
@@ -61,6 +97,46 @@ describe("JWT round-trip", () => {
|
|
|
61
97
|
const token = signJwt({ tenant_id: "../etc" }, SECRET);
|
|
62
98
|
expect(() => verifyJwt(token, SECRET)).toThrow(/invalid tenantId/);
|
|
63
99
|
});
|
|
100
|
+
|
|
101
|
+
test("valid short-lived HS256 token verifies", () => {
|
|
102
|
+
const iat = Math.floor(Date.now() / 1000);
|
|
103
|
+
const token = signJwt({ tenant_id: "tenant-a", iat, exp: iat + 300 }, SECRET);
|
|
104
|
+
const claims = verifyJwt(token, SECRET);
|
|
105
|
+
expect(claims.tenant_id).toBe("tenant-a");
|
|
106
|
+
expect(claims.exp).toBe(iat + 300);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
test("rejects token with no exp claim (CWE-613)", () => {
|
|
110
|
+
// Forge directly — `signJwt` always injects an exp.
|
|
111
|
+
const token = forgeToken({ alg: "HS256", typ: "JWT" }, { tenant_id: "tenant-a" }, SECRET);
|
|
112
|
+
expect(() => verifyJwt(token, SECRET)).toThrow(/missing exp/);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("rejects token whose header alg is not HS256", () => {
|
|
116
|
+
const iat = Math.floor(Date.now() / 1000);
|
|
117
|
+
const token = forgeToken(
|
|
118
|
+
{ alg: "none", typ: "JWT" },
|
|
119
|
+
{ tenant_id: "tenant-a", iat, exp: iat + 300 },
|
|
120
|
+
SECRET,
|
|
121
|
+
);
|
|
122
|
+
expect(() => verifyJwt(token, SECRET)).toThrow(/unsupported alg/);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
test("rejects token whose header typ is not JWT", () => {
|
|
126
|
+
const iat = Math.floor(Date.now() / 1000);
|
|
127
|
+
const token = forgeToken(
|
|
128
|
+
{ alg: "HS256", typ: "JWE" },
|
|
129
|
+
{ tenant_id: "tenant-a", iat, exp: iat + 300 },
|
|
130
|
+
SECRET,
|
|
131
|
+
);
|
|
132
|
+
expect(() => verifyJwt(token, SECRET)).toThrow(/unsupported typ/);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
test("rejects a token whose lifetime exceeds the 24h ceiling", () => {
|
|
136
|
+
const iat = Math.floor(Date.now() / 1000);
|
|
137
|
+
const token = signJwt({ tenant_id: "tenant-a", iat, exp: iat + 25 * 60 * 60 }, SECRET);
|
|
138
|
+
expect(() => verifyJwt(token, SECRET)).toThrow(/lifetime exceeds maximum/);
|
|
139
|
+
});
|
|
64
140
|
});
|
|
65
141
|
|
|
66
142
|
describe("server.handle (T2/T3 contract)", () => {
|
|
@@ -137,9 +213,9 @@ describe("server.handle (T2/T3 contract)", () => {
|
|
|
137
213
|
describe("budget enforcement", () => {
|
|
138
214
|
test("recordUsage increments cumulative usage", async () => {
|
|
139
215
|
const { server } = makeServer();
|
|
140
|
-
server.recordUsage("tenant-a", { input: 1000, output: 200 });
|
|
141
|
-
server.recordUsage("tenant-a", { input: 500, output: 100 });
|
|
142
|
-
expect(server.usage("tenant-a")).toEqual({ input: 1500, output: 300 });
|
|
216
|
+
await server.recordUsage("tenant-a", { input: 1000, output: 200 });
|
|
217
|
+
await server.recordUsage("tenant-a", { input: 500, output: 100 });
|
|
218
|
+
expect(await server.usage("tenant-a")).toEqual({ input: 1500, output: 300 });
|
|
143
219
|
});
|
|
144
220
|
|
|
145
221
|
test("exhausted input budget → 429 budget_exceeded", async () => {
|
|
@@ -151,7 +227,7 @@ describe("budget enforcement", () => {
|
|
|
151
227
|
handler: async () => ({ ok: true }),
|
|
152
228
|
tenantOverrides: { "tenant-a": tinyA },
|
|
153
229
|
});
|
|
154
|
-
server.recordUsage("tenant-a", { input: 999, output: 0 });
|
|
230
|
+
await server.recordUsage("tenant-a", { input: 999, output: 0 });
|
|
155
231
|
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
156
232
|
const res = await server.handle({
|
|
157
233
|
bearer: token,
|
|
@@ -166,6 +242,110 @@ describe("budget enforcement", () => {
|
|
|
166
242
|
error: { code: "budget_exceeded", message: expect.stringMatching(/input tokens/) },
|
|
167
243
|
});
|
|
168
244
|
});
|
|
245
|
+
|
|
246
|
+
// SECURITY: without an in-flight reservation, concurrent requests all pass
|
|
247
|
+
// checkBudget (which only sees recorded usage = 0) before any records, so a
|
|
248
|
+
// burst blows past the cap. The reservation counts each in-flight request.
|
|
249
|
+
test("in-flight reservation bounds a concurrent burst (TOCTOU)", async () => {
|
|
250
|
+
const tenantA = buildTenant("tenant-a", { tenantsRoot: tmp });
|
|
251
|
+
const tinyA: Tenant = { ...tenantA, budget: { maxInputTokens: 100, maxOutputTokens: 100 } };
|
|
252
|
+
const server = createGatewayServer({
|
|
253
|
+
jwtSecret: SECRET,
|
|
254
|
+
tenantsRoot: tmp,
|
|
255
|
+
handler: async () => ({ ok: true }),
|
|
256
|
+
tenantOverrides: { "tenant-a": tinyA },
|
|
257
|
+
estimateUsage: () => ({ input: 60, output: 0 }),
|
|
258
|
+
});
|
|
259
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
260
|
+
const req = (id: string) =>
|
|
261
|
+
server.handle({
|
|
262
|
+
bearer: token,
|
|
263
|
+
body: {
|
|
264
|
+
protocol: "crewhaus.v1",
|
|
265
|
+
id,
|
|
266
|
+
method: "runs.create",
|
|
267
|
+
params: { spec: "s", input: "" },
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
// Three concurrent requests @ 60 est. tokens vs a 100-token budget: with
|
|
271
|
+
// recorded usage 0, all three would pass the old check; the cumulative
|
|
272
|
+
// reservation (60+60+60) blocks the 2nd and 3rd.
|
|
273
|
+
const results = await Promise.all([req("1"), req("2"), req("3")]);
|
|
274
|
+
const rejected = results.filter(
|
|
275
|
+
(r) =>
|
|
276
|
+
typeof r === "object" &&
|
|
277
|
+
r !== null &&
|
|
278
|
+
"error" in r &&
|
|
279
|
+
(r as { error: { code: string } }).error.code === "budget_exceeded",
|
|
280
|
+
);
|
|
281
|
+
expect(rejected.length).toBeGreaterThanOrEqual(2);
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// SECURITY (audit R3): two gateway "replicas" sharing a durable budget
|
|
285
|
+
// store enforce ONE budget. Before the seam each replica had its own
|
|
286
|
+
// in-memory maps, multiplying every tenant budget by the replica count.
|
|
287
|
+
test("replicas sharing a SqliteBudgetStore enforce a single budget", async () => {
|
|
288
|
+
const tenantA = buildTenant("tenant-a", { tenantsRoot: tmp });
|
|
289
|
+
const tinyA: Tenant = { ...tenantA, budget: { maxInputTokens: 100, maxOutputTokens: 100 } };
|
|
290
|
+
const storeFile = join(tmp, "budget.db");
|
|
291
|
+
const mk = () =>
|
|
292
|
+
createGatewayServer({
|
|
293
|
+
jwtSecret: SECRET,
|
|
294
|
+
tenantsRoot: tmp,
|
|
295
|
+
handler: async () => ({ ok: true }),
|
|
296
|
+
tenantOverrides: { "tenant-a": tinyA },
|
|
297
|
+
estimateUsage: () => ({ input: 60, output: 0 }),
|
|
298
|
+
budgetStore: new SqliteBudgetStore({ path: storeFile }),
|
|
299
|
+
});
|
|
300
|
+
const replicaA = mk();
|
|
301
|
+
const replicaB = mk();
|
|
302
|
+
// Usage recorded through replica A is visible to replica B...
|
|
303
|
+
await replicaA.recordUsage("tenant-a", { input: 70, output: 0 });
|
|
304
|
+
expect(await replicaB.usage("tenant-a")).toEqual({ input: 70, output: 0 });
|
|
305
|
+
// ...and bounds replica B's requests (70 recorded + 60 estimate >= 100).
|
|
306
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
307
|
+
const res = await replicaB.handle({
|
|
308
|
+
bearer: token,
|
|
309
|
+
body: {
|
|
310
|
+
protocol: "crewhaus.v1",
|
|
311
|
+
id: "1",
|
|
312
|
+
method: "runs.create",
|
|
313
|
+
params: { spec: "s", input: "" },
|
|
314
|
+
},
|
|
315
|
+
});
|
|
316
|
+
expect(res).toMatchObject({
|
|
317
|
+
error: { code: "budget_exceeded", message: expect.stringMatching(/input tokens 130\/100/) },
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
test("reservation is released after each request (sequential requests aren't starved)", async () => {
|
|
322
|
+
const tenantA = buildTenant("tenant-a", { tenantsRoot: tmp });
|
|
323
|
+
const tinyA: Tenant = { ...tenantA, budget: { maxInputTokens: 100, maxOutputTokens: 100 } };
|
|
324
|
+
const server = createGatewayServer({
|
|
325
|
+
jwtSecret: SECRET,
|
|
326
|
+
tenantsRoot: tmp,
|
|
327
|
+
handler: async () => ({ ok: true }),
|
|
328
|
+
tenantOverrides: { "tenant-a": tinyA },
|
|
329
|
+
estimateUsage: () => ({ input: 60, output: 0 }),
|
|
330
|
+
});
|
|
331
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
332
|
+
const req = () =>
|
|
333
|
+
server.handle({
|
|
334
|
+
bearer: token,
|
|
335
|
+
body: {
|
|
336
|
+
protocol: "crewhaus.v1",
|
|
337
|
+
id: "x",
|
|
338
|
+
method: "runs.create",
|
|
339
|
+
params: { spec: "s", input: "" },
|
|
340
|
+
},
|
|
341
|
+
});
|
|
342
|
+
// Run-to-completion releases the 60-token reservation, so the next request
|
|
343
|
+
// (recorded usage still 0 here) reserves freshly and succeeds.
|
|
344
|
+
const a = await req();
|
|
345
|
+
const b = await req();
|
|
346
|
+
expect(a).not.toMatchObject({ error: { code: "budget_exceeded" } });
|
|
347
|
+
expect(b).not.toMatchObject({ error: { code: "budget_exceeded" } });
|
|
348
|
+
});
|
|
169
349
|
});
|
|
170
350
|
|
|
171
351
|
describe("tenancy isolation", () => {
|
|
@@ -218,4 +398,418 @@ describe("audit log", () => {
|
|
|
218
398
|
for await (const r of log.read()) rows.push(r);
|
|
219
399
|
expect(rows.length).toBe(1);
|
|
220
400
|
});
|
|
401
|
+
|
|
402
|
+
test("the audit row carries method, tenantId and the token's sub claim", async () => {
|
|
403
|
+
const { server, tenantA } = makeServer();
|
|
404
|
+
const token = signJwt({ tenant_id: "tenant-a", sub: "user-42" }, SECRET);
|
|
405
|
+
await server.handle({
|
|
406
|
+
bearer: token,
|
|
407
|
+
body: {
|
|
408
|
+
protocol: "crewhaus.v1",
|
|
409
|
+
id: "1",
|
|
410
|
+
method: "runs.create",
|
|
411
|
+
params: { spec: "s", input: "" },
|
|
412
|
+
},
|
|
413
|
+
});
|
|
414
|
+
const log = await server.getAuditLog(tenantA);
|
|
415
|
+
const rows: Array<{ payload: { method: string; tenantId: string; sub?: string } }> = [];
|
|
416
|
+
for await (const r of log.read())
|
|
417
|
+
rows.push(r as { payload: { method: string; tenantId: string; sub?: string } });
|
|
418
|
+
expect(rows[0]?.payload).toEqual({
|
|
419
|
+
method: "runs.create",
|
|
420
|
+
tenantId: "tenant-a",
|
|
421
|
+
sub: "user-42",
|
|
422
|
+
});
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
test("getAuditLog memoises — the same log instance is returned per tenant", async () => {
|
|
426
|
+
const { server, tenantA } = makeServer();
|
|
427
|
+
const first = await server.getAuditLog(tenantA);
|
|
428
|
+
const second = await server.getAuditLog(tenantA);
|
|
429
|
+
expect(second).toBe(first);
|
|
430
|
+
});
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
describe("verifyJwt — iat edge cases (forged tokens)", () => {
|
|
434
|
+
test("rejects a token whose iat is in the future", () => {
|
|
435
|
+
const future = Math.floor((Date.now() + 10 * 60_000) / 1000);
|
|
436
|
+
const token = forgeToken(
|
|
437
|
+
{ alg: "HS256", typ: "JWT" },
|
|
438
|
+
{ tenant_id: "tenant-a", iat: future, exp: future + 60 },
|
|
439
|
+
SECRET,
|
|
440
|
+
);
|
|
441
|
+
expect(() => verifyJwt(token, SECRET)).toThrow(/iat in the future/);
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
test("rejects a token whose iat is non-numeric", () => {
|
|
445
|
+
const iat = Math.floor(Date.now() / 1000);
|
|
446
|
+
const token = forgeToken(
|
|
447
|
+
{ alg: "HS256", typ: "JWT" },
|
|
448
|
+
{ tenant_id: "tenant-a", iat: "soon", exp: iat + 300 },
|
|
449
|
+
SECRET,
|
|
450
|
+
);
|
|
451
|
+
expect(() => verifyJwt(token, SECRET)).toThrow(/malformed iat/);
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
test("rejects a body with a missing tenant_id claim", () => {
|
|
455
|
+
const iat = Math.floor(Date.now() / 1000);
|
|
456
|
+
const token = forgeToken({ alg: "HS256", typ: "JWT" }, { iat, exp: iat + 300 }, SECRET);
|
|
457
|
+
expect(() => verifyJwt(token, SECRET)).toThrow(/missing tenant_id/);
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
test("rejects a token whose body is not valid JSON", () => {
|
|
461
|
+
// Header is valid; body decodes to non-JSON bytes; signature matches that body.
|
|
462
|
+
const b64url = (s: string): string =>
|
|
463
|
+
Buffer.from(s, "utf8")
|
|
464
|
+
.toString("base64")
|
|
465
|
+
.replace(/\+/g, "-")
|
|
466
|
+
.replace(/\//g, "_")
|
|
467
|
+
.replace(/=+$/, "");
|
|
468
|
+
const headerB64 = b64url(JSON.stringify({ alg: "HS256", typ: "JWT" }));
|
|
469
|
+
const bodyB64 = b64url("this-is-not-json{");
|
|
470
|
+
const data = `${headerB64}.${bodyB64}`;
|
|
471
|
+
const sig = createHmac("sha256", SECRET)
|
|
472
|
+
.update(data)
|
|
473
|
+
.digest("base64")
|
|
474
|
+
.replace(/\+/g, "-")
|
|
475
|
+
.replace(/\//g, "_")
|
|
476
|
+
.replace(/=+$/, "");
|
|
477
|
+
expect(() => verifyJwt(`${data}.${sig}`, SECRET)).toThrow(/malformed JWT body/);
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
test("rejects a token whose header is not valid JSON", () => {
|
|
481
|
+
const b64url = (s: string): string =>
|
|
482
|
+
Buffer.from(s, "utf8")
|
|
483
|
+
.toString("base64")
|
|
484
|
+
.replace(/\+/g, "-")
|
|
485
|
+
.replace(/\//g, "_")
|
|
486
|
+
.replace(/=+$/, "");
|
|
487
|
+
const headerB64 = b64url("not-json{");
|
|
488
|
+
const iat = Math.floor(Date.now() / 1000);
|
|
489
|
+
const bodyB64 = b64url(JSON.stringify({ tenant_id: "tenant-a", iat, exp: iat + 300 }));
|
|
490
|
+
const data = `${headerB64}.${bodyB64}`;
|
|
491
|
+
const sig = createHmac("sha256", SECRET)
|
|
492
|
+
.update(data)
|
|
493
|
+
.digest("base64")
|
|
494
|
+
.replace(/\+/g, "-")
|
|
495
|
+
.replace(/\//g, "_")
|
|
496
|
+
.replace(/=+$/, "");
|
|
497
|
+
expect(() => verifyJwt(`${data}.${sig}`, SECRET)).toThrow(/malformed JWT header/);
|
|
498
|
+
});
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
describe("createGatewayServer — injected clock + default tenant building", () => {
|
|
502
|
+
test("honours an injected now() for expiry checks", async () => {
|
|
503
|
+
// Token expires at T+300s. Pin the clock past expiry; the request must 401.
|
|
504
|
+
const iat = 1_000_000;
|
|
505
|
+
const token = signJwt({ tenant_id: "tenant-a", iat, exp: iat + 300 }, SECRET);
|
|
506
|
+
const tenantA = buildTenant("tenant-a", { tenantsRoot: tmp });
|
|
507
|
+
const server = createGatewayServer({
|
|
508
|
+
jwtSecret: SECRET,
|
|
509
|
+
tenantsRoot: tmp,
|
|
510
|
+
handler: async () => ({ ok: true }),
|
|
511
|
+
tenantOverrides: { "tenant-a": tenantA },
|
|
512
|
+
now: () => (iat + 10_000) * 1000,
|
|
513
|
+
});
|
|
514
|
+
const res = await server.handle({
|
|
515
|
+
bearer: token,
|
|
516
|
+
body: {
|
|
517
|
+
protocol: "crewhaus.v1",
|
|
518
|
+
id: "1",
|
|
519
|
+
method: "runs.create",
|
|
520
|
+
params: { spec: "s", input: "" },
|
|
521
|
+
},
|
|
522
|
+
});
|
|
523
|
+
expect(res).toMatchObject({
|
|
524
|
+
error: { code: "unauthorized", message: expect.stringMatching(/expired/) },
|
|
525
|
+
});
|
|
526
|
+
});
|
|
527
|
+
|
|
528
|
+
test("builds a tenant from tenantsRoot when no override is supplied", async () => {
|
|
529
|
+
// No tenantOverrides → tenantFor() falls through to buildTenant(tenantsRoot).
|
|
530
|
+
let seenRoot: string | undefined;
|
|
531
|
+
const server = createGatewayServer({
|
|
532
|
+
jwtSecret: SECRET,
|
|
533
|
+
tenantsRoot: tmp,
|
|
534
|
+
handler: async ({ tenant }) => {
|
|
535
|
+
seenRoot = tenant.auditRoot;
|
|
536
|
+
return { ok: true };
|
|
537
|
+
},
|
|
538
|
+
});
|
|
539
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
540
|
+
const res = await server.handle({
|
|
541
|
+
bearer: token,
|
|
542
|
+
body: {
|
|
543
|
+
protocol: "crewhaus.v1",
|
|
544
|
+
id: "1",
|
|
545
|
+
method: "runs.create",
|
|
546
|
+
params: { spec: "s", input: "" },
|
|
547
|
+
},
|
|
548
|
+
});
|
|
549
|
+
expect(res).toMatchObject({ protocol: "crewhaus.v1", id: "1" });
|
|
550
|
+
expect(seenRoot?.startsWith(tmp)).toBe(true);
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
test("builds a tenant with the package default root when tenantsRoot is omitted", async () => {
|
|
554
|
+
// Neither override nor tenantsRoot → buildTenant() uses its own default root.
|
|
555
|
+
// We never write to disk here: budget is exhausted first so the handler/audit
|
|
556
|
+
// never runs, keeping the test free of real filesystem side effects.
|
|
557
|
+
const server = createGatewayServer({
|
|
558
|
+
jwtSecret: SECRET,
|
|
559
|
+
handler: async () => ({ ok: true }),
|
|
560
|
+
});
|
|
561
|
+
await server.recordUsage("tenant-a", { input: 10_000_000, output: 0 });
|
|
562
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
563
|
+
const res = await server.handle({
|
|
564
|
+
bearer: token,
|
|
565
|
+
body: {
|
|
566
|
+
protocol: "crewhaus.v1",
|
|
567
|
+
id: "1",
|
|
568
|
+
method: "runs.create",
|
|
569
|
+
params: { spec: "s", input: "" },
|
|
570
|
+
},
|
|
571
|
+
});
|
|
572
|
+
expect(res).toMatchObject({ error: { code: "budget_exceeded" } });
|
|
573
|
+
});
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
describe("budget enforcement — output dimension + internal errors", () => {
|
|
577
|
+
test("exhausted output budget → 429 budget_exceeded", async () => {
|
|
578
|
+
const tenantA = buildTenant("tenant-a", { tenantsRoot: tmp });
|
|
579
|
+
const tinyA: Tenant = { ...tenantA, budget: { maxInputTokens: 1000, maxOutputTokens: 100 } };
|
|
580
|
+
const server = createGatewayServer({
|
|
581
|
+
jwtSecret: SECRET,
|
|
582
|
+
tenantsRoot: tmp,
|
|
583
|
+
handler: async () => ({ ok: true }),
|
|
584
|
+
tenantOverrides: { "tenant-a": tinyA },
|
|
585
|
+
});
|
|
586
|
+
await server.recordUsage("tenant-a", { input: 0, output: 100 });
|
|
587
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
588
|
+
const res = await server.handle({
|
|
589
|
+
bearer: token,
|
|
590
|
+
body: {
|
|
591
|
+
protocol: "crewhaus.v1",
|
|
592
|
+
id: "1",
|
|
593
|
+
method: "runs.create",
|
|
594
|
+
params: { spec: "s", input: "" },
|
|
595
|
+
},
|
|
596
|
+
});
|
|
597
|
+
expect(res).toMatchObject({
|
|
598
|
+
error: { code: "budget_exceeded", message: expect.stringMatching(/output tokens/) },
|
|
599
|
+
});
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
test("a handler that rejects surfaces as 500 internal_error", async () => {
|
|
603
|
+
const { server } = makeServer(async () => {
|
|
604
|
+
throw new Error("handler boom");
|
|
605
|
+
});
|
|
606
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
607
|
+
const res = await server.handle({
|
|
608
|
+
bearer: token,
|
|
609
|
+
body: {
|
|
610
|
+
protocol: "crewhaus.v1",
|
|
611
|
+
id: "1",
|
|
612
|
+
method: "runs.create",
|
|
613
|
+
params: { spec: "s", input: "" },
|
|
614
|
+
},
|
|
615
|
+
});
|
|
616
|
+
expect(res).toMatchObject({
|
|
617
|
+
error: { code: "internal_error", message: "handler boom" },
|
|
618
|
+
});
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
test("a handler that throws a non-Error value is stringified into internal_error", async () => {
|
|
622
|
+
// Reject with a raw (non-Error) string to exercise the server's
|
|
623
|
+
// `String(err)` branch. A plain rejected promise (rather than an `async`
|
|
624
|
+
// body that `throw`s a string literal) keeps the rejection reason exactly
|
|
625
|
+
// "raw string failure" without tripping useAwait / noThrowLiteral.
|
|
626
|
+
const { server } = makeServer(() => Promise.reject("raw string failure"));
|
|
627
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
628
|
+
const res = await server.handle({
|
|
629
|
+
bearer: token,
|
|
630
|
+
body: {
|
|
631
|
+
protocol: "crewhaus.v1",
|
|
632
|
+
id: "1",
|
|
633
|
+
method: "runs.create",
|
|
634
|
+
params: { spec: "s", input: "" },
|
|
635
|
+
},
|
|
636
|
+
});
|
|
637
|
+
expect(res).toMatchObject({
|
|
638
|
+
error: { code: "internal_error", message: "raw string failure" },
|
|
639
|
+
});
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
test("a GatewayServerError that is neither budget nor auth maps to 400 bad_request", async () => {
|
|
643
|
+
const { server } = makeServer(async () => {
|
|
644
|
+
throw new GatewayServerError("some other config problem");
|
|
645
|
+
});
|
|
646
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
647
|
+
const res = await server.handle({
|
|
648
|
+
bearer: token,
|
|
649
|
+
body: {
|
|
650
|
+
protocol: "crewhaus.v1",
|
|
651
|
+
id: "1",
|
|
652
|
+
method: "runs.create",
|
|
653
|
+
params: { spec: "s", input: "" },
|
|
654
|
+
},
|
|
655
|
+
});
|
|
656
|
+
expect(res).toMatchObject({
|
|
657
|
+
error: { code: "bad_request", message: "some other config problem" },
|
|
658
|
+
});
|
|
659
|
+
});
|
|
660
|
+
});
|
|
661
|
+
|
|
662
|
+
describe("statusFor — exhaustive wire code → HTTP status map", () => {
|
|
663
|
+
test("maps every standard ErrorCode and falls back to 200", () => {
|
|
664
|
+
expect(statusFor(ErrorCode.Unauthorized)).toBe(401);
|
|
665
|
+
expect(statusFor(ErrorCode.Forbidden)).toBe(403);
|
|
666
|
+
expect(statusFor(ErrorCode.NotFound)).toBe(404);
|
|
667
|
+
expect(statusFor(ErrorCode.BadRequest)).toBe(400);
|
|
668
|
+
expect(statusFor(ErrorCode.BudgetExceeded)).toBe(429);
|
|
669
|
+
expect(statusFor(ErrorCode.InternalError)).toBe(500);
|
|
670
|
+
// Unknown / empty codes fall through to the 200 default.
|
|
671
|
+
expect(statusFor("totally_unknown_code")).toBe(200);
|
|
672
|
+
expect(statusFor("")).toBe(200);
|
|
673
|
+
});
|
|
674
|
+
});
|
|
675
|
+
|
|
676
|
+
describe("listen — real Bun.serve HTTP surface (loopback)", () => {
|
|
677
|
+
/** Start the daemon on an ephemeral loopback port and return a teardown. */
|
|
678
|
+
async function withHttp(
|
|
679
|
+
server: ReturnType<typeof createGatewayServer>,
|
|
680
|
+
fn: (base: string) => Promise<void>,
|
|
681
|
+
): Promise<void> {
|
|
682
|
+
const { port, close } = await server.listen(0);
|
|
683
|
+
expect(typeof port).toBe("number");
|
|
684
|
+
expect(port).toBeGreaterThan(0);
|
|
685
|
+
try {
|
|
686
|
+
await fn(`http://127.0.0.1:${port}`);
|
|
687
|
+
} finally {
|
|
688
|
+
await close();
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
test("authenticated POST returns 200 with the success envelope", async () => {
|
|
693
|
+
const { server } = makeServer(async ({ tenant }) => ({
|
|
694
|
+
runId: "run_h",
|
|
695
|
+
sessionId: "sess_h",
|
|
696
|
+
tenantId: tenant.id,
|
|
697
|
+
}));
|
|
698
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
699
|
+
await withHttp(server, async (base) => {
|
|
700
|
+
const res = await fetch(base, {
|
|
701
|
+
method: "POST",
|
|
702
|
+
headers: { "content-type": "application/json", authorization: `Bearer ${token}` },
|
|
703
|
+
body: JSON.stringify({
|
|
704
|
+
protocol: PROTOCOL_VERSION,
|
|
705
|
+
id: "1",
|
|
706
|
+
method: "runs.create",
|
|
707
|
+
params: { spec: "s", input: "hi" },
|
|
708
|
+
}),
|
|
709
|
+
});
|
|
710
|
+
expect(res.status).toBe(200);
|
|
711
|
+
expect(await res.json()).toEqual({
|
|
712
|
+
protocol: PROTOCOL_VERSION,
|
|
713
|
+
id: "1",
|
|
714
|
+
result: { runId: "run_h", sessionId: "sess_h", tenantId: "tenant-a" },
|
|
715
|
+
});
|
|
716
|
+
});
|
|
717
|
+
});
|
|
718
|
+
|
|
719
|
+
test("missing Authorization header returns 401", async () => {
|
|
720
|
+
const { server } = makeServer();
|
|
721
|
+
await withHttp(server, async (base) => {
|
|
722
|
+
const res = await fetch(base, {
|
|
723
|
+
method: "POST",
|
|
724
|
+
headers: { "content-type": "application/json" },
|
|
725
|
+
body: JSON.stringify({
|
|
726
|
+
protocol: PROTOCOL_VERSION,
|
|
727
|
+
id: "1",
|
|
728
|
+
method: "runs.create",
|
|
729
|
+
params: { spec: "s", input: "" },
|
|
730
|
+
}),
|
|
731
|
+
});
|
|
732
|
+
expect(res.status).toBe(401);
|
|
733
|
+
expect(await res.json()).toMatchObject({ error: { code: "unauthorized" } });
|
|
734
|
+
});
|
|
735
|
+
});
|
|
736
|
+
|
|
737
|
+
test("a non-Bearer Authorization scheme is treated as no token (401)", async () => {
|
|
738
|
+
const { server } = makeServer();
|
|
739
|
+
await withHttp(server, async (base) => {
|
|
740
|
+
const res = await fetch(base, {
|
|
741
|
+
method: "POST",
|
|
742
|
+
headers: { "content-type": "application/json", authorization: "Basic abc123" },
|
|
743
|
+
body: JSON.stringify({
|
|
744
|
+
protocol: PROTOCOL_VERSION,
|
|
745
|
+
id: "1",
|
|
746
|
+
method: "runs.create",
|
|
747
|
+
params: { spec: "s", input: "" },
|
|
748
|
+
}),
|
|
749
|
+
});
|
|
750
|
+
expect(res.status).toBe(401);
|
|
751
|
+
});
|
|
752
|
+
});
|
|
753
|
+
|
|
754
|
+
test("a non-JSON body returns 400 before auth is even consulted", async () => {
|
|
755
|
+
const { server } = makeServer();
|
|
756
|
+
await withHttp(server, async (base) => {
|
|
757
|
+
const res = await fetch(base, {
|
|
758
|
+
method: "POST",
|
|
759
|
+
headers: { "content-type": "application/json", authorization: "Bearer whatever" },
|
|
760
|
+
body: "}{ not json",
|
|
761
|
+
});
|
|
762
|
+
expect(res.status).toBe(400);
|
|
763
|
+
expect(await res.json()).toMatchObject({
|
|
764
|
+
error: { code: "bad_request", message: expect.stringMatching(/must be JSON/) },
|
|
765
|
+
});
|
|
766
|
+
});
|
|
767
|
+
});
|
|
768
|
+
|
|
769
|
+
test("an over-budget request returns HTTP 429", async () => {
|
|
770
|
+
const tenantA = buildTenant("tenant-a", { tenantsRoot: tmp });
|
|
771
|
+
const tinyA: Tenant = { ...tenantA, budget: { maxInputTokens: 50, maxOutputTokens: 50 } };
|
|
772
|
+
const server = createGatewayServer({
|
|
773
|
+
jwtSecret: SECRET,
|
|
774
|
+
tenantsRoot: tmp,
|
|
775
|
+
handler: async () => ({ ok: true }),
|
|
776
|
+
tenantOverrides: { "tenant-a": tinyA },
|
|
777
|
+
});
|
|
778
|
+
await server.recordUsage("tenant-a", { input: 50, output: 0 });
|
|
779
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
780
|
+
await withHttp(server, async (base) => {
|
|
781
|
+
const res = await fetch(base, {
|
|
782
|
+
method: "POST",
|
|
783
|
+
headers: { "content-type": "application/json", authorization: `Bearer ${token}` },
|
|
784
|
+
body: JSON.stringify({
|
|
785
|
+
protocol: PROTOCOL_VERSION,
|
|
786
|
+
id: "1",
|
|
787
|
+
method: "runs.create",
|
|
788
|
+
params: { spec: "s", input: "" },
|
|
789
|
+
}),
|
|
790
|
+
});
|
|
791
|
+
expect(res.status).toBe(429);
|
|
792
|
+
});
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
test("binds on an explicit host argument", async () => {
|
|
796
|
+
const { server } = makeServer();
|
|
797
|
+
const { port, close } = await server.listen(0, "127.0.0.1");
|
|
798
|
+
try {
|
|
799
|
+
const token = signJwt({ tenant_id: "tenant-a" }, SECRET);
|
|
800
|
+
const res = await fetch(`http://127.0.0.1:${port}`, {
|
|
801
|
+
method: "POST",
|
|
802
|
+
headers: { "content-type": "application/json", authorization: `Bearer ${token}` },
|
|
803
|
+
body: JSON.stringify({
|
|
804
|
+
protocol: PROTOCOL_VERSION,
|
|
805
|
+
id: "1",
|
|
806
|
+
method: "runs.create",
|
|
807
|
+
params: { spec: "s", input: "" },
|
|
808
|
+
}),
|
|
809
|
+
});
|
|
810
|
+
expect(res.status).toBe(200);
|
|
811
|
+
} finally {
|
|
812
|
+
await close();
|
|
813
|
+
}
|
|
814
|
+
});
|
|
221
815
|
});
|
package/src/index.ts
CHANGED
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
|
|
27
27
|
import { createHmac, timingSafeEqual } from "node:crypto";
|
|
28
28
|
import { type AppendInput, type AuditLog, openAuditLog } from "@crewhaus/audit-log";
|
|
29
|
+
import { type BudgetStore, InMemoryBudgetStore } from "@crewhaus/durable-state";
|
|
29
30
|
import { CrewhausError } from "@crewhaus/errors";
|
|
30
31
|
import {
|
|
31
32
|
ErrorCode,
|
|
@@ -56,6 +57,20 @@ export type JwtClaims = {
|
|
|
56
57
|
// HS256 JWT — minimal verifier and signer (no external deps).
|
|
57
58
|
// ---------------------------------------------------------------------------
|
|
58
59
|
|
|
60
|
+
/** Only HS256 is accepted — guards against `alg` confusion (e.g. `none`). */
|
|
61
|
+
const JWT_ALG = "HS256";
|
|
62
|
+
/** Only compact JWS bearer tokens are accepted. */
|
|
63
|
+
const JWT_TYP = "JWT";
|
|
64
|
+
/** Reject tokens whose lifetime (`exp - iat`) exceeds this when `iat` is present. */
|
|
65
|
+
const MAX_JWT_LIFETIME_SECONDS = 24 * 60 * 60;
|
|
66
|
+
/** Allowed clock skew when checking `iat` is not in the future. */
|
|
67
|
+
const IAT_SKEW_MS = 60_000;
|
|
68
|
+
|
|
69
|
+
type JwtHeader = {
|
|
70
|
+
readonly alg?: string;
|
|
71
|
+
readonly typ?: string;
|
|
72
|
+
};
|
|
73
|
+
|
|
59
74
|
function b64urlEncode(input: Uint8Array | string): string {
|
|
60
75
|
const buf = typeof input === "string" ? Buffer.from(input, "utf8") : Buffer.from(input);
|
|
61
76
|
return buf.toString("base64").replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
|
@@ -67,8 +82,13 @@ function b64urlDecode(input: string): Buffer {
|
|
|
67
82
|
}
|
|
68
83
|
|
|
69
84
|
export function signJwt(claims: JwtClaims, secret: string): string {
|
|
70
|
-
|
|
71
|
-
|
|
85
|
+
// Convenience minter (tests + smoke only). Default `iat`/`exp` so emitted
|
|
86
|
+
// tokens satisfy the verifier's mandatory-`exp` + bounded-lifetime contract;
|
|
87
|
+
// production tokens come from an external IDP.
|
|
88
|
+
const iat = claims.iat ?? Math.floor(Date.now() / 1000);
|
|
89
|
+
const exp = claims.exp ?? iat + 60 * 60;
|
|
90
|
+
const header = b64urlEncode(JSON.stringify({ alg: JWT_ALG, typ: JWT_TYP }));
|
|
91
|
+
const body = b64urlEncode(JSON.stringify({ ...claims, iat, exp }));
|
|
72
92
|
const data = `${header}.${body}`;
|
|
73
93
|
const sig = createHmac("sha256", secret).update(data).digest();
|
|
74
94
|
return `${data}.${b64urlEncode(sig)}`;
|
|
@@ -80,6 +100,20 @@ export function verifyJwt(token: string, secret: string, now: () => number = Dat
|
|
|
80
100
|
throw new GatewayServerError("malformed JWT — expected 3 segments");
|
|
81
101
|
}
|
|
82
102
|
const [headerB64, bodyB64, sigB64] = parts as [string, string, string];
|
|
103
|
+
// Validate the header (alg/typ) BEFORE spending an HMAC — rejects
|
|
104
|
+
// `alg: none` / algorithm-confusion tokens up front.
|
|
105
|
+
let header: JwtHeader;
|
|
106
|
+
try {
|
|
107
|
+
header = JSON.parse(b64urlDecode(headerB64).toString("utf8")) as JwtHeader;
|
|
108
|
+
} catch (err) {
|
|
109
|
+
throw new GatewayServerError("malformed JWT header", err);
|
|
110
|
+
}
|
|
111
|
+
if (header.alg !== JWT_ALG) {
|
|
112
|
+
throw new GatewayServerError(`JWT unsupported alg — expected ${JWT_ALG}`);
|
|
113
|
+
}
|
|
114
|
+
if (header.typ !== JWT_TYP) {
|
|
115
|
+
throw new GatewayServerError(`JWT unsupported typ — expected ${JWT_TYP}`);
|
|
116
|
+
}
|
|
83
117
|
const data = `${headerB64}.${bodyB64}`;
|
|
84
118
|
const expected = createHmac("sha256", secret).update(data).digest();
|
|
85
119
|
let actual: Buffer;
|
|
@@ -105,11 +139,26 @@ export function verifyJwt(token: string, secret: string, now: () => number = Dat
|
|
|
105
139
|
}
|
|
106
140
|
validateTenantId(claims.tenant_id);
|
|
107
141
|
const nowMs = now();
|
|
108
|
-
|
|
142
|
+
// `exp` is mandatory — an absent (or non-numeric) `exp` must not mean
|
|
143
|
+
// "never expires" (CWE-613).
|
|
144
|
+
if (typeof claims.exp !== "number" || !Number.isFinite(claims.exp)) {
|
|
145
|
+
throw new GatewayServerError("JWT missing exp claim");
|
|
146
|
+
}
|
|
147
|
+
if (claims.exp * 1000 <= nowMs) {
|
|
109
148
|
throw new GatewayServerError("JWT expired");
|
|
110
149
|
}
|
|
111
|
-
if (claims.iat !== undefined
|
|
112
|
-
|
|
150
|
+
if (claims.iat !== undefined) {
|
|
151
|
+
if (typeof claims.iat !== "number" || !Number.isFinite(claims.iat)) {
|
|
152
|
+
throw new GatewayServerError("JWT malformed iat claim");
|
|
153
|
+
}
|
|
154
|
+
if (claims.iat * 1000 > nowMs + IAT_SKEW_MS) {
|
|
155
|
+
throw new GatewayServerError("JWT iat in the future");
|
|
156
|
+
}
|
|
157
|
+
// Bound the maximum lifetime — a token cannot outlive its `iat` by more
|
|
158
|
+
// than the configured ceiling.
|
|
159
|
+
if (claims.exp - claims.iat > MAX_JWT_LIFETIME_SECONDS) {
|
|
160
|
+
throw new GatewayServerError("JWT lifetime exceeds maximum");
|
|
161
|
+
}
|
|
113
162
|
}
|
|
114
163
|
return claims;
|
|
115
164
|
}
|
|
@@ -135,6 +184,31 @@ export type CreateGatewayServerOptions = {
|
|
|
135
184
|
*/
|
|
136
185
|
readonly tenantOverrides?: Readonly<Record<string, Tenant>>;
|
|
137
186
|
readonly now?: () => number;
|
|
187
|
+
/**
|
|
188
|
+
* Optional per-request cost estimate. It is RESERVED against the tenant's
|
|
189
|
+
* budget before the handler runs and released after — closing the TOCTOU
|
|
190
|
+
* where concurrent requests all pass `checkBudget` (which only sees
|
|
191
|
+
* already-recorded usage) before any of them records its usage, each then
|
|
192
|
+
* running to full cost. A generic gateway can't know token costs, so supply
|
|
193
|
+
* a realistic estimate here to bound in-flight spend; the default reserves
|
|
194
|
+
* nothing (behavior-preserving). Actual usage is still recorded out-of-band
|
|
195
|
+
* via `recordUsage`.
|
|
196
|
+
*/
|
|
197
|
+
readonly estimateUsage?: (args: {
|
|
198
|
+
readonly method: MethodT;
|
|
199
|
+
readonly params: unknown;
|
|
200
|
+
readonly tenant: Tenant;
|
|
201
|
+
}) => UsageDelta;
|
|
202
|
+
/**
|
|
203
|
+
* Pluggable budget accounting (audit follow-up R3). Default: in-memory —
|
|
204
|
+
* per-process semantics identical to before the seam existed. Multi-process
|
|
205
|
+
* single-host deployments pass a `SqliteBudgetStore` (or a spec-built store
|
|
206
|
+
* via `createBudgetStore("sqlite:<path>")`) so every replica reserves and
|
|
207
|
+
* records against the SAME counters; multi-host deployments implement
|
|
208
|
+
* `BudgetStore` against a network store. Without this, N replicas multiply
|
|
209
|
+
* every tenant budget by N.
|
|
210
|
+
*/
|
|
211
|
+
readonly budgetStore?: BudgetStore;
|
|
138
212
|
};
|
|
139
213
|
|
|
140
214
|
export type UsageDelta = {
|
|
@@ -154,16 +228,25 @@ export interface GatewayServer {
|
|
|
154
228
|
* HTTP layer does.
|
|
155
229
|
*/
|
|
156
230
|
handle(request: { readonly bearer?: string; readonly body: unknown }): Promise<unknown>;
|
|
157
|
-
/**
|
|
158
|
-
|
|
231
|
+
/**
|
|
232
|
+
* Record token usage against a tenant's running total. Async since the
|
|
233
|
+
* budget store may be durable (audit R3); await it so usage is committed
|
|
234
|
+
* before the response is considered complete.
|
|
235
|
+
*/
|
|
236
|
+
recordUsage(tenantId: string, delta: UsageDelta): Promise<void>;
|
|
159
237
|
/** Read current usage (mostly for tests). */
|
|
160
|
-
usage(tenantId: string): { input: number; output: number }
|
|
238
|
+
usage(tenantId: string): Promise<{ input: number; output: number }>;
|
|
161
239
|
/** Get or build the audit log for a tenant. Memoised. */
|
|
162
240
|
getAuditLog(tenant: Tenant): Promise<AuditLog>;
|
|
163
241
|
}
|
|
164
242
|
|
|
243
|
+
const ZERO_USAGE: UsageDelta = { input: 0, output: 0 };
|
|
244
|
+
|
|
165
245
|
export function createGatewayServer(opts: CreateGatewayServerOptions): GatewayServer {
|
|
166
|
-
|
|
246
|
+
// Budget accounting (recorded usage + in-flight reservations) lives behind
|
|
247
|
+
// the BudgetStore seam; the in-memory default preserves the pre-seam
|
|
248
|
+
// per-process semantics verbatim.
|
|
249
|
+
const budget = opts.budgetStore ?? new InMemoryBudgetStore();
|
|
167
250
|
const auditLogByTenant = new Map<string, AuditLog>();
|
|
168
251
|
const now = opts.now ?? Date.now;
|
|
169
252
|
|
|
@@ -183,28 +266,6 @@ export function createGatewayServer(opts: CreateGatewayServerOptions): GatewaySe
|
|
|
183
266
|
return log;
|
|
184
267
|
}
|
|
185
268
|
|
|
186
|
-
function bumpUsage(tenantId: string, delta: UsageDelta): void {
|
|
187
|
-
const cur = usageByTenant.get(tenantId) ?? { input: 0, output: 0 };
|
|
188
|
-
usageByTenant.set(tenantId, {
|
|
189
|
-
input: cur.input + delta.input,
|
|
190
|
-
output: cur.output + delta.output,
|
|
191
|
-
});
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
function checkBudget(tenant: Tenant): void {
|
|
195
|
-
const used = usageByTenant.get(tenant.id) ?? { input: 0, output: 0 };
|
|
196
|
-
if (used.input >= tenant.budget.maxInputTokens) {
|
|
197
|
-
throw new GatewayServerError(
|
|
198
|
-
`budget exceeded: input tokens ${used.input}/${tenant.budget.maxInputTokens}`,
|
|
199
|
-
);
|
|
200
|
-
}
|
|
201
|
-
if (used.output >= tenant.budget.maxOutputTokens) {
|
|
202
|
-
throw new GatewayServerError(
|
|
203
|
-
`budget exceeded: output tokens ${used.output}/${tenant.budget.maxOutputTokens}`,
|
|
204
|
-
);
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
269
|
async function handleEnvelope(envelope: unknown, bearer: string | undefined): Promise<unknown> {
|
|
209
270
|
let id = "?";
|
|
210
271
|
try {
|
|
@@ -215,19 +276,38 @@ export function createGatewayServer(opts: CreateGatewayServerOptions): GatewaySe
|
|
|
215
276
|
const tenant = tenantFor(claims);
|
|
216
277
|
const decoded = decodeRequest(envelope);
|
|
217
278
|
id = decoded.id;
|
|
218
|
-
|
|
219
|
-
//
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
)
|
|
230
|
-
|
|
279
|
+
// Atomically reserve the estimated cost against recorded + in-flight
|
|
280
|
+
// usage (the store refuses when the total would exceed the budget on
|
|
281
|
+
// either dimension) — then release once the request finishes (actual
|
|
282
|
+
// usage is recorded out-of-band via recordUsage in the meantime). The
|
|
283
|
+
// check-and-reserve is a single atomic store operation so concurrent
|
|
284
|
+
// requests — including ones in OTHER processes sharing a durable
|
|
285
|
+
// store — can't all slip past the cap.
|
|
286
|
+
const estimate =
|
|
287
|
+
opts.estimateUsage?.({ method: decoded.method, params: decoded.params, tenant }) ??
|
|
288
|
+
ZERO_USAGE;
|
|
289
|
+
const reservation = await budget.tryReserve(tenant.id, estimate, tenant.budget);
|
|
290
|
+
if (!reservation.ok) {
|
|
291
|
+
throw new GatewayServerError(
|
|
292
|
+
`budget exceeded: ${reservation.reason} tokens ${reservation.total}/${reservation.limit}`,
|
|
293
|
+
);
|
|
294
|
+
}
|
|
295
|
+
try {
|
|
296
|
+
// Audit every authenticated gateway request.
|
|
297
|
+
const log = await getAuditLog(tenant);
|
|
298
|
+
const requestPayload: AppendInput["payload"] = {
|
|
299
|
+
method: decoded.method,
|
|
300
|
+
tenantId: tenant.id,
|
|
301
|
+
sub: claims.sub,
|
|
302
|
+
};
|
|
303
|
+
await log.append({ kind: "gateway_request", payload: requestPayload });
|
|
304
|
+
const result = await withTenant(tenant, () =>
|
|
305
|
+
opts.handler({ method: decoded.method, params: decoded.params, tenant }),
|
|
306
|
+
);
|
|
307
|
+
return encodeSuccess(id, result);
|
|
308
|
+
} finally {
|
|
309
|
+
await budget.release(tenant.id, estimate);
|
|
310
|
+
}
|
|
231
311
|
} catch (err) {
|
|
232
312
|
if (err instanceof GatewayProtocolError) {
|
|
233
313
|
return encodeError(id, ErrorCode.BadRequest, err.message);
|
|
@@ -291,17 +371,22 @@ export function createGatewayServer(opts: CreateGatewayServerOptions): GatewaySe
|
|
|
291
371
|
handle(req): Promise<unknown> {
|
|
292
372
|
return handleEnvelope(req.body, req.bearer);
|
|
293
373
|
},
|
|
294
|
-
recordUsage(tenantId, delta): void {
|
|
295
|
-
|
|
374
|
+
recordUsage(tenantId, delta): Promise<void> {
|
|
375
|
+
return budget.recordUsage(tenantId, delta);
|
|
296
376
|
},
|
|
297
|
-
usage(tenantId): { input: number; output: number } {
|
|
298
|
-
return
|
|
377
|
+
usage(tenantId): Promise<{ input: number; output: number }> {
|
|
378
|
+
return budget.usage(tenantId);
|
|
299
379
|
},
|
|
300
380
|
getAuditLog,
|
|
301
381
|
};
|
|
302
382
|
}
|
|
303
383
|
|
|
304
|
-
|
|
384
|
+
/**
|
|
385
|
+
* Map a wire `ErrorCode` to its HTTP status. Exported so reference clients
|
|
386
|
+
* and embedders can render the same status the daemon's HTTP layer does.
|
|
387
|
+
* Exhaustive over {@link ErrorCode}; unknown codes fall back to `200`.
|
|
388
|
+
*/
|
|
389
|
+
export function statusFor(code: string): number {
|
|
305
390
|
switch (code) {
|
|
306
391
|
case ErrorCode.Unauthorized:
|
|
307
392
|
return 401;
|