@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/drizzle/0000_productive_jackpot.sql +26 -0
  3. package/drizzle/0001_puzzling_purple_man.sql +26 -0
  4. package/drizzle/0002_sparkling_paper_doll.sql +15 -0
  5. package/drizzle/0003_married_senator_kelly.sql +1 -0
  6. package/drizzle/0004_crazy_miek.sql +2 -0
  7. package/drizzle/0005_tearful_randall_flagg.sql +1 -0
  8. package/drizzle/meta/0000_snapshot.json +232 -0
  9. package/drizzle/meta/0001_snapshot.json +434 -0
  10. package/drizzle/meta/0002_snapshot.json +551 -0
  11. package/drizzle/meta/0003_snapshot.json +557 -0
  12. package/drizzle/meta/0004_snapshot.json +573 -0
  13. package/drizzle/meta/0005_snapshot.json +574 -0
  14. package/drizzle/meta/_journal.json +48 -0
  15. package/drizzle.config.ts +7 -0
  16. package/package.json +42 -0
  17. package/src/agent-runner.test.ts +262 -0
  18. package/src/agent-runner.ts +262 -0
  19. package/src/chat/agent-loop.test.ts +119 -0
  20. package/src/chat/agent-loop.ts +73 -0
  21. package/src/chat/auto-apply.test.ts +237 -0
  22. package/src/chat/chat-handler.ts +111 -0
  23. package/src/chat/chat-service.streamturn.test.ts +417 -0
  24. package/src/chat/chat-service.test.ts +250 -0
  25. package/src/chat/chat-service.ts +923 -0
  26. package/src/chat/classifier-service.ts +64 -0
  27. package/src/chat/classifier.logic.test.ts +92 -0
  28. package/src/chat/classifier.logic.ts +71 -0
  29. package/src/chat/conversation-store.it.test.ts +203 -0
  30. package/src/chat/conversation-store.test.ts +248 -0
  31. package/src/chat/conversation-store.ts +237 -0
  32. package/src/chat/decision.logic.test.ts +45 -0
  33. package/src/chat/decision.logic.ts +54 -0
  34. package/src/chat/llm-provider.test.ts +63 -0
  35. package/src/chat/llm-provider.ts +67 -0
  36. package/src/chat/model-error.logic.test.ts +60 -0
  37. package/src/chat/model-error.logic.ts +65 -0
  38. package/src/chat/normalize-messages.logic.test.ts +101 -0
  39. package/src/chat/normalize-messages.logic.ts +65 -0
  40. package/src/chat/permission-mode.logic.test.ts +70 -0
  41. package/src/chat/permission-mode.logic.ts +45 -0
  42. package/src/chat/read-invoker.ts +72 -0
  43. package/src/chat/replay.test.ts +174 -0
  44. package/src/chat/scrub-content.test.ts +183 -0
  45. package/src/chat/scrub-content.ts +154 -0
  46. package/src/chat/sdk-tools.test.ts +168 -0
  47. package/src/chat/sdk-tools.ts +181 -0
  48. package/src/chat/title-service.test.ts +146 -0
  49. package/src/chat/title-service.ts +111 -0
  50. package/src/chat/title.logic.test.ts +98 -0
  51. package/src/chat/title.logic.ts +102 -0
  52. package/src/extension-points.ts +41 -0
  53. package/src/generated/docs-index.ts +3020 -0
  54. package/src/hardening/handler-authz.test.ts +282 -0
  55. package/src/hardening/no-secret-leak.test.ts +303 -0
  56. package/src/hooks.ts +33 -0
  57. package/src/index.ts +542 -0
  58. package/src/mcp/connection-registry.test.ts +25 -0
  59. package/src/mcp/connection-registry.ts +54 -0
  60. package/src/mcp/mcp-conformance.it.test.ts +128 -0
  61. package/src/mcp/server.test.ts +285 -0
  62. package/src/mcp/server.ts +300 -0
  63. package/src/mcp/tool-invoker.ts +65 -0
  64. package/src/openai-provider.test.ts +64 -0
  65. package/src/openai-provider.ts +146 -0
  66. package/src/projection.test.ts +97 -0
  67. package/src/projection.ts +132 -0
  68. package/src/propose-apply/args-hash.test.ts +26 -0
  69. package/src/propose-apply/args-hash.ts +30 -0
  70. package/src/propose-apply/service.test.ts +423 -0
  71. package/src/propose-apply/service.ts +419 -0
  72. package/src/propose-apply/store.test.ts +136 -0
  73. package/src/propose-apply/store.ts +224 -0
  74. package/src/propose-apply/token.test.ts +52 -0
  75. package/src/propose-apply/token.ts +71 -0
  76. package/src/rate-limit/spend-ledger.it.test.ts +224 -0
  77. package/src/rate-limit/spend-ledger.test.ts +176 -0
  78. package/src/rate-limit/spend-ledger.ts +162 -0
  79. package/src/rate-limit/tool-budget.it.test.ts +173 -0
  80. package/src/rate-limit/tool-budget.test.ts +58 -0
  81. package/src/rate-limit/tool-budget.ts +107 -0
  82. package/src/registry-wiring.test.ts +131 -0
  83. package/src/registry-wiring.ts +68 -0
  84. package/src/resolver.test.ts +156 -0
  85. package/src/resolver.ts +78 -0
  86. package/src/router.test.ts +78 -0
  87. package/src/router.ts +345 -0
  88. package/src/schema.ts +284 -0
  89. package/src/serializer.test.ts +88 -0
  90. package/src/serializer.ts +42 -0
  91. package/src/tool-registry.ts +58 -0
  92. package/src/tools/composite-tools.ts +24 -0
  93. package/src/tools/docs-tools.test.ts +150 -0
  94. package/src/tools/docs-tools.ts +115 -0
  95. package/src/tools/probe-url.test.ts +51 -0
  96. package/src/tools/probe-url.ts +146 -0
  97. package/src/tools/rank-docs.test.ts +153 -0
  98. package/src/tools/rank-docs.ts +209 -0
  99. package/src/tools/script-context-extract.test.ts +93 -0
  100. package/src/tools/script-context-extract.ts +283 -0
  101. package/src/tools/ssrf-guard.test.ts +69 -0
  102. package/src/tools/ssrf-guard.ts +108 -0
  103. package/src/tools/tool-set.e2e.test.ts +64 -0
  104. package/src/user-rpc-client.test.ts +45 -0
  105. package/src/user-rpc-client.ts +60 -0
  106. package/tsconfig.json +26 -0
@@ -0,0 +1,224 @@
1
+ import { and, eq, lt, gt } from "drizzle-orm";
2
+ import type { SafeDatabase } from "@checkstack/backend-api";
3
+ import type { AiToolEffect } from "@checkstack/ai-common";
4
+ import * as schema from "../schema";
5
+ import type { AiToolCallRow } from "../schema";
6
+ import { generateProposalNonce } from "./token";
7
+
8
+ type AiDatabase = SafeDatabase<typeof schema>;
9
+
10
+ /** Default proposal-token TTL — 10 minutes (LOCKED, §13.4). */
11
+ export const PROPOSAL_TTL_MS = 10 * 60 * 1000;
12
+
13
+ export interface AuditPrincipal {
14
+ kind: "user" | "application";
15
+ id: string;
16
+ }
17
+
18
+ /**
19
+ * The durable audit log + propose/apply token store. A `proposed` row IS the
20
+ * token (decision §4, §13.4); there is no separate ephemeral table. All state
21
+ * is in shared Postgres, so a token proposed on one pod is consumable on any
22
+ * other (state-and-scale).
23
+ */
24
+ export interface AiToolCallStore {
25
+ /** Record a directly-executed tool (read, or an automation-run mutate). */
26
+ recordExecuted(args: {
27
+ principal: AuditPrincipal;
28
+ transport: "chat" | "mcp" | "automation";
29
+ conversationId?: string;
30
+ toolName: string;
31
+ argsHash: string;
32
+ resultSnapshot?: Record<string, unknown>;
33
+ }): Promise<AiToolCallRow>;
34
+
35
+ /** Record a failed execute (audit only). */
36
+ recordFailed(args: {
37
+ principal: AuditPrincipal;
38
+ transport: "chat" | "mcp" | "automation";
39
+ conversationId?: string;
40
+ toolName: string;
41
+ effect: AiToolEffect;
42
+ argsHash: string;
43
+ error: string;
44
+ }): Promise<AiToolCallRow>;
45
+
46
+ /**
47
+ * Persist a `proposed` row (the token store). Returns the row plus the fresh
48
+ * nonce; the caller formats `propose:<id>.<nonce>`.
49
+ */
50
+ createProposal(args: {
51
+ principal: AuditPrincipal;
52
+ transport: "chat" | "mcp";
53
+ conversationId?: string;
54
+ toolName: string;
55
+ effect: AiToolEffect;
56
+ argsHash: string;
57
+ proposedPayload: Record<string, unknown>;
58
+ resultSnapshot?: Record<string, unknown>;
59
+ now?: Date;
60
+ }): Promise<{ row: AiToolCallRow; nonce: string }>;
61
+
62
+ /**
63
+ * Atomically consume a `proposed` row: a single
64
+ * `UPDATE ... WHERE id = ? AND status = 'proposed'` flips it to `applied` and
65
+ * RETURNS the row. A second apply finds `status != 'proposed'` and gets
66
+ * `undefined` (single-use even under concurrent calls). Returns `undefined`
67
+ * when the row is missing, already consumed, or the conditions don't hold.
68
+ *
69
+ * `applier` is the principal that actually called `apply` — stamped into
70
+ * `appliedByKind`/`appliedById` so the audit log records WHO applied, not just
71
+ * who proposed (P3 review item 1). Usually identical to the proposer.
72
+ */
73
+ consumeProposal(args: {
74
+ rowId: string;
75
+ applier: AuditPrincipal;
76
+ now?: Date;
77
+ }): Promise<AiToolCallRow | undefined>;
78
+
79
+ /** Fetch a proposal row by id without consuming it (for nonce/TTL checks). */
80
+ getProposal(rowId: string): Promise<AiToolCallRow | undefined>;
81
+
82
+ /**
83
+ * Sweep: flip expired `proposed` rows to `expired`, retaining them as audit
84
+ * history. Returns the number of rows expired.
85
+ */
86
+ expireStaleProposals(now?: Date): Promise<number>;
87
+ }
88
+
89
+ export function createAiToolCallStore({
90
+ db,
91
+ }: {
92
+ db: AiDatabase;
93
+ }): AiToolCallStore {
94
+ return {
95
+ async recordExecuted({
96
+ principal,
97
+ transport,
98
+ conversationId,
99
+ toolName,
100
+ argsHash,
101
+ resultSnapshot,
102
+ }) {
103
+ const [row] = await db
104
+ .insert(schema.aiToolCalls)
105
+ .values({
106
+ principalKind: principal.kind,
107
+ principalId: principal.id,
108
+ transport,
109
+ conversationId,
110
+ toolName,
111
+ effect: "read",
112
+ argsHash,
113
+ status: "executed",
114
+ resultSnapshot,
115
+ })
116
+ .returning();
117
+ return row;
118
+ },
119
+
120
+ async recordFailed({
121
+ principal,
122
+ transport,
123
+ conversationId,
124
+ toolName,
125
+ effect,
126
+ argsHash,
127
+ error,
128
+ }) {
129
+ const [row] = await db
130
+ .insert(schema.aiToolCalls)
131
+ .values({
132
+ principalKind: principal.kind,
133
+ principalId: principal.id,
134
+ transport,
135
+ conversationId,
136
+ toolName,
137
+ effect,
138
+ argsHash,
139
+ status: "failed",
140
+ error,
141
+ })
142
+ .returning();
143
+ return row;
144
+ },
145
+
146
+ async createProposal({
147
+ principal,
148
+ transport,
149
+ conversationId,
150
+ toolName,
151
+ effect,
152
+ argsHash,
153
+ proposedPayload,
154
+ resultSnapshot,
155
+ now = new Date(),
156
+ }) {
157
+ const nonce = generateProposalNonce();
158
+ const [row] = await db
159
+ .insert(schema.aiToolCalls)
160
+ .values({
161
+ principalKind: principal.kind,
162
+ principalId: principal.id,
163
+ transport,
164
+ conversationId,
165
+ toolName,
166
+ effect,
167
+ argsHash,
168
+ status: "proposed",
169
+ proposalNonce: nonce,
170
+ proposalExpiresAt: new Date(now.getTime() + PROPOSAL_TTL_MS),
171
+ proposedPayload,
172
+ resultSnapshot,
173
+ proposedAt: now,
174
+ })
175
+ .returning();
176
+ return { row, nonce };
177
+ },
178
+
179
+ async consumeProposal({ rowId, applier, now = new Date() }) {
180
+ // Atomic single-use: only a still-`proposed`, non-expired row transitions
181
+ // to `applied`. A concurrent second apply matches zero rows.
182
+ const [row] = await db
183
+ .update(schema.aiToolCalls)
184
+ .set({
185
+ status: "applied",
186
+ appliedAt: now,
187
+ appliedByKind: applier.kind,
188
+ appliedById: applier.id,
189
+ })
190
+ .where(
191
+ and(
192
+ eq(schema.aiToolCalls.id, rowId),
193
+ eq(schema.aiToolCalls.status, "proposed"),
194
+ gt(schema.aiToolCalls.proposalExpiresAt, now),
195
+ ),
196
+ )
197
+ .returning();
198
+ return row;
199
+ },
200
+
201
+ async getProposal(rowId) {
202
+ const rows = await db
203
+ .select()
204
+ .from(schema.aiToolCalls)
205
+ .where(eq(schema.aiToolCalls.id, rowId))
206
+ .limit(1);
207
+ return rows[0];
208
+ },
209
+
210
+ async expireStaleProposals(now = new Date()) {
211
+ const expired = await db
212
+ .update(schema.aiToolCalls)
213
+ .set({ status: "expired" })
214
+ .where(
215
+ and(
216
+ eq(schema.aiToolCalls.status, "proposed"),
217
+ lt(schema.aiToolCalls.proposalExpiresAt, now),
218
+ ),
219
+ )
220
+ .returning({ id: schema.aiToolCalls.id });
221
+ return expired.length;
222
+ },
223
+ };
224
+ }
@@ -0,0 +1,52 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import {
3
+ formatProposalToken,
4
+ generateProposalNonce,
5
+ nonceMatches,
6
+ parseProposalToken,
7
+ } from "./token";
8
+
9
+ describe("proposal token codec", () => {
10
+ test("format -> parse round-trips id and nonce", () => {
11
+ const rowId = "11111111-2222-3333-4444-555555555555";
12
+ const nonce = generateProposalNonce();
13
+ const token = formatProposalToken({ rowId, nonce });
14
+ expect(token.startsWith("propose:")).toBe(true);
15
+ const parsed = parseProposalToken(token);
16
+ expect(parsed).toEqual({ rowId, nonce });
17
+ });
18
+
19
+ test("nonce is 64 hex chars (32 random bytes)", () => {
20
+ const nonce = generateProposalNonce();
21
+ expect(nonce).toMatch(/^[0-9a-f]{64}$/);
22
+ });
23
+
24
+ test("rejects a token without the propose: prefix", () => {
25
+ expect(parseProposalToken("apply:abc.def")).toBeUndefined();
26
+ });
27
+
28
+ test("rejects a token with no separator", () => {
29
+ expect(parseProposalToken("propose:abcdef")).toBeUndefined();
30
+ });
31
+
32
+ test("rejects a token with an empty id", () => {
33
+ expect(parseProposalToken("propose:.abc")).toBeUndefined();
34
+ });
35
+
36
+ test("splits on the FIRST dot so a hex nonce is preserved whole", () => {
37
+ // Defensive: nonces are hex (no dots), but parsing must not lose data even
38
+ // if a future id format contained a dot in the nonce position.
39
+ const parsed = parseProposalToken("propose:row1.aa.bb");
40
+ expect(parsed).toEqual({ rowId: "row1", nonce: "aa.bb" });
41
+ });
42
+
43
+ test("nonceMatches is true for equal nonces, false otherwise", () => {
44
+ const nonce = generateProposalNonce();
45
+ expect(nonceMatches({ candidate: nonce, stored: nonce })).toBe(true);
46
+ expect(nonceMatches({ candidate: "deadbeef", stored: nonce })).toBe(false);
47
+ });
48
+
49
+ test("nonceMatches returns false on length mismatch without throwing", () => {
50
+ expect(nonceMatches({ candidate: "ab", stored: "abcd" })).toBe(false);
51
+ });
52
+ });
@@ -0,0 +1,71 @@
1
+ import { randomBytes, timingSafeEqual } from "node:crypto";
2
+
3
+ /**
4
+ * Proposal token codec (decision §13.4).
5
+ *
6
+ * A proposal is a row in `ai_tool_calls` with `status = "proposed"`. The opaque
7
+ * token handed to the caller is `propose:<rowId>.<nonce>`. `apply` parses it,
8
+ * fetches the row by id, and accepts only if the row is still `proposed`, the
9
+ * nonce matches in CONSTANT TIME, and the TTL has not elapsed.
10
+ *
11
+ * This module is pure (no DB) so the token grammar + constant-time compare are
12
+ * unit-tested in isolation.
13
+ */
14
+
15
+ const TOKEN_PREFIX = "propose:";
16
+ const NONCE_BYTES = 32;
17
+
18
+ export interface ParsedProposalToken {
19
+ rowId: string;
20
+ nonce: string;
21
+ }
22
+
23
+ /** A fresh, cryptographically-random nonce (hex). */
24
+ export function generateProposalNonce(): string {
25
+ return randomBytes(NONCE_BYTES).toString("hex");
26
+ }
27
+
28
+ /** Build the opaque token for a proposal row. */
29
+ export function formatProposalToken({
30
+ rowId,
31
+ nonce,
32
+ }: ParsedProposalToken): string {
33
+ return `${TOKEN_PREFIX}${rowId}.${nonce}`;
34
+ }
35
+
36
+ /**
37
+ * Parse an opaque proposal token. Returns `undefined` for any malformed token
38
+ * (wrong prefix, missing separator, empty id/nonce) so callers reject without
39
+ * branching on a thrown error.
40
+ */
41
+ export function parseProposalToken(
42
+ token: string,
43
+ ): ParsedProposalToken | undefined {
44
+ if (!token.startsWith(TOKEN_PREFIX)) return undefined;
45
+ const body = token.slice(TOKEN_PREFIX.length);
46
+ // The rowId is a UUID (no dots); the nonce is hex (no dots). Split on the
47
+ // FIRST dot so a row id is never confused with the nonce.
48
+ const sep = body.indexOf(".");
49
+ if (sep <= 0) return undefined;
50
+ const rowId = body.slice(0, sep);
51
+ const nonce = body.slice(sep + 1);
52
+ if (!rowId || !nonce) return undefined;
53
+ return { rowId, nonce };
54
+ }
55
+
56
+ /**
57
+ * Constant-time nonce comparison. Length-mismatched / non-hex inputs return
58
+ * false without leaking timing about the stored nonce.
59
+ */
60
+ export function nonceMatches({
61
+ candidate,
62
+ stored,
63
+ }: {
64
+ candidate: string;
65
+ stored: string;
66
+ }): boolean {
67
+ const a = Buffer.from(candidate, "utf8");
68
+ const b = Buffer.from(stored, "utf8");
69
+ if (a.length !== b.length) return false;
70
+ return timingSafeEqual(a, b);
71
+ }
@@ -0,0 +1,224 @@
1
+ /**
2
+ * Per-integration LLM spend-cap cross-pod enforcement (real Postgres) — Phase 6,
3
+ * state-and-scale §14.5.
4
+ *
5
+ * The spend cap is a shared-Postgres ROLLING-WINDOW SUM over `ai_spend`: the
6
+ * spend is the total tokens a principal has incurred against an integration in
7
+ * the trailing window. Because the sum is read from the SAME shared table that
8
+ * every pod writes to, the cap holds across ALL pods. An in-memory per-pod token
9
+ * counter would let N pods EACH allow the cap = N x the intended spend — a leak a
10
+ * single-process unit test can never catch. This test simulates TWO pods (two
11
+ * independent pools to the SAME schema) and asserts the combined token usage is
12
+ * counted against ONE shared cap (mirrors the tool-budget cross-pod IT).
13
+ *
14
+ * Gated behind `CHECKSTACK_IT=1`; connection from `CHECKSTACK_IT_PG_URL`. Runs in
15
+ * a freshly created, self-cleaning schema.
16
+ */
17
+ import { afterAll, beforeAll, describe, expect, it } from "bun:test";
18
+ import { drizzle } from "drizzle-orm/node-postgres";
19
+ import { Pool } from "pg";
20
+ import type { SafeDatabase } from "@checkstack/backend-api";
21
+ import type { AiSpendCap } from "@checkstack/ai-common";
22
+ import * as schema from "../schema";
23
+ import type { AuditPrincipal } from "../propose-apply/store";
24
+ import {
25
+ SpendCapExceededError,
26
+ checkSpendCap,
27
+ enforceSpendCap,
28
+ recordSpend,
29
+ } from "./spend-ledger";
30
+
31
+ const PG_URL =
32
+ process.env.CHECKSTACK_IT_PG_URL ??
33
+ "postgres://postgres:postgres@localhost:5432/postgres";
34
+
35
+ const SCHEMA = `it_ai_spend_${crypto.randomUUID().replace(/-/g, "")}`;
36
+ const INTEGRATION = "ai.openai-compatible.c1";
37
+
38
+ interface Pod {
39
+ pool: Pool;
40
+ db: SafeDatabase<typeof schema>;
41
+ end(): Promise<void>;
42
+ }
43
+
44
+ function makePod(): Pod {
45
+ const pool = new Pool({
46
+ connectionString: PG_URL,
47
+ options: `-c search_path=${SCHEMA}`,
48
+ });
49
+ const db = drizzle(pool, { schema }) as unknown as SafeDatabase<typeof schema>;
50
+ return { pool, db, end: () => pool.end() };
51
+ }
52
+
53
+ describe.skipIf(!process.env.CHECKSTACK_IT)(
54
+ "per-integration spend cap (shared Postgres, cross-pod)",
55
+ () => {
56
+ let admin: Pool;
57
+ let podA: Pod;
58
+ let podB: Pod;
59
+
60
+ beforeAll(async () => {
61
+ admin = new Pool({ connectionString: PG_URL });
62
+ await admin.query(`CREATE SCHEMA "${SCHEMA}"`);
63
+ await admin.query(`
64
+ CREATE TABLE "${SCHEMA}".ai_spend (
65
+ id text PRIMARY KEY,
66
+ integration_id text NOT NULL,
67
+ principal_kind text NOT NULL,
68
+ principal_id text NOT NULL,
69
+ conversation_id text,
70
+ model text,
71
+ input_tokens integer NOT NULL DEFAULT 0,
72
+ output_tokens integer NOT NULL DEFAULT 0,
73
+ total_tokens integer NOT NULL DEFAULT 0,
74
+ created_at timestamp NOT NULL DEFAULT now()
75
+ )
76
+ `);
77
+ podA = makePod();
78
+ podB = makePod();
79
+ });
80
+
81
+ afterAll(async () => {
82
+ await podA?.end();
83
+ await podB?.end();
84
+ await admin.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
85
+ await admin.end();
86
+ });
87
+
88
+ it("sums token usage from BOTH pods against ONE shared cap (no N x leak)", async () => {
89
+ const principal: AuditPrincipal = { kind: "user", id: "spend-u1" };
90
+ const cap: AiSpendCap = { tokenBudget: 1000, windowMinutes: 60 };
91
+
92
+ // Record turns on ALTERNATING pods. Whichever pod later checks the cap
93
+ // sees the COMBINED total, because the ledger is the shared table.
94
+ await recordSpend({
95
+ db: podA.db,
96
+ integrationId: INTEGRATION,
97
+ principal,
98
+ usage: { inputTokens: 200, outputTokens: 100 }, // 300
99
+ });
100
+ await recordSpend({
101
+ db: podB.db,
102
+ integrationId: INTEGRATION,
103
+ principal,
104
+ usage: { inputTokens: 250, outputTokens: 150 }, // 400 -> 700
105
+ });
106
+
107
+ // 700 < 1000: a new turn is within budget, read from EITHER pod.
108
+ const onA = await checkSpendCap({
109
+ db: podA.db,
110
+ integrationId: INTEGRATION,
111
+ principal,
112
+ cap,
113
+ });
114
+ const onB = await checkSpendCap({
115
+ db: podB.db,
116
+ integrationId: INTEGRATION,
117
+ principal,
118
+ cap,
119
+ });
120
+ expect(onA.used).toBe(700);
121
+ expect(onB.used).toBe(700);
122
+ expect(onA.allowed).toBe(true);
123
+ expect(onB.allowed).toBe(true);
124
+
125
+ // One more turn (on pod B) crosses the cap.
126
+ await recordSpend({
127
+ db: podB.db,
128
+ integrationId: INTEGRATION,
129
+ principal,
130
+ usage: { inputTokens: 200, outputTokens: 200 }, // 400 -> 1100
131
+ });
132
+
133
+ // Now BOTH pods agree the principal is over the SHARED cap — not 1000/pod.
134
+ const overA = await checkSpendCap({
135
+ db: podA.db,
136
+ integrationId: INTEGRATION,
137
+ principal,
138
+ cap,
139
+ });
140
+ const overB = await checkSpendCap({
141
+ db: podB.db,
142
+ integrationId: INTEGRATION,
143
+ principal,
144
+ cap,
145
+ });
146
+ expect(overA.used).toBe(1100);
147
+ expect(overB.used).toBe(1100);
148
+ expect(overA.allowed).toBe(false);
149
+ expect(overB.allowed).toBe(false);
150
+
151
+ await expect(
152
+ enforceSpendCap({
153
+ db: podA.db,
154
+ integrationId: INTEGRATION,
155
+ principal,
156
+ cap,
157
+ }),
158
+ ).rejects.toBeInstanceOf(SpendCapExceededError);
159
+ await expect(
160
+ enforceSpendCap({
161
+ db: podB.db,
162
+ integrationId: INTEGRATION,
163
+ principal,
164
+ cap,
165
+ }),
166
+ ).rejects.toBeInstanceOf(SpendCapExceededError);
167
+ });
168
+
169
+ it("the cap is per-integration: spend on one integration does not block another", async () => {
170
+ const principal: AuditPrincipal = { kind: "user", id: "spend-u2" };
171
+ const cap: AiSpendCap = { tokenBudget: 100, windowMinutes: 60 };
172
+ await recordSpend({
173
+ db: podA.db,
174
+ integrationId: "ai.openai-compatible.c1",
175
+ principal,
176
+ usage: { inputTokens: 200, outputTokens: 0 }, // over on c1
177
+ });
178
+ // c1 is over; c2 has no spend, so a turn there is still allowed.
179
+ await expect(
180
+ enforceSpendCap({
181
+ db: podB.db,
182
+ integrationId: "ai.openai-compatible.c1",
183
+ principal,
184
+ cap,
185
+ }),
186
+ ).rejects.toBeInstanceOf(SpendCapExceededError);
187
+ await expect(
188
+ enforceSpendCap({
189
+ db: podB.db,
190
+ integrationId: "ai.openai-compatible.c2",
191
+ principal,
192
+ cap,
193
+ }),
194
+ ).resolves.toBeUndefined();
195
+ });
196
+
197
+ it("the window slides: only recent spend counts (older turns drop out)", async () => {
198
+ const principal: AuditPrincipal = { kind: "user", id: "spend-window" };
199
+ // An OLD turn outside the window.
200
+ await podA.pool.query(
201
+ `INSERT INTO "${SCHEMA}".ai_spend
202
+ (id, integration_id, principal_kind, principal_id, total_tokens, created_at)
203
+ VALUES ($1, $2, 'user', $3, 5000, now() - interval '120 minutes')`,
204
+ [crypto.randomUUID(), INTEGRATION, principal.id],
205
+ );
206
+ // A recent one.
207
+ await recordSpend({
208
+ db: podB.db,
209
+ integrationId: INTEGRATION,
210
+ principal,
211
+ usage: { inputTokens: 10, outputTokens: 10 },
212
+ });
213
+ // With a 60-minute window the old 5000-token turn is excluded.
214
+ const result = await checkSpendCap({
215
+ db: podB.db,
216
+ integrationId: INTEGRATION,
217
+ principal,
218
+ cap: { tokenBudget: 1000, windowMinutes: 60 },
219
+ });
220
+ expect(result.used).toBe(20);
221
+ expect(result.allowed).toBe(true);
222
+ });
223
+ },
224
+ );