@checkstack/ai-backend 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/drizzle/0000_productive_jackpot.sql +26 -0
- package/drizzle/0001_puzzling_purple_man.sql +26 -0
- package/drizzle/0002_sparkling_paper_doll.sql +15 -0
- package/drizzle/0003_married_senator_kelly.sql +1 -0
- package/drizzle/0004_crazy_miek.sql +2 -0
- package/drizzle/0005_tearful_randall_flagg.sql +1 -0
- package/drizzle/meta/0000_snapshot.json +232 -0
- package/drizzle/meta/0001_snapshot.json +434 -0
- package/drizzle/meta/0002_snapshot.json +551 -0
- package/drizzle/meta/0003_snapshot.json +557 -0
- package/drizzle/meta/0004_snapshot.json +573 -0
- package/drizzle/meta/0005_snapshot.json +574 -0
- package/drizzle/meta/_journal.json +48 -0
- package/drizzle.config.ts +7 -0
- package/package.json +42 -0
- package/src/agent-runner.test.ts +262 -0
- package/src/agent-runner.ts +262 -0
- package/src/chat/agent-loop.test.ts +119 -0
- package/src/chat/agent-loop.ts +73 -0
- package/src/chat/auto-apply.test.ts +237 -0
- package/src/chat/chat-handler.ts +111 -0
- package/src/chat/chat-service.streamturn.test.ts +417 -0
- package/src/chat/chat-service.test.ts +250 -0
- package/src/chat/chat-service.ts +923 -0
- package/src/chat/classifier-service.ts +64 -0
- package/src/chat/classifier.logic.test.ts +92 -0
- package/src/chat/classifier.logic.ts +71 -0
- package/src/chat/conversation-store.it.test.ts +203 -0
- package/src/chat/conversation-store.test.ts +248 -0
- package/src/chat/conversation-store.ts +237 -0
- package/src/chat/decision.logic.test.ts +45 -0
- package/src/chat/decision.logic.ts +54 -0
- package/src/chat/llm-provider.test.ts +63 -0
- package/src/chat/llm-provider.ts +67 -0
- package/src/chat/model-error.logic.test.ts +60 -0
- package/src/chat/model-error.logic.ts +65 -0
- package/src/chat/normalize-messages.logic.test.ts +101 -0
- package/src/chat/normalize-messages.logic.ts +65 -0
- package/src/chat/permission-mode.logic.test.ts +70 -0
- package/src/chat/permission-mode.logic.ts +45 -0
- package/src/chat/read-invoker.ts +72 -0
- package/src/chat/replay.test.ts +174 -0
- package/src/chat/scrub-content.test.ts +183 -0
- package/src/chat/scrub-content.ts +154 -0
- package/src/chat/sdk-tools.test.ts +168 -0
- package/src/chat/sdk-tools.ts +181 -0
- package/src/chat/title-service.test.ts +146 -0
- package/src/chat/title-service.ts +111 -0
- package/src/chat/title.logic.test.ts +98 -0
- package/src/chat/title.logic.ts +102 -0
- package/src/extension-points.ts +41 -0
- package/src/generated/docs-index.ts +3020 -0
- package/src/hardening/handler-authz.test.ts +282 -0
- package/src/hardening/no-secret-leak.test.ts +303 -0
- package/src/hooks.ts +33 -0
- package/src/index.ts +542 -0
- package/src/mcp/connection-registry.test.ts +25 -0
- package/src/mcp/connection-registry.ts +54 -0
- package/src/mcp/mcp-conformance.it.test.ts +128 -0
- package/src/mcp/server.test.ts +285 -0
- package/src/mcp/server.ts +300 -0
- package/src/mcp/tool-invoker.ts +65 -0
- package/src/openai-provider.test.ts +64 -0
- package/src/openai-provider.ts +146 -0
- package/src/projection.test.ts +97 -0
- package/src/projection.ts +132 -0
- package/src/propose-apply/args-hash.test.ts +26 -0
- package/src/propose-apply/args-hash.ts +30 -0
- package/src/propose-apply/service.test.ts +423 -0
- package/src/propose-apply/service.ts +419 -0
- package/src/propose-apply/store.test.ts +136 -0
- package/src/propose-apply/store.ts +224 -0
- package/src/propose-apply/token.test.ts +52 -0
- package/src/propose-apply/token.ts +71 -0
- package/src/rate-limit/spend-ledger.it.test.ts +224 -0
- package/src/rate-limit/spend-ledger.test.ts +176 -0
- package/src/rate-limit/spend-ledger.ts +162 -0
- package/src/rate-limit/tool-budget.it.test.ts +173 -0
- package/src/rate-limit/tool-budget.test.ts +58 -0
- package/src/rate-limit/tool-budget.ts +107 -0
- package/src/registry-wiring.test.ts +131 -0
- package/src/registry-wiring.ts +68 -0
- package/src/resolver.test.ts +156 -0
- package/src/resolver.ts +78 -0
- package/src/router.test.ts +78 -0
- package/src/router.ts +345 -0
- package/src/schema.ts +284 -0
- package/src/serializer.test.ts +88 -0
- package/src/serializer.ts +42 -0
- package/src/tool-registry.ts +58 -0
- package/src/tools/composite-tools.ts +24 -0
- package/src/tools/docs-tools.test.ts +150 -0
- package/src/tools/docs-tools.ts +115 -0
- package/src/tools/probe-url.test.ts +51 -0
- package/src/tools/probe-url.ts +146 -0
- package/src/tools/rank-docs.test.ts +153 -0
- package/src/tools/rank-docs.ts +209 -0
- package/src/tools/script-context-extract.test.ts +93 -0
- package/src/tools/script-context-extract.ts +283 -0
- package/src/tools/ssrf-guard.test.ts +69 -0
- package/src/tools/ssrf-guard.ts +108 -0
- package/src/tools/tool-set.e2e.test.ts +64 -0
- package/src/user-rpc-client.test.ts +45 -0
- package/src/user-rpc-client.ts +60 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import { and, eq, lt, gt } from "drizzle-orm";
|
|
2
|
+
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
3
|
+
import type { AiToolEffect } from "@checkstack/ai-common";
|
|
4
|
+
import * as schema from "../schema";
|
|
5
|
+
import type { AiToolCallRow } from "../schema";
|
|
6
|
+
import { generateProposalNonce } from "./token";
|
|
7
|
+
|
|
8
|
+
type AiDatabase = SafeDatabase<typeof schema>;
|
|
9
|
+
|
|
10
|
+
/** Default proposal-token TTL — 10 minutes (LOCKED, §13.4). */
|
|
11
|
+
export const PROPOSAL_TTL_MS = 10 * 60 * 1000;
|
|
12
|
+
|
|
13
|
+
export interface AuditPrincipal {
|
|
14
|
+
kind: "user" | "application";
|
|
15
|
+
id: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* The durable audit log + propose/apply token store. A `proposed` row IS the
|
|
20
|
+
* token (decision §4, §13.4); there is no separate ephemeral table. All state
|
|
21
|
+
* is in shared Postgres, so a token proposed on one pod is consumable on any
|
|
22
|
+
* other (state-and-scale).
|
|
23
|
+
*/
|
|
24
|
+
export interface AiToolCallStore {
|
|
25
|
+
/** Record a directly-executed tool (read, or an automation-run mutate). */
|
|
26
|
+
recordExecuted(args: {
|
|
27
|
+
principal: AuditPrincipal;
|
|
28
|
+
transport: "chat" | "mcp" | "automation";
|
|
29
|
+
conversationId?: string;
|
|
30
|
+
toolName: string;
|
|
31
|
+
argsHash: string;
|
|
32
|
+
resultSnapshot?: Record<string, unknown>;
|
|
33
|
+
}): Promise<AiToolCallRow>;
|
|
34
|
+
|
|
35
|
+
/** Record a failed execute (audit only). */
|
|
36
|
+
recordFailed(args: {
|
|
37
|
+
principal: AuditPrincipal;
|
|
38
|
+
transport: "chat" | "mcp" | "automation";
|
|
39
|
+
conversationId?: string;
|
|
40
|
+
toolName: string;
|
|
41
|
+
effect: AiToolEffect;
|
|
42
|
+
argsHash: string;
|
|
43
|
+
error: string;
|
|
44
|
+
}): Promise<AiToolCallRow>;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Persist a `proposed` row (the token store). Returns the row plus the fresh
|
|
48
|
+
* nonce; the caller formats `propose:<id>.<nonce>`.
|
|
49
|
+
*/
|
|
50
|
+
createProposal(args: {
|
|
51
|
+
principal: AuditPrincipal;
|
|
52
|
+
transport: "chat" | "mcp";
|
|
53
|
+
conversationId?: string;
|
|
54
|
+
toolName: string;
|
|
55
|
+
effect: AiToolEffect;
|
|
56
|
+
argsHash: string;
|
|
57
|
+
proposedPayload: Record<string, unknown>;
|
|
58
|
+
resultSnapshot?: Record<string, unknown>;
|
|
59
|
+
now?: Date;
|
|
60
|
+
}): Promise<{ row: AiToolCallRow; nonce: string }>;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Atomically consume a `proposed` row: a single
|
|
64
|
+
* `UPDATE ... WHERE id = ? AND status = 'proposed'` flips it to `applied` and
|
|
65
|
+
* RETURNS the row. A second apply finds `status != 'proposed'` and gets
|
|
66
|
+
* `undefined` (single-use even under concurrent calls). Returns `undefined`
|
|
67
|
+
* when the row is missing, already consumed, or the conditions don't hold.
|
|
68
|
+
*
|
|
69
|
+
* `applier` is the principal that actually called `apply` — stamped into
|
|
70
|
+
* `appliedByKind`/`appliedById` so the audit log records WHO applied, not just
|
|
71
|
+
* who proposed (P3 review item 1). Usually identical to the proposer.
|
|
72
|
+
*/
|
|
73
|
+
consumeProposal(args: {
|
|
74
|
+
rowId: string;
|
|
75
|
+
applier: AuditPrincipal;
|
|
76
|
+
now?: Date;
|
|
77
|
+
}): Promise<AiToolCallRow | undefined>;
|
|
78
|
+
|
|
79
|
+
/** Fetch a proposal row by id without consuming it (for nonce/TTL checks). */
|
|
80
|
+
getProposal(rowId: string): Promise<AiToolCallRow | undefined>;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Sweep: flip expired `proposed` rows to `expired`, retaining them as audit
|
|
84
|
+
* history. Returns the number of rows expired.
|
|
85
|
+
*/
|
|
86
|
+
expireStaleProposals(now?: Date): Promise<number>;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function createAiToolCallStore({
|
|
90
|
+
db,
|
|
91
|
+
}: {
|
|
92
|
+
db: AiDatabase;
|
|
93
|
+
}): AiToolCallStore {
|
|
94
|
+
return {
|
|
95
|
+
async recordExecuted({
|
|
96
|
+
principal,
|
|
97
|
+
transport,
|
|
98
|
+
conversationId,
|
|
99
|
+
toolName,
|
|
100
|
+
argsHash,
|
|
101
|
+
resultSnapshot,
|
|
102
|
+
}) {
|
|
103
|
+
const [row] = await db
|
|
104
|
+
.insert(schema.aiToolCalls)
|
|
105
|
+
.values({
|
|
106
|
+
principalKind: principal.kind,
|
|
107
|
+
principalId: principal.id,
|
|
108
|
+
transport,
|
|
109
|
+
conversationId,
|
|
110
|
+
toolName,
|
|
111
|
+
effect: "read",
|
|
112
|
+
argsHash,
|
|
113
|
+
status: "executed",
|
|
114
|
+
resultSnapshot,
|
|
115
|
+
})
|
|
116
|
+
.returning();
|
|
117
|
+
return row;
|
|
118
|
+
},
|
|
119
|
+
|
|
120
|
+
async recordFailed({
|
|
121
|
+
principal,
|
|
122
|
+
transport,
|
|
123
|
+
conversationId,
|
|
124
|
+
toolName,
|
|
125
|
+
effect,
|
|
126
|
+
argsHash,
|
|
127
|
+
error,
|
|
128
|
+
}) {
|
|
129
|
+
const [row] = await db
|
|
130
|
+
.insert(schema.aiToolCalls)
|
|
131
|
+
.values({
|
|
132
|
+
principalKind: principal.kind,
|
|
133
|
+
principalId: principal.id,
|
|
134
|
+
transport,
|
|
135
|
+
conversationId,
|
|
136
|
+
toolName,
|
|
137
|
+
effect,
|
|
138
|
+
argsHash,
|
|
139
|
+
status: "failed",
|
|
140
|
+
error,
|
|
141
|
+
})
|
|
142
|
+
.returning();
|
|
143
|
+
return row;
|
|
144
|
+
},
|
|
145
|
+
|
|
146
|
+
async createProposal({
|
|
147
|
+
principal,
|
|
148
|
+
transport,
|
|
149
|
+
conversationId,
|
|
150
|
+
toolName,
|
|
151
|
+
effect,
|
|
152
|
+
argsHash,
|
|
153
|
+
proposedPayload,
|
|
154
|
+
resultSnapshot,
|
|
155
|
+
now = new Date(),
|
|
156
|
+
}) {
|
|
157
|
+
const nonce = generateProposalNonce();
|
|
158
|
+
const [row] = await db
|
|
159
|
+
.insert(schema.aiToolCalls)
|
|
160
|
+
.values({
|
|
161
|
+
principalKind: principal.kind,
|
|
162
|
+
principalId: principal.id,
|
|
163
|
+
transport,
|
|
164
|
+
conversationId,
|
|
165
|
+
toolName,
|
|
166
|
+
effect,
|
|
167
|
+
argsHash,
|
|
168
|
+
status: "proposed",
|
|
169
|
+
proposalNonce: nonce,
|
|
170
|
+
proposalExpiresAt: new Date(now.getTime() + PROPOSAL_TTL_MS),
|
|
171
|
+
proposedPayload,
|
|
172
|
+
resultSnapshot,
|
|
173
|
+
proposedAt: now,
|
|
174
|
+
})
|
|
175
|
+
.returning();
|
|
176
|
+
return { row, nonce };
|
|
177
|
+
},
|
|
178
|
+
|
|
179
|
+
async consumeProposal({ rowId, applier, now = new Date() }) {
|
|
180
|
+
// Atomic single-use: only a still-`proposed`, non-expired row transitions
|
|
181
|
+
// to `applied`. A concurrent second apply matches zero rows.
|
|
182
|
+
const [row] = await db
|
|
183
|
+
.update(schema.aiToolCalls)
|
|
184
|
+
.set({
|
|
185
|
+
status: "applied",
|
|
186
|
+
appliedAt: now,
|
|
187
|
+
appliedByKind: applier.kind,
|
|
188
|
+
appliedById: applier.id,
|
|
189
|
+
})
|
|
190
|
+
.where(
|
|
191
|
+
and(
|
|
192
|
+
eq(schema.aiToolCalls.id, rowId),
|
|
193
|
+
eq(schema.aiToolCalls.status, "proposed"),
|
|
194
|
+
gt(schema.aiToolCalls.proposalExpiresAt, now),
|
|
195
|
+
),
|
|
196
|
+
)
|
|
197
|
+
.returning();
|
|
198
|
+
return row;
|
|
199
|
+
},
|
|
200
|
+
|
|
201
|
+
async getProposal(rowId) {
|
|
202
|
+
const rows = await db
|
|
203
|
+
.select()
|
|
204
|
+
.from(schema.aiToolCalls)
|
|
205
|
+
.where(eq(schema.aiToolCalls.id, rowId))
|
|
206
|
+
.limit(1);
|
|
207
|
+
return rows[0];
|
|
208
|
+
},
|
|
209
|
+
|
|
210
|
+
async expireStaleProposals(now = new Date()) {
|
|
211
|
+
const expired = await db
|
|
212
|
+
.update(schema.aiToolCalls)
|
|
213
|
+
.set({ status: "expired" })
|
|
214
|
+
.where(
|
|
215
|
+
and(
|
|
216
|
+
eq(schema.aiToolCalls.status, "proposed"),
|
|
217
|
+
lt(schema.aiToolCalls.proposalExpiresAt, now),
|
|
218
|
+
),
|
|
219
|
+
)
|
|
220
|
+
.returning({ id: schema.aiToolCalls.id });
|
|
221
|
+
return expired.length;
|
|
222
|
+
},
|
|
223
|
+
};
|
|
224
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
formatProposalToken,
|
|
4
|
+
generateProposalNonce,
|
|
5
|
+
nonceMatches,
|
|
6
|
+
parseProposalToken,
|
|
7
|
+
} from "./token";
|
|
8
|
+
|
|
9
|
+
describe("proposal token codec", () => {
|
|
10
|
+
test("format -> parse round-trips id and nonce", () => {
|
|
11
|
+
const rowId = "11111111-2222-3333-4444-555555555555";
|
|
12
|
+
const nonce = generateProposalNonce();
|
|
13
|
+
const token = formatProposalToken({ rowId, nonce });
|
|
14
|
+
expect(token.startsWith("propose:")).toBe(true);
|
|
15
|
+
const parsed = parseProposalToken(token);
|
|
16
|
+
expect(parsed).toEqual({ rowId, nonce });
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test("nonce is 64 hex chars (32 random bytes)", () => {
|
|
20
|
+
const nonce = generateProposalNonce();
|
|
21
|
+
expect(nonce).toMatch(/^[0-9a-f]{64}$/);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test("rejects a token without the propose: prefix", () => {
|
|
25
|
+
expect(parseProposalToken("apply:abc.def")).toBeUndefined();
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("rejects a token with no separator", () => {
|
|
29
|
+
expect(parseProposalToken("propose:abcdef")).toBeUndefined();
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("rejects a token with an empty id", () => {
|
|
33
|
+
expect(parseProposalToken("propose:.abc")).toBeUndefined();
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("splits on the FIRST dot so a hex nonce is preserved whole", () => {
|
|
37
|
+
// Defensive: nonces are hex (no dots), but parsing must not lose data even
|
|
38
|
+
// if a future id format contained a dot in the nonce position.
|
|
39
|
+
const parsed = parseProposalToken("propose:row1.aa.bb");
|
|
40
|
+
expect(parsed).toEqual({ rowId: "row1", nonce: "aa.bb" });
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test("nonceMatches is true for equal nonces, false otherwise", () => {
|
|
44
|
+
const nonce = generateProposalNonce();
|
|
45
|
+
expect(nonceMatches({ candidate: nonce, stored: nonce })).toBe(true);
|
|
46
|
+
expect(nonceMatches({ candidate: "deadbeef", stored: nonce })).toBe(false);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test("nonceMatches returns false on length mismatch without throwing", () => {
|
|
50
|
+
expect(nonceMatches({ candidate: "ab", stored: "abcd" })).toBe(false);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { randomBytes, timingSafeEqual } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Proposal token codec (decision §13.4).
|
|
5
|
+
*
|
|
6
|
+
* A proposal is a row in `ai_tool_calls` with `status = "proposed"`. The opaque
|
|
7
|
+
* token handed to the caller is `propose:<rowId>.<nonce>`. `apply` parses it,
|
|
8
|
+
* fetches the row by id, and accepts only if the row is still `proposed`, the
|
|
9
|
+
* nonce matches in CONSTANT TIME, and the TTL has not elapsed.
|
|
10
|
+
*
|
|
11
|
+
* This module is pure (no DB) so the token grammar + constant-time compare are
|
|
12
|
+
* unit-tested in isolation.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const TOKEN_PREFIX = "propose:";
|
|
16
|
+
const NONCE_BYTES = 32;
|
|
17
|
+
|
|
18
|
+
export interface ParsedProposalToken {
|
|
19
|
+
rowId: string;
|
|
20
|
+
nonce: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** A fresh, cryptographically-random nonce (hex). */
|
|
24
|
+
export function generateProposalNonce(): string {
|
|
25
|
+
return randomBytes(NONCE_BYTES).toString("hex");
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Build the opaque token for a proposal row. */
|
|
29
|
+
export function formatProposalToken({
|
|
30
|
+
rowId,
|
|
31
|
+
nonce,
|
|
32
|
+
}: ParsedProposalToken): string {
|
|
33
|
+
return `${TOKEN_PREFIX}${rowId}.${nonce}`;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Parse an opaque proposal token. Returns `undefined` for any malformed token
|
|
38
|
+
* (wrong prefix, missing separator, empty id/nonce) so callers reject without
|
|
39
|
+
* branching on a thrown error.
|
|
40
|
+
*/
|
|
41
|
+
export function parseProposalToken(
|
|
42
|
+
token: string,
|
|
43
|
+
): ParsedProposalToken | undefined {
|
|
44
|
+
if (!token.startsWith(TOKEN_PREFIX)) return undefined;
|
|
45
|
+
const body = token.slice(TOKEN_PREFIX.length);
|
|
46
|
+
// The rowId is a UUID (no dots); the nonce is hex (no dots). Split on the
|
|
47
|
+
// FIRST dot so a row id is never confused with the nonce.
|
|
48
|
+
const sep = body.indexOf(".");
|
|
49
|
+
if (sep <= 0) return undefined;
|
|
50
|
+
const rowId = body.slice(0, sep);
|
|
51
|
+
const nonce = body.slice(sep + 1);
|
|
52
|
+
if (!rowId || !nonce) return undefined;
|
|
53
|
+
return { rowId, nonce };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Constant-time nonce comparison. Length-mismatched / non-hex inputs return
|
|
58
|
+
* false without leaking timing about the stored nonce.
|
|
59
|
+
*/
|
|
60
|
+
export function nonceMatches({
|
|
61
|
+
candidate,
|
|
62
|
+
stored,
|
|
63
|
+
}: {
|
|
64
|
+
candidate: string;
|
|
65
|
+
stored: string;
|
|
66
|
+
}): boolean {
|
|
67
|
+
const a = Buffer.from(candidate, "utf8");
|
|
68
|
+
const b = Buffer.from(stored, "utf8");
|
|
69
|
+
if (a.length !== b.length) return false;
|
|
70
|
+
return timingSafeEqual(a, b);
|
|
71
|
+
}
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-integration LLM spend-cap cross-pod enforcement (real Postgres) — Phase 6,
|
|
3
|
+
* state-and-scale §14.5.
|
|
4
|
+
*
|
|
5
|
+
* The spend cap is a shared-Postgres ROLLING-WINDOW SUM over `ai_spend`: the
|
|
6
|
+
* spend is the total tokens a principal has incurred against an integration in
|
|
7
|
+
* the trailing window. Because the sum is read from the SAME shared table that
|
|
8
|
+
* every pod writes to, the cap holds across ALL pods. An in-memory per-pod token
|
|
9
|
+
* counter would let N pods EACH allow the cap = N x the intended spend — a leak a
|
|
10
|
+
* single-process unit test can never catch. This test simulates TWO pods (two
|
|
11
|
+
* independent pools to the SAME schema) and asserts the combined token usage is
|
|
12
|
+
* counted against ONE shared cap (mirrors the tool-budget cross-pod IT).
|
|
13
|
+
*
|
|
14
|
+
* Gated behind `CHECKSTACK_IT=1`; connection from `CHECKSTACK_IT_PG_URL`. Runs in
|
|
15
|
+
* a freshly created, self-cleaning schema.
|
|
16
|
+
*/
|
|
17
|
+
import { afterAll, beforeAll, describe, expect, it } from "bun:test";
|
|
18
|
+
import { drizzle } from "drizzle-orm/node-postgres";
|
|
19
|
+
import { Pool } from "pg";
|
|
20
|
+
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
21
|
+
import type { AiSpendCap } from "@checkstack/ai-common";
|
|
22
|
+
import * as schema from "../schema";
|
|
23
|
+
import type { AuditPrincipal } from "../propose-apply/store";
|
|
24
|
+
import {
|
|
25
|
+
SpendCapExceededError,
|
|
26
|
+
checkSpendCap,
|
|
27
|
+
enforceSpendCap,
|
|
28
|
+
recordSpend,
|
|
29
|
+
} from "./spend-ledger";
|
|
30
|
+
|
|
31
|
+
const PG_URL =
|
|
32
|
+
process.env.CHECKSTACK_IT_PG_URL ??
|
|
33
|
+
"postgres://postgres:postgres@localhost:5432/postgres";
|
|
34
|
+
|
|
35
|
+
const SCHEMA = `it_ai_spend_${crypto.randomUUID().replace(/-/g, "")}`;
|
|
36
|
+
const INTEGRATION = "ai.openai-compatible.c1";
|
|
37
|
+
|
|
38
|
+
interface Pod {
|
|
39
|
+
pool: Pool;
|
|
40
|
+
db: SafeDatabase<typeof schema>;
|
|
41
|
+
end(): Promise<void>;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function makePod(): Pod {
|
|
45
|
+
const pool = new Pool({
|
|
46
|
+
connectionString: PG_URL,
|
|
47
|
+
options: `-c search_path=${SCHEMA}`,
|
|
48
|
+
});
|
|
49
|
+
const db = drizzle(pool, { schema }) as unknown as SafeDatabase<typeof schema>;
|
|
50
|
+
return { pool, db, end: () => pool.end() };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
describe.skipIf(!process.env.CHECKSTACK_IT)(
|
|
54
|
+
"per-integration spend cap (shared Postgres, cross-pod)",
|
|
55
|
+
() => {
|
|
56
|
+
let admin: Pool;
|
|
57
|
+
let podA: Pod;
|
|
58
|
+
let podB: Pod;
|
|
59
|
+
|
|
60
|
+
beforeAll(async () => {
|
|
61
|
+
admin = new Pool({ connectionString: PG_URL });
|
|
62
|
+
await admin.query(`CREATE SCHEMA "${SCHEMA}"`);
|
|
63
|
+
await admin.query(`
|
|
64
|
+
CREATE TABLE "${SCHEMA}".ai_spend (
|
|
65
|
+
id text PRIMARY KEY,
|
|
66
|
+
integration_id text NOT NULL,
|
|
67
|
+
principal_kind text NOT NULL,
|
|
68
|
+
principal_id text NOT NULL,
|
|
69
|
+
conversation_id text,
|
|
70
|
+
model text,
|
|
71
|
+
input_tokens integer NOT NULL DEFAULT 0,
|
|
72
|
+
output_tokens integer NOT NULL DEFAULT 0,
|
|
73
|
+
total_tokens integer NOT NULL DEFAULT 0,
|
|
74
|
+
created_at timestamp NOT NULL DEFAULT now()
|
|
75
|
+
)
|
|
76
|
+
`);
|
|
77
|
+
podA = makePod();
|
|
78
|
+
podB = makePod();
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
afterAll(async () => {
|
|
82
|
+
await podA?.end();
|
|
83
|
+
await podB?.end();
|
|
84
|
+
await admin.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
|
|
85
|
+
await admin.end();
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it("sums token usage from BOTH pods against ONE shared cap (no N x leak)", async () => {
|
|
89
|
+
const principal: AuditPrincipal = { kind: "user", id: "spend-u1" };
|
|
90
|
+
const cap: AiSpendCap = { tokenBudget: 1000, windowMinutes: 60 };
|
|
91
|
+
|
|
92
|
+
// Record turns on ALTERNATING pods. Whichever pod later checks the cap
|
|
93
|
+
// sees the COMBINED total, because the ledger is the shared table.
|
|
94
|
+
await recordSpend({
|
|
95
|
+
db: podA.db,
|
|
96
|
+
integrationId: INTEGRATION,
|
|
97
|
+
principal,
|
|
98
|
+
usage: { inputTokens: 200, outputTokens: 100 }, // 300
|
|
99
|
+
});
|
|
100
|
+
await recordSpend({
|
|
101
|
+
db: podB.db,
|
|
102
|
+
integrationId: INTEGRATION,
|
|
103
|
+
principal,
|
|
104
|
+
usage: { inputTokens: 250, outputTokens: 150 }, // 400 -> 700
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
// 700 < 1000: a new turn is within budget, read from EITHER pod.
|
|
108
|
+
const onA = await checkSpendCap({
|
|
109
|
+
db: podA.db,
|
|
110
|
+
integrationId: INTEGRATION,
|
|
111
|
+
principal,
|
|
112
|
+
cap,
|
|
113
|
+
});
|
|
114
|
+
const onB = await checkSpendCap({
|
|
115
|
+
db: podB.db,
|
|
116
|
+
integrationId: INTEGRATION,
|
|
117
|
+
principal,
|
|
118
|
+
cap,
|
|
119
|
+
});
|
|
120
|
+
expect(onA.used).toBe(700);
|
|
121
|
+
expect(onB.used).toBe(700);
|
|
122
|
+
expect(onA.allowed).toBe(true);
|
|
123
|
+
expect(onB.allowed).toBe(true);
|
|
124
|
+
|
|
125
|
+
// One more turn (on pod B) crosses the cap.
|
|
126
|
+
await recordSpend({
|
|
127
|
+
db: podB.db,
|
|
128
|
+
integrationId: INTEGRATION,
|
|
129
|
+
principal,
|
|
130
|
+
usage: { inputTokens: 200, outputTokens: 200 }, // 400 -> 1100
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// Now BOTH pods agree the principal is over the SHARED cap — not 1000/pod.
|
|
134
|
+
const overA = await checkSpendCap({
|
|
135
|
+
db: podA.db,
|
|
136
|
+
integrationId: INTEGRATION,
|
|
137
|
+
principal,
|
|
138
|
+
cap,
|
|
139
|
+
});
|
|
140
|
+
const overB = await checkSpendCap({
|
|
141
|
+
db: podB.db,
|
|
142
|
+
integrationId: INTEGRATION,
|
|
143
|
+
principal,
|
|
144
|
+
cap,
|
|
145
|
+
});
|
|
146
|
+
expect(overA.used).toBe(1100);
|
|
147
|
+
expect(overB.used).toBe(1100);
|
|
148
|
+
expect(overA.allowed).toBe(false);
|
|
149
|
+
expect(overB.allowed).toBe(false);
|
|
150
|
+
|
|
151
|
+
await expect(
|
|
152
|
+
enforceSpendCap({
|
|
153
|
+
db: podA.db,
|
|
154
|
+
integrationId: INTEGRATION,
|
|
155
|
+
principal,
|
|
156
|
+
cap,
|
|
157
|
+
}),
|
|
158
|
+
).rejects.toBeInstanceOf(SpendCapExceededError);
|
|
159
|
+
await expect(
|
|
160
|
+
enforceSpendCap({
|
|
161
|
+
db: podB.db,
|
|
162
|
+
integrationId: INTEGRATION,
|
|
163
|
+
principal,
|
|
164
|
+
cap,
|
|
165
|
+
}),
|
|
166
|
+
).rejects.toBeInstanceOf(SpendCapExceededError);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it("the cap is per-integration: spend on one integration does not block another", async () => {
|
|
170
|
+
const principal: AuditPrincipal = { kind: "user", id: "spend-u2" };
|
|
171
|
+
const cap: AiSpendCap = { tokenBudget: 100, windowMinutes: 60 };
|
|
172
|
+
await recordSpend({
|
|
173
|
+
db: podA.db,
|
|
174
|
+
integrationId: "ai.openai-compatible.c1",
|
|
175
|
+
principal,
|
|
176
|
+
usage: { inputTokens: 200, outputTokens: 0 }, // over on c1
|
|
177
|
+
});
|
|
178
|
+
// c1 is over; c2 has no spend, so a turn there is still allowed.
|
|
179
|
+
await expect(
|
|
180
|
+
enforceSpendCap({
|
|
181
|
+
db: podB.db,
|
|
182
|
+
integrationId: "ai.openai-compatible.c1",
|
|
183
|
+
principal,
|
|
184
|
+
cap,
|
|
185
|
+
}),
|
|
186
|
+
).rejects.toBeInstanceOf(SpendCapExceededError);
|
|
187
|
+
await expect(
|
|
188
|
+
enforceSpendCap({
|
|
189
|
+
db: podB.db,
|
|
190
|
+
integrationId: "ai.openai-compatible.c2",
|
|
191
|
+
principal,
|
|
192
|
+
cap,
|
|
193
|
+
}),
|
|
194
|
+
).resolves.toBeUndefined();
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it("the window slides: only recent spend counts (older turns drop out)", async () => {
|
|
198
|
+
const principal: AuditPrincipal = { kind: "user", id: "spend-window" };
|
|
199
|
+
// An OLD turn outside the window.
|
|
200
|
+
await podA.pool.query(
|
|
201
|
+
`INSERT INTO "${SCHEMA}".ai_spend
|
|
202
|
+
(id, integration_id, principal_kind, principal_id, total_tokens, created_at)
|
|
203
|
+
VALUES ($1, $2, 'user', $3, 5000, now() - interval '120 minutes')`,
|
|
204
|
+
[crypto.randomUUID(), INTEGRATION, principal.id],
|
|
205
|
+
);
|
|
206
|
+
// A recent one.
|
|
207
|
+
await recordSpend({
|
|
208
|
+
db: podB.db,
|
|
209
|
+
integrationId: INTEGRATION,
|
|
210
|
+
principal,
|
|
211
|
+
usage: { inputTokens: 10, outputTokens: 10 },
|
|
212
|
+
});
|
|
213
|
+
// With a 60-minute window the old 5000-token turn is excluded.
|
|
214
|
+
const result = await checkSpendCap({
|
|
215
|
+
db: podB.db,
|
|
216
|
+
integrationId: INTEGRATION,
|
|
217
|
+
principal,
|
|
218
|
+
cap: { tokenBudget: 1000, windowMinutes: 60 },
|
|
219
|
+
});
|
|
220
|
+
expect(result.used).toBe(20);
|
|
221
|
+
expect(result.allowed).toBe(true);
|
|
222
|
+
});
|
|
223
|
+
},
|
|
224
|
+
);
|