@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/drizzle/0000_productive_jackpot.sql +26 -0
  3. package/drizzle/0001_puzzling_purple_man.sql +26 -0
  4. package/drizzle/0002_sparkling_paper_doll.sql +15 -0
  5. package/drizzle/0003_married_senator_kelly.sql +1 -0
  6. package/drizzle/0004_crazy_miek.sql +2 -0
  7. package/drizzle/0005_tearful_randall_flagg.sql +1 -0
  8. package/drizzle/meta/0000_snapshot.json +232 -0
  9. package/drizzle/meta/0001_snapshot.json +434 -0
  10. package/drizzle/meta/0002_snapshot.json +551 -0
  11. package/drizzle/meta/0003_snapshot.json +557 -0
  12. package/drizzle/meta/0004_snapshot.json +573 -0
  13. package/drizzle/meta/0005_snapshot.json +574 -0
  14. package/drizzle/meta/_journal.json +48 -0
  15. package/drizzle.config.ts +7 -0
  16. package/package.json +42 -0
  17. package/src/agent-runner.test.ts +262 -0
  18. package/src/agent-runner.ts +262 -0
  19. package/src/chat/agent-loop.test.ts +119 -0
  20. package/src/chat/agent-loop.ts +73 -0
  21. package/src/chat/auto-apply.test.ts +237 -0
  22. package/src/chat/chat-handler.ts +111 -0
  23. package/src/chat/chat-service.streamturn.test.ts +417 -0
  24. package/src/chat/chat-service.test.ts +250 -0
  25. package/src/chat/chat-service.ts +923 -0
  26. package/src/chat/classifier-service.ts +64 -0
  27. package/src/chat/classifier.logic.test.ts +92 -0
  28. package/src/chat/classifier.logic.ts +71 -0
  29. package/src/chat/conversation-store.it.test.ts +203 -0
  30. package/src/chat/conversation-store.test.ts +248 -0
  31. package/src/chat/conversation-store.ts +237 -0
  32. package/src/chat/decision.logic.test.ts +45 -0
  33. package/src/chat/decision.logic.ts +54 -0
  34. package/src/chat/llm-provider.test.ts +63 -0
  35. package/src/chat/llm-provider.ts +67 -0
  36. package/src/chat/model-error.logic.test.ts +60 -0
  37. package/src/chat/model-error.logic.ts +65 -0
  38. package/src/chat/normalize-messages.logic.test.ts +101 -0
  39. package/src/chat/normalize-messages.logic.ts +65 -0
  40. package/src/chat/permission-mode.logic.test.ts +70 -0
  41. package/src/chat/permission-mode.logic.ts +45 -0
  42. package/src/chat/read-invoker.ts +72 -0
  43. package/src/chat/replay.test.ts +174 -0
  44. package/src/chat/scrub-content.test.ts +183 -0
  45. package/src/chat/scrub-content.ts +154 -0
  46. package/src/chat/sdk-tools.test.ts +168 -0
  47. package/src/chat/sdk-tools.ts +181 -0
  48. package/src/chat/title-service.test.ts +146 -0
  49. package/src/chat/title-service.ts +111 -0
  50. package/src/chat/title.logic.test.ts +98 -0
  51. package/src/chat/title.logic.ts +102 -0
  52. package/src/extension-points.ts +41 -0
  53. package/src/generated/docs-index.ts +3020 -0
  54. package/src/hardening/handler-authz.test.ts +282 -0
  55. package/src/hardening/no-secret-leak.test.ts +303 -0
  56. package/src/hooks.ts +33 -0
  57. package/src/index.ts +542 -0
  58. package/src/mcp/connection-registry.test.ts +25 -0
  59. package/src/mcp/connection-registry.ts +54 -0
  60. package/src/mcp/mcp-conformance.it.test.ts +128 -0
  61. package/src/mcp/server.test.ts +285 -0
  62. package/src/mcp/server.ts +300 -0
  63. package/src/mcp/tool-invoker.ts +65 -0
  64. package/src/openai-provider.test.ts +64 -0
  65. package/src/openai-provider.ts +146 -0
  66. package/src/projection.test.ts +97 -0
  67. package/src/projection.ts +132 -0
  68. package/src/propose-apply/args-hash.test.ts +26 -0
  69. package/src/propose-apply/args-hash.ts +30 -0
  70. package/src/propose-apply/service.test.ts +423 -0
  71. package/src/propose-apply/service.ts +419 -0
  72. package/src/propose-apply/store.test.ts +136 -0
  73. package/src/propose-apply/store.ts +224 -0
  74. package/src/propose-apply/token.test.ts +52 -0
  75. package/src/propose-apply/token.ts +71 -0
  76. package/src/rate-limit/spend-ledger.it.test.ts +224 -0
  77. package/src/rate-limit/spend-ledger.test.ts +176 -0
  78. package/src/rate-limit/spend-ledger.ts +162 -0
  79. package/src/rate-limit/tool-budget.it.test.ts +173 -0
  80. package/src/rate-limit/tool-budget.test.ts +58 -0
  81. package/src/rate-limit/tool-budget.ts +107 -0
  82. package/src/registry-wiring.test.ts +131 -0
  83. package/src/registry-wiring.ts +68 -0
  84. package/src/resolver.test.ts +156 -0
  85. package/src/resolver.ts +78 -0
  86. package/src/router.test.ts +78 -0
  87. package/src/router.ts +345 -0
  88. package/src/schema.ts +284 -0
  89. package/src/serializer.test.ts +88 -0
  90. package/src/serializer.ts +42 -0
  91. package/src/tool-registry.ts +58 -0
  92. package/src/tools/composite-tools.ts +24 -0
  93. package/src/tools/docs-tools.test.ts +150 -0
  94. package/src/tools/docs-tools.ts +115 -0
  95. package/src/tools/probe-url.test.ts +51 -0
  96. package/src/tools/probe-url.ts +146 -0
  97. package/src/tools/rank-docs.test.ts +153 -0
  98. package/src/tools/rank-docs.ts +209 -0
  99. package/src/tools/script-context-extract.test.ts +93 -0
  100. package/src/tools/script-context-extract.ts +283 -0
  101. package/src/tools/ssrf-guard.test.ts +69 -0
  102. package/src/tools/ssrf-guard.ts +108 -0
  103. package/src/tools/tool-set.e2e.test.ts +64 -0
  104. package/src/user-rpc-client.test.ts +45 -0
  105. package/src/user-rpc-client.ts +60 -0
  106. package/tsconfig.json +26 -0
@@ -0,0 +1,176 @@
1
+ import { describe, expect, test, mock } from "bun:test";
2
+ import type { AiSpendCap } from "@checkstack/ai-common";
3
+ import type { AuditPrincipal } from "../propose-apply/store";
4
+ import {
5
+ checkSpendCap,
6
+ enforceSpendCap,
7
+ recordSpend,
8
+ SpendCapExceededError,
9
+ } from "./spend-ledger";
10
+
11
+ /**
12
+ * Per-integration LLM spend cap (Phase 6, the locked-off knob, now wired).
13
+ *
14
+ * Unit-level proof that the cap is a shared-Postgres ROLLING-WINDOW SUM over
15
+ * `ai_spend`: the check reads the SUM through the injected `db` (no pod-local
16
+ * counter), enforcement throws once `used >= tokenBudget`, the cap is OFF when
17
+ * unconfigured, and a turn's usage is recorded as input + output tokens. The
18
+ * cross-pod property (the sum counts EVERY pod's writes) is exercised against a
19
+ * live Postgres in `spend-ledger.it.test.ts`.
20
+ */
21
+
22
+ const principal: AuditPrincipal = { kind: "user", id: "u1" };
23
+ const cap: AiSpendCap = { tokenBudget: 1000, windowMinutes: 60 };
24
+
25
+ /** A fake db whose SUM query resolves to `total`, simulating the ledger read. */
26
+ function sumDb(total: number) {
27
+ const where = mock(() => Promise.resolve([{ total }]));
28
+ const from = mock(() => ({ where }));
29
+ const select = mock(() => ({ from }));
30
+ return { select } as never;
31
+ }
32
+
33
+ /** A fake db capturing the inserted spend row. */
34
+ function insertDb() {
35
+ const captured: Array<Record<string, unknown>> = [];
36
+ const values = mock((v: Record<string, unknown>) => {
37
+ captured.push(v);
38
+ return Promise.resolve([]);
39
+ });
40
+ const insert = mock(() => ({ values }));
41
+ return { db: { insert } as never, captured };
42
+ }
43
+
44
+ describe("checkSpendCap (rolling-window SUM over ai_spend)", () => {
45
+ test("under budget: a new turn is allowed", async () => {
46
+ const result = await checkSpendCap({
47
+ db: sumDb(400),
48
+ integrationId: "ai.openai-compatible.c1",
49
+ principal,
50
+ cap,
51
+ });
52
+ expect(result.used).toBe(400);
53
+ expect(result.allowed).toBe(true);
54
+ expect(result.tokenBudget).toBe(1000);
55
+ });
56
+
57
+ test("at/over budget: a new turn is refused (used >= tokenBudget)", async () => {
58
+ const atCap = await checkSpendCap({
59
+ db: sumDb(1000),
60
+ integrationId: "c1",
61
+ principal,
62
+ cap,
63
+ });
64
+ expect(atCap.allowed).toBe(false);
65
+ const over = await checkSpendCap({
66
+ db: sumDb(1500),
67
+ integrationId: "c1",
68
+ principal,
69
+ cap,
70
+ });
71
+ expect(over.allowed).toBe(false);
72
+ });
73
+
74
+ test("a numeric-string SUM (as pg returns it) coerces to a number", async () => {
75
+ // pg returns sum() over an integer column as a string; the ledger coerces.
76
+ const where = mock(() => Promise.resolve([{ total: "750" }]));
77
+ const from = mock(() => ({ where }));
78
+ const db = { select: mock(() => ({ from })) } as never;
79
+ const result = await checkSpendCap({
80
+ db,
81
+ integrationId: "c1",
82
+ principal,
83
+ cap,
84
+ });
85
+ expect(result.used).toBe(750);
86
+ expect(result.allowed).toBe(true);
87
+ });
88
+
89
+ test("the window start is now - windowMinutes (the rolling window)", async () => {
90
+ const now = new Date("2026-06-02T12:00:00Z");
91
+ const result = await checkSpendCap({
92
+ db: sumDb(0),
93
+ integrationId: "c1",
94
+ principal,
95
+ cap: { tokenBudget: 100, windowMinutes: 30 },
96
+ now,
97
+ });
98
+ expect(result.windowStart).toEqual(new Date("2026-06-02T11:30:00Z"));
99
+ });
100
+ });
101
+
102
+ describe("enforceSpendCap", () => {
103
+ test("OFF by default: no cap configured = a no-op (never throws, never reads)", async () => {
104
+ const select = mock(() => ({ from: () => ({ where: () => [] }) }));
105
+ const db = { select } as never;
106
+ await expect(
107
+ enforceSpendCap({ db, integrationId: "c1", principal, cap: undefined }),
108
+ ).resolves.toBeUndefined();
109
+ // Default OFF means we never even hit the ledger.
110
+ expect(select).not.toHaveBeenCalled();
111
+ });
112
+
113
+ test("throws SpendCapExceededError when over the configured budget", async () => {
114
+ await expect(
115
+ enforceSpendCap({
116
+ db: sumDb(1200),
117
+ integrationId: "c1",
118
+ principal,
119
+ cap,
120
+ }),
121
+ ).rejects.toBeInstanceOf(SpendCapExceededError);
122
+ });
123
+
124
+ test("passes when under the configured budget", async () => {
125
+ await expect(
126
+ enforceSpendCap({ db: sumDb(10), integrationId: "c1", principal, cap }),
127
+ ).resolves.toBeUndefined();
128
+ });
129
+ });
130
+
131
+ describe("recordSpend (append to the shared ledger)", () => {
132
+ test("persists input + output + total tokens for the turn", async () => {
133
+ const { db, captured } = insertDb();
134
+ await recordSpend({
135
+ db,
136
+ integrationId: "ai.openai-compatible.c1",
137
+ principal,
138
+ conversationId: "conv-1",
139
+ model: "gpt-4o-mini",
140
+ usage: { inputTokens: 120, outputTokens: 80 },
141
+ });
142
+ expect(captured).toHaveLength(1);
143
+ const row = captured[0];
144
+ expect(row?.integrationId).toBe("ai.openai-compatible.c1");
145
+ expect(row?.principalKind).toBe("user");
146
+ expect(row?.inputTokens).toBe(120);
147
+ expect(row?.outputTokens).toBe(80);
148
+ expect(row?.totalTokens).toBe(200);
149
+ expect(row?.conversationId).toBe("conv-1");
150
+ expect(row?.model).toBe("gpt-4o-mini");
151
+ });
152
+
153
+ test("missing / negative usage coerces to 0 (a provider omitting usage never corrupts the ledger)", async () => {
154
+ const { db, captured } = insertDb();
155
+ await recordSpend({
156
+ db,
157
+ integrationId: "c1",
158
+ principal,
159
+ usage: { inputTokens: Number.NaN, outputTokens: -5 },
160
+ });
161
+ const row = captured[0];
162
+ expect(row?.inputTokens).toBe(0);
163
+ expect(row?.outputTokens).toBe(0);
164
+ expect(row?.totalTokens).toBe(0);
165
+ });
166
+ });
167
+
168
+ describe("SpendCapExceededError", () => {
169
+ test("carries used + cap and a clear budget-exceeded message", () => {
170
+ const err = new SpendCapExceededError(1200, 1000);
171
+ expect(err.used).toBe(1200);
172
+ expect(err.tokenBudget).toBe(1000);
173
+ expect(err.message).toContain("spend/budget exceeded");
174
+ expect(err.name).toBe("SpendCapExceededError");
175
+ });
176
+ });
@@ -0,0 +1,162 @@
1
+ import { and, eq, gte, sql } from "drizzle-orm";
2
+ import type { SafeDatabase } from "@checkstack/backend-api";
3
+ import type { AiSpendCap } from "@checkstack/ai-common";
4
+ import * as schema from "../schema";
5
+ import type { AuditPrincipal } from "../propose-apply/store";
6
+
7
+ type AiDatabase = SafeDatabase<typeof schema>;
8
+
9
+ /**
10
+ * Per-integration LLM SPEND CAP (Phase 6, the locked-off knob, now wired).
11
+ *
12
+ * A shared-Postgres ROLLING-WINDOW SUM over `ai_spend`: the spend is the total
13
+ * number of tokens this principal has incurred against THIS integration in the
14
+ * trailing window. Because the sum is read from the SAME shared table that every
15
+ * pod writes to, the cap holds across ALL pods (state-and-scale §14.5) — an
16
+ * in-memory per-pod token counter would let N pods each allow the cap = N x the
17
+ * intended spend, which a single-process test can never catch. Mirrors the
18
+ * per-principal tool rate-limit budget exactly, reusing the
19
+ * `ai_spend_integration_principal_created_idx` index.
20
+ *
21
+ * Token-count (not USD) is deliberate: deterministic + provider-agnostic. Every
22
+ * OpenAI-compatible provider reports tokens via the AI SDK `usage`; only some
23
+ * publish a price table (self-hosted Ollama/vLLM have none), so a USD cap would
24
+ * require a per-model pricing table that drifts and is meaningless for local
25
+ * models. Tokens are the one unit every provider reports.
26
+ *
27
+ * The cap is OFF by default: enforcement only runs when a connection configures
28
+ * `spendCap`. With no cap configured, `enforceSpendCap` is a no-op.
29
+ */
30
+
31
+ export interface SpendUsageInput {
32
+ inputTokens: number;
33
+ outputTokens: number;
34
+ }
35
+
36
+ export interface SpendCheckResult {
37
+ /** Total tokens already used by this principal+integration in the window. */
38
+ used: number;
39
+ /** Whether a NEW turn is within budget (`used < tokenBudget`). */
40
+ allowed: boolean;
41
+ /** The configured token budget (the cap). */
42
+ tokenBudget: number;
43
+ /** Trailing window start the sum was taken against. */
44
+ windowStart: Date;
45
+ }
46
+
47
+ /**
48
+ * Sum the principal's token usage against an integration over the cap's trailing
49
+ * window and decide whether a new turn is within budget. Returns
50
+ * `allowed: false` once `used >= tokenBudget`.
51
+ */
52
+ export async function checkSpendCap({
53
+ db,
54
+ integrationId,
55
+ principal,
56
+ cap,
57
+ now = new Date(),
58
+ }: {
59
+ db: AiDatabase;
60
+ integrationId: string;
61
+ principal: AuditPrincipal;
62
+ cap: AiSpendCap;
63
+ now?: Date;
64
+ }): Promise<SpendCheckResult> {
65
+ const windowStart = new Date(now.getTime() - cap.windowMinutes * 60_000);
66
+
67
+ const rows = await db
68
+ .select({
69
+ total: sql<number>`coalesce(sum(${schema.aiSpend.totalTokens}), 0)`,
70
+ })
71
+ .from(schema.aiSpend)
72
+ .where(
73
+ and(
74
+ eq(schema.aiSpend.integrationId, integrationId),
75
+ eq(schema.aiSpend.principalKind, principal.kind),
76
+ eq(schema.aiSpend.principalId, principal.id),
77
+ gte(schema.aiSpend.createdAt, windowStart),
78
+ ),
79
+ );
80
+
81
+ // `sum` over an integer column comes back as a numeric string from pg; coerce.
82
+ const used = Number(rows[0]?.total ?? 0);
83
+ return {
84
+ used,
85
+ allowed: used < cap.tokenBudget,
86
+ tokenBudget: cap.tokenBudget,
87
+ windowStart,
88
+ };
89
+ }
90
+
91
+ /** Error thrown when a principal is over an integration's LLM spend cap. */
92
+ export class SpendCapExceededError extends Error {
93
+ constructor(
94
+ public readonly used: number,
95
+ public readonly tokenBudget: number,
96
+ ) {
97
+ super(
98
+ `LLM spend/budget exceeded: ${used} tokens used against this integration in the current window (cap ${tokenBudget}). Try again later or raise the integration's spend cap.`,
99
+ );
100
+ this.name = "SpendCapExceededError";
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Enforce the cap BEFORE a chat turn: throws {@link SpendCapExceededError} when
106
+ * the principal is over the integration's configured budget. A no-op when no
107
+ * `cap` is configured (default OFF — no cap unless set on the connection).
108
+ */
109
+ export async function enforceSpendCap({
110
+ db,
111
+ integrationId,
112
+ principal,
113
+ cap,
114
+ now,
115
+ }: {
116
+ db: AiDatabase;
117
+ integrationId: string;
118
+ principal: AuditPrincipal;
119
+ cap?: AiSpendCap;
120
+ now?: Date;
121
+ }): Promise<void> {
122
+ if (!cap) return; // default OFF: no cap unless configured.
123
+ const result = await checkSpendCap({ db, integrationId, principal, cap, now });
124
+ if (!result.allowed) {
125
+ throw new SpendCapExceededError(result.used, result.tokenBudget);
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Record one turn's token usage into the shared `ai_spend` ledger (append-only).
131
+ * Called from the agent loop's `onFinish` with the AI-SDK `totalUsage`. Negative
132
+ * / undefined token counts coerce to 0 so a provider that omits usage never
133
+ * corrupts the ledger.
134
+ */
135
+ export async function recordSpend({
136
+ db,
137
+ integrationId,
138
+ principal,
139
+ conversationId,
140
+ model,
141
+ usage,
142
+ }: {
143
+ db: AiDatabase;
144
+ integrationId: string;
145
+ principal: AuditPrincipal;
146
+ conversationId?: string;
147
+ model?: string;
148
+ usage: SpendUsageInput;
149
+ }): Promise<void> {
150
+ const inputTokens = Math.max(0, Math.trunc(usage.inputTokens || 0));
151
+ const outputTokens = Math.max(0, Math.trunc(usage.outputTokens || 0));
152
+ await db.insert(schema.aiSpend).values({
153
+ integrationId,
154
+ principalKind: principal.kind,
155
+ principalId: principal.id,
156
+ conversationId,
157
+ model,
158
+ inputTokens,
159
+ outputTokens,
160
+ totalTokens: inputTokens + outputTokens,
161
+ });
162
+ }
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Per-principal tool-budget cross-pod enforcement (real Postgres) — matrix #17,
3
+ * plan §14.5, state-and-scale §9.
4
+ *
5
+ * The per-principal tool budget is a shared-Postgres ROLLING-WINDOW counter over
6
+ * `ai_tool_calls`: the budget is the number of rows the principal has written in
7
+ * the trailing window. Because the count is read from the SAME shared table that
8
+ * every pod writes to, the cap holds across ALL pods. An in-memory per-pod
9
+ * limiter would let N pods EACH allow the cap = N x the intended limit — a leak a
10
+ * single-process unit test can never catch. This test simulates TWO pods (two
11
+ * independent pools to the SAME schema) and asserts the combined activity is
12
+ * counted against ONE shared budget (mirrors the P2 DCR cross-pod IT).
13
+ *
14
+ * Gated behind `CHECKSTACK_IT=1`; connection from `CHECKSTACK_IT_PG_URL`. Runs in
15
+ * a freshly created, self-cleaning schema.
16
+ */
17
+ import { afterAll, beforeAll, describe, expect, it } from "bun:test";
18
+ import { drizzle } from "drizzle-orm/node-postgres";
19
+ import { Pool } from "pg";
20
+ import type { SafeDatabase } from "@checkstack/backend-api";
21
+ import * as schema from "../schema";
22
+ import type { AuditPrincipal } from "../propose-apply/store";
23
+ import {
24
+ ToolBudgetExceededError,
25
+ checkToolBudget,
26
+ enforceToolBudget,
27
+ } from "./tool-budget";
28
+
29
+ const PG_URL =
30
+ process.env.CHECKSTACK_IT_PG_URL ??
31
+ "postgres://postgres:postgres@localhost:5432/postgres";
32
+
33
+ const SCHEMA = `it_ai_budget_${crypto.randomUUID().replace(/-/g, "")}`;
34
+
35
+ interface Pod {
36
+ pool: Pool;
37
+ db: SafeDatabase<typeof schema>;
38
+ end(): Promise<void>;
39
+ }
40
+
41
+ function makePod(): Pod {
42
+ const pool = new Pool({
43
+ connectionString: PG_URL,
44
+ options: `-c search_path=${SCHEMA}`,
45
+ });
46
+ const db = drizzle(pool, { schema }) as unknown as SafeDatabase<typeof schema>;
47
+ return { pool, db, end: () => pool.end() };
48
+ }
49
+
50
+ /** Insert one `ai_tool_calls` row (a recorded tool invocation) on a given pod. */
51
+ async function recordCall(pod: Pod, principal: AuditPrincipal): Promise<void> {
52
+ await pod.pool.query(
53
+ `INSERT INTO "${SCHEMA}".ai_tool_calls
54
+ (id, principal_kind, principal_id, transport, tool_name, effect, args_hash, status)
55
+ VALUES ($1, $2, $3, 'mcp', 'incident.list', 'read', 'h', 'executed')`,
56
+ [crypto.randomUUID(), principal.kind, principal.id],
57
+ );
58
+ }
59
+
60
+ describe.skipIf(!process.env.CHECKSTACK_IT)(
61
+ "per-principal tool budget (shared Postgres, cross-pod)",
62
+ () => {
63
+ let admin: Pool;
64
+ let podA: Pod;
65
+ let podB: Pod;
66
+
67
+ beforeAll(async () => {
68
+ admin = new Pool({ connectionString: PG_URL });
69
+ await admin.query(`CREATE SCHEMA "${SCHEMA}"`);
70
+ // The minimal columns checkToolBudget counts over (principal + createdAt).
71
+ await admin.query(`
72
+ CREATE TABLE "${SCHEMA}".ai_tool_calls (
73
+ id text PRIMARY KEY,
74
+ principal_kind text NOT NULL,
75
+ principal_id text NOT NULL,
76
+ transport text NOT NULL,
77
+ conversation_id text,
78
+ tool_name text NOT NULL,
79
+ effect text NOT NULL,
80
+ args_hash text NOT NULL,
81
+ status text NOT NULL,
82
+ proposal_nonce text,
83
+ proposal_expires_at timestamp,
84
+ applied_by_kind text,
85
+ applied_by_id text,
86
+ result_snapshot jsonb,
87
+ proposed_payload jsonb,
88
+ error text,
89
+ proposed_at timestamp,
90
+ applied_at timestamp,
91
+ created_at timestamp NOT NULL DEFAULT now()
92
+ )
93
+ `);
94
+ podA = makePod();
95
+ podB = makePod();
96
+ });
97
+
98
+ afterAll(async () => {
99
+ await podA?.end();
100
+ await podB?.end();
101
+ await admin.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
102
+ await admin.end();
103
+ });
104
+
105
+ it("counts activity from BOTH pods against ONE shared budget (no N x leak)", async () => {
106
+ const principal: AuditPrincipal = { kind: "user", id: "budget-u1" };
107
+ const max = 4;
108
+
109
+ // Alternate the pod that records each call. Whichever pod later checks the
110
+ // budget sees the COMBINED count, because the counter is the shared table.
111
+ await recordCall(podA, principal);
112
+ await recordCall(podB, principal);
113
+ await recordCall(podA, principal);
114
+
115
+ // 3 recorded < 4: a new call is within budget, read from EITHER pod.
116
+ const onA = await checkToolBudget({ db: podA.db, principal, max });
117
+ const onB = await checkToolBudget({ db: podB.db, principal, max });
118
+ expect(onA.used).toBe(3);
119
+ expect(onB.used).toBe(3);
120
+ expect(onA.allowed).toBe(true);
121
+ expect(onB.allowed).toBe(true);
122
+
123
+ // One more recorded call (on pod B) reaches the cap.
124
+ await recordCall(podB, principal);
125
+
126
+ // Now BOTH pods agree the principal is at/over budget — not 4-per-pod.
127
+ const overA = await checkToolBudget({ db: podA.db, principal, max });
128
+ const overB = await checkToolBudget({ db: podB.db, principal, max });
129
+ expect(overA.used).toBe(4);
130
+ expect(overB.used).toBe(4);
131
+ expect(overA.allowed).toBe(false);
132
+ expect(overB.allowed).toBe(false);
133
+ });
134
+
135
+ it("enforceToolBudget throws cross-pod once the shared cap is met", async () => {
136
+ const principal: AuditPrincipal = { kind: "application", id: "budget-app" };
137
+ const max = 2;
138
+ await recordCall(podA, principal);
139
+ await recordCall(podB, principal);
140
+
141
+ // Within-budget check on a fresh principal stays under: assert the throw is
142
+ // driven by the SHARED count, not pod-local state.
143
+ await expect(
144
+ enforceToolBudget({ db: podB.db, principal, max }),
145
+ ).rejects.toBeInstanceOf(ToolBudgetExceededError);
146
+ await expect(
147
+ enforceToolBudget({ db: podA.db, principal, max }),
148
+ ).rejects.toBeInstanceOf(ToolBudgetExceededError);
149
+ });
150
+
151
+ it("the window slides: only recent calls count (older ones drop out)", async () => {
152
+ const principal: AuditPrincipal = { kind: "user", id: "budget-window" };
153
+ // Insert a row with an OLD created_at (outside the window).
154
+ await podA.pool.query(
155
+ `INSERT INTO "${SCHEMA}".ai_tool_calls
156
+ (id, principal_kind, principal_id, transport, tool_name, effect, args_hash, status, created_at)
157
+ VALUES ($1, 'user', $2, 'mcp', 'incident.list', 'read', 'h', 'executed', now() - interval '10 minutes')`,
158
+ [crypto.randomUUID(), principal.id],
159
+ );
160
+ // A recent one.
161
+ await recordCall(podB, principal);
162
+
163
+ // With a 1-minute window the old row is excluded; only the recent counts.
164
+ const result = await checkToolBudget({
165
+ db: podB.db,
166
+ principal,
167
+ max: 60,
168
+ windowMs: 60_000,
169
+ });
170
+ expect(result.used).toBe(1);
171
+ });
172
+ },
173
+ );
@@ -0,0 +1,58 @@
1
+ import { describe, expect, test, mock } from "bun:test";
2
+ import {
3
+ checkToolBudget,
4
+ enforceToolBudget,
5
+ ToolBudgetExceededError,
6
+ TOOL_BUDGET_MAX_CALLS,
7
+ } from "./tool-budget";
8
+
9
+ /** Build a fake db whose count query resolves to `value`. */
10
+ function dbReturning(value: number) {
11
+ const where = mock(() => Promise.resolve([{ value }]));
12
+ const from = mock(() => ({ where }));
13
+ const select = mock(() => ({ from }));
14
+ return { db: { select } as never, where };
15
+ }
16
+
17
+ describe("per-principal tool budget (§14.5, P3 review item 3)", () => {
18
+ test("under budget is allowed; the count is read over a trailing window", async () => {
19
+ const { db, where } = dbReturning(5);
20
+ const now = new Date("2026-06-01T00:01:00Z");
21
+ const result = await checkToolBudget({
22
+ db,
23
+ principal: { kind: "user", id: "u1" },
24
+ max: 10,
25
+ windowMs: 60_000,
26
+ now,
27
+ });
28
+ expect(result.used).toBe(5);
29
+ expect(result.allowed).toBe(true);
30
+ // Rolling window: start is `now - windowMs`.
31
+ expect(result.windowStart.getTime()).toBe(now.getTime() - 60_000);
32
+ expect(where).toHaveBeenCalledTimes(1);
33
+ });
34
+
35
+ test("at-or-over budget is refused (used >= max)", async () => {
36
+ const { db } = dbReturning(10);
37
+ const result = await checkToolBudget({
38
+ db,
39
+ principal: { kind: "user", id: "u1" },
40
+ max: 10,
41
+ });
42
+ expect(result.allowed).toBe(false);
43
+ });
44
+
45
+ test("enforceToolBudget throws ToolBudgetExceededError over budget", async () => {
46
+ const { db } = dbReturning(TOOL_BUDGET_MAX_CALLS);
47
+ await expect(
48
+ enforceToolBudget({ db, principal: { kind: "application", id: "a1" } }),
49
+ ).rejects.toBeInstanceOf(ToolBudgetExceededError);
50
+ });
51
+
52
+ test("enforceToolBudget passes under budget", async () => {
53
+ const { db } = dbReturning(0);
54
+ await expect(
55
+ enforceToolBudget({ db, principal: { kind: "user", id: "u1" } }),
56
+ ).resolves.toBeUndefined();
57
+ });
58
+ });
@@ -0,0 +1,107 @@
1
+ import { and, count, eq, gte } from "drizzle-orm";
2
+ import type { SafeDatabase } from "@checkstack/backend-api";
3
+ import * as schema from "../schema";
4
+ import type { AuditPrincipal } from "../propose-apply/store";
5
+
6
+ type AiDatabase = SafeDatabase<typeof schema>;
7
+
8
+ /**
9
+ * Per-principal tool RATE-LIMIT BUDGET (plan §14.5, P3 review item 3).
10
+ *
11
+ * A shared-Postgres ROLLING-WINDOW counter over `ai_tool_calls`: the budget is
12
+ * the number of rows this principal has written in the trailing window. Because
13
+ * the count is read from the SAME shared Postgres table that every pod writes
14
+ * to (state-and-scale §14.5), the cap holds across ALL pods — an in-memory
15
+ * per-pod limiter would let N pods each allow the cap, i.e. N x the intended
16
+ * limit, which a single-process test would never catch. This mirrors the P2 DCR
17
+ * rate-limiter pattern, reusing the existing `ai_tool_calls_principal_created_idx`
18
+ * index (`principalKind, principalId, createdAt`).
19
+ *
20
+ * Every tool invocation across BOTH transports (MCP `tools/call` and the chat
21
+ * agent loop) writes an `ai_tool_calls` row, so the count is an accurate budget
22
+ * of the principal's tool activity regardless of effect. The check runs BEFORE
23
+ * the call is executed/recorded; over-budget callers are refused.
24
+ */
25
+
26
+ /** Default rolling window for the per-principal tool budget (1 minute). */
27
+ export const TOOL_BUDGET_WINDOW_MS = 60_000;
28
+ /** Default max tool calls per principal per window. */
29
+ export const TOOL_BUDGET_MAX_CALLS = 60;
30
+
31
+ export interface ToolBudgetResult {
32
+ /** Tool calls already recorded for this principal in the current window. */
33
+ used: number;
34
+ /** Whether a NEW call is within budget (`used < max`). */
35
+ allowed: boolean;
36
+ /** Trailing window start the count was taken against. */
37
+ windowStart: Date;
38
+ }
39
+
40
+ /**
41
+ * Read the principal's tool-call count over the trailing window and decide
42
+ * whether a new call is within budget. Returns `allowed: false` when the
43
+ * principal has already met or exceeded `max` in the window. The caller decides
44
+ * the response (a 429 for MCP, a friendly error for chat).
45
+ */
46
+ export async function checkToolBudget({
47
+ db,
48
+ principal,
49
+ max = TOOL_BUDGET_MAX_CALLS,
50
+ windowMs = TOOL_BUDGET_WINDOW_MS,
51
+ now = new Date(),
52
+ }: {
53
+ db: AiDatabase;
54
+ principal: AuditPrincipal;
55
+ max?: number;
56
+ windowMs?: number;
57
+ now?: Date;
58
+ }): Promise<ToolBudgetResult> {
59
+ const windowStart = new Date(now.getTime() - windowMs);
60
+
61
+ const rows = await db
62
+ .select({ value: count() })
63
+ .from(schema.aiToolCalls)
64
+ .where(
65
+ and(
66
+ eq(schema.aiToolCalls.principalKind, principal.kind),
67
+ eq(schema.aiToolCalls.principalId, principal.id),
68
+ gte(schema.aiToolCalls.createdAt, windowStart),
69
+ ),
70
+ );
71
+
72
+ const used = rows[0]?.value ?? 0;
73
+ return { used, allowed: used < max, windowStart };
74
+ }
75
+
76
+ /** Error thrown / mapped to 429 when a principal is over its tool budget. */
77
+ export class ToolBudgetExceededError extends Error {
78
+ constructor(
79
+ public readonly used: number,
80
+ public readonly max: number,
81
+ ) {
82
+ super(
83
+ `Tool rate-limit budget exceeded: ${used} calls in the current window (max ${max}). Try again shortly.`,
84
+ );
85
+ this.name = "ToolBudgetExceededError";
86
+ }
87
+ }
88
+
89
+ /**
90
+ * Enforce the budget: throws {@link ToolBudgetExceededError} when over budget.
91
+ * Convenience wrapper used by both transports so the enforcement is identical.
92
+ */
93
+ export async function enforceToolBudget(args: {
94
+ db: AiDatabase;
95
+ principal: AuditPrincipal;
96
+ max?: number;
97
+ windowMs?: number;
98
+ now?: Date;
99
+ }): Promise<void> {
100
+ const result = await checkToolBudget(args);
101
+ if (!result.allowed) {
102
+ throw new ToolBudgetExceededError(
103
+ result.used,
104
+ args.max ?? TOOL_BUDGET_MAX_CALLS,
105
+ );
106
+ }
107
+ }