@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/drizzle/0000_productive_jackpot.sql +26 -0
  3. package/drizzle/0001_puzzling_purple_man.sql +26 -0
  4. package/drizzle/0002_sparkling_paper_doll.sql +15 -0
  5. package/drizzle/0003_married_senator_kelly.sql +1 -0
  6. package/drizzle/0004_crazy_miek.sql +2 -0
  7. package/drizzle/0005_tearful_randall_flagg.sql +1 -0
  8. package/drizzle/meta/0000_snapshot.json +232 -0
  9. package/drizzle/meta/0001_snapshot.json +434 -0
  10. package/drizzle/meta/0002_snapshot.json +551 -0
  11. package/drizzle/meta/0003_snapshot.json +557 -0
  12. package/drizzle/meta/0004_snapshot.json +573 -0
  13. package/drizzle/meta/0005_snapshot.json +574 -0
  14. package/drizzle/meta/_journal.json +48 -0
  15. package/drizzle.config.ts +7 -0
  16. package/package.json +42 -0
  17. package/src/agent-runner.test.ts +262 -0
  18. package/src/agent-runner.ts +262 -0
  19. package/src/chat/agent-loop.test.ts +119 -0
  20. package/src/chat/agent-loop.ts +73 -0
  21. package/src/chat/auto-apply.test.ts +237 -0
  22. package/src/chat/chat-handler.ts +111 -0
  23. package/src/chat/chat-service.streamturn.test.ts +417 -0
  24. package/src/chat/chat-service.test.ts +250 -0
  25. package/src/chat/chat-service.ts +923 -0
  26. package/src/chat/classifier-service.ts +64 -0
  27. package/src/chat/classifier.logic.test.ts +92 -0
  28. package/src/chat/classifier.logic.ts +71 -0
  29. package/src/chat/conversation-store.it.test.ts +203 -0
  30. package/src/chat/conversation-store.test.ts +248 -0
  31. package/src/chat/conversation-store.ts +237 -0
  32. package/src/chat/decision.logic.test.ts +45 -0
  33. package/src/chat/decision.logic.ts +54 -0
  34. package/src/chat/llm-provider.test.ts +63 -0
  35. package/src/chat/llm-provider.ts +67 -0
  36. package/src/chat/model-error.logic.test.ts +60 -0
  37. package/src/chat/model-error.logic.ts +65 -0
  38. package/src/chat/normalize-messages.logic.test.ts +101 -0
  39. package/src/chat/normalize-messages.logic.ts +65 -0
  40. package/src/chat/permission-mode.logic.test.ts +70 -0
  41. package/src/chat/permission-mode.logic.ts +45 -0
  42. package/src/chat/read-invoker.ts +72 -0
  43. package/src/chat/replay.test.ts +174 -0
  44. package/src/chat/scrub-content.test.ts +183 -0
  45. package/src/chat/scrub-content.ts +154 -0
  46. package/src/chat/sdk-tools.test.ts +168 -0
  47. package/src/chat/sdk-tools.ts +181 -0
  48. package/src/chat/title-service.test.ts +146 -0
  49. package/src/chat/title-service.ts +111 -0
  50. package/src/chat/title.logic.test.ts +98 -0
  51. package/src/chat/title.logic.ts +102 -0
  52. package/src/extension-points.ts +41 -0
  53. package/src/generated/docs-index.ts +3020 -0
  54. package/src/hardening/handler-authz.test.ts +282 -0
  55. package/src/hardening/no-secret-leak.test.ts +303 -0
  56. package/src/hooks.ts +33 -0
  57. package/src/index.ts +542 -0
  58. package/src/mcp/connection-registry.test.ts +25 -0
  59. package/src/mcp/connection-registry.ts +54 -0
  60. package/src/mcp/mcp-conformance.it.test.ts +128 -0
  61. package/src/mcp/server.test.ts +285 -0
  62. package/src/mcp/server.ts +300 -0
  63. package/src/mcp/tool-invoker.ts +65 -0
  64. package/src/openai-provider.test.ts +64 -0
  65. package/src/openai-provider.ts +146 -0
  66. package/src/projection.test.ts +97 -0
  67. package/src/projection.ts +132 -0
  68. package/src/propose-apply/args-hash.test.ts +26 -0
  69. package/src/propose-apply/args-hash.ts +30 -0
  70. package/src/propose-apply/service.test.ts +423 -0
  71. package/src/propose-apply/service.ts +419 -0
  72. package/src/propose-apply/store.test.ts +136 -0
  73. package/src/propose-apply/store.ts +224 -0
  74. package/src/propose-apply/token.test.ts +52 -0
  75. package/src/propose-apply/token.ts +71 -0
  76. package/src/rate-limit/spend-ledger.it.test.ts +224 -0
  77. package/src/rate-limit/spend-ledger.test.ts +176 -0
  78. package/src/rate-limit/spend-ledger.ts +162 -0
  79. package/src/rate-limit/tool-budget.it.test.ts +173 -0
  80. package/src/rate-limit/tool-budget.test.ts +58 -0
  81. package/src/rate-limit/tool-budget.ts +107 -0
  82. package/src/registry-wiring.test.ts +131 -0
  83. package/src/registry-wiring.ts +68 -0
  84. package/src/resolver.test.ts +156 -0
  85. package/src/resolver.ts +78 -0
  86. package/src/router.test.ts +78 -0
  87. package/src/router.ts +345 -0
  88. package/src/schema.ts +284 -0
  89. package/src/serializer.test.ts +88 -0
  90. package/src/serializer.ts +42 -0
  91. package/src/tool-registry.ts +58 -0
  92. package/src/tools/composite-tools.ts +24 -0
  93. package/src/tools/docs-tools.test.ts +150 -0
  94. package/src/tools/docs-tools.ts +115 -0
  95. package/src/tools/probe-url.test.ts +51 -0
  96. package/src/tools/probe-url.ts +146 -0
  97. package/src/tools/rank-docs.test.ts +153 -0
  98. package/src/tools/rank-docs.ts +209 -0
  99. package/src/tools/script-context-extract.test.ts +93 -0
  100. package/src/tools/script-context-extract.ts +283 -0
  101. package/src/tools/ssrf-guard.test.ts +69 -0
  102. package/src/tools/ssrf-guard.ts +108 -0
  103. package/src/tools/tool-set.e2e.test.ts +64 -0
  104. package/src/user-rpc-client.test.ts +45 -0
  105. package/src/user-rpc-client.ts +60 -0
  106. package/tsconfig.json +26 -0
@@ -0,0 +1,183 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import {
3
+ REDACTED,
4
+ scrubContent,
5
+ scrubModelMessages,
6
+ } from "./scrub-content";
7
+
8
+ /**
9
+ * NO-SECRET-LEAK content scrubbing (Phase 6) — the canary regression suite.
10
+ *
11
+ * The architectural guarantee "a credential can NEVER be persisted into
12
+ * `ai_messages.content`" is now an ENFORCED invariant: `scrubContent` runs on the
13
+ * message write path. These tests inject credential-shaped keys/values and assert
14
+ * they are stripped, while proving the scrub does NOT blanket-redact innocent
15
+ * user prose (the guarantee is "a credential cannot ride along", not "no string
16
+ * is ever touched").
17
+ */
18
+
19
+ const SECRET = "sk-canary-DO-NOT-LEAK-0123456789abcdef";
20
+
21
+ function assertNoSecret(value: unknown, where: string): void {
22
+ expect(
23
+ JSON.stringify(value) ?? "",
24
+ `${where} must not contain the secret`,
25
+ ).not.toContain(SECRET);
26
+ }
27
+
28
+ describe("scrubContent: credential-shaped KEYS are redacted", () => {
29
+ test("apiKey at top level is stripped", () => {
30
+ const out = scrubContent({ text: "hi", apiKey: SECRET });
31
+ expect(out.apiKey).toBe(REDACTED);
32
+ expect(out.text).toBe("hi");
33
+ assertNoSecret(out, "scrubbed content");
34
+ });
35
+
36
+ test("nested secret keys (api_key, authorization, password, x-secret) are stripped", () => {
37
+ const out = scrubContent({
38
+ result: {
39
+ headers: { authorization: `Bearer ${SECRET}` },
40
+ config: { api_key: SECRET, password: SECRET, "x-secret": SECRET },
41
+ },
42
+ });
43
+ const result = out.result as Record<string, unknown>;
44
+ const headers = result.headers as Record<string, unknown>;
45
+ const config = result.config as Record<string, unknown>;
46
+ expect(headers.authorization).toBe(REDACTED);
47
+ expect(config.api_key).toBe(REDACTED);
48
+ expect(config.password).toBe(REDACTED);
49
+ expect(config["x-secret"]).toBe(REDACTED);
50
+ assertNoSecret(out, "deeply nested scrubbed content");
51
+ });
52
+
53
+ test("secret keys inside arrays are stripped", () => {
54
+ const out = scrubContent({
55
+ items: [{ name: "a", clientSecret: SECRET }, { name: "b" }],
56
+ });
57
+ const items = out.items as Array<Record<string, unknown>>;
58
+ expect(items[0]?.clientSecret).toBe(REDACTED);
59
+ expect(items[0]?.name).toBe("a");
60
+ expect(items[1]?.name).toBe("b");
61
+ assertNoSecret(out, "array scrubbed content");
62
+ });
63
+ });
64
+
65
+ describe("scrubContent: credential-shaped VALUES are redacted regardless of key", () => {
66
+ test("an sk-... value under an innocent key is still stripped", () => {
67
+ // Worst case: a buggy tool result puts the key under a harmless field name.
68
+ const out = scrubContent({ note: SECRET, freeform: { blob: SECRET } });
69
+ expect(out.note).toBe(REDACTED);
70
+ expect((out.freeform as Record<string, unknown>).blob).toBe(REDACTED);
71
+ assertNoSecret(out, "value-pattern scrubbed content");
72
+ });
73
+
74
+ test("a Bearer token value is stripped", () => {
75
+ const out = scrubContent({
76
+ msg: "use Bearer abcdef1234567890ABCDEF for auth",
77
+ });
78
+ expect(out.msg).toBe(REDACTED);
79
+ assertNoSecret(out, "bearer-value scrubbed content");
80
+ });
81
+ });
82
+
83
+ describe("scrubContent: innocent prose is preserved (no blanket scrubbing)", () => {
84
+ test("ordinary chat text mentioning 'token' or 'password' is NOT redacted", () => {
85
+ const out = scrubContent({
86
+ text: "I forgot my password, can you reset the auth token flow?",
87
+ });
88
+ expect(out.text).toBe(
89
+ "I forgot my password, can you reset the auth token flow?",
90
+ );
91
+ });
92
+
93
+ test("a normal incident title and numbers survive untouched", () => {
94
+ const out = scrubContent({
95
+ text: "deploy at 14:02 caused 500s on /api/checkout",
96
+ count: 42,
97
+ ok: true,
98
+ });
99
+ expect(out.text).toBe("deploy at 14:02 caused 500s on /api/checkout");
100
+ expect(out.count).toBe(42);
101
+ expect(out.ok).toBe(true);
102
+ });
103
+
104
+ test("a token-shaped slug embedded in a URL path is PRESERVED (no over-redaction)", () => {
105
+ // The `sk-...` shape appears inside a longer URL path segment, NOT as a
106
+ // standalone credential. It must survive — the value pattern only fires on a
107
+ // standalone/boundary-delimited token.
108
+ const url = "https://host/api/sk-checkout-flow-12345678/status";
109
+ const out = scrubContent({ link: url, text: `see ${url} for details` });
110
+ expect(out.link).toBe(url);
111
+ expect(out.text).toBe(`see ${url} for details`);
112
+ });
113
+
114
+ test("a standalone sk-... value is STILL redacted (even after tightening the boundary)", () => {
115
+ // Whole-value, whitespace-delimited, and quote-delimited standalone tokens.
116
+ expect(scrubContent({ v: SECRET }).v).toBe(REDACTED);
117
+ expect(scrubContent({ v: `key is ${SECRET} ok` }).v).toBe(REDACTED);
118
+ expect(scrubContent({ v: `"${SECRET}"` }).v).toBe(REDACTED);
119
+ });
120
+ });
121
+
122
+ describe("scrubModelMessages: replay history is scrubbed too", () => {
123
+ test("a tool-result part carrying a credential is redacted before persist", () => {
124
+ // The canonical AI-SDK ResponseMessage[] shape (a tool message with a
125
+ // tool-result part). A credential smuggled into the output must not persist.
126
+ const messages = [
127
+ {
128
+ role: "assistant",
129
+ content: [
130
+ { type: "text", text: "calling tool" },
131
+ {
132
+ type: "tool-call",
133
+ toolCallId: "tc1",
134
+ toolName: "incident.list",
135
+ input: { status: "open" },
136
+ },
137
+ ],
138
+ },
139
+ {
140
+ role: "tool",
141
+ content: [
142
+ {
143
+ type: "tool-result",
144
+ toolCallId: "tc1",
145
+ toolName: "incident.list",
146
+ output: {
147
+ type: "json",
148
+ value: { rows: [{ id: 1 }], apiKey: SECRET, leaked: SECRET },
149
+ },
150
+ },
151
+ ],
152
+ },
153
+ ];
154
+ const scrubbed = scrubModelMessages(messages);
155
+ assertNoSecret(scrubbed, "scrubbed replay messages");
156
+ // The non-secret structure survives so replay still works.
157
+ const toolMsg = scrubbed[1];
158
+ const part = (toolMsg.content as Array<Record<string, unknown>>)[0];
159
+ const value = (part.output as Record<string, unknown>).value as Record<
160
+ string,
161
+ unknown
162
+ >;
163
+ expect(value.apiKey).toBe(REDACTED);
164
+ expect(value.leaked).toBe(REDACTED);
165
+ expect(value.rows).toEqual([{ id: 1 }]);
166
+ });
167
+ });
168
+
169
+ describe("scrubContent: the guard has teeth (idempotent + cycle-safe)", () => {
170
+ test("re-scrubbing already-redacted content is a no-op", () => {
171
+ const once = scrubContent({ apiKey: SECRET, text: "hi" });
172
+ const twice = scrubContent(once);
173
+ expect(twice).toEqual(once);
174
+ });
175
+
176
+ test("a cyclic object does not hang and is redacted at the cycle", () => {
177
+ const cyclic: Record<string, unknown> = { text: "hi" };
178
+ cyclic.self = cyclic;
179
+ const out = scrubContent(cyclic);
180
+ expect(out.text).toBe("hi");
181
+ expect(out.self).toBe(REDACTED);
182
+ });
183
+ });
@@ -0,0 +1,154 @@
1
+ /**
2
+ * NO-SECRET-LEAK content scrubber (Phase 6) — makes the previously-architectural
3
+ * guarantee an ENFORCED, tested invariant on the message write path.
4
+ *
5
+ * `ai_messages.content` (and the replay `modelMessages`) is a free-form JSON bag.
6
+ * Architecturally, a provider credential should never reach it — the model only
7
+ * ever sees tool RESULTS (which source procedures redact) and never the
8
+ * integration apiKey. But "should never" is not "cannot": nothing structurally
9
+ * stopped a buggy tool result, a future feature, or a malicious tool from
10
+ * persisting a secret into the bag. This scrubber closes that gap by running on
11
+ * EVERY message write (`appendMessage`), so a credential can never be persisted
12
+ * into message content even if upstream code is wrong.
13
+ *
14
+ * The scrub is deliberately conservative — it redacts:
15
+ * - any value under a SECRET-SHAPED KEY (apiKey, api_key, authorization, token,
16
+ * password, secret, x-secret, bearer, ...), recursively; and
17
+ * - any string VALUE that matches a high-confidence credential pattern
18
+ * (OpenAI-style `sk-...`, `Bearer <token>` headers), regardless of its key.
19
+ *
20
+ * It does NOT blanket-scrub arbitrary strings: a user's chat text or an
21
+ * incident title that merely contains the word "token" is preserved. The
22
+ * guarantee is "a credential cannot be persisted", not "no string is ever
23
+ * touched".
24
+ */
25
+
26
+ /** The sentinel a redacted value is replaced with. */
27
+ export const REDACTED = "[REDACTED]";
28
+
29
+ /**
30
+ * Key names whose VALUE is treated as a credential and redacted wholesale
31
+ * (case-insensitive, matched as a substring of the normalized key). These mirror
32
+ * the `x-secret` field names the integration platform stores in the Vault.
33
+ */
34
+ const SECRET_KEY_PATTERNS = [
35
+ "apikey",
36
+ "api-key",
37
+ "api_key",
38
+ "secret",
39
+ "password",
40
+ "passwd",
41
+ "authorization",
42
+ "auth-token",
43
+ "authtoken",
44
+ "access-token",
45
+ "accesstoken",
46
+ "access_token",
47
+ "refresh-token",
48
+ "refreshtoken",
49
+ "refresh_token",
50
+ "bearer",
51
+ "x-api-key",
52
+ "private-key",
53
+ "privatekey",
54
+ "private_key",
55
+ "client-secret",
56
+ "clientsecret",
57
+ "client_secret",
58
+ "credential",
59
+ ];
60
+
61
+ /**
62
+ * A standalone-token delimiter: whitespace, quotes, comma/semicolon, or string
63
+ * start/end. Deliberately EXCLUDES `/` and other path characters so a
64
+ * token-shaped slug embedded in a URL path (e.g.
65
+ * `https://host/api/sk-checkout-flow-12345678`) is NOT treated as a credential —
66
+ * the key-based redaction (apiKey / authorization / x-secret / ...) remains the
67
+ * primary defense for structured fields, so the value pattern can safely err
68
+ * toward fewer false positives in free text.
69
+ */
70
+ const TOKEN_BOUNDARY = String.raw`(?:^|[\s"'\`,;])`;
71
+ const TOKEN_BOUNDARY_END = String.raw`(?:$|[\s"'\`,;])`;
72
+
73
+ /**
74
+ * Value patterns redacted regardless of their key (high-confidence credential
75
+ * shapes only, to avoid scrubbing innocent prose):
76
+ * - OpenAI-style keys: `sk-...`, `sk-proj-...`, `rk-...` (>= 16 trailing chars)
77
+ * ONLY when they are a standalone, boundary-delimited token (or the whole
78
+ * value) — never when embedded inside a longer URL/path.
79
+ * - An `Authorization: Bearer <token>` header value (the `Bearer ` prefix is
80
+ * itself a strong signal, so no extra boundary constraint is needed).
81
+ */
82
+ const SECRET_VALUE_PATTERNS: RegExp[] = [
83
+ new RegExp(
84
+ `${TOKEN_BOUNDARY}(?:sk|rk)-(?:proj-)?[A-Za-z0-9_-]{16,}${TOKEN_BOUNDARY_END}`,
85
+ ),
86
+ /\bBearer\s+[A-Za-z0-9._~+/-]{12,}=*\b/i,
87
+ ];
88
+
89
+ /** True if a key name looks like it holds a credential. */
90
+ function isSecretKey(key: string): boolean {
91
+ const normalized = key.toLowerCase().replaceAll(/\s+/g, "");
92
+ return SECRET_KEY_PATTERNS.some((p) => normalized.includes(p));
93
+ }
94
+
95
+ /** True if a string value matches a high-confidence credential pattern. */
96
+ function valueLooksSecret(value: string): boolean {
97
+ return SECRET_VALUE_PATTERNS.some((re) => re.test(value));
98
+ }
99
+
100
+ /**
101
+ * Recursively scrub a JSON-serializable value. Cycles are guarded with a seen
102
+ * set (defensive — message content is already JSON, but a hand-built object
103
+ * could contain a cycle). The input is never mutated; a fresh structure is
104
+ * returned.
105
+ */
106
+ function scrubValue(value: unknown, seen: WeakSet<object>): unknown {
107
+ if (typeof value === "string") {
108
+ return valueLooksSecret(value) ? REDACTED : value;
109
+ }
110
+ if (value === null || typeof value !== "object") {
111
+ return value;
112
+ }
113
+ if (seen.has(value)) return REDACTED;
114
+ seen.add(value);
115
+
116
+ if (Array.isArray(value)) {
117
+ return value.map((item) => scrubValue(item, seen));
118
+ }
119
+
120
+ const out: Record<string, unknown> = {};
121
+ for (const [key, child] of Object.entries(value as Record<string, unknown>)) {
122
+ if (isSecretKey(key)) {
123
+ // A secret-shaped key: redact the whole subtree, never persist its value.
124
+ out[key] = REDACTED;
125
+ continue;
126
+ }
127
+ out[key] = scrubValue(child, seen);
128
+ }
129
+ return out;
130
+ }
131
+
132
+ /**
133
+ * Scrub a message-content bag before it is persisted. Returns a new record with
134
+ * every credential-shaped key/value redacted. Safe to call on already-clean
135
+ * content (idempotent — re-running over `[REDACTED]` is a no-op).
136
+ */
137
+ export function scrubContent(
138
+ content: Record<string, unknown>,
139
+ ): Record<string, unknown> {
140
+ return scrubValue(content, new WeakSet()) as Record<string, unknown>;
141
+ }
142
+
143
+ /**
144
+ * Scrub the replay `modelMessages` array (AI-SDK `ResponseMessage[]`). Each
145
+ * element is a JSON object; the same recursive scrub applies so a tool-result
146
+ * part can never carry a credential into the replay history.
147
+ */
148
+ export function scrubModelMessages(
149
+ messages: Array<Record<string, unknown>>,
150
+ ): Array<Record<string, unknown>> {
151
+ return messages.map(
152
+ (m) => scrubValue(m, new WeakSet()) as Record<string, unknown>,
153
+ );
154
+ }
@@ -0,0 +1,168 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { z } from "zod";
3
+ import type { AuthUser } from "@checkstack/backend-api";
4
+ import type { AiPermissionMode } from "@checkstack/ai-common";
5
+ import type { RegisteredAiTool } from "../tool-registry";
6
+ import {
7
+ buildAgentSdkTools,
8
+ type AutoAppliedResult,
9
+ type ConfirmCardResult,
10
+ } from "./sdk-tools";
11
+
12
+ function tool(
13
+ name: string,
14
+ effect: RegisteredAiTool["effect"],
15
+ ): RegisteredAiTool {
16
+ return {
17
+ name,
18
+ description: name,
19
+ effect,
20
+ input: z.object({ value: z.string() }),
21
+ requiredAccessRules: [],
22
+ ...(effect === "read"
23
+ ? {}
24
+ : { dryRun: async () => ({ summary: "s", payload: {} }) }),
25
+ execute: () => Promise.resolve({ ok: true }),
26
+ };
27
+ }
28
+
29
+ const principal: AuthUser = { type: "user", id: "u1", accessRules: ["*"] };
30
+
31
+ function callbacks() {
32
+ const calls: string[] = [];
33
+ return {
34
+ calls,
35
+ enforceBudget: async () => {
36
+ calls.push("budget");
37
+ },
38
+ runRead: async () => {
39
+ calls.push("runRead");
40
+ return { rows: [] };
41
+ },
42
+ propose: async ({ tool: t }: { tool: RegisteredAiTool }) => {
43
+ calls.push("propose");
44
+ return {
45
+ __confirm: true,
46
+ toolName: t.name,
47
+ effect: t.effect as "mutate" | "destructive",
48
+ summary: "would do it",
49
+ token: "propose:abc.def",
50
+ payload: { value: "x" },
51
+ expiresAt: new Date().toISOString(),
52
+ note: "awaiting approval",
53
+ } satisfies ConfirmCardResult;
54
+ },
55
+ autoApply: async ({ tool: t }: { tool: RegisteredAiTool }) => {
56
+ calls.push("autoApply");
57
+ return {
58
+ __applied: true,
59
+ toolName: t.name,
60
+ effect: "mutate",
61
+ summary: "did it",
62
+ toolCallId: "tc-1",
63
+ result: { created: true },
64
+ note: "applied",
65
+ } satisfies AutoAppliedResult;
66
+ },
67
+ };
68
+ }
69
+
70
+ function build({
71
+ effect,
72
+ mode,
73
+ cb,
74
+ }: {
75
+ effect: RegisteredAiTool["effect"];
76
+ mode: AiPermissionMode;
77
+ cb: ReturnType<typeof callbacks>;
78
+ }) {
79
+ const name = `t.${effect}`;
80
+ const sdk = buildAgentSdkTools({
81
+ tools: [tool(name, effect)],
82
+ principal,
83
+ mode,
84
+ callbacks: cb,
85
+ });
86
+ return sdk[name]?.execute;
87
+ }
88
+
89
+ describe("buildAgentSdkTools — 3-tier gating", () => {
90
+ test("read tool ALWAYS auto-runs (approve mode)", async () => {
91
+ const cb = callbacks();
92
+ const execute = build({ effect: "read", mode: "approve", cb });
93
+ const result = await execute?.(
94
+ { value: "x" },
95
+ { toolCallId: "t1", messages: [] },
96
+ );
97
+ expect(result).toEqual({ rows: [] });
98
+ expect(cb.calls).toEqual(["budget", "runRead"]);
99
+ });
100
+
101
+ test("read tool ALWAYS auto-runs (auto mode) — mode never gates reads", async () => {
102
+ const cb = callbacks();
103
+ const execute = build({ effect: "read", mode: "auto", cb });
104
+ await execute?.({ value: "x" }, { toolCallId: "t1", messages: [] });
105
+ expect(cb.calls).toEqual(["budget", "runRead"]);
106
+ });
107
+
108
+ test("mutate tool in APPROVE mode -> propose (confirm card, never commits)", async () => {
109
+ const cb = callbacks();
110
+ const execute = build({ effect: "mutate", mode: "approve", cb });
111
+ const result = (await execute?.(
112
+ { value: "x" },
113
+ { toolCallId: "t1", messages: [] },
114
+ )) as ConfirmCardResult;
115
+ expect(result.__confirm).toBe(true);
116
+ expect(result.token).toBe("propose:abc.def");
117
+ expect(cb.calls).toEqual(["budget", "propose"]);
118
+ });
119
+
120
+ test("mutate tool in AUTO mode -> auto-applies server-side (no confirm card)", async () => {
121
+ const cb = callbacks();
122
+ const execute = build({ effect: "mutate", mode: "auto", cb });
123
+ const result = (await execute?.(
124
+ { value: "x" },
125
+ { toolCallId: "t1", messages: [] },
126
+ )) as AutoAppliedResult;
127
+ expect(result.__applied).toBe(true);
128
+ expect(result.toolCallId).toBe("tc-1");
129
+ // It applied; it did NOT return a confirm card.
130
+ expect(cb.calls).toEqual(["budget", "autoApply"]);
131
+ });
132
+
133
+ test("destructive tool in APPROVE mode -> propose (confirm card)", async () => {
134
+ const cb = callbacks();
135
+ const execute = build({ effect: "destructive", mode: "approve", cb });
136
+ const result = (await execute?.(
137
+ { value: "x" },
138
+ { toolCallId: "t1", messages: [] },
139
+ )) as ConfirmCardResult;
140
+ expect(result.effect).toBe("destructive");
141
+ expect(result.__confirm).toBe(true);
142
+ expect(cb.calls).toEqual(["budget", "propose"]);
143
+ });
144
+
145
+ test("SECURITY INVARIANT: destructive tool in AUTO mode STILL proposes (never auto-applies)", async () => {
146
+ const cb = callbacks();
147
+ const execute = build({ effect: "destructive", mode: "auto", cb });
148
+ const result = (await execute?.(
149
+ { value: "x" },
150
+ { toolCallId: "t1", messages: [] },
151
+ )) as ConfirmCardResult;
152
+ // AUTO mode does NOT change a destructive tool's disposition: still a card.
153
+ expect(result.__confirm).toBe(true);
154
+ expect(result.effect).toBe("destructive");
155
+ // autoApply was NEVER called for the destructive tool.
156
+ expect(cb.calls).toEqual(["budget", "propose"]);
157
+ });
158
+
159
+ test("the model is offered exactly the tools passed in (resolver-allowed only)", () => {
160
+ const sdk = buildAgentSdkTools({
161
+ tools: [tool("incident.list", "read")],
162
+ principal,
163
+ mode: "approve",
164
+ callbacks: callbacks(),
165
+ });
166
+ expect(Object.keys(sdk)).toEqual(["incident.list"]);
167
+ });
168
+ });
@@ -0,0 +1,181 @@
1
+ import { tool as aiTool, type Tool } from "ai";
2
+ import type { AuthUser } from "@checkstack/backend-api";
3
+ import type { AiPermissionMode, AiFieldDiff } from "@checkstack/ai-common";
4
+ import type { RegisteredAiTool } from "../tool-registry";
5
+ import { decideToolDisposition } from "./permission-mode.logic";
6
+
7
+ /**
8
+ * Result a mutate/destructive tool's `execute` returns to the model in APPROVE
9
+ * mode (and for ALL destructive tools): it does NOT commit. It runs the propose
10
+ * dry-run and returns a CONFIRM CARD the human must approve via `applyTool`. The
11
+ * model can never silently mutate.
12
+ */
13
+ export interface ConfirmCardResult {
14
+ __confirm: true;
15
+ toolName: string;
16
+ effect: "mutate" | "destructive";
17
+ summary: string;
18
+ /** Opaque single-use proposal token consumed by `applyTool`. */
19
+ token: string;
20
+ /** Validated, ready-to-apply payload rendered on the card. */
21
+ payload: unknown;
22
+ /** Optional before -> after diff for an update, rendered on the card. */
23
+ diff?: AiFieldDiff[];
24
+ expiresAt: string;
25
+ /**
26
+ * MODEL-FACING guidance (ignored by the UI): tells the agent the proposal was
27
+ * created and shown, so it STOPS instead of re-proposing the same change. The
28
+ * dispatcher saw the model fire the same propose three times in a row.
29
+ */
30
+ note: string;
31
+ }
32
+
33
+ /**
34
+ * Returned to the model when it proposes/auto-applies the SAME tool with the
35
+ * SAME arguments again within ONE turn. Carries no `__confirm`/`__applied`, so
36
+ * the UI renders NO extra card; the model just gets a clear "already handled,
37
+ * stop" signal. Guards against the model spamming duplicate proposals/tokens
38
+ * because it thought the first call did not go through.
39
+ */
40
+ export interface DuplicateToolCallResult {
41
+ __duplicate: true;
42
+ toolName: string;
43
+ note: string;
44
+ }
45
+
46
+ /**
47
+ * Result a `mutate` tool's `execute` returns to the model in AUTO mode: the
48
+ * proposal was applied SERVER-SIDE immediately (no human click), under the SAME
49
+ * `isAllowed` re-check + audit row the human `applyTool` path uses. Surfaced to
50
+ * the model so it knows the change took effect. ONLY `mutate` tools reach this
51
+ * (destructive tools always return a `ConfirmCardResult`).
52
+ */
53
+ export interface AutoAppliedResult {
54
+ __applied: true;
55
+ toolName: string;
56
+ effect: "mutate";
57
+ summary: string;
58
+ /** The audit row id the apply produced. */
59
+ toolCallId: string;
60
+ /** The tool's `execute` result (e.g. the created automation). */
61
+ result: unknown;
62
+ /** Optional before -> after diff for an update, shown on the applied card. */
63
+ diff?: AiFieldDiff[];
64
+ /** MODEL-FACING guidance (ignored by the UI); see {@link ConfirmCardResult.note}. */
65
+ note: string;
66
+ }
67
+
68
+ /** Callbacks the SDK tool executors delegate to (kept injectable for testing). */
69
+ export interface AgentToolCallbacks {
70
+ /** Enforce the per-principal tool budget; throws when over budget. */
71
+ enforceBudget(principal: AuthUser): Promise<void>;
72
+ /** Run a read tool (re-checks authz, records audit). Returns the result. */
73
+ runRead(args: {
74
+ principal: AuthUser;
75
+ tool: RegisteredAiTool;
76
+ input: unknown;
77
+ }): Promise<unknown>;
78
+ /**
79
+ * Propose a mutate/destructive tool; returns a confirm card (no commit), or a
80
+ * {@link DuplicateToolCallResult} if the SAME tool+args was already proposed
81
+ * this turn (so the model cannot spam duplicate cards/tokens).
82
+ */
83
+ propose(args: {
84
+ principal: AuthUser;
85
+ tool: RegisteredAiTool;
86
+ input: unknown;
87
+ }): Promise<ConfirmCardResult | DuplicateToolCallResult>;
88
+ /**
89
+ * AUTO-mode-only: propose AND apply a `mutate` tool SERVER-SIDE in one shot.
90
+ * Runs through the SAME propose/apply service (same `isAllowed` re-check, same
91
+ * `ai_tool_calls` audit rows) the human `applyTool` path uses - never a weaker
92
+ * path. Reached ONLY for `mutate` tools; destructive tools never call this.
93
+ */
94
+ autoApply(args: {
95
+ principal: AuthUser;
96
+ tool: RegisteredAiTool;
97
+ input: unknown;
98
+ }): Promise<AutoAppliedResult | DuplicateToolCallResult>;
99
+ }
100
+
101
+ /**
102
+ * Convert resolver-allowed Checkstack tools into Vercel-AI-SDK `tool()` defs for
103
+ * the agent loop. The disposition is baked into each tool's `execute` by the
104
+ * pure `decideToolDisposition` 3-tier model (Phase 4):
105
+ *
106
+ * - `read` tools ALWAYS auto-run via `runRead`, in BOTH modes (handler authz
107
+ * re-checks on execute). The mode never gates reads.
108
+ * - `mutate` tools INHERIT the conversation's permission mode: in AUTO they
109
+ * auto-apply SERVER-SIDE via `autoApply` (no human click); in APPROVE they
110
+ * `propose` and return a CONFIRM CARD the human approves via `applyTool`.
111
+ * - `destructive` tools ALWAYS `propose` and return a CONFIRM CARD, in BOTH
112
+ * modes - the mode is NEVER consulted, so a destructive tool can never
113
+ * auto-apply (the security invariant).
114
+ *
115
+ * Only tools the resolver already allowed for the principal are passed in, so
116
+ * the model is never even offered a forbidden tool; the budget + per-call authz
117
+ * re-check inside the executors (and inside propose/apply) are the server-side
118
+ * authority regardless.
119
+ */
120
+ export function buildAgentSdkTools({
121
+ tools,
122
+ principal,
123
+ mode,
124
+ callbacks,
125
+ }: {
126
+ tools: RegisteredAiTool[];
127
+ principal: AuthUser;
128
+ /** The conversation's permission mode. Governs the `mutate` branch only. */
129
+ mode: AiPermissionMode;
130
+ callbacks: AgentToolCallbacks;
131
+ }): Record<string, Tool> {
132
+ const sdkTools: Record<string, Tool> = {};
133
+
134
+ for (const t of tools) {
135
+ const disposition = decideToolDisposition({ effect: t.effect, mode });
136
+
137
+ if (disposition === "auto-run") {
138
+ sdkTools[t.name] = aiTool({
139
+ description: t.description,
140
+ inputSchema: t.input,
141
+ execute: async (input: unknown) => {
142
+ await callbacks.enforceBudget(principal);
143
+ return callbacks.runRead({ principal, tool: t, input });
144
+ },
145
+ });
146
+ continue;
147
+ }
148
+
149
+ if (disposition === "auto-apply") {
150
+ // AUTO mode + mutate: apply immediately server-side. Same propose/apply
151
+ // service (same authz re-check + audit) as a human apply - never weaker.
152
+ sdkTools[t.name] = aiTool({
153
+ description: `${t.description} (auto-applied immediately in this conversation's auto mode)`,
154
+ inputSchema: t.input,
155
+ execute: async (
156
+ input: unknown,
157
+ ): Promise<AutoAppliedResult | DuplicateToolCallResult> => {
158
+ await callbacks.enforceBudget(principal);
159
+ return callbacks.autoApply({ principal, tool: t, input });
160
+ },
161
+ });
162
+ continue;
163
+ }
164
+
165
+ // disposition === "propose": mutate-in-APPROVE or ANY destructive tool. The
166
+ // returned confirm card is what the chat UI renders; nothing is committed
167
+ // until the human applies.
168
+ sdkTools[t.name] = aiTool({
169
+ description: `${t.description} (requires human confirmation before it takes effect)`,
170
+ inputSchema: t.input,
171
+ execute: async (
172
+ input: unknown,
173
+ ): Promise<ConfirmCardResult | DuplicateToolCallResult> => {
174
+ await callbacks.enforceBudget(principal);
175
+ return callbacks.propose({ principal, tool: t, input });
176
+ },
177
+ });
178
+ }
179
+
180
+ return sdkTools;
181
+ }