@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/drizzle/0000_productive_jackpot.sql +26 -0
  3. package/drizzle/0001_puzzling_purple_man.sql +26 -0
  4. package/drizzle/0002_sparkling_paper_doll.sql +15 -0
  5. package/drizzle/0003_married_senator_kelly.sql +1 -0
  6. package/drizzle/0004_crazy_miek.sql +2 -0
  7. package/drizzle/0005_tearful_randall_flagg.sql +1 -0
  8. package/drizzle/meta/0000_snapshot.json +232 -0
  9. package/drizzle/meta/0001_snapshot.json +434 -0
  10. package/drizzle/meta/0002_snapshot.json +551 -0
  11. package/drizzle/meta/0003_snapshot.json +557 -0
  12. package/drizzle/meta/0004_snapshot.json +573 -0
  13. package/drizzle/meta/0005_snapshot.json +574 -0
  14. package/drizzle/meta/_journal.json +48 -0
  15. package/drizzle.config.ts +7 -0
  16. package/package.json +42 -0
  17. package/src/agent-runner.test.ts +262 -0
  18. package/src/agent-runner.ts +262 -0
  19. package/src/chat/agent-loop.test.ts +119 -0
  20. package/src/chat/agent-loop.ts +73 -0
  21. package/src/chat/auto-apply.test.ts +237 -0
  22. package/src/chat/chat-handler.ts +111 -0
  23. package/src/chat/chat-service.streamturn.test.ts +417 -0
  24. package/src/chat/chat-service.test.ts +250 -0
  25. package/src/chat/chat-service.ts +923 -0
  26. package/src/chat/classifier-service.ts +64 -0
  27. package/src/chat/classifier.logic.test.ts +92 -0
  28. package/src/chat/classifier.logic.ts +71 -0
  29. package/src/chat/conversation-store.it.test.ts +203 -0
  30. package/src/chat/conversation-store.test.ts +248 -0
  31. package/src/chat/conversation-store.ts +237 -0
  32. package/src/chat/decision.logic.test.ts +45 -0
  33. package/src/chat/decision.logic.ts +54 -0
  34. package/src/chat/llm-provider.test.ts +63 -0
  35. package/src/chat/llm-provider.ts +67 -0
  36. package/src/chat/model-error.logic.test.ts +60 -0
  37. package/src/chat/model-error.logic.ts +65 -0
  38. package/src/chat/normalize-messages.logic.test.ts +101 -0
  39. package/src/chat/normalize-messages.logic.ts +65 -0
  40. package/src/chat/permission-mode.logic.test.ts +70 -0
  41. package/src/chat/permission-mode.logic.ts +45 -0
  42. package/src/chat/read-invoker.ts +72 -0
  43. package/src/chat/replay.test.ts +174 -0
  44. package/src/chat/scrub-content.test.ts +183 -0
  45. package/src/chat/scrub-content.ts +154 -0
  46. package/src/chat/sdk-tools.test.ts +168 -0
  47. package/src/chat/sdk-tools.ts +181 -0
  48. package/src/chat/title-service.test.ts +146 -0
  49. package/src/chat/title-service.ts +111 -0
  50. package/src/chat/title.logic.test.ts +98 -0
  51. package/src/chat/title.logic.ts +102 -0
  52. package/src/extension-points.ts +41 -0
  53. package/src/generated/docs-index.ts +3020 -0
  54. package/src/hardening/handler-authz.test.ts +282 -0
  55. package/src/hardening/no-secret-leak.test.ts +303 -0
  56. package/src/hooks.ts +33 -0
  57. package/src/index.ts +542 -0
  58. package/src/mcp/connection-registry.test.ts +25 -0
  59. package/src/mcp/connection-registry.ts +54 -0
  60. package/src/mcp/mcp-conformance.it.test.ts +128 -0
  61. package/src/mcp/server.test.ts +285 -0
  62. package/src/mcp/server.ts +300 -0
  63. package/src/mcp/tool-invoker.ts +65 -0
  64. package/src/openai-provider.test.ts +64 -0
  65. package/src/openai-provider.ts +146 -0
  66. package/src/projection.test.ts +97 -0
  67. package/src/projection.ts +132 -0
  68. package/src/propose-apply/args-hash.test.ts +26 -0
  69. package/src/propose-apply/args-hash.ts +30 -0
  70. package/src/propose-apply/service.test.ts +423 -0
  71. package/src/propose-apply/service.ts +419 -0
  72. package/src/propose-apply/store.test.ts +136 -0
  73. package/src/propose-apply/store.ts +224 -0
  74. package/src/propose-apply/token.test.ts +52 -0
  75. package/src/propose-apply/token.ts +71 -0
  76. package/src/rate-limit/spend-ledger.it.test.ts +224 -0
  77. package/src/rate-limit/spend-ledger.test.ts +176 -0
  78. package/src/rate-limit/spend-ledger.ts +162 -0
  79. package/src/rate-limit/tool-budget.it.test.ts +173 -0
  80. package/src/rate-limit/tool-budget.test.ts +58 -0
  81. package/src/rate-limit/tool-budget.ts +107 -0
  82. package/src/registry-wiring.test.ts +131 -0
  83. package/src/registry-wiring.ts +68 -0
  84. package/src/resolver.test.ts +156 -0
  85. package/src/resolver.ts +78 -0
  86. package/src/router.test.ts +78 -0
  87. package/src/router.ts +345 -0
  88. package/src/schema.ts +284 -0
  89. package/src/serializer.test.ts +88 -0
  90. package/src/serializer.ts +42 -0
  91. package/src/tool-registry.ts +58 -0
  92. package/src/tools/composite-tools.ts +24 -0
  93. package/src/tools/docs-tools.test.ts +150 -0
  94. package/src/tools/docs-tools.ts +115 -0
  95. package/src/tools/probe-url.test.ts +51 -0
  96. package/src/tools/probe-url.ts +146 -0
  97. package/src/tools/rank-docs.test.ts +153 -0
  98. package/src/tools/rank-docs.ts +209 -0
  99. package/src/tools/script-context-extract.test.ts +93 -0
  100. package/src/tools/script-context-extract.ts +283 -0
  101. package/src/tools/ssrf-guard.test.ts +69 -0
  102. package/src/tools/ssrf-guard.ts +108 -0
  103. package/src/tools/tool-set.e2e.test.ts +64 -0
  104. package/src/user-rpc-client.test.ts +45 -0
  105. package/src/user-rpc-client.ts +60 -0
  106. package/tsconfig.json +26 -0
@@ -0,0 +1,73 @@
1
+ import type { AuthUser } from "@checkstack/backend-api";
2
+ import type { AiToolResolver } from "../resolver";
3
+ import type { RegisteredAiTool } from "../tool-registry";
4
+
5
+ /**
6
+ * Server-side agent-loop CORE (plan §4 / Phase 4) — provider-agnostic and
7
+ * DOM-free, so the security-critical tool-gating logic is unit-testable without
8
+ * a model, a browser, or the Vercel AI SDK.
9
+ *
10
+ * The loop treats the model as an UNTRUSTED caller (decision §1.5): it may only
11
+ * invoke tools the resolver allows for the logged-in principal, and it may never
12
+ * silently mutate. The decision of WHAT a model-requested tool call does is made
13
+ * here, not by the SDK:
14
+ *
15
+ * - `read` tools AUTO-RUN (handler-side authz still re-checks on execute).
16
+ * - `mutate` / `destructive` tools NEVER execute inline; they go through the
17
+ * propose/apply gate and surface a CONFIRM CARD the human must approve.
18
+ * - a tool the principal cannot see is REFUSED server-side, even if the model
19
+ * asks for it (the resolver never offered it, but the model is untrusted).
20
+ */
21
+
22
+ /** What the agent loop should do with a model-requested tool call. */
23
+ export type AgentToolDisposition =
24
+ | { kind: "run"; tool: RegisteredAiTool } // read tool: auto-run
25
+ | { kind: "confirm"; tool: RegisteredAiTool } // mutate/destructive: propose + confirm card
26
+ | { kind: "refused"; reason: string }; // unknown / not allowed
27
+
28
+ /**
29
+ * The single server-side gate for a model-requested tool. Mirrors the MCP
30
+ * `tools/call` gate so both transports treat the model identically.
31
+ */
32
+ export function disposeAgentTool({
33
+ toolName,
34
+ principal,
35
+ resolver,
36
+ getTool,
37
+ }: {
38
+ toolName: string;
39
+ principal: AuthUser;
40
+ resolver: AiToolResolver;
41
+ getTool: (name: string) => RegisteredAiTool | undefined;
42
+ }): AgentToolDisposition {
43
+ const tool = getTool(toolName);
44
+ if (!tool) {
45
+ return { kind: "refused", reason: `Unknown tool: ${toolName}` };
46
+ }
47
+ // The model is untrusted: re-check authorization server-side even though the
48
+ // resolver only ever OFFERED allowed tools.
49
+ if (!resolver.isAllowed({ principal, tool })) {
50
+ return { kind: "refused", reason: `Forbidden: ${toolName}` };
51
+ }
52
+ if (tool.effect === "read") {
53
+ return { kind: "run", tool };
54
+ }
55
+ // mutate / destructive: never inline; require human confirmation.
56
+ return { kind: "confirm", tool };
57
+ }
58
+
59
+ /**
60
+ * The set of tools the loop OFFERS the model for a principal. Identical to the
61
+ * resolver output — the loop never widens it. Mutating/destructive tools are
62
+ * offered too (so the model can REQUEST them), but their disposition is
63
+ * `confirm`, never auto-run.
64
+ */
65
+ export function offeredTools({
66
+ principal,
67
+ resolver,
68
+ }: {
69
+ principal: AuthUser;
70
+ resolver: AiToolResolver;
71
+ }): RegisteredAiTool[] {
72
+ return resolver.resolveTools(principal);
73
+ }
@@ -0,0 +1,237 @@
1
+ import { describe, expect, test, mock } from "bun:test";
2
+ import { z } from "zod";
3
+ import type { AuthUser } from "@checkstack/backend-api";
4
+ import { createAiToolRegistry } from "../tool-registry";
5
+ import type { RegisteredAiTool } from "../tool-registry";
6
+ import { createAiToolResolver } from "../resolver";
7
+ import {
8
+ createProposeApplyService,
9
+ ProposeApplyError,
10
+ } from "../propose-apply/service";
11
+ import { generateProposalNonce } from "../propose-apply/token";
12
+ import type { AiToolCallStore } from "../propose-apply/store";
13
+ import type { AiToolCallRow } from "../schema";
14
+ import { buildChatToolCallbacks, type ChatRecordExecuted } from "./chat-service";
15
+ import type { ChatReadInvoker } from "./read-invoker";
16
+
17
+ /**
18
+ * In-memory `AiToolCallStore` mirroring the atomic single-use consume (same as
19
+ * the propose/apply service test). Used here to prove the AUTO-mode server-side
20
+ * auto-apply path runs through the SAME propose/apply service - same `isAllowed`
21
+ * re-check, same `ai_tool_calls` audit rows - as the human `applyTool` flow.
22
+ */
23
+ function createFakeStore(now: () => Date): AiToolCallStore & {
24
+ rows: Map<string, AiToolCallRow>;
25
+ } {
26
+ const rows = new Map<string, AiToolCallRow>();
27
+ let counter = 0;
28
+ const baseRow = (over: Partial<AiToolCallRow>): AiToolCallRow => ({
29
+ id: `row-${++counter}`,
30
+ principalKind: "user",
31
+ principalId: "u1",
32
+ transport: "chat",
33
+ conversationId: null,
34
+ toolName: "x",
35
+ effect: "mutate",
36
+ argsHash: "h",
37
+ status: "proposed",
38
+ proposalNonce: null,
39
+ proposalExpiresAt: null,
40
+ resultSnapshot: null,
41
+ proposedPayload: null,
42
+ error: null,
43
+ proposedAt: null,
44
+ appliedAt: null,
45
+ appliedByKind: null,
46
+ appliedById: null,
47
+ createdAt: now(),
48
+ ...over,
49
+ });
50
+ return {
51
+ rows,
52
+ async recordExecuted(args) {
53
+ const row = baseRow({
54
+ ...args,
55
+ conversationId: args.conversationId ?? null,
56
+ effect: "read",
57
+ status: "executed",
58
+ resultSnapshot: args.resultSnapshot ?? null,
59
+ });
60
+ rows.set(row.id, row);
61
+ return row;
62
+ },
63
+ async recordFailed(args) {
64
+ const row = baseRow({
65
+ ...args,
66
+ conversationId: args.conversationId ?? null,
67
+ status: "failed",
68
+ });
69
+ rows.set(row.id, row);
70
+ return row;
71
+ },
72
+ async createProposal(args) {
73
+ const nonce = generateProposalNonce();
74
+ const row = baseRow({
75
+ principalKind: args.principal.kind,
76
+ principalId: args.principal.id,
77
+ transport: args.transport,
78
+ conversationId: args.conversationId ?? null,
79
+ toolName: args.toolName,
80
+ effect: args.effect,
81
+ argsHash: args.argsHash,
82
+ status: "proposed",
83
+ proposalNonce: nonce,
84
+ proposalExpiresAt: new Date((args.now ?? now()).getTime() + 600_000),
85
+ proposedPayload: args.proposedPayload,
86
+ resultSnapshot: args.resultSnapshot ?? null,
87
+ proposedAt: args.now ?? now(),
88
+ });
89
+ rows.set(row.id, row);
90
+ return { row, nonce };
91
+ },
92
+ async consumeProposal({ rowId, applier, now: at = now() }) {
93
+ const row = rows.get(rowId);
94
+ if (!row) return undefined;
95
+ if (row.status !== "proposed") return undefined;
96
+ if (row.proposalExpiresAt && row.proposalExpiresAt.getTime() <= at.getTime()) {
97
+ return undefined;
98
+ }
99
+ const updated: AiToolCallRow = {
100
+ ...row,
101
+ status: "applied",
102
+ appliedAt: at,
103
+ appliedByKind: applier.kind,
104
+ appliedById: applier.id,
105
+ };
106
+ rows.set(rowId, updated);
107
+ return updated;
108
+ },
109
+ async getProposal(rowId) {
110
+ return rows.get(rowId);
111
+ },
112
+ async expireStaleProposals() {
113
+ return 0;
114
+ },
115
+ };
116
+ }
117
+
118
+ const ManageInput = z.object({ value: z.string() });
119
+
120
+ function mutatingTool(): {
121
+ tool: RegisteredAiTool<{ value: string }, { created: string }>;
122
+ execute: ReturnType<typeof mock>;
123
+ } {
124
+ // A spy execute so a test can assert the tool body NEVER ran (e.g. when the
125
+ // authz gate refuses before any commit). Refusal happens in `propose` today,
126
+ // before execute — this makes the no-execute guarantee regression-proof if the
127
+ // gate order ever changes.
128
+ const execute = mock(async ({ input }: { input: { value: string } }) => ({
129
+ created: input.value,
130
+ }));
131
+ const tool: RegisteredAiTool<{ value: string }, { created: string }> = {
132
+ name: "demo.mutate",
133
+ description: "demo mutating tool",
134
+ effect: "mutate",
135
+ input: ManageInput,
136
+ requiredAccessRules: ["demo.demo.manage"],
137
+ dryRun: async ({ input }) => ({
138
+ summary: `Would create ${input.value}`,
139
+ payload: { value: input.value },
140
+ }),
141
+ execute,
142
+ };
143
+ return { tool, execute };
144
+ }
145
+
146
+ const allowed: AuthUser = {
147
+ type: "user",
148
+ id: "u1",
149
+ accessRules: ["demo.demo.manage"],
150
+ };
151
+ const notAllowed: AuthUser = {
152
+ type: "user",
153
+ id: "u2",
154
+ accessRules: ["other.read"],
155
+ };
156
+
157
+ function budgetDb(used: number) {
158
+ const where = mock(() => Promise.resolve([{ value: used }]));
159
+ const from = mock(() => ({ where }));
160
+ const select = mock(() => ({ from }));
161
+ return { select } as never;
162
+ }
163
+
164
+ function setup() {
165
+ const registry = createAiToolRegistry();
166
+ const { tool, execute } = mutatingTool();
167
+ registry.register(tool);
168
+ const resolver = createAiToolResolver({ registry });
169
+ const store = createFakeStore(() => new Date());
170
+ const proposeApply = createProposeApplyService({ registry, resolver, store });
171
+ const readInvoker: ChatReadInvoker = {
172
+ invoke: () => Promise.reject(new Error("read invoker should not run")),
173
+ };
174
+ const recordExecuted: ChatRecordExecuted = async () => {};
175
+ const callbacks = buildChatToolCallbacks({
176
+ proposeApply,
177
+ readInvoker,
178
+ recordExecuted,
179
+ readRouting: new Map(),
180
+ db: budgetDb(0),
181
+ conversationId: "conv-1",
182
+ forwardHeaders: {},
183
+ internalUrl: "http://localhost:3000",
184
+ });
185
+ return { tool, execute, store, callbacks };
186
+ }
187
+
188
+ describe("AUTO-mode mutate auto-apply path", () => {
189
+ test("auto-applies server-side through the SAME propose/apply service (audited as applied)", async () => {
190
+ const { tool, execute, store, callbacks } = setup();
191
+
192
+ const result = await callbacks.autoApply({
193
+ principal: allowed,
194
+ tool,
195
+ input: { value: "alpha" },
196
+ });
197
+
198
+ // A first call must auto-apply (not be deduped as a duplicate).
199
+ if (!("__applied" in result)) {
200
+ throw new Error("expected an applied result, got a duplicate");
201
+ }
202
+ // The tool's execute actually ran (server-side apply, no human click).
203
+ expect(result.__applied).toBe(true);
204
+ expect(result.result).toEqual({ created: "alpha" });
205
+ expect(execute).toHaveBeenCalledTimes(1);
206
+
207
+ // The audit trail mirrors a HUMAN apply exactly: a row transitioned
208
+ // proposed -> applied, with the applier stamped. Not a weaker/parallel path.
209
+ const applied = [...store.rows.values()].filter((r) => r.status === "applied");
210
+ expect(applied).toHaveLength(1);
211
+ expect(applied[0]?.toolName).toBe("demo.mutate");
212
+ expect(applied[0]?.effect).toBe("mutate");
213
+ expect(applied[0]?.appliedById).toBe("u1");
214
+ expect(applied[0]?.id).toBe(result.toolCallId);
215
+ });
216
+
217
+ test("re-checks authz: an unauthorized principal is refused (no apply, no execute)", async () => {
218
+ const { tool, execute, store, callbacks } = setup();
219
+
220
+ await expect(
221
+ callbacks.autoApply({
222
+ principal: notAllowed,
223
+ tool,
224
+ input: { value: "beta" },
225
+ }),
226
+ ).rejects.toBeInstanceOf(ProposeApplyError);
227
+
228
+ // Nothing was applied — the authz gate (the SAME `isAllowed` re-check the
229
+ // human path uses) refused before any commit.
230
+ const applied = [...store.rows.values()].filter((r) => r.status === "applied");
231
+ expect(applied).toHaveLength(0);
232
+ // The tool body NEVER ran for the unauthorized principal. Refusal happens in
233
+ // `propose` (before any execute) today; this assertion makes the no-execute
234
+ // guarantee regression-proof even if the gate order ever changes.
235
+ expect(execute).not.toHaveBeenCalled();
236
+ });
237
+ });
@@ -0,0 +1,111 @@
1
+ import { z } from "zod";
2
+ import type { AuthService } from "@checkstack/backend-api";
3
+ import { extractErrorMessage } from "@checkstack/common";
4
+ import type { ChatService } from "./chat-service";
5
+ import { forwardableAuthHeaders } from "./read-invoker";
6
+
7
+ /** Body of a streaming chat turn POST (a new user message). */
8
+ const ChatTurnBodySchema = z.object({
9
+ conversationId: z.string(),
10
+ connectionId: z.string(),
11
+ model: z.string().optional(),
12
+ message: z.string().min(1),
13
+ });
14
+
15
+ /**
16
+ * Body of a post-confirm-card decision POST: the operator applied or declined a
17
+ * proposal and we stream the model's acknowledgment. The actual apply runs
18
+ * separately via `applyTool`; this only carries the proposal token + decision.
19
+ */
20
+ const ChatDecisionBodySchema = z.object({
21
+ conversationId: z.string(),
22
+ connectionId: z.string(),
23
+ model: z.string().optional(),
24
+ decision: z.object({
25
+ token: z.string().min(1),
26
+ kind: z.enum(["apply", "decline"]),
27
+ }),
28
+ });
29
+
30
+ /** A /chat POST is either a new user turn or a confirm-card decision turn. */
31
+ const ChatRequestBodySchema = z.union([
32
+ ChatTurnBodySchema,
33
+ ChatDecisionBodySchema,
34
+ ]);
35
+
36
+ /**
37
+ * Raw HTTP handler for the streaming chat turn, mounted at /api/ai/chat. SSE
38
+ * streaming requires a raw handler (oRPC does not stream), so authentication is
39
+ * done here via the platform auth strategy — the SAME principal resolution as
40
+ * every other request. The resolved principal must be a logged-in RealUser
41
+ * (chat is RealUser-only); the credential never crosses to the browser.
42
+ */
43
+ export function createChatRequestHandler({
44
+ chatService,
45
+ auth,
46
+ }: {
47
+ chatService: ChatService;
48
+ auth: AuthService;
49
+ }): (req: Request) => Promise<Response> {
50
+ return async function handleChatRequest(req: Request): Promise<Response> {
51
+ if (req.method !== "POST") {
52
+ return new Response(null, { status: 405 });
53
+ }
54
+
55
+ const principal = await auth.authenticate(req);
56
+ if (!principal) {
57
+ return Response.json({ error: "Unauthorized" }, { status: 401 });
58
+ }
59
+ if (principal.type !== "user") {
60
+ // Applications/services use MCP, not the in-app chat.
61
+ return Response.json(
62
+ { error: "AI chat is available to logged-in users only." },
63
+ { status: 403 },
64
+ );
65
+ }
66
+
67
+ let body: z.infer<typeof ChatRequestBodySchema>;
68
+ try {
69
+ const parsed = ChatRequestBodySchema.safeParse(await req.json());
70
+ if (!parsed.success) {
71
+ return Response.json(
72
+ { error: `Invalid request: ${parsed.error.message}` },
73
+ { status: 400 },
74
+ );
75
+ }
76
+ body = parsed.data;
77
+ } catch {
78
+ return Response.json({ error: "Invalid JSON body." }, { status: 400 });
79
+ }
80
+
81
+ const forwardHeaders = forwardableAuthHeaders(req);
82
+ try {
83
+ // A decision turn (operator applied/declined a confirm card) vs a normal
84
+ // user message turn. The union narrows on the presence of `decision`.
85
+ if ("decision" in body) {
86
+ return await chatService.streamDecision({
87
+ principal,
88
+ conversationId: body.conversationId,
89
+ connectionId: body.connectionId,
90
+ model: body.model,
91
+ forwardHeaders,
92
+ token: body.decision.token,
93
+ decision: body.decision.kind,
94
+ });
95
+ }
96
+ return await chatService.streamTurn({
97
+ principal,
98
+ conversationId: body.conversationId,
99
+ connectionId: body.connectionId,
100
+ model: body.model,
101
+ forwardHeaders,
102
+ userText: body.message,
103
+ });
104
+ } catch (error) {
105
+ return Response.json(
106
+ { error: extractErrorMessage(error, "Chat turn failed.") },
107
+ { status: 500 },
108
+ );
109
+ }
110
+ };
111
+ }