@checkstack/ai-backend 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/drizzle/0000_productive_jackpot.sql +26 -0
- package/drizzle/0001_puzzling_purple_man.sql +26 -0
- package/drizzle/0002_sparkling_paper_doll.sql +15 -0
- package/drizzle/0003_married_senator_kelly.sql +1 -0
- package/drizzle/0004_crazy_miek.sql +2 -0
- package/drizzle/0005_tearful_randall_flagg.sql +1 -0
- package/drizzle/meta/0000_snapshot.json +232 -0
- package/drizzle/meta/0001_snapshot.json +434 -0
- package/drizzle/meta/0002_snapshot.json +551 -0
- package/drizzle/meta/0003_snapshot.json +557 -0
- package/drizzle/meta/0004_snapshot.json +573 -0
- package/drizzle/meta/0005_snapshot.json +574 -0
- package/drizzle/meta/_journal.json +48 -0
- package/drizzle.config.ts +7 -0
- package/package.json +42 -0
- package/src/agent-runner.test.ts +262 -0
- package/src/agent-runner.ts +262 -0
- package/src/chat/agent-loop.test.ts +119 -0
- package/src/chat/agent-loop.ts +73 -0
- package/src/chat/auto-apply.test.ts +237 -0
- package/src/chat/chat-handler.ts +111 -0
- package/src/chat/chat-service.streamturn.test.ts +417 -0
- package/src/chat/chat-service.test.ts +250 -0
- package/src/chat/chat-service.ts +923 -0
- package/src/chat/classifier-service.ts +64 -0
- package/src/chat/classifier.logic.test.ts +92 -0
- package/src/chat/classifier.logic.ts +71 -0
- package/src/chat/conversation-store.it.test.ts +203 -0
- package/src/chat/conversation-store.test.ts +248 -0
- package/src/chat/conversation-store.ts +237 -0
- package/src/chat/decision.logic.test.ts +45 -0
- package/src/chat/decision.logic.ts +54 -0
- package/src/chat/llm-provider.test.ts +63 -0
- package/src/chat/llm-provider.ts +67 -0
- package/src/chat/model-error.logic.test.ts +60 -0
- package/src/chat/model-error.logic.ts +65 -0
- package/src/chat/normalize-messages.logic.test.ts +101 -0
- package/src/chat/normalize-messages.logic.ts +65 -0
- package/src/chat/permission-mode.logic.test.ts +70 -0
- package/src/chat/permission-mode.logic.ts +45 -0
- package/src/chat/read-invoker.ts +72 -0
- package/src/chat/replay.test.ts +174 -0
- package/src/chat/scrub-content.test.ts +183 -0
- package/src/chat/scrub-content.ts +154 -0
- package/src/chat/sdk-tools.test.ts +168 -0
- package/src/chat/sdk-tools.ts +181 -0
- package/src/chat/title-service.test.ts +146 -0
- package/src/chat/title-service.ts +111 -0
- package/src/chat/title.logic.test.ts +98 -0
- package/src/chat/title.logic.ts +102 -0
- package/src/extension-points.ts +41 -0
- package/src/generated/docs-index.ts +3020 -0
- package/src/hardening/handler-authz.test.ts +282 -0
- package/src/hardening/no-secret-leak.test.ts +303 -0
- package/src/hooks.ts +33 -0
- package/src/index.ts +542 -0
- package/src/mcp/connection-registry.test.ts +25 -0
- package/src/mcp/connection-registry.ts +54 -0
- package/src/mcp/mcp-conformance.it.test.ts +128 -0
- package/src/mcp/server.test.ts +285 -0
- package/src/mcp/server.ts +300 -0
- package/src/mcp/tool-invoker.ts +65 -0
- package/src/openai-provider.test.ts +64 -0
- package/src/openai-provider.ts +146 -0
- package/src/projection.test.ts +97 -0
- package/src/projection.ts +132 -0
- package/src/propose-apply/args-hash.test.ts +26 -0
- package/src/propose-apply/args-hash.ts +30 -0
- package/src/propose-apply/service.test.ts +423 -0
- package/src/propose-apply/service.ts +419 -0
- package/src/propose-apply/store.test.ts +136 -0
- package/src/propose-apply/store.ts +224 -0
- package/src/propose-apply/token.test.ts +52 -0
- package/src/propose-apply/token.ts +71 -0
- package/src/rate-limit/spend-ledger.it.test.ts +224 -0
- package/src/rate-limit/spend-ledger.test.ts +176 -0
- package/src/rate-limit/spend-ledger.ts +162 -0
- package/src/rate-limit/tool-budget.it.test.ts +173 -0
- package/src/rate-limit/tool-budget.test.ts +58 -0
- package/src/rate-limit/tool-budget.ts +107 -0
- package/src/registry-wiring.test.ts +131 -0
- package/src/registry-wiring.ts +68 -0
- package/src/resolver.test.ts +156 -0
- package/src/resolver.ts +78 -0
- package/src/router.test.ts +78 -0
- package/src/router.ts +345 -0
- package/src/schema.ts +284 -0
- package/src/serializer.test.ts +88 -0
- package/src/serializer.ts +42 -0
- package/src/tool-registry.ts +58 -0
- package/src/tools/composite-tools.ts +24 -0
- package/src/tools/docs-tools.test.ts +150 -0
- package/src/tools/docs-tools.ts +115 -0
- package/src/tools/probe-url.test.ts +51 -0
- package/src/tools/probe-url.ts +146 -0
- package/src/tools/rank-docs.test.ts +153 -0
- package/src/tools/rank-docs.ts +209 -0
- package/src/tools/script-context-extract.test.ts +93 -0
- package/src/tools/script-context-extract.ts +283 -0
- package/src/tools/ssrf-guard.test.ts +69 -0
- package/src/tools/ssrf-guard.ts +108 -0
- package/src/tools/tool-set.e2e.test.ts +64 -0
- package/src/user-rpc-client.test.ts +45 -0
- package/src/user-rpc-client.ts +60 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
import { describe, expect, test, mock, beforeEach, afterEach } from "bun:test";
|
|
2
|
+
import { APICallError, type LanguageModelUsage } from "ai";
|
|
3
|
+
import type { AuthUser } from "@checkstack/backend-api";
|
|
4
|
+
import type { OpenAiCompatibleConnection } from "@checkstack/ai-common";
|
|
5
|
+
import type { ClassifierTextGenerator } from "./classifier-service";
|
|
6
|
+
import { OFF_TOPIC_REFUSAL } from "./classifier.logic";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* `streamTurn` integration-ish unit tests for the topical pre-classifier (Fix
|
|
10
|
+
* 3). We mock the `ai` module's `streamText` (so no live model is built) and the
|
|
11
|
+
* provider builder, then inject a fake classifier generator to drive each path:
|
|
12
|
+
*
|
|
13
|
+
* - OFF_TOPIC short-circuits: streamText (the expensive tool loop) NEVER runs,
|
|
14
|
+
* the canned refusal is persisted + streamed.
|
|
15
|
+
* - classifier ERROR fails open: streamText runs (the normal turn proceeds).
|
|
16
|
+
* - ON_TOPIC proceeds: streamText runs normally.
|
|
17
|
+
*
|
|
18
|
+
* These are DOM-free and run under `bun test` from the repo root.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
// Records each streamText call so we can assert whether the tool loop ran and
|
|
22
|
+
// what message history it was handed (post-normalization).
|
|
23
|
+
const streamTextCalls: Array<{ system: string; messages: unknown }> = [];
|
|
24
|
+
// Captures the onError handler the service hands `toUIMessageStreamResponse`, so
|
|
25
|
+
// a test can drive the (otherwise masked) provider-error surfacing path.
|
|
26
|
+
let lastOnError: ((error: unknown) => string) | undefined;
|
|
27
|
+
// Captures the onFinish callback the service hands streamText, so the test can
|
|
28
|
+
// drive the normal-turn persistence/spend path deterministically.
|
|
29
|
+
let lastOnFinish:
|
|
30
|
+
| ((args: {
|
|
31
|
+
text: string;
|
|
32
|
+
steps: Array<{ response: { messages: unknown[] } }>;
|
|
33
|
+
totalUsage: LanguageModelUsage;
|
|
34
|
+
}) => Promise<void> | void)
|
|
35
|
+
| undefined;
|
|
36
|
+
|
|
37
|
+
const realAi = await import("ai");
|
|
38
|
+
|
|
39
|
+
mock.module("ai", () => ({
|
|
40
|
+
...realAi,
|
|
41
|
+
stepCountIs: () => ({}),
|
|
42
|
+
streamText: (args: {
|
|
43
|
+
system: string;
|
|
44
|
+
messages: unknown;
|
|
45
|
+
onFinish?: typeof lastOnFinish;
|
|
46
|
+
}) => {
|
|
47
|
+
streamTextCalls.push({ system: args.system, messages: args.messages });
|
|
48
|
+
lastOnFinish = args.onFinish;
|
|
49
|
+
return {
|
|
50
|
+
toUIMessageStreamResponse: (opts?: {
|
|
51
|
+
onError?: (error: unknown) => string;
|
|
52
|
+
}) => {
|
|
53
|
+
lastOnError = opts?.onError;
|
|
54
|
+
return new Response("normal-turn", { status: 200 });
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
},
|
|
58
|
+
// Real-enough SSE helpers for the short-circuit path (refusal stream).
|
|
59
|
+
createUIMessageStream: ({
|
|
60
|
+
execute,
|
|
61
|
+
}: {
|
|
62
|
+
execute: (o: { writer: { write: (c: unknown) => void } }) => void;
|
|
63
|
+
}) => {
|
|
64
|
+
const chunks: unknown[] = [];
|
|
65
|
+
execute({ writer: { write: (c) => chunks.push(c) } });
|
|
66
|
+
return chunks;
|
|
67
|
+
},
|
|
68
|
+
createUIMessageStreamResponse: ({ stream }: { stream: unknown[] }) =>
|
|
69
|
+
new Response(JSON.stringify(stream), { status: 200 }),
|
|
70
|
+
}));
|
|
71
|
+
|
|
72
|
+
// NOTE: we deliberately do NOT mock `./llm-provider`. `buildLanguageModel`
|
|
73
|
+
// returns a real provider model object but makes no network call until
|
|
74
|
+
// `streamText`/`generateText` runs, and `streamText` is mocked above — so the
|
|
75
|
+
// real builder is safe here AND we avoid leaking a module mock into
|
|
76
|
+
// `llm-provider.test.ts` (bun `mock.module` is process-global).
|
|
77
|
+
|
|
78
|
+
// Imported AFTER the mocks so the service binds the mocked `ai`.
|
|
79
|
+
const { createChatService } = await import("./chat-service");
|
|
80
|
+
|
|
81
|
+
const principal: AuthUser = {
|
|
82
|
+
type: "user",
|
|
83
|
+
id: "u1",
|
|
84
|
+
accessRules: [],
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
const connection: OpenAiCompatibleConnection = {
|
|
88
|
+
baseUrl: "https://api.openai.com/v1",
|
|
89
|
+
apiKey: "sk-test",
|
|
90
|
+
defaultModel: "test-model",
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
/** Build a full AI-SDK usage object from input/output token counts. */
|
|
94
|
+
function usage(inputTokens: number, outputTokens: number): LanguageModelUsage {
|
|
95
|
+
return {
|
|
96
|
+
inputTokens,
|
|
97
|
+
outputTokens,
|
|
98
|
+
totalTokens: inputTokens + outputTokens,
|
|
99
|
+
inputTokenDetails: {
|
|
100
|
+
noCacheTokens: undefined,
|
|
101
|
+
cacheReadTokens: undefined,
|
|
102
|
+
cacheWriteTokens: undefined,
|
|
103
|
+
},
|
|
104
|
+
outputTokenDetails: {
|
|
105
|
+
textTokens: undefined,
|
|
106
|
+
reasoningTokens: undefined,
|
|
107
|
+
},
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** A classifier generator that always returns the given verdict text. */
|
|
112
|
+
function verdict(text: string): ClassifierTextGenerator {
|
|
113
|
+
return async () => ({ text, usage: usage(3, 1) });
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** A classifier generator that throws (simulates a classifier outage). */
|
|
117
|
+
const classifierThrows: ClassifierTextGenerator = async () => {
|
|
118
|
+
throw new Error("classifier unavailable");
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
/** A minimal proposeApply double exposing only what streamDecision touches. */
|
|
122
|
+
interface ProposalDouble {
|
|
123
|
+
rowId: string;
|
|
124
|
+
toolName: string;
|
|
125
|
+
status: string;
|
|
126
|
+
conversationId?: string;
|
|
127
|
+
summary?: string;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function makeService(
|
|
131
|
+
classifierGenerate: ClassifierTextGenerator,
|
|
132
|
+
describeProposalResult?: ProposalDouble,
|
|
133
|
+
) {
|
|
134
|
+
const appended: Array<{ role: string; text: unknown }> = [];
|
|
135
|
+
const spendInserts: Array<Record<string, unknown>> = [];
|
|
136
|
+
const conversations = {
|
|
137
|
+
getConversation: async () => ({
|
|
138
|
+
id: "conv-1",
|
|
139
|
+
userId: "u1",
|
|
140
|
+
title: "t",
|
|
141
|
+
integrationId: "ai.openai-compatible.c1",
|
|
142
|
+
model: null,
|
|
143
|
+
permissionMode: "approve" as const,
|
|
144
|
+
createdAt: new Date(),
|
|
145
|
+
updatedAt: new Date(),
|
|
146
|
+
archivedAt: null,
|
|
147
|
+
}),
|
|
148
|
+
appendMessage: async (a: { role: string; content: { text: unknown } }) => {
|
|
149
|
+
appended.push({ role: a.role, text: a.content.text });
|
|
150
|
+
return {} as never;
|
|
151
|
+
},
|
|
152
|
+
listMessages: async () => [
|
|
153
|
+
{
|
|
154
|
+
id: "m1",
|
|
155
|
+
conversationId: "conv-1",
|
|
156
|
+
role: "user" as const,
|
|
157
|
+
content: { text: "hi" },
|
|
158
|
+
toolCalls: null,
|
|
159
|
+
modelMessages: null,
|
|
160
|
+
createdAt: new Date(),
|
|
161
|
+
},
|
|
162
|
+
],
|
|
163
|
+
updateConversation: async () => undefined,
|
|
164
|
+
createConversation: async () => ({}) as never,
|
|
165
|
+
listConversations: async () => [],
|
|
166
|
+
archiveConversation: async () => false,
|
|
167
|
+
deleteConversation: async () => false,
|
|
168
|
+
};
|
|
169
|
+
// db.insert(...).values(...) is awaited by recordSpend; capture the values.
|
|
170
|
+
const db = {
|
|
171
|
+
insert: () => ({
|
|
172
|
+
values: async (v: Record<string, unknown>) => {
|
|
173
|
+
spendInserts.push(v);
|
|
174
|
+
},
|
|
175
|
+
}),
|
|
176
|
+
} as never;
|
|
177
|
+
const loggerErrors: Array<{ message: string; meta: unknown }> = [];
|
|
178
|
+
const logger = {
|
|
179
|
+
info: () => {},
|
|
180
|
+
warn: () => {},
|
|
181
|
+
debug: () => {},
|
|
182
|
+
error: (message: string, meta?: unknown) => {
|
|
183
|
+
loggerErrors.push({ message, meta });
|
|
184
|
+
},
|
|
185
|
+
};
|
|
186
|
+
const service = createChatService({
|
|
187
|
+
resolver: { resolveTools: () => [] } as never,
|
|
188
|
+
proposeApply: {
|
|
189
|
+
describeProposal: async () => describeProposalResult,
|
|
190
|
+
} as never,
|
|
191
|
+
conversations: conversations as never,
|
|
192
|
+
connections: { resolve: async () => connection },
|
|
193
|
+
readInvoker: { invoke: async () => ({}) },
|
|
194
|
+
recordExecuted: async () => {},
|
|
195
|
+
db,
|
|
196
|
+
logger,
|
|
197
|
+
internalUrl: "http://localhost:3000",
|
|
198
|
+
classifierGenerate,
|
|
199
|
+
});
|
|
200
|
+
return { service, appended, spendInserts, loggerErrors };
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
beforeEach(() => {
|
|
204
|
+
streamTextCalls.length = 0;
|
|
205
|
+
lastOnFinish = undefined;
|
|
206
|
+
lastOnError = undefined;
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
afterEach(() => {
|
|
210
|
+
mock.restore();
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
describe("streamTurn topical pre-classifier", () => {
|
|
214
|
+
const turn = {
|
|
215
|
+
principal,
|
|
216
|
+
conversationId: "conv-1",
|
|
217
|
+
connectionId: "ai.openai-compatible.c1",
|
|
218
|
+
forwardHeaders: {},
|
|
219
|
+
userText: "write me a hello world react component",
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
test("OFF_TOPIC short-circuits: no tool loop, refusal persisted + streamed, classifier spend recorded", async () => {
|
|
223
|
+
const { service, appended, spendInserts } = makeService(
|
|
224
|
+
verdict("OFF_TOPIC"),
|
|
225
|
+
);
|
|
226
|
+
const res = await service.streamTurn(turn);
|
|
227
|
+
expect(res.status).toBe(200);
|
|
228
|
+
|
|
229
|
+
// The expensive tool loop (streamText) must NOT have run.
|
|
230
|
+
expect(streamTextCalls).toHaveLength(0);
|
|
231
|
+
|
|
232
|
+
// The user message is persisted up front; the refusal is persisted as the
|
|
233
|
+
// assistant message (no other assistant turn was generated).
|
|
234
|
+
const assistant = appended.filter((a) => a.role === "assistant");
|
|
235
|
+
expect(assistant).toHaveLength(1);
|
|
236
|
+
expect(assistant[0]?.text).toBe(OFF_TOPIC_REFUSAL);
|
|
237
|
+
|
|
238
|
+
// The refusal text was streamed over the SSE path. Parse the stream chunks
|
|
239
|
+
// (the mock JSON-serializes them, which escapes the quotes in the refusal)
|
|
240
|
+
// and reassemble the deltas rather than substring-matching escaped JSON.
|
|
241
|
+
const body = await res.text();
|
|
242
|
+
const chunks = JSON.parse(body) as Array<{ delta?: string }>;
|
|
243
|
+
const streamed = chunks.map((c) => c.delta ?? "").join("");
|
|
244
|
+
expect(streamed).toBe(OFF_TOPIC_REFUSAL);
|
|
245
|
+
|
|
246
|
+
// The classifier's own (small) token usage was recorded against the ledger.
|
|
247
|
+
expect(spendInserts).toHaveLength(1);
|
|
248
|
+
expect(spendInserts[0]?.inputTokens).toBe(3);
|
|
249
|
+
expect(spendInserts[0]?.outputTokens).toBe(1);
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
test("classifier ERROR fails open: the normal turn proceeds (tool loop runs)", async () => {
|
|
253
|
+
const { service } = makeService(classifierThrows);
|
|
254
|
+
const res = await service.streamTurn(turn);
|
|
255
|
+
expect(res.status).toBe(200);
|
|
256
|
+
// Fail-open: streamText (the normal turn) ran despite the classifier throw.
|
|
257
|
+
expect(streamTextCalls).toHaveLength(1);
|
|
258
|
+
expect(await res.text()).toBe("normal-turn");
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test("ON_TOPIC proceeds: the normal turn runs and the classifier spend is recorded", async () => {
|
|
262
|
+
const { service, spendInserts } = makeService(verdict("ON_TOPIC"));
|
|
263
|
+
const res = await service.streamTurn({
|
|
264
|
+
...turn,
|
|
265
|
+
userText: "summarize the open incidents",
|
|
266
|
+
});
|
|
267
|
+
expect(res.status).toBe(200);
|
|
268
|
+
expect(streamTextCalls).toHaveLength(1);
|
|
269
|
+
// The classifier usage was recorded even on the ON_TOPIC path.
|
|
270
|
+
expect(spendInserts).toHaveLength(1);
|
|
271
|
+
|
|
272
|
+
// Drive the normal turn's onFinish to verify the turn usage is also recorded.
|
|
273
|
+
expect(lastOnFinish).toBeDefined();
|
|
274
|
+
await lastOnFinish?.({
|
|
275
|
+
text: "Here are the open incidents.",
|
|
276
|
+
steps: [],
|
|
277
|
+
totalUsage: usage(50, 20),
|
|
278
|
+
});
|
|
279
|
+
// One more spend row (the turn) + the assistant message persisted.
|
|
280
|
+
expect(spendInserts).toHaveLength(2);
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
test("hands streamText a normalized message history (starts with a user row)", async () => {
|
|
284
|
+
const { service } = makeService(verdict("ON_TOPIC"));
|
|
285
|
+
await service.streamTurn({ ...turn, userText: "list incidents" });
|
|
286
|
+
expect(streamTextCalls).toHaveLength(1);
|
|
287
|
+
const messages = streamTextCalls[0]?.messages;
|
|
288
|
+
expect(Array.isArray(messages)).toBe(true);
|
|
289
|
+
// normalizeModelMessages guarantees a leading user row.
|
|
290
|
+
const first = (messages as Array<{ role: string }>)[0];
|
|
291
|
+
expect(first?.role).toBe("user");
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test("surfaces a masked provider error (HTTP body) to the UI and the log", async () => {
|
|
295
|
+
const { service, loggerErrors } = makeService(verdict("ON_TOPIC"));
|
|
296
|
+
await service.streamTurn({ ...turn, userText: "summarize incidents" });
|
|
297
|
+
// The service must have installed an onError handler (else errors are masked).
|
|
298
|
+
expect(lastOnError).toBeDefined();
|
|
299
|
+
|
|
300
|
+
const apiError = new APICallError({
|
|
301
|
+
message: "Bad Request",
|
|
302
|
+
url: "https://openrouter.ai/api/v1/chat/completions",
|
|
303
|
+
requestBodyValues: {},
|
|
304
|
+
statusCode: 400,
|
|
305
|
+
responseBody: '{"error":{"code":"invalid_prompt","message":"bad"}}',
|
|
306
|
+
});
|
|
307
|
+
const surfaced = lastOnError?.(apiError);
|
|
308
|
+
expect(surfaced).toContain("HTTP 400");
|
|
309
|
+
expect(surfaced).toContain("invalid_prompt");
|
|
310
|
+
|
|
311
|
+
// It is also logged server-side with the structured provider detail.
|
|
312
|
+
expect(loggerErrors).toHaveLength(1);
|
|
313
|
+
expect(loggerErrors[0]?.message).toBe("AI chat model call failed");
|
|
314
|
+
const meta = loggerErrors[0]?.meta as { statusCode?: number };
|
|
315
|
+
expect(meta.statusCode).toBe(400);
|
|
316
|
+
});
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
describe("streamDecision (post-confirm-card acknowledgment)", () => {
|
|
320
|
+
const base = {
|
|
321
|
+
principal,
|
|
322
|
+
conversationId: "conv-1",
|
|
323
|
+
connectionId: "ai.openai-compatible.c1",
|
|
324
|
+
forwardHeaders: {},
|
|
325
|
+
};
|
|
326
|
+
|
|
327
|
+
test("apply: runs the model with a server-derived APPLIED note, no user bubble", async () => {
|
|
328
|
+
const { service, appended } = makeService(verdict("ON_TOPIC"), {
|
|
329
|
+
rowId: "row-1",
|
|
330
|
+
toolName: "healthcheck.propose",
|
|
331
|
+
status: "applied",
|
|
332
|
+
conversationId: "conv-1",
|
|
333
|
+
summary: 'Create health check "google-com-http"',
|
|
334
|
+
});
|
|
335
|
+
const res = await service.streamDecision({
|
|
336
|
+
...base,
|
|
337
|
+
token: "propose:row-1.nonce",
|
|
338
|
+
decision: "apply",
|
|
339
|
+
});
|
|
340
|
+
expect(res.status).toBe(200);
|
|
341
|
+
expect(streamTextCalls).toHaveLength(1);
|
|
342
|
+
|
|
343
|
+
// The decision note is delivered to the model (server-derived summary).
|
|
344
|
+
const serialized = JSON.stringify(streamTextCalls[0]?.messages);
|
|
345
|
+
expect(serialized).toContain("APPLIED");
|
|
346
|
+
expect(serialized).toContain("Create health check");
|
|
347
|
+
expect(serialized).toContain("google-com-http");
|
|
348
|
+
|
|
349
|
+
// No user message is persisted for a decision turn (no user bubble); only
|
|
350
|
+
// the streamed assistant reply is persisted via onFinish (driven below).
|
|
351
|
+
expect(appended.filter((a) => a.role === "user")).toHaveLength(0);
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
test("apply: refuses (409) when the proposal is not actually applied", async () => {
|
|
355
|
+
const { service } = makeService(verdict("ON_TOPIC"), {
|
|
356
|
+
rowId: "row-1",
|
|
357
|
+
toolName: "healthcheck.propose",
|
|
358
|
+
status: "proposed",
|
|
359
|
+
conversationId: "conv-1",
|
|
360
|
+
summary: "Create health check X",
|
|
361
|
+
});
|
|
362
|
+
const res = await service.streamDecision({
|
|
363
|
+
...base,
|
|
364
|
+
token: "propose:row-1.nonce",
|
|
365
|
+
decision: "apply",
|
|
366
|
+
});
|
|
367
|
+
expect(res.status).toBe(409);
|
|
368
|
+
// The model must NOT run if we cannot truthfully say it was applied.
|
|
369
|
+
expect(streamTextCalls).toHaveLength(0);
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
test("decline: runs the model with a DECLINED note (no status requirement)", async () => {
|
|
373
|
+
const { service } = makeService(verdict("ON_TOPIC"), {
|
|
374
|
+
rowId: "row-1",
|
|
375
|
+
toolName: "healthcheck.propose",
|
|
376
|
+
status: "proposed",
|
|
377
|
+
conversationId: "conv-1",
|
|
378
|
+
summary: "Create health check X",
|
|
379
|
+
});
|
|
380
|
+
const res = await service.streamDecision({
|
|
381
|
+
...base,
|
|
382
|
+
token: "propose:row-1.nonce",
|
|
383
|
+
decision: "decline",
|
|
384
|
+
});
|
|
385
|
+
expect(res.status).toBe(200);
|
|
386
|
+
expect(streamTextCalls).toHaveLength(1);
|
|
387
|
+
expect(JSON.stringify(streamTextCalls[0]?.messages)).toContain("DECLINED");
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
test("refuses (404) a proposal that belongs to a different conversation", async () => {
|
|
391
|
+
const { service } = makeService(verdict("ON_TOPIC"), {
|
|
392
|
+
rowId: "row-1",
|
|
393
|
+
toolName: "healthcheck.propose",
|
|
394
|
+
status: "applied",
|
|
395
|
+
conversationId: "other-conv",
|
|
396
|
+
summary: "Create health check X",
|
|
397
|
+
});
|
|
398
|
+
const res = await service.streamDecision({
|
|
399
|
+
...base,
|
|
400
|
+
token: "propose:row-1.nonce",
|
|
401
|
+
decision: "apply",
|
|
402
|
+
});
|
|
403
|
+
expect(res.status).toBe(404);
|
|
404
|
+
expect(streamTextCalls).toHaveLength(0);
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
test("refuses (404) an unknown/forged token", async () => {
|
|
408
|
+
const { service } = makeService(verdict("ON_TOPIC"), undefined);
|
|
409
|
+
const res = await service.streamDecision({
|
|
410
|
+
...base,
|
|
411
|
+
token: "propose:nope.nope",
|
|
412
|
+
decision: "apply",
|
|
413
|
+
});
|
|
414
|
+
expect(res.status).toBe(404);
|
|
415
|
+
expect(streamTextCalls).toHaveLength(0);
|
|
416
|
+
});
|
|
417
|
+
});
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import { describe, expect, test, mock } from "bun:test";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import type { AuthUser } from "@checkstack/backend-api";
|
|
4
|
+
import type { RegisteredAiTool } from "../tool-registry";
|
|
5
|
+
import type { ProposeApplyService } from "../propose-apply/service";
|
|
6
|
+
import type { ChatReadInvoker } from "./read-invoker";
|
|
7
|
+
import {
|
|
8
|
+
buildChatToolCallbacks,
|
|
9
|
+
type ChatRecordExecuted,
|
|
10
|
+
} from "./chat-service";
|
|
11
|
+
import { ToolBudgetExceededError } from "../rate-limit/tool-budget";
|
|
12
|
+
|
|
13
|
+
const principal: AuthUser = {
|
|
14
|
+
type: "user",
|
|
15
|
+
id: "u1",
|
|
16
|
+
accessRules: ["incident.incident.read"],
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const readTool: RegisteredAiTool = {
|
|
20
|
+
name: "incident.list",
|
|
21
|
+
description: "list",
|
|
22
|
+
effect: "read",
|
|
23
|
+
input: z.object({ status: z.string().optional() }),
|
|
24
|
+
requiredAccessRules: ["incident.incident.read"],
|
|
25
|
+
execute: () => Promise.resolve({ rows: [] }),
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* A COMPOSITE read tool (no projection routing) — like `ai.searchDocs` /
|
|
30
|
+
* `ai.getDoc`. It has no entry in `readRouting`, so `runRead` must invoke its
|
|
31
|
+
* own `execute` rather than re-entering the live router.
|
|
32
|
+
*/
|
|
33
|
+
const compositeReadTool: RegisteredAiTool = {
|
|
34
|
+
name: "ai.searchDocs",
|
|
35
|
+
description: "search docs",
|
|
36
|
+
effect: "read",
|
|
37
|
+
input: z.object({ query: z.string() }),
|
|
38
|
+
requiredAccessRules: ["ai.chat.read"],
|
|
39
|
+
execute: ({ input }) =>
|
|
40
|
+
Promise.resolve({ echoed: (input as { query: string }).query }),
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
/** A fake db whose count query resolves to `used`, simulating the budget read. */
|
|
44
|
+
function budgetDb(used: number) {
|
|
45
|
+
const where = mock(() => Promise.resolve([{ value: used }]));
|
|
46
|
+
const from = mock(() => ({ where }));
|
|
47
|
+
const select = mock(() => ({ from }));
|
|
48
|
+
return { select } as never;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function deps(over?: {
|
|
52
|
+
used?: number;
|
|
53
|
+
invoke?: ChatReadInvoker["invoke"];
|
|
54
|
+
recordExecuted?: ChatRecordExecuted;
|
|
55
|
+
}) {
|
|
56
|
+
const recorded: Array<{
|
|
57
|
+
transport: string;
|
|
58
|
+
toolName: string;
|
|
59
|
+
argsHash: string;
|
|
60
|
+
conversationId: string;
|
|
61
|
+
}> = [];
|
|
62
|
+
const readInvoker: ChatReadInvoker = {
|
|
63
|
+
invoke: over?.invoke ?? (() => Promise.resolve({ rows: [1, 2] })),
|
|
64
|
+
};
|
|
65
|
+
const recordExecuted: ChatRecordExecuted =
|
|
66
|
+
over?.recordExecuted ??
|
|
67
|
+
(async ({ toolName, argsHash, conversationId }) => {
|
|
68
|
+
recorded.push({ transport: "chat", toolName, argsHash, conversationId });
|
|
69
|
+
});
|
|
70
|
+
const proposeApply = {} as ProposeApplyService;
|
|
71
|
+
const readRouting = new Map([
|
|
72
|
+
["incident.list", { pluginId: "incident", procedureKey: "listIncidents" }],
|
|
73
|
+
]);
|
|
74
|
+
const callbacks = buildChatToolCallbacks({
|
|
75
|
+
proposeApply,
|
|
76
|
+
readInvoker,
|
|
77
|
+
recordExecuted,
|
|
78
|
+
readRouting,
|
|
79
|
+
db: budgetDb(over?.used ?? 0),
|
|
80
|
+
conversationId: "conv-1",
|
|
81
|
+
forwardHeaders: { cookie: "session=x" },
|
|
82
|
+
internalUrl: "http://localhost:3000",
|
|
83
|
+
});
|
|
84
|
+
return { callbacks, recorded };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
describe("chat read auditing + budget (P4 review item 2)", () => {
|
|
88
|
+
test("a chat read writes an ai_tool_calls row with transport 'chat' + args hash", async () => {
|
|
89
|
+
const { callbacks, recorded } = deps();
|
|
90
|
+
const result = await callbacks.runRead({
|
|
91
|
+
principal,
|
|
92
|
+
tool: readTool,
|
|
93
|
+
input: { status: "open" },
|
|
94
|
+
});
|
|
95
|
+
expect(result).toEqual({ rows: [1, 2] });
|
|
96
|
+
// The read was audit-recorded so it lands in the log AND counts to budget.
|
|
97
|
+
expect(recorded).toHaveLength(1);
|
|
98
|
+
expect(recorded[0]?.transport).toBe("chat");
|
|
99
|
+
expect(recorded[0]?.toolName).toBe("incident.list");
|
|
100
|
+
expect(recorded[0]?.conversationId).toBe("conv-1");
|
|
101
|
+
// The args hash is a SHA-256 hex digest, never the raw args.
|
|
102
|
+
expect(recorded[0]?.argsHash).toMatch(/^[0-9a-f]{64}$/);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
test("enforceBudget passes when under budget", async () => {
|
|
106
|
+
const { callbacks } = deps({ used: 0 });
|
|
107
|
+
await expect(callbacks.enforceBudget(principal)).resolves.toBeUndefined();
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("an over-budget chat read is rejected before it runs (budget counts chat reads)", async () => {
|
|
111
|
+
let invoked = false;
|
|
112
|
+
const { callbacks } = deps({
|
|
113
|
+
used: 999,
|
|
114
|
+
invoke: () => {
|
|
115
|
+
invoked = true;
|
|
116
|
+
return Promise.resolve({});
|
|
117
|
+
},
|
|
118
|
+
});
|
|
119
|
+
// The SDK tool calls enforceBudget BEFORE runRead; over budget -> throw.
|
|
120
|
+
await expect(callbacks.enforceBudget(principal)).rejects.toBeInstanceOf(
|
|
121
|
+
ToolBudgetExceededError,
|
|
122
|
+
);
|
|
123
|
+
expect(invoked).toBe(false);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("a read whose recordExecuted fails still surfaces (recording is awaited, audit-critical)", async () => {
|
|
127
|
+
// The audit row is awaited because the budget depends on it; a recording
|
|
128
|
+
// failure must propagate rather than silently undercount.
|
|
129
|
+
const { callbacks } = deps({
|
|
130
|
+
recordExecuted: () => Promise.reject(new Error("db down")),
|
|
131
|
+
});
|
|
132
|
+
await expect(
|
|
133
|
+
callbacks.runRead({ principal, tool: readTool, input: {} }),
|
|
134
|
+
).rejects.toThrow("db down");
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
describe("runRead composite-read fallback (searchDocs/getDoc path)", () => {
|
|
139
|
+
test("a routing-less read tool runs its own execute (NOT the router)", async () => {
|
|
140
|
+
// The readInvoker must NOT be touched for a composite tool; if it were, the
|
|
141
|
+
// routing-less tool would error "has no routing" in the pre-fallback code.
|
|
142
|
+
const invoke = mock(() => Promise.reject(new Error("router should not run")));
|
|
143
|
+
const { callbacks, recorded } = deps({ invoke });
|
|
144
|
+
|
|
145
|
+
const docPrincipal: AuthUser = {
|
|
146
|
+
type: "user",
|
|
147
|
+
id: "u1",
|
|
148
|
+
accessRules: ["ai.chat.read"],
|
|
149
|
+
};
|
|
150
|
+
const result = await callbacks.runRead({
|
|
151
|
+
principal: docPrincipal,
|
|
152
|
+
tool: compositeReadTool,
|
|
153
|
+
input: { query: "health checks" },
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// The tool's own execute produced the result.
|
|
157
|
+
expect(result).toEqual({ echoed: "health checks" });
|
|
158
|
+
// The live-router invoker was never called (no routing entry).
|
|
159
|
+
expect(invoke).not.toHaveBeenCalled();
|
|
160
|
+
// The composite read is still audit-recorded (audit + budget count).
|
|
161
|
+
expect(recorded).toHaveLength(1);
|
|
162
|
+
expect(recorded[0]?.toolName).toBe("ai.searchDocs");
|
|
163
|
+
expect(recorded[0]?.transport).toBe("chat");
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
/** A mutate tool used to exercise the per-turn propose/auto-apply dedupe. */
|
|
168
|
+
const mutateTool: RegisteredAiTool = {
|
|
169
|
+
name: "healthcheck.update",
|
|
170
|
+
description: "update a health check",
|
|
171
|
+
effect: "mutate",
|
|
172
|
+
input: z.object({
|
|
173
|
+
id: z.string(),
|
|
174
|
+
body: z.record(z.string(), z.unknown()),
|
|
175
|
+
}),
|
|
176
|
+
requiredAccessRules: ["healthcheck.healthcheck.manage"],
|
|
177
|
+
execute: () => Promise.resolve({ ok: true }),
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
/** Build callbacks wired to a propose-spy ProposeApplyService (no real store). */
|
|
181
|
+
function dedupeCallbacks(proposeMock: ReturnType<typeof mock>) {
|
|
182
|
+
// Partial service stub: only `propose`/`apply` are exercised here (the same
|
|
183
|
+
// pattern the existing harness uses with `{} as ProposeApplyService`).
|
|
184
|
+
const proposeApply = {
|
|
185
|
+
propose: proposeMock,
|
|
186
|
+
apply: mock(() =>
|
|
187
|
+
Promise.resolve({ toolCallId: "row-1", result: { ok: true } }),
|
|
188
|
+
),
|
|
189
|
+
} as unknown as ProposeApplyService;
|
|
190
|
+
return buildChatToolCallbacks({
|
|
191
|
+
proposeApply,
|
|
192
|
+
readInvoker: { invoke: () => Promise.resolve({}) },
|
|
193
|
+
recordExecuted: async () => {},
|
|
194
|
+
readRouting: new Map(),
|
|
195
|
+
db: budgetDb(0),
|
|
196
|
+
conversationId: "conv-1",
|
|
197
|
+
forwardHeaders: {},
|
|
198
|
+
internalUrl: "http://localhost:3000",
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
describe("per-turn mutating-tool dedupe (no triple proposals)", () => {
|
|
203
|
+
const proposal = {
|
|
204
|
+
token: "propose:row-1.nonce",
|
|
205
|
+
summary: "Update health check",
|
|
206
|
+
payload: { id: "hc1" },
|
|
207
|
+
diff: undefined,
|
|
208
|
+
toolCallId: "row-1",
|
|
209
|
+
expiresAt: new Date("2026-06-03T00:00:00Z"),
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
test("a repeated identical propose returns a duplicate, proposed only once", async () => {
|
|
213
|
+
const proposeMock = mock(() => Promise.resolve(proposal));
|
|
214
|
+
const callbacks = dedupeCallbacks(proposeMock);
|
|
215
|
+
const input = { id: "hc1", body: { intervalSeconds: 120 } };
|
|
216
|
+
|
|
217
|
+
const first = await callbacks.propose({ principal, tool: mutateTool, input });
|
|
218
|
+
const second = await callbacks.propose({ principal, tool: mutateTool, input });
|
|
219
|
+
|
|
220
|
+
// First call creates the real confirm card (with a token + a stop note).
|
|
221
|
+
expect("__confirm" in first && first.__confirm).toBe(true);
|
|
222
|
+
expect("token" in first).toBe(true);
|
|
223
|
+
if ("note" in first) expect(first.note.length).toBeGreaterThan(0);
|
|
224
|
+
|
|
225
|
+
// Second identical call is deduped: no card, no new token, a clear note.
|
|
226
|
+
expect("__duplicate" in second && second.__duplicate).toBe(true);
|
|
227
|
+
expect("token" in second).toBe(false);
|
|
228
|
+
|
|
229
|
+
// The single-use proposal token is minted exactly ONCE for the repeat.
|
|
230
|
+
expect(proposeMock).toHaveBeenCalledTimes(1);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
test("a DIFFERENT propose in the same turn is NOT deduped", async () => {
|
|
234
|
+
const proposeMock = mock(() => Promise.resolve(proposal));
|
|
235
|
+
const callbacks = dedupeCallbacks(proposeMock);
|
|
236
|
+
|
|
237
|
+
await callbacks.propose({
|
|
238
|
+
principal,
|
|
239
|
+
tool: mutateTool,
|
|
240
|
+
input: { id: "hc1", body: { intervalSeconds: 120 } },
|
|
241
|
+
});
|
|
242
|
+
await callbacks.propose({
|
|
243
|
+
principal,
|
|
244
|
+
tool: mutateTool,
|
|
245
|
+
input: { id: "hc2", body: { intervalSeconds: 120 } },
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
expect(proposeMock).toHaveBeenCalledTimes(2);
|
|
249
|
+
});
|
|
250
|
+
});
|