@checkstack/ai-backend 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/drizzle/0000_productive_jackpot.sql +26 -0
- package/drizzle/0001_puzzling_purple_man.sql +26 -0
- package/drizzle/0002_sparkling_paper_doll.sql +15 -0
- package/drizzle/0003_married_senator_kelly.sql +1 -0
- package/drizzle/0004_crazy_miek.sql +2 -0
- package/drizzle/0005_tearful_randall_flagg.sql +1 -0
- package/drizzle/meta/0000_snapshot.json +232 -0
- package/drizzle/meta/0001_snapshot.json +434 -0
- package/drizzle/meta/0002_snapshot.json +551 -0
- package/drizzle/meta/0003_snapshot.json +557 -0
- package/drizzle/meta/0004_snapshot.json +573 -0
- package/drizzle/meta/0005_snapshot.json +574 -0
- package/drizzle/meta/_journal.json +48 -0
- package/drizzle.config.ts +7 -0
- package/package.json +42 -0
- package/src/agent-runner.test.ts +262 -0
- package/src/agent-runner.ts +262 -0
- package/src/chat/agent-loop.test.ts +119 -0
- package/src/chat/agent-loop.ts +73 -0
- package/src/chat/auto-apply.test.ts +237 -0
- package/src/chat/chat-handler.ts +111 -0
- package/src/chat/chat-service.streamturn.test.ts +417 -0
- package/src/chat/chat-service.test.ts +250 -0
- package/src/chat/chat-service.ts +923 -0
- package/src/chat/classifier-service.ts +64 -0
- package/src/chat/classifier.logic.test.ts +92 -0
- package/src/chat/classifier.logic.ts +71 -0
- package/src/chat/conversation-store.it.test.ts +203 -0
- package/src/chat/conversation-store.test.ts +248 -0
- package/src/chat/conversation-store.ts +237 -0
- package/src/chat/decision.logic.test.ts +45 -0
- package/src/chat/decision.logic.ts +54 -0
- package/src/chat/llm-provider.test.ts +63 -0
- package/src/chat/llm-provider.ts +67 -0
- package/src/chat/model-error.logic.test.ts +60 -0
- package/src/chat/model-error.logic.ts +65 -0
- package/src/chat/normalize-messages.logic.test.ts +101 -0
- package/src/chat/normalize-messages.logic.ts +65 -0
- package/src/chat/permission-mode.logic.test.ts +70 -0
- package/src/chat/permission-mode.logic.ts +45 -0
- package/src/chat/read-invoker.ts +72 -0
- package/src/chat/replay.test.ts +174 -0
- package/src/chat/scrub-content.test.ts +183 -0
- package/src/chat/scrub-content.ts +154 -0
- package/src/chat/sdk-tools.test.ts +168 -0
- package/src/chat/sdk-tools.ts +181 -0
- package/src/chat/title-service.test.ts +146 -0
- package/src/chat/title-service.ts +111 -0
- package/src/chat/title.logic.test.ts +98 -0
- package/src/chat/title.logic.ts +102 -0
- package/src/extension-points.ts +41 -0
- package/src/generated/docs-index.ts +3020 -0
- package/src/hardening/handler-authz.test.ts +282 -0
- package/src/hardening/no-secret-leak.test.ts +303 -0
- package/src/hooks.ts +33 -0
- package/src/index.ts +542 -0
- package/src/mcp/connection-registry.test.ts +25 -0
- package/src/mcp/connection-registry.ts +54 -0
- package/src/mcp/mcp-conformance.it.test.ts +128 -0
- package/src/mcp/server.test.ts +285 -0
- package/src/mcp/server.ts +300 -0
- package/src/mcp/tool-invoker.ts +65 -0
- package/src/openai-provider.test.ts +64 -0
- package/src/openai-provider.ts +146 -0
- package/src/projection.test.ts +97 -0
- package/src/projection.ts +132 -0
- package/src/propose-apply/args-hash.test.ts +26 -0
- package/src/propose-apply/args-hash.ts +30 -0
- package/src/propose-apply/service.test.ts +423 -0
- package/src/propose-apply/service.ts +419 -0
- package/src/propose-apply/store.test.ts +136 -0
- package/src/propose-apply/store.ts +224 -0
- package/src/propose-apply/token.test.ts +52 -0
- package/src/propose-apply/token.ts +71 -0
- package/src/rate-limit/spend-ledger.it.test.ts +224 -0
- package/src/rate-limit/spend-ledger.test.ts +176 -0
- package/src/rate-limit/spend-ledger.ts +162 -0
- package/src/rate-limit/tool-budget.it.test.ts +173 -0
- package/src/rate-limit/tool-budget.test.ts +58 -0
- package/src/rate-limit/tool-budget.ts +107 -0
- package/src/registry-wiring.test.ts +131 -0
- package/src/registry-wiring.ts +68 -0
- package/src/resolver.test.ts +156 -0
- package/src/resolver.ts +78 -0
- package/src/router.test.ts +78 -0
- package/src/router.ts +345 -0
- package/src/schema.ts +284 -0
- package/src/serializer.test.ts +88 -0
- package/src/serializer.ts +42 -0
- package/src/tool-registry.ts +58 -0
- package/src/tools/composite-tools.ts +24 -0
- package/src/tools/docs-tools.test.ts +150 -0
- package/src/tools/docs-tools.ts +115 -0
- package/src/tools/probe-url.test.ts +51 -0
- package/src/tools/probe-url.ts +146 -0
- package/src/tools/rank-docs.test.ts +153 -0
- package/src/tools/rank-docs.ts +209 -0
- package/src/tools/script-context-extract.test.ts +93 -0
- package/src/tools/script-context-extract.ts +283 -0
- package/src/tools/ssrf-guard.test.ts +69 -0
- package/src/tools/ssrf-guard.ts +108 -0
- package/src/tools/tool-set.e2e.test.ts +64 -0
- package/src/user-rpc-client.test.ts +45 -0
- package/src/user-rpc-client.ts +60 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { generateText, type LanguageModel, type LanguageModelUsage } from "ai";
|
|
2
|
+
import {
|
|
3
|
+
buildClassifierPrompt,
|
|
4
|
+
parseClassifierVerdict,
|
|
5
|
+
type ClassifierVerdict,
|
|
6
|
+
} from "./classifier.logic";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Result of a single classifier call: the verdict plus the (small) token usage
|
|
10
|
+
* so the caller can record it against the spend ledger like any other model
|
|
11
|
+
* call. Usage is best-effort - a provider that omits counts yields zeros.
|
|
12
|
+
*/
|
|
13
|
+
export interface ClassifierResult {
|
|
14
|
+
verdict: ClassifierVerdict;
|
|
15
|
+
usage: LanguageModelUsage;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* The model call used by the classifier. Defaults to the AI SDK's
|
|
20
|
+
* `generateText` against the turn's resolved model; INJECTABLE (like the title
|
|
21
|
+
* generator) so the chat-service short-circuit logic is unit-testable WITHOUT a
|
|
22
|
+
* live provider mock.
|
|
23
|
+
*/
|
|
24
|
+
export type ClassifierTextGenerator = (args: {
|
|
25
|
+
model: LanguageModel;
|
|
26
|
+
system: string;
|
|
27
|
+
prompt: string;
|
|
28
|
+
}) => Promise<{ text: string; usage: LanguageModelUsage }>;
|
|
29
|
+
|
|
30
|
+
/** Default generator: a cheap `generateText` reusing the turn's model. */
|
|
31
|
+
const defaultGenerateClassifierText: ClassifierTextGenerator = async ({
|
|
32
|
+
model,
|
|
33
|
+
system,
|
|
34
|
+
prompt,
|
|
35
|
+
}) => {
|
|
36
|
+
const { text, usage } = await generateText({ model, system, prompt });
|
|
37
|
+
return { text, usage };
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Classify whether the user's message is on-topic for operating Checkstack.
|
|
42
|
+
*
|
|
43
|
+
* Runs the cheap classifier model call and parses its reply. The call is NOT
|
|
44
|
+
* fail-open here: it propagates a thrown error so the caller can decide to
|
|
45
|
+
* fail-open (proceed with the normal turn) AND distinguish a classifier outage
|
|
46
|
+
* from a real OFF_TOPIC verdict. A non-throwing reply is parsed leniently
|
|
47
|
+
* (ambiguous -> ON_TOPIC) by `parseClassifierVerdict`.
|
|
48
|
+
*/
|
|
49
|
+
export async function classifyTopic({
|
|
50
|
+
model,
|
|
51
|
+
userText,
|
|
52
|
+
generate = defaultGenerateClassifierText,
|
|
53
|
+
}: {
|
|
54
|
+
/** The already-built language model used for the turn (provider-agnostic). */
|
|
55
|
+
model: LanguageModel;
|
|
56
|
+
/** The user's new message text. */
|
|
57
|
+
userText: string;
|
|
58
|
+
/** Override the model call (tests inject a fake; defaults to generateText). */
|
|
59
|
+
generate?: ClassifierTextGenerator;
|
|
60
|
+
}): Promise<ClassifierResult> {
|
|
61
|
+
const { system, prompt } = buildClassifierPrompt({ userText });
|
|
62
|
+
const { text, usage } = await generate({ model, system, prompt });
|
|
63
|
+
return { verdict: parseClassifierVerdict(text), usage };
|
|
64
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
buildClassifierPrompt,
|
|
4
|
+
parseClassifierVerdict,
|
|
5
|
+
OFF_TOPIC_REFUSAL,
|
|
6
|
+
} from "./classifier.logic";
|
|
7
|
+
|
|
8
|
+
describe("buildClassifierPrompt", () => {
|
|
9
|
+
test("carries the user text verbatim as the prompt and a non-empty system prompt", () => {
|
|
10
|
+
const { system, prompt } = buildClassifierPrompt({
|
|
11
|
+
userText: "Summarize the open incidents",
|
|
12
|
+
});
|
|
13
|
+
expect(prompt).toBe("Summarize the open incidents");
|
|
14
|
+
expect(system.length).toBeGreaterThan(0);
|
|
15
|
+
// The system prompt names the platform domains so the model knows what is
|
|
16
|
+
// on-topic.
|
|
17
|
+
expect(system).toContain("Checkstack");
|
|
18
|
+
expect(system).toContain("ON_TOPIC");
|
|
19
|
+
expect(system).toContain("OFF_TOPIC");
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("system prompt explicitly lists meta/capability questions as ON_TOPIC", () => {
|
|
23
|
+
const { system } = buildClassifierPrompt({ userText: "what can you do?" });
|
|
24
|
+
// Must contain language that classifies assistant capability/meta questions
|
|
25
|
+
// as ON_TOPIC. If this regresses (e.g. someone tightens the prompt and
|
|
26
|
+
// removes the meta allowance), this assertion catches it.
|
|
27
|
+
expect(system).toMatch(/what can you do|meta.*capability|capability.*question/i);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("system prompt explicitly lists greetings as ON_TOPIC", () => {
|
|
31
|
+
const { system } = buildClassifierPrompt({ userText: "hi" });
|
|
32
|
+
// Greetings / conversational openers must be named in the ON_TOPIC section.
|
|
33
|
+
expect(system).toMatch(/greeting|conversational opener|\"hi\"/i);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("system prompt explicitly lists how-to/conceptual questions as ON_TOPIC", () => {
|
|
37
|
+
const { system } = buildClassifierPrompt({
|
|
38
|
+
userText: "how do health checks work?",
|
|
39
|
+
});
|
|
40
|
+
// How-to and conceptual questions about using Checkstack must be on-topic.
|
|
41
|
+
expect(system).toMatch(/how.to|conceptual/i);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("system prompt restricts OFF_TOPIC to CLEARLY unrelated requests only", () => {
|
|
45
|
+
const { system } = buildClassifierPrompt({ userText: "write me a poem" });
|
|
46
|
+
// The word "clearly" (or equivalent) must gate the OFF_TOPIC definition so
|
|
47
|
+
// borderline messages default to ON_TOPIC.
|
|
48
|
+
expect(system).toMatch(/clearly unrelated|CLEARLY unrelated/i);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test("system prompt retains the 'when in doubt' ON_TOPIC default", () => {
|
|
52
|
+
const { system } = buildClassifierPrompt({ userText: "???" });
|
|
53
|
+
expect(system).toMatch(/when in doubt.*on_topic/i);
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe("parseClassifierVerdict", () => {
|
|
58
|
+
test("recognizes a bare ON_TOPIC reply", () => {
|
|
59
|
+
expect(parseClassifierVerdict("ON_TOPIC")).toBe("ON_TOPIC");
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("recognizes a bare OFF_TOPIC reply", () => {
|
|
63
|
+
expect(parseClassifierVerdict("OFF_TOPIC")).toBe("OFF_TOPIC");
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
test("tolerates surrounding whitespace/punctuation/casing on OFF_TOPIC", () => {
|
|
67
|
+
expect(parseClassifierVerdict(" off_topic.\n")).toBe("OFF_TOPIC");
|
|
68
|
+
expect(parseClassifierVerdict("Verdict: OFF-TOPIC")).toBe("OFF_TOPIC");
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("defaults ambiguous replies to ON_TOPIC (false refusal is worse)", () => {
|
|
72
|
+
expect(parseClassifierVerdict("maybe?")).toBe("ON_TOPIC");
|
|
73
|
+
expect(parseClassifierVerdict("")).toBe("ON_TOPIC");
|
|
74
|
+
expect(parseClassifierVerdict("I think this is fine")).toBe("ON_TOPIC");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("a reply mentioning BOTH tokens defaults to ON_TOPIC (does not refuse)", () => {
|
|
78
|
+
expect(parseClassifierVerdict("not OFF_TOPIC, it is ON_TOPIC")).toBe(
|
|
79
|
+
"ON_TOPIC",
|
|
80
|
+
);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("the canned refusal is concise, uses no em-dashes, and nudges toward supported topics", () => {
|
|
84
|
+
expect(OFF_TOPIC_REFUSAL).toContain("Checkstack");
|
|
85
|
+
expect(OFF_TOPIC_REFUSAL).not.toContain("—");
|
|
86
|
+
// The refusal should redirect the user rather than just saying "I can't".
|
|
87
|
+
// It must mention at least one supported domain so the user knows what to ask.
|
|
88
|
+
expect(OFF_TOPIC_REFUSAL).toMatch(
|
|
89
|
+
/incident|health check|anomal|automation/i,
|
|
90
|
+
);
|
|
91
|
+
});
|
|
92
|
+
});
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure, DOM-free helpers for the cheap topical PRE-CLASSIFIER that runs before
|
|
3
|
+
* the expensive agent/tool loop. The classifier asks a small model whether the
|
|
4
|
+
* user's message is about operating Checkstack; an OFF_TOPIC verdict lets the
|
|
5
|
+
* chat service short-circuit with a canned refusal INSTEAD of running the full
|
|
6
|
+
* tool loop, saving generation + tool tokens.
|
|
7
|
+
*
|
|
8
|
+
* Both functions are total (never throw). The parser leans toward ON_TOPIC on
|
|
9
|
+
* anything it does not clearly recognize as OFF_TOPIC: a false refusal of a real
|
|
10
|
+
* ops question is worse than letting one off-topic request slide.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/** The classifier's verdict. ON_TOPIC proceeds; OFF_TOPIC short-circuits. */
|
|
14
|
+
export type ClassifierVerdict = "ON_TOPIC" | "OFF_TOPIC";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* System prompt for the classifier. Kept tiny + deterministic: the model must
|
|
18
|
+
* reply with a bare token so the (cheap) call stays short. The parser defends
|
|
19
|
+
* against any decoration regardless.
|
|
20
|
+
*/
|
|
21
|
+
const CLASSIFIER_SYSTEM_PROMPT =
|
|
22
|
+
"You are a topical classifier for Checkstack, an incident, health-check, " +
|
|
23
|
+
"anomaly, automation, and monitoring/operations platform. Decide whether the " +
|
|
24
|
+
"user's message is ON_TOPIC or OFF_TOPIC. " +
|
|
25
|
+
"ON_TOPIC includes: operating or reasoning about Checkstack (incidents, " +
|
|
26
|
+
"health checks, anomalies, automations, monitoring, on-call, the platform's " +
|
|
27
|
+
"data and configuration); meta/capability questions about the assistant itself " +
|
|
28
|
+
"(\"what can you do\", \"who are you\", \"help\", \"what features do you have\"); " +
|
|
29
|
+
"greetings and conversational openers (\"hi\", \"hello\", \"hey\"); " +
|
|
30
|
+
"how-to or conceptual questions about using Checkstack features or workflows " +
|
|
31
|
+
"(\"how do health checks work\", \"how do I create an automation\"). " +
|
|
32
|
+
"OFF_TOPIC means CLEARLY unrelated requests: general coding help unrelated to " +
|
|
33
|
+
"Checkstack, creative writing, and general trivia or knowledge questions. " +
|
|
34
|
+
"When in doubt, reply ON_TOPIC. Reply with the token only.";
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* The canned refusal returned (over the normal SSE stream) when the classifier
|
|
38
|
+
* says OFF_TOPIC. Concise, with a one-line redirect. No em-dashes (user-facing).
|
|
39
|
+
*/
|
|
40
|
+
export const OFF_TOPIC_REFUSAL =
|
|
41
|
+
"That looks outside my scope - I focus on Checkstack monitoring and " +
|
|
42
|
+
"operations (incidents, health checks, anomalies, automations). " +
|
|
43
|
+
"Try asking me about those, or ask \"what can you do?\" to see what I support.";
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Build the classifier prompt pair (system + user) for a given message. Pure so
|
|
47
|
+
* the prompt shape is unit-testable without a model. The user message is passed
|
|
48
|
+
* through verbatim as the prompt (the system prompt carries all instruction).
|
|
49
|
+
*/
|
|
50
|
+
export function buildClassifierPrompt({
|
|
51
|
+
userText,
|
|
52
|
+
}: {
|
|
53
|
+
userText: string;
|
|
54
|
+
}): { system: string; prompt: string } {
|
|
55
|
+
return { system: CLASSIFIER_SYSTEM_PROMPT, prompt: userText };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Parse the classifier's raw reply into a verdict. Returns OFF_TOPIC only when
|
|
60
|
+
* the reply clearly says so (contains the OFF_TOPIC token and NOT the ON_TOPIC
|
|
61
|
+
* token); everything else - ON_TOPIC, ambiguous, empty, or unrecognized -
|
|
62
|
+
* defaults to ON_TOPIC (fail toward letting a real ops question through).
|
|
63
|
+
*/
|
|
64
|
+
export function parseClassifierVerdict(raw: string): ClassifierVerdict {
|
|
65
|
+
const upper = raw.toUpperCase();
|
|
66
|
+
const saysOff = upper.includes("OFF_TOPIC") || upper.includes("OFF-TOPIC");
|
|
67
|
+
const saysOn = upper.includes("ON_TOPIC") || upper.includes("ON-TOPIC");
|
|
68
|
+
// Only treat as OFF_TOPIC when the reply unambiguously says off and not on.
|
|
69
|
+
if (saysOff && !saysOn) return "OFF_TOPIC";
|
|
70
|
+
return "ON_TOPIC";
|
|
71
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-pod conversation readback (real Postgres) — matrix #15, plan §16,
|
|
3
|
+
* state-and-scale §9.
|
|
4
|
+
*
|
|
5
|
+
* The DETERMINISTIC backstop the single-process unit suite cannot provide: "does
|
|
6
|
+
* a read return the same answer on every pod?". A conversation created against
|
|
7
|
+
* one connection pool MUST be readable, owner-scoped, via a SECOND pool over the
|
|
8
|
+
* SAME Postgres — because the chat agent loop is resumable on whichever pod
|
|
9
|
+
* handles the next turn, and nothing about a conversation is pod-local.
|
|
10
|
+
*
|
|
11
|
+
* Two-pod model (faithful proxy, mirrors `automation-backend`'s
|
|
12
|
+
* `cross-pod-read-consistency.it.test.ts`): two independent `AiConversationStore`
|
|
13
|
+
* instances, each over its OWN `pg.Pool`, BOTH pointed at the SAME schema.
|
|
14
|
+
* Separate pools = no shared JS heap; one DB = the shared durable substrate N
|
|
15
|
+
* pods share in production.
|
|
16
|
+
*
|
|
17
|
+
* NEGATIVE: a conversation owned by user A is NOT readable by user B from pod B
|
|
18
|
+
* (the store is always owner-scoped, so it can never leak another user's chat).
|
|
19
|
+
*
|
|
20
|
+
* Gated behind `CHECKSTACK_IT=1`; connection from `CHECKSTACK_IT_PG_URL`. Runs in
|
|
21
|
+
* a freshly created, self-cleaning schema.
|
|
22
|
+
*/
|
|
23
|
+
import { afterAll, beforeAll, describe, expect, it } from "bun:test";
|
|
24
|
+
import { drizzle } from "drizzle-orm/node-postgres";
|
|
25
|
+
import { Pool } from "pg";
|
|
26
|
+
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
27
|
+
import * as schema from "../schema";
|
|
28
|
+
import {
|
|
29
|
+
createAiConversationStore,
|
|
30
|
+
type AiConversationStore,
|
|
31
|
+
} from "./conversation-store";
|
|
32
|
+
|
|
33
|
+
const PG_URL =
|
|
34
|
+
process.env.CHECKSTACK_IT_PG_URL ??
|
|
35
|
+
"postgres://postgres:postgres@localhost:5432/postgres";
|
|
36
|
+
|
|
37
|
+
const SCHEMA = `it_ai_conv_${crypto.randomUUID().replace(/-/g, "")}`;
|
|
38
|
+
|
|
39
|
+
interface Pod {
|
|
40
|
+
pool: Pool;
|
|
41
|
+
store: AiConversationStore;
|
|
42
|
+
end(): Promise<void>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function makePod(): Pod {
|
|
46
|
+
const pool = new Pool({
|
|
47
|
+
connectionString: PG_URL,
|
|
48
|
+
options: `-c search_path=${SCHEMA}`,
|
|
49
|
+
});
|
|
50
|
+
const db = drizzle(pool, { schema }) as unknown as SafeDatabase<typeof schema>;
|
|
51
|
+
return { pool, store: createAiConversationStore({ db }), end: () => pool.end() };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
describe.skipIf(!process.env.CHECKSTACK_IT)(
|
|
55
|
+
"cross-pod conversation readback (shared Postgres)",
|
|
56
|
+
() => {
|
|
57
|
+
let admin: Pool;
|
|
58
|
+
let podA: Pod;
|
|
59
|
+
let podB: Pod;
|
|
60
|
+
|
|
61
|
+
beforeAll(async () => {
|
|
62
|
+
admin = new Pool({ connectionString: PG_URL });
|
|
63
|
+
await admin.query(`CREATE SCHEMA "${SCHEMA}"`);
|
|
64
|
+
await admin.query(`
|
|
65
|
+
CREATE TABLE "${SCHEMA}".ai_conversations (
|
|
66
|
+
id text PRIMARY KEY,
|
|
67
|
+
user_id text NOT NULL,
|
|
68
|
+
title text,
|
|
69
|
+
integration_id text,
|
|
70
|
+
model text,
|
|
71
|
+
permission_mode text NOT NULL DEFAULT 'approve',
|
|
72
|
+
created_at timestamp NOT NULL DEFAULT now(),
|
|
73
|
+
updated_at timestamp NOT NULL DEFAULT now(),
|
|
74
|
+
archived_at timestamp
|
|
75
|
+
)
|
|
76
|
+
`);
|
|
77
|
+
await admin.query(`
|
|
78
|
+
CREATE TABLE "${SCHEMA}".ai_messages (
|
|
79
|
+
id text PRIMARY KEY,
|
|
80
|
+
conversation_id text NOT NULL
|
|
81
|
+
REFERENCES "${SCHEMA}".ai_conversations(id) ON DELETE CASCADE,
|
|
82
|
+
role text NOT NULL,
|
|
83
|
+
content jsonb NOT NULL,
|
|
84
|
+
tool_calls jsonb,
|
|
85
|
+
model_messages jsonb,
|
|
86
|
+
created_at timestamp NOT NULL DEFAULT now()
|
|
87
|
+
)
|
|
88
|
+
`);
|
|
89
|
+
podA = makePod();
|
|
90
|
+
podB = makePod();
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
afterAll(async () => {
|
|
94
|
+
await podA?.end();
|
|
95
|
+
await podB?.end();
|
|
96
|
+
await admin.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
|
|
97
|
+
await admin.end();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("a conversation created on pod A is fully readable on pod B (same owner)", async () => {
|
|
101
|
+
const created = await podA.store.createConversation({
|
|
102
|
+
userId: "user-A",
|
|
103
|
+
title: "Investigate prod outage",
|
|
104
|
+
integrationId: "ai.openai-compatible.c1",
|
|
105
|
+
model: "gpt-4o-mini",
|
|
106
|
+
});
|
|
107
|
+
await podA.store.appendMessage({
|
|
108
|
+
conversationId: created.id,
|
|
109
|
+
role: "user",
|
|
110
|
+
content: { text: "what changed in the last hour?" },
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// Pod B — a DIFFERENT pool, no shared heap — reads the SAME conversation.
|
|
114
|
+
const fetched = await podB.store.getConversation({
|
|
115
|
+
id: created.id,
|
|
116
|
+
userId: "user-A",
|
|
117
|
+
});
|
|
118
|
+
expect(fetched?.id).toBe(created.id);
|
|
119
|
+
expect(fetched?.title).toBe("Investigate prod outage");
|
|
120
|
+
expect(fetched?.model).toBe("gpt-4o-mini");
|
|
121
|
+
|
|
122
|
+
// And the transcript continues correctly from pod B.
|
|
123
|
+
const messages = await podB.store.listMessages({
|
|
124
|
+
conversationId: created.id,
|
|
125
|
+
});
|
|
126
|
+
expect(messages).toHaveLength(1);
|
|
127
|
+
expect(messages[0]?.content).toEqual({
|
|
128
|
+
text: "what changed in the last hour?",
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// A follow-up appended on pod B is visible back on pod A (resumable). The
|
|
132
|
+
// assistant turn carries the canonical AI-SDK ResponseMessage[] for
|
|
133
|
+
// tool-call REPLAY — it must round-trip cross-pod so the next turn on any
|
|
134
|
+
// pod replays the prior tool interaction (state-and-scale §9).
|
|
135
|
+
await podB.store.appendMessage({
|
|
136
|
+
conversationId: created.id,
|
|
137
|
+
role: "assistant",
|
|
138
|
+
content: { text: "a deploy at 14:02" },
|
|
139
|
+
modelMessages: [
|
|
140
|
+
{
|
|
141
|
+
role: "assistant",
|
|
142
|
+
content: [
|
|
143
|
+
{
|
|
144
|
+
type: "tool-call",
|
|
145
|
+
toolCallId: "tc1",
|
|
146
|
+
toolName: "incident.list",
|
|
147
|
+
input: { status: "open" },
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
role: "tool",
|
|
153
|
+
content: [
|
|
154
|
+
{
|
|
155
|
+
type: "tool-result",
|
|
156
|
+
toolCallId: "tc1",
|
|
157
|
+
toolName: "incident.list",
|
|
158
|
+
output: { type: "json", value: { rows: [] } },
|
|
159
|
+
},
|
|
160
|
+
],
|
|
161
|
+
},
|
|
162
|
+
],
|
|
163
|
+
});
|
|
164
|
+
const fromA = await podA.store.listMessages({ conversationId: created.id });
|
|
165
|
+
expect(fromA.map((m) => m.role)).toEqual(["user", "assistant"]);
|
|
166
|
+
// The replay history written on pod B is fully readable on pod A.
|
|
167
|
+
const assistant = fromA[1];
|
|
168
|
+
expect(assistant?.modelMessages).toHaveLength(2);
|
|
169
|
+
expect(assistant?.modelMessages?.[0]?.role).toBe("assistant");
|
|
170
|
+
expect(assistant?.modelMessages?.[1]?.role).toBe("tool");
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it("NEGATIVE: pod B cannot read user A's conversation as a DIFFERENT user", async () => {
|
|
174
|
+
const created = await podA.store.createConversation({
|
|
175
|
+
userId: "user-A",
|
|
176
|
+
title: "private",
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
// User B asks pod B for the row by id — owner-scoped read returns nothing.
|
|
180
|
+
const leaked = await podB.store.getConversation({
|
|
181
|
+
id: created.id,
|
|
182
|
+
userId: "user-B",
|
|
183
|
+
});
|
|
184
|
+
expect(leaked).toBeUndefined();
|
|
185
|
+
|
|
186
|
+
// User B's list never includes it either.
|
|
187
|
+
const listB = await podB.store.listConversations({ userId: "user-B" });
|
|
188
|
+
expect(listB.find((c) => c.id === created.id)).toBeUndefined();
|
|
189
|
+
|
|
190
|
+
// And user B cannot delete it (owner-scoped) — the row survives.
|
|
191
|
+
const deleted = await podB.store.deleteConversation({
|
|
192
|
+
id: created.id,
|
|
193
|
+
userId: "user-B",
|
|
194
|
+
});
|
|
195
|
+
expect(deleted).toBe(false);
|
|
196
|
+
const stillThere = await podA.store.getConversation({
|
|
197
|
+
id: created.id,
|
|
198
|
+
userId: "user-A",
|
|
199
|
+
});
|
|
200
|
+
expect(stillThere?.id).toBe(created.id);
|
|
201
|
+
});
|
|
202
|
+
},
|
|
203
|
+
);
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import { describe, expect, test, mock } from "bun:test";
|
|
2
|
+
import { createAiConversationStore } from "./conversation-store";
|
|
3
|
+
import type { AiConversationRow, AiMessageRow } from "../schema";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* State-and-scale (#15): the conversation store is pure shared-Postgres CRUD —
|
|
7
|
+
* it holds NO pod-local state, so a conversation written by one pod is readable
|
|
8
|
+
* by any other simply because both read/write the same database. These unit
|
|
9
|
+
* tests assert that property structurally:
|
|
10
|
+
*
|
|
11
|
+
* - every read/write goes through the injected `db` (no in-memory cache that
|
|
12
|
+
* would diverge between pods), and
|
|
13
|
+
* - reads are always owner-scoped (a `userId` filter), so the store can never
|
|
14
|
+
* leak another user's chat.
|
|
15
|
+
*
|
|
16
|
+
* True cross-pod readback against a live Postgres is exercised in
|
|
17
|
+
* `conversation-store.it.test.ts` (env-gated). Here we use a spy db.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
function convRow(over: Partial<AiConversationRow> = {}): AiConversationRow {
|
|
21
|
+
return {
|
|
22
|
+
id: "c1",
|
|
23
|
+
userId: "u1",
|
|
24
|
+
title: "t",
|
|
25
|
+
integrationId: null,
|
|
26
|
+
model: null,
|
|
27
|
+
permissionMode: "approve",
|
|
28
|
+
createdAt: new Date("2026-06-01T00:00:00Z"),
|
|
29
|
+
updatedAt: new Date("2026-06-01T00:00:00Z"),
|
|
30
|
+
archivedAt: null,
|
|
31
|
+
...over,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function msgRow(over: Partial<AiMessageRow> = {}): AiMessageRow {
|
|
36
|
+
return {
|
|
37
|
+
id: "m1",
|
|
38
|
+
conversationId: "c1",
|
|
39
|
+
role: "user",
|
|
40
|
+
content: { text: "hi" },
|
|
41
|
+
toolCalls: null,
|
|
42
|
+
modelMessages: null,
|
|
43
|
+
createdAt: new Date("2026-06-01T00:00:00Z"),
|
|
44
|
+
...over,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
describe("AiConversationStore", () => {
|
|
49
|
+
test("createConversation inserts and returns the row", async () => {
|
|
50
|
+
const returning = mock(() => Promise.resolve([convRow()]));
|
|
51
|
+
const values = mock((_v: Record<string, unknown>) => ({ returning }));
|
|
52
|
+
const db = { insert: mock(() => ({ values })) };
|
|
53
|
+
const store = createAiConversationStore({ db: db as never });
|
|
54
|
+
|
|
55
|
+
const created = await store.createConversation({
|
|
56
|
+
userId: "u1",
|
|
57
|
+
title: "t",
|
|
58
|
+
integrationId: "ai.openai-compatible.c1",
|
|
59
|
+
model: "gpt-4o-mini",
|
|
60
|
+
});
|
|
61
|
+
expect(created.id).toBe("c1");
|
|
62
|
+
const inserted = values.mock.calls[0]?.[0] as {
|
|
63
|
+
userId: string;
|
|
64
|
+
model: string;
|
|
65
|
+
};
|
|
66
|
+
expect(inserted.userId).toBe("u1");
|
|
67
|
+
expect(inserted.model).toBe("gpt-4o-mini");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("createConversation passes permissionMode through to the insert", async () => {
|
|
71
|
+
const returning = mock(() =>
|
|
72
|
+
Promise.resolve([convRow({ permissionMode: "auto" })]),
|
|
73
|
+
);
|
|
74
|
+
const values = mock((_v: Record<string, unknown>) => ({ returning }));
|
|
75
|
+
const db = { insert: mock(() => ({ values })) };
|
|
76
|
+
const store = createAiConversationStore({ db: db as never });
|
|
77
|
+
|
|
78
|
+
const created = await store.createConversation({
|
|
79
|
+
userId: "u1",
|
|
80
|
+
permissionMode: "auto",
|
|
81
|
+
});
|
|
82
|
+
expect(created.permissionMode).toBe("auto");
|
|
83
|
+
const inserted = values.mock.calls[0]?.[0] as { permissionMode?: string };
|
|
84
|
+
expect(inserted.permissionMode).toBe("auto");
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test("createConversation omits permissionMode when not given (column default applies)", async () => {
|
|
88
|
+
const returning = mock(() => Promise.resolve([convRow()]));
|
|
89
|
+
const values = mock((_v: Record<string, unknown>) => ({ returning }));
|
|
90
|
+
const db = { insert: mock(() => ({ values })) };
|
|
91
|
+
const store = createAiConversationStore({ db: db as never });
|
|
92
|
+
|
|
93
|
+
await store.createConversation({ userId: "u1" });
|
|
94
|
+
const inserted = values.mock.calls[0]?.[0] as { permissionMode?: string };
|
|
95
|
+
// Omitted -> undefined, so the NOT NULL DEFAULT 'approve' column applies.
|
|
96
|
+
expect(inserted.permissionMode).toBeUndefined();
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test("updateConversation writes permissionMode (owner-scoped) and bumps updatedAt", async () => {
|
|
100
|
+
const returning = mock(() =>
|
|
101
|
+
Promise.resolve([convRow({ permissionMode: "auto" })]),
|
|
102
|
+
);
|
|
103
|
+
const where = mock(() => ({ returning }));
|
|
104
|
+
const set = mock(
|
|
105
|
+
(_patch: { permissionMode?: string; updatedAt: Date }) => ({ where }),
|
|
106
|
+
);
|
|
107
|
+
const db = { update: mock(() => ({ set })) };
|
|
108
|
+
const store = createAiConversationStore({ db: db as never });
|
|
109
|
+
|
|
110
|
+
const updated = await store.updateConversation({
|
|
111
|
+
id: "c1",
|
|
112
|
+
userId: "u1",
|
|
113
|
+
permissionMode: "auto",
|
|
114
|
+
});
|
|
115
|
+
expect(updated?.permissionMode).toBe("auto");
|
|
116
|
+
const patch = set.mock.calls[0]?.[0] as {
|
|
117
|
+
permissionMode?: string;
|
|
118
|
+
updatedAt: Date;
|
|
119
|
+
};
|
|
120
|
+
expect(patch.permissionMode).toBe("auto");
|
|
121
|
+
expect(patch.updatedAt).toBeInstanceOf(Date);
|
|
122
|
+
// Owner-scoped: a WHERE clause filtering id + userId is always present.
|
|
123
|
+
expect(where).toHaveBeenCalledTimes(1);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("updateConversation leaves permissionMode untouched when not provided", async () => {
|
|
127
|
+
const returning = mock(() => Promise.resolve([convRow()]));
|
|
128
|
+
const where = mock(() => ({ returning }));
|
|
129
|
+
const set = mock((_patch: Record<string, unknown>) => ({ where }));
|
|
130
|
+
const db = { update: mock(() => ({ set })) };
|
|
131
|
+
const store = createAiConversationStore({ db: db as never });
|
|
132
|
+
|
|
133
|
+
await store.updateConversation({ id: "c1", userId: "u1", title: "new" });
|
|
134
|
+
const patch = set.mock.calls[0]?.[0] as { permissionMode?: string };
|
|
135
|
+
expect(patch.permissionMode).toBeUndefined();
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("getConversation reads through the db (no pod-local cache) and is owner-scoped", async () => {
|
|
139
|
+
const where = mock(() => ({ limit: () => Promise.resolve([convRow()]) }));
|
|
140
|
+
const from = mock(() => ({ where }));
|
|
141
|
+
const db = { select: mock(() => ({ from })) };
|
|
142
|
+
const store = createAiConversationStore({ db: db as never });
|
|
143
|
+
|
|
144
|
+
const fetched = await store.getConversation({ id: "c1", userId: "u1" });
|
|
145
|
+
expect(fetched?.id).toBe("c1");
|
|
146
|
+
// Every read hits the shared db — there is no in-memory shortcut that would
|
|
147
|
+
// return different answers on different pods.
|
|
148
|
+
expect(db.select).toHaveBeenCalledTimes(1);
|
|
149
|
+
// The query is filtered (id + userId): the WHERE clause is always present so
|
|
150
|
+
// a caller can never read another user's conversation.
|
|
151
|
+
expect(where).toHaveBeenCalledTimes(1);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test("appendMessage inserts the message and bumps the conversation updatedAt", async () => {
|
|
155
|
+
const returning = mock(() => Promise.resolve([msgRow()]));
|
|
156
|
+
const values = mock(() => ({ returning }));
|
|
157
|
+
const updateWhere = mock(() => Promise.resolve([]));
|
|
158
|
+
const set = mock((_patch: { updatedAt: Date }) => ({ where: updateWhere }));
|
|
159
|
+
const db = {
|
|
160
|
+
insert: mock(() => ({ values })),
|
|
161
|
+
update: mock(() => ({ set })),
|
|
162
|
+
};
|
|
163
|
+
const store = createAiConversationStore({ db: db as never });
|
|
164
|
+
|
|
165
|
+
const m = await store.appendMessage({
|
|
166
|
+
conversationId: "c1",
|
|
167
|
+
role: "assistant",
|
|
168
|
+
content: { text: "hello" },
|
|
169
|
+
});
|
|
170
|
+
expect(m.id).toBe("m1");
|
|
171
|
+
// updatedAt bump = an UPDATE on the conversation row.
|
|
172
|
+
expect(db.update).toHaveBeenCalledTimes(1);
|
|
173
|
+
const patch = set.mock.calls[0]?.[0] as { updatedAt: Date };
|
|
174
|
+
expect(patch.updatedAt).toBeInstanceOf(Date);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
test("listMessages reads ordered by createdAt for the conversation", async () => {
|
|
178
|
+
const orderBy = mock(() =>
|
|
179
|
+
Promise.resolve([msgRow({ id: "m1" }), msgRow({ id: "m2" })]),
|
|
180
|
+
);
|
|
181
|
+
const where = mock(() => ({ orderBy }));
|
|
182
|
+
const from = mock(() => ({ where }));
|
|
183
|
+
const db = { select: mock(() => ({ from })) };
|
|
184
|
+
const store = createAiConversationStore({ db: db as never });
|
|
185
|
+
|
|
186
|
+
const msgs = await store.listMessages({ conversationId: "c1" });
|
|
187
|
+
expect(msgs.map((m) => m.id)).toEqual(["m1", "m2"]);
|
|
188
|
+
expect(orderBy).toHaveBeenCalledTimes(1);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
test("listConversations is owner-scoped and ordered newest-first", async () => {
|
|
192
|
+
const orderBy = mock(() => Promise.resolve([convRow()]));
|
|
193
|
+
const where = mock(() => ({ orderBy }));
|
|
194
|
+
const from = mock(() => ({ where }));
|
|
195
|
+
const db = { select: mock(() => ({ from })) };
|
|
196
|
+
const store = createAiConversationStore({ db: db as never });
|
|
197
|
+
|
|
198
|
+
await store.listConversations({ userId: "u1" });
|
|
199
|
+
expect(where).toHaveBeenCalledTimes(1);
|
|
200
|
+
expect(orderBy).toHaveBeenCalledTimes(1);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test("archiveConversation soft-deletes via UPDATE (never DELETE), owner-scoped", async () => {
|
|
204
|
+
const returning = mock(() => Promise.resolve([{ id: "c1" }]));
|
|
205
|
+
const where = mock(() => ({ returning }));
|
|
206
|
+
const set = mock((_patch: { archivedAt: Date }) => ({ where }));
|
|
207
|
+
const db = {
|
|
208
|
+
update: mock(() => ({ set })),
|
|
209
|
+
// A DELETE on the store during archive would be a bug — assert it never runs.
|
|
210
|
+
delete: mock(() => {
|
|
211
|
+
throw new Error("archive must not hard-delete");
|
|
212
|
+
}),
|
|
213
|
+
};
|
|
214
|
+
const store = createAiConversationStore({ db: db as never });
|
|
215
|
+
|
|
216
|
+
expect(await store.archiveConversation({ id: "c1", userId: "u1" })).toBe(
|
|
217
|
+
true,
|
|
218
|
+
);
|
|
219
|
+
// Soft delete = UPDATE stamping archivedAt, never a row removal.
|
|
220
|
+
expect(db.update).toHaveBeenCalledTimes(1);
|
|
221
|
+
expect(db.delete).not.toHaveBeenCalled();
|
|
222
|
+
const patch = set.mock.calls[0]?.[0] as { archivedAt: Date };
|
|
223
|
+
expect(patch.archivedAt).toBeInstanceOf(Date);
|
|
224
|
+
expect(where).toHaveBeenCalledTimes(1);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
test("archiveConversation returns false when no owned active row matched", async () => {
|
|
228
|
+
const returning = mock(() => Promise.resolve([]));
|
|
229
|
+
const where = mock(() => ({ returning }));
|
|
230
|
+
const set = mock(() => ({ where }));
|
|
231
|
+
const db = { update: mock(() => ({ set })) };
|
|
232
|
+
const store = createAiConversationStore({ db: db as never });
|
|
233
|
+
|
|
234
|
+
expect(await store.archiveConversation({ id: "c1", userId: "u1" })).toBe(
|
|
235
|
+
false,
|
|
236
|
+
);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
test("deleteConversation is owner-scoped and reports whether a row was removed", async () => {
|
|
240
|
+
const returning = mock(() => Promise.resolve([{ id: "c1" }]));
|
|
241
|
+
const where = mock(() => ({ returning }));
|
|
242
|
+
const db = { delete: mock(() => ({ where })) };
|
|
243
|
+
const store = createAiConversationStore({ db: db as never });
|
|
244
|
+
|
|
245
|
+
expect(await store.deleteConversation({ id: "c1", userId: "u1" })).toBe(true);
|
|
246
|
+
expect(where).toHaveBeenCalledTimes(1);
|
|
247
|
+
});
|
|
248
|
+
});
|