@checkstack/ai-backend 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/drizzle/0000_productive_jackpot.sql +26 -0
- package/drizzle/0001_puzzling_purple_man.sql +26 -0
- package/drizzle/0002_sparkling_paper_doll.sql +15 -0
- package/drizzle/0003_married_senator_kelly.sql +1 -0
- package/drizzle/0004_crazy_miek.sql +2 -0
- package/drizzle/0005_tearful_randall_flagg.sql +1 -0
- package/drizzle/meta/0000_snapshot.json +232 -0
- package/drizzle/meta/0001_snapshot.json +434 -0
- package/drizzle/meta/0002_snapshot.json +551 -0
- package/drizzle/meta/0003_snapshot.json +557 -0
- package/drizzle/meta/0004_snapshot.json +573 -0
- package/drizzle/meta/0005_snapshot.json +574 -0
- package/drizzle/meta/_journal.json +48 -0
- package/drizzle.config.ts +7 -0
- package/package.json +42 -0
- package/src/agent-runner.test.ts +262 -0
- package/src/agent-runner.ts +262 -0
- package/src/chat/agent-loop.test.ts +119 -0
- package/src/chat/agent-loop.ts +73 -0
- package/src/chat/auto-apply.test.ts +237 -0
- package/src/chat/chat-handler.ts +111 -0
- package/src/chat/chat-service.streamturn.test.ts +417 -0
- package/src/chat/chat-service.test.ts +250 -0
- package/src/chat/chat-service.ts +923 -0
- package/src/chat/classifier-service.ts +64 -0
- package/src/chat/classifier.logic.test.ts +92 -0
- package/src/chat/classifier.logic.ts +71 -0
- package/src/chat/conversation-store.it.test.ts +203 -0
- package/src/chat/conversation-store.test.ts +248 -0
- package/src/chat/conversation-store.ts +237 -0
- package/src/chat/decision.logic.test.ts +45 -0
- package/src/chat/decision.logic.ts +54 -0
- package/src/chat/llm-provider.test.ts +63 -0
- package/src/chat/llm-provider.ts +67 -0
- package/src/chat/model-error.logic.test.ts +60 -0
- package/src/chat/model-error.logic.ts +65 -0
- package/src/chat/normalize-messages.logic.test.ts +101 -0
- package/src/chat/normalize-messages.logic.ts +65 -0
- package/src/chat/permission-mode.logic.test.ts +70 -0
- package/src/chat/permission-mode.logic.ts +45 -0
- package/src/chat/read-invoker.ts +72 -0
- package/src/chat/replay.test.ts +174 -0
- package/src/chat/scrub-content.test.ts +183 -0
- package/src/chat/scrub-content.ts +154 -0
- package/src/chat/sdk-tools.test.ts +168 -0
- package/src/chat/sdk-tools.ts +181 -0
- package/src/chat/title-service.test.ts +146 -0
- package/src/chat/title-service.ts +111 -0
- package/src/chat/title.logic.test.ts +98 -0
- package/src/chat/title.logic.ts +102 -0
- package/src/extension-points.ts +41 -0
- package/src/generated/docs-index.ts +3020 -0
- package/src/hardening/handler-authz.test.ts +282 -0
- package/src/hardening/no-secret-leak.test.ts +303 -0
- package/src/hooks.ts +33 -0
- package/src/index.ts +542 -0
- package/src/mcp/connection-registry.test.ts +25 -0
- package/src/mcp/connection-registry.ts +54 -0
- package/src/mcp/mcp-conformance.it.test.ts +128 -0
- package/src/mcp/server.test.ts +285 -0
- package/src/mcp/server.ts +300 -0
- package/src/mcp/tool-invoker.ts +65 -0
- package/src/openai-provider.test.ts +64 -0
- package/src/openai-provider.ts +146 -0
- package/src/projection.test.ts +97 -0
- package/src/projection.ts +132 -0
- package/src/propose-apply/args-hash.test.ts +26 -0
- package/src/propose-apply/args-hash.ts +30 -0
- package/src/propose-apply/service.test.ts +423 -0
- package/src/propose-apply/service.ts +419 -0
- package/src/propose-apply/store.test.ts +136 -0
- package/src/propose-apply/store.ts +224 -0
- package/src/propose-apply/token.test.ts +52 -0
- package/src/propose-apply/token.ts +71 -0
- package/src/rate-limit/spend-ledger.it.test.ts +224 -0
- package/src/rate-limit/spend-ledger.test.ts +176 -0
- package/src/rate-limit/spend-ledger.ts +162 -0
- package/src/rate-limit/tool-budget.it.test.ts +173 -0
- package/src/rate-limit/tool-budget.test.ts +58 -0
- package/src/rate-limit/tool-budget.ts +107 -0
- package/src/registry-wiring.test.ts +131 -0
- package/src/registry-wiring.ts +68 -0
- package/src/resolver.test.ts +156 -0
- package/src/resolver.ts +78 -0
- package/src/router.test.ts +78 -0
- package/src/router.ts +345 -0
- package/src/schema.ts +284 -0
- package/src/serializer.test.ts +88 -0
- package/src/serializer.ts +42 -0
- package/src/tool-registry.ts +58 -0
- package/src/tools/composite-tools.ts +24 -0
- package/src/tools/docs-tools.test.ts +150 -0
- package/src/tools/docs-tools.ts +115 -0
- package/src/tools/probe-url.test.ts +51 -0
- package/src/tools/probe-url.ts +146 -0
- package/src/tools/rank-docs.test.ts +153 -0
- package/src/tools/rank-docs.ts +209 -0
- package/src/tools/script-context-extract.test.ts +93 -0
- package/src/tools/script-context-extract.ts +283 -0
- package/src/tools/ssrf-guard.test.ts +69 -0
- package/src/tools/ssrf-guard.ts +108 -0
- package/src/tools/tool-set.e2e.test.ts +64 -0
- package/src/user-rpc-client.test.ts +45 -0
- package/src/user-rpc-client.ts +60 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import { and, asc, desc, eq, isNull } from "drizzle-orm";
|
|
2
|
+
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
3
|
+
import type { AiPermissionMode } from "@checkstack/ai-common";
|
|
4
|
+
import * as schema from "../schema";
|
|
5
|
+
import type {
|
|
6
|
+
AiConversationRow,
|
|
7
|
+
AiMessageRow,
|
|
8
|
+
} from "../schema";
|
|
9
|
+
import { scrubContent, scrubModelMessages } from "./scrub-content";
|
|
10
|
+
|
|
11
|
+
type AiDatabase = SafeDatabase<typeof schema>;
|
|
12
|
+
|
|
13
|
+
/** A persisted chat message role (AI-SDK roles). */
|
|
14
|
+
export type AiMessageRole = "system" | "user" | "assistant" | "tool";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Durable conversation + message persistence (plan §4, state-and-scale §9).
|
|
18
|
+
*
|
|
19
|
+
* All chat state lives in shared Postgres so any pod can list, continue, or
|
|
20
|
+
* append to a conversation — the agent loop is resumable on whichever pod
|
|
21
|
+
* handles the next turn. Nothing about a conversation is pod-local. Ownership is
|
|
22
|
+
* enforced at the handler via the session principal (the store always filters by
|
|
23
|
+
* `userId` so one user can never read another's chat).
|
|
24
|
+
*/
|
|
25
|
+
export interface AiConversationStore {
|
|
26
|
+
createConversation(args: {
|
|
27
|
+
userId: string;
|
|
28
|
+
title?: string;
|
|
29
|
+
integrationId?: string;
|
|
30
|
+
model?: string;
|
|
31
|
+
/** Permission mode for the conversation; defaults to `approve` when omitted. */
|
|
32
|
+
permissionMode?: AiPermissionMode;
|
|
33
|
+
}): Promise<AiConversationRow>;
|
|
34
|
+
|
|
35
|
+
/** List a user's conversations, most-recently-updated first. */
|
|
36
|
+
listConversations(args: { userId: string }): Promise<AiConversationRow[]>;
|
|
37
|
+
|
|
38
|
+
/** Fetch one conversation IF it belongs to the user, else undefined. */
|
|
39
|
+
getConversation(args: {
|
|
40
|
+
id: string;
|
|
41
|
+
userId: string;
|
|
42
|
+
}): Promise<AiConversationRow | undefined>;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Append a message; bumps the conversation's `updatedAt`. The `content`,
|
|
46
|
+
* `toolCalls`, and `modelMessages` bags are SCRUBBED of any credential-shaped
|
|
47
|
+
* key/value before they hit Postgres (see `scrubContent`), so a secret can
|
|
48
|
+
* never be persisted into message content — an enforced, not merely
|
|
49
|
+
* architectural, invariant.
|
|
50
|
+
*/
|
|
51
|
+
appendMessage(args: {
|
|
52
|
+
conversationId: string;
|
|
53
|
+
role: AiMessageRole;
|
|
54
|
+
content: Record<string, unknown>;
|
|
55
|
+
toolCalls?: Array<Record<string, unknown>>;
|
|
56
|
+
/**
|
|
57
|
+
* AI-SDK `ResponseMessage[]` for tool-call REPLAY (assistant tool-call +
|
|
58
|
+
* tool-result parts). Persisted verbatim (after scrubbing) so a resumed
|
|
59
|
+
* conversation replays the full prior tool interaction to the model.
|
|
60
|
+
*/
|
|
61
|
+
modelMessages?: Array<Record<string, unknown>>;
|
|
62
|
+
}): Promise<AiMessageRow>;
|
|
63
|
+
|
|
64
|
+
/** All messages for a conversation, oldest-first (the loop's history). */
|
|
65
|
+
listMessages(args: {
|
|
66
|
+
conversationId: string;
|
|
67
|
+
}): Promise<AiMessageRow[]>;
|
|
68
|
+
|
|
69
|
+
/** Update mutable conversation metadata (title / model / permission mode). */
|
|
70
|
+
updateConversation(args: {
|
|
71
|
+
id: string;
|
|
72
|
+
userId: string;
|
|
73
|
+
title?: string;
|
|
74
|
+
model?: string;
|
|
75
|
+
permissionMode?: AiPermissionMode;
|
|
76
|
+
}): Promise<AiConversationRow | undefined>;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* SOFT-DELETE (archive) a conversation IF it belongs to the user: stamps
|
|
80
|
+
* `archivedAt = now` so the row + its messages are RETAINED (later abuse
|
|
81
|
+
* introspection) but the chat disappears from `listConversations`. This is the
|
|
82
|
+
* user-facing "Delete" action. Returns true when an owned, not-yet-archived row
|
|
83
|
+
* was stamped, false otherwise (wrong owner / already archived / missing).
|
|
84
|
+
*/
|
|
85
|
+
archiveConversation(args: { id: string; userId: string }): Promise<boolean>;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* HARD-delete a conversation (cascades to messages) IF it belongs to the user.
|
|
89
|
+
* NOT exposed to the user-facing delete action (that archives); retained only
|
|
90
|
+
* for non-user callers such as a retention sweep.
|
|
91
|
+
*/
|
|
92
|
+
deleteConversation(args: { id: string; userId: string }): Promise<boolean>;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export function createAiConversationStore({
|
|
96
|
+
db,
|
|
97
|
+
}: {
|
|
98
|
+
db: AiDatabase;
|
|
99
|
+
}): AiConversationStore {
|
|
100
|
+
return {
|
|
101
|
+
async createConversation({
|
|
102
|
+
userId,
|
|
103
|
+
title,
|
|
104
|
+
integrationId,
|
|
105
|
+
model,
|
|
106
|
+
permissionMode,
|
|
107
|
+
}) {
|
|
108
|
+
const [row] = await db
|
|
109
|
+
.insert(schema.aiConversations)
|
|
110
|
+
// `permissionMode` omitted -> the column default ("approve") applies.
|
|
111
|
+
.values({ userId, title, integrationId, model, permissionMode })
|
|
112
|
+
.returning();
|
|
113
|
+
return row;
|
|
114
|
+
},
|
|
115
|
+
|
|
116
|
+
async listConversations({ userId }) {
|
|
117
|
+
// Archived (soft-deleted) conversations are retained in the table but
|
|
118
|
+
// excluded from the sidebar list.
|
|
119
|
+
return db
|
|
120
|
+
.select()
|
|
121
|
+
.from(schema.aiConversations)
|
|
122
|
+
.where(
|
|
123
|
+
and(
|
|
124
|
+
eq(schema.aiConversations.userId, userId),
|
|
125
|
+
isNull(schema.aiConversations.archivedAt),
|
|
126
|
+
),
|
|
127
|
+
)
|
|
128
|
+
.orderBy(desc(schema.aiConversations.updatedAt));
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
async getConversation({ id, userId }) {
|
|
132
|
+
const rows = await db
|
|
133
|
+
.select()
|
|
134
|
+
.from(schema.aiConversations)
|
|
135
|
+
.where(
|
|
136
|
+
and(
|
|
137
|
+
eq(schema.aiConversations.id, id),
|
|
138
|
+
eq(schema.aiConversations.userId, userId),
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
.limit(1);
|
|
142
|
+
return rows[0];
|
|
143
|
+
},
|
|
144
|
+
|
|
145
|
+
async appendMessage({
|
|
146
|
+
conversationId,
|
|
147
|
+
role,
|
|
148
|
+
content,
|
|
149
|
+
toolCalls,
|
|
150
|
+
modelMessages,
|
|
151
|
+
}) {
|
|
152
|
+
// Scrub credential-shaped keys/values from EVERY bag on the write path:
|
|
153
|
+
// the no-secret-leak guarantee is enforced here, not merely assumed.
|
|
154
|
+
const [row] = await db
|
|
155
|
+
.insert(schema.aiMessages)
|
|
156
|
+
.values({
|
|
157
|
+
conversationId,
|
|
158
|
+
role,
|
|
159
|
+
content: scrubContent(content),
|
|
160
|
+
toolCalls: toolCalls ? scrubModelMessages(toolCalls) : toolCalls,
|
|
161
|
+
modelMessages: modelMessages
|
|
162
|
+
? scrubModelMessages(modelMessages)
|
|
163
|
+
: modelMessages,
|
|
164
|
+
})
|
|
165
|
+
.returning();
|
|
166
|
+
// Bump the owning conversation so list order reflects activity.
|
|
167
|
+
await db
|
|
168
|
+
.update(schema.aiConversations)
|
|
169
|
+
.set({ updatedAt: new Date() })
|
|
170
|
+
.where(eq(schema.aiConversations.id, conversationId));
|
|
171
|
+
return row;
|
|
172
|
+
},
|
|
173
|
+
|
|
174
|
+
async listMessages({ conversationId }) {
|
|
175
|
+
return db
|
|
176
|
+
.select()
|
|
177
|
+
.from(schema.aiMessages)
|
|
178
|
+
.where(eq(schema.aiMessages.conversationId, conversationId))
|
|
179
|
+
.orderBy(asc(schema.aiMessages.createdAt));
|
|
180
|
+
},
|
|
181
|
+
|
|
182
|
+
async updateConversation({ id, userId, title, model, permissionMode }) {
|
|
183
|
+
const set: Partial<{
|
|
184
|
+
title: string;
|
|
185
|
+
model: string;
|
|
186
|
+
permissionMode: AiPermissionMode;
|
|
187
|
+
updatedAt: Date;
|
|
188
|
+
}> = {
|
|
189
|
+
updatedAt: new Date(),
|
|
190
|
+
};
|
|
191
|
+
if (title !== undefined) set.title = title;
|
|
192
|
+
if (model !== undefined) set.model = model;
|
|
193
|
+
if (permissionMode !== undefined) set.permissionMode = permissionMode;
|
|
194
|
+
const [row] = await db
|
|
195
|
+
.update(schema.aiConversations)
|
|
196
|
+
.set(set)
|
|
197
|
+
.where(
|
|
198
|
+
and(
|
|
199
|
+
eq(schema.aiConversations.id, id),
|
|
200
|
+
eq(schema.aiConversations.userId, userId),
|
|
201
|
+
),
|
|
202
|
+
)
|
|
203
|
+
.returning();
|
|
204
|
+
return row;
|
|
205
|
+
},
|
|
206
|
+
|
|
207
|
+
async archiveConversation({ id, userId }) {
|
|
208
|
+
// Owner-scoped soft delete: only stamp a row that is owned AND not already
|
|
209
|
+
// archived, so a repeat archive is a no-op (returns false).
|
|
210
|
+
const archived = await db
|
|
211
|
+
.update(schema.aiConversations)
|
|
212
|
+
.set({ archivedAt: new Date() })
|
|
213
|
+
.where(
|
|
214
|
+
and(
|
|
215
|
+
eq(schema.aiConversations.id, id),
|
|
216
|
+
eq(schema.aiConversations.userId, userId),
|
|
217
|
+
isNull(schema.aiConversations.archivedAt),
|
|
218
|
+
),
|
|
219
|
+
)
|
|
220
|
+
.returning({ id: schema.aiConversations.id });
|
|
221
|
+
return archived.length > 0;
|
|
222
|
+
},
|
|
223
|
+
|
|
224
|
+
async deleteConversation({ id, userId }) {
|
|
225
|
+
const deleted = await db
|
|
226
|
+
.delete(schema.aiConversations)
|
|
227
|
+
.where(
|
|
228
|
+
and(
|
|
229
|
+
eq(schema.aiConversations.id, id),
|
|
230
|
+
eq(schema.aiConversations.userId, userId),
|
|
231
|
+
),
|
|
232
|
+
)
|
|
233
|
+
.returning({ id: schema.aiConversations.id });
|
|
234
|
+
return deleted.length > 0;
|
|
235
|
+
},
|
|
236
|
+
};
|
|
237
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { buildDecisionNote } from "./decision.logic";
|
|
3
|
+
|
|
4
|
+
describe("buildDecisionNote", () => {
|
|
5
|
+
test("apply note states it is live, uses the summary, and forbids re-proposing", () => {
|
|
6
|
+
const note = buildDecisionNote({
|
|
7
|
+
decision: "apply",
|
|
8
|
+
toolName: "healthcheck.propose",
|
|
9
|
+
summary: 'Create health check "google-com-http" running every 60s',
|
|
10
|
+
});
|
|
11
|
+
expect(note).toContain("APPLIED");
|
|
12
|
+
expect(note).toContain("now live");
|
|
13
|
+
expect(note).toContain('Create health check "google-com-http"');
|
|
14
|
+
expect(note).toContain("Do NOT propose it again");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
test("decline note states nothing changed and asks what to adjust", () => {
|
|
18
|
+
const note = buildDecisionNote({
|
|
19
|
+
decision: "decline",
|
|
20
|
+
toolName: "healthcheck.propose",
|
|
21
|
+
summary: "Create health check X",
|
|
22
|
+
});
|
|
23
|
+
expect(note).toContain("DECLINED");
|
|
24
|
+
expect(note).toContain("Nothing was changed");
|
|
25
|
+
expect(note).toContain("adjust");
|
|
26
|
+
expect(note).toContain("Do NOT re-propose");
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test("falls back to the tool name when no summary is stored", () => {
|
|
30
|
+
const note = buildDecisionNote({
|
|
31
|
+
decision: "apply",
|
|
32
|
+
toolName: "automation.propose",
|
|
33
|
+
});
|
|
34
|
+
expect(note).toContain('the proposed "automation.propose" action');
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test("ignores a blank summary and falls back to the tool name", () => {
|
|
38
|
+
const note = buildDecisionNote({
|
|
39
|
+
decision: "decline",
|
|
40
|
+
toolName: "automation.propose",
|
|
41
|
+
summary: " ",
|
|
42
|
+
});
|
|
43
|
+
expect(note).toContain('the proposed "automation.propose" action');
|
|
44
|
+
});
|
|
45
|
+
});
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure helpers for the post-confirm-card "decision acknowledgment" turn.
|
|
3
|
+
*
|
|
4
|
+
* When the operator applies or declines a confirm card, the actual apply runs
|
|
5
|
+
* through the unchanged `applyTool` propose/apply path. This turn is purely the
|
|
6
|
+
* MODEL's reaction: it is told the operator's decision and streams a short
|
|
7
|
+
* acknowledgment so the conversation does not dead-end on "waiting for your
|
|
8
|
+
* confirmation". The note is built SERVER-SIDE from the stored proposal (tool
|
|
9
|
+
* name + the summary captured at propose time) - no client-supplied text ever
|
|
10
|
+
* reaches the model - and is EPHEMERAL: it is appended to the model-call history
|
|
11
|
+
* for this turn only, never persisted. The assistant's reply carries the
|
|
12
|
+
* outcome forward into the persisted transcript.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/** A human decision on a proposed mutate/destructive tool's confirm card. */
|
|
16
|
+
export type DecisionKind = "apply" | "decline";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Build the ephemeral note that informs the model of the operator's decision.
|
|
20
|
+
* `summary` is the proposal's stored one-line summary (e.g. "Create health
|
|
21
|
+
* check ..."); when absent we fall back to the tool name so the note is still
|
|
22
|
+
* meaningful.
|
|
23
|
+
*/
|
|
24
|
+
export function buildDecisionNote({
|
|
25
|
+
decision,
|
|
26
|
+
toolName,
|
|
27
|
+
summary,
|
|
28
|
+
}: {
|
|
29
|
+
decision: DecisionKind;
|
|
30
|
+
toolName: string;
|
|
31
|
+
summary?: string;
|
|
32
|
+
}): string {
|
|
33
|
+
const what =
|
|
34
|
+
summary && summary.trim().length > 0
|
|
35
|
+
? summary.trim()
|
|
36
|
+
: `the proposed "${toolName}" action`;
|
|
37
|
+
|
|
38
|
+
if (decision === "apply") {
|
|
39
|
+
return [
|
|
40
|
+
"[system note] The operator APPROVED your previous proposal and it has",
|
|
41
|
+
"been APPLIED successfully and is now live:",
|
|
42
|
+
`${what}.`,
|
|
43
|
+
"Confirm to the operator in one or two short sentences what is now in",
|
|
44
|
+
"effect, then suggest a sensible next step. Do NOT propose it again.",
|
|
45
|
+
].join(" ");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return [
|
|
49
|
+
"[system note] The operator DECLINED your previous proposal",
|
|
50
|
+
`(${what}). Nothing was changed.`,
|
|
51
|
+
"Acknowledge briefly and ask what they would like to adjust.",
|
|
52
|
+
"Do NOT re-propose it unless the operator asks.",
|
|
53
|
+
].join(" ");
|
|
54
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import type { OpenAiCompatibleConnection } from "@checkstack/ai-common";
|
|
3
|
+
import { resolveModelId, buildLanguageModel } from "./llm-provider";
|
|
4
|
+
|
|
5
|
+
const conn = (over: Partial<OpenAiCompatibleConnection> = {}): OpenAiCompatibleConnection => ({
|
|
6
|
+
baseUrl: "https://api.openai.com/v1",
|
|
7
|
+
apiKey: "sk-secret",
|
|
8
|
+
defaultModel: "gpt-4o-mini",
|
|
9
|
+
...over,
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
describe("resolveModelId (§14.6 per-integration model UX)", () => {
|
|
13
|
+
test("falls back to defaultModel when nothing requested", () => {
|
|
14
|
+
expect(resolveModelId({ connection: conn(), requested: undefined })).toBe(
|
|
15
|
+
"gpt-4o-mini",
|
|
16
|
+
);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test("uses the requested model when no allowlist constrains it", () => {
|
|
20
|
+
expect(resolveModelId({ connection: conn(), requested: "gpt-4o" })).toBe(
|
|
21
|
+
"gpt-4o",
|
|
22
|
+
);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test("honours a requested model that is in the allowlist", () => {
|
|
26
|
+
const c = conn({ availableModels: ["gpt-4o-mini", "gpt-4o"] });
|
|
27
|
+
expect(resolveModelId({ connection: c, requested: "gpt-4o" })).toBe("gpt-4o");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("rejects a requested model outside a non-empty allowlist (untrusted input)", () => {
|
|
31
|
+
const c = conn({ availableModels: ["gpt-4o-mini"] });
|
|
32
|
+
// The model id arrives over the wire and is untrusted, so an out-of-list
|
|
33
|
+
// value falls back to the default rather than being passed through.
|
|
34
|
+
expect(resolveModelId({ connection: c, requested: "evil-model" })).toBe(
|
|
35
|
+
"gpt-4o-mini",
|
|
36
|
+
);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
describe("buildLanguageModel coerces the model in the live path (P4 review item 1)", () => {
|
|
41
|
+
/** Read the resolved model id from the provider model the SDK returns. */
|
|
42
|
+
const modelIdOf = (model: ReturnType<typeof buildLanguageModel>): string =>
|
|
43
|
+
typeof model === "string" ? model : model.modelId;
|
|
44
|
+
|
|
45
|
+
test("an out-of-allowlist model id is coerced to defaultModel, not passed through", () => {
|
|
46
|
+
const c = conn({ availableModels: ["gpt-4o-mini"] });
|
|
47
|
+
// This is the LIVE path the chat service uses; it must not honour an
|
|
48
|
+
// arbitrary wire-supplied model id.
|
|
49
|
+
const model = buildLanguageModel({ connection: c, model: "evil-model" });
|
|
50
|
+
expect(modelIdOf(model)).toBe("gpt-4o-mini");
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("an allowlisted model id is honoured", () => {
|
|
54
|
+
const c = conn({ availableModels: ["gpt-4o-mini", "gpt-4o"] });
|
|
55
|
+
const model = buildLanguageModel({ connection: c, model: "gpt-4o" });
|
|
56
|
+
expect(modelIdOf(model)).toBe("gpt-4o");
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("no requested model uses defaultModel", () => {
|
|
60
|
+
const model = buildLanguageModel({ connection: conn(), model: undefined });
|
|
61
|
+
expect(modelIdOf(model)).toBe("gpt-4o-mini");
|
|
62
|
+
});
|
|
63
|
+
});
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
2
|
+
import type { LanguageModel } from "ai";
|
|
3
|
+
import type { OpenAiCompatibleConnection } from "@checkstack/ai-common";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Build a provider-agnostic language model from an OpenAI-compatible connection
|
|
7
|
+
* (decision §14.6). The `baseURL` override is what makes this work against
|
|
8
|
+
* OpenAI, Azure, OpenRouter, Ollama, vLLM, LM Studio, etc. - any endpoint that
|
|
9
|
+
* speaks the OpenAI CHAT-COMPLETIONS wire format.
|
|
10
|
+
*
|
|
11
|
+
* Why `@ai-sdk/openai-compatible` and NOT `@ai-sdk/openai`: the strict OpenAI
|
|
12
|
+
* provider (`@ai-sdk/openai` v3) defaults to OpenAI's `/responses` API, which
|
|
13
|
+
* sends `reasoning` / `reasoning_text` message parts that third-party gateways
|
|
14
|
+
* (OpenRouter, DeepSeek, Ollama, ...) do not implement - they reject it with
|
|
15
|
+
* HTTP 400 `invalid_prompt` "Invalid Responses API request". The compatible
|
|
16
|
+
* provider talks plain `/chat/completions` and omits OpenAI-only request fields,
|
|
17
|
+
* which is exactly what an "OpenAI-compatible connection" needs.
|
|
18
|
+
*
|
|
19
|
+
* CRITICAL (security): the credential (`apiKey`) is read here, on the BACKEND,
|
|
20
|
+
* and handed to the SDK provider (as a `Bearer` Authorization header). It never
|
|
21
|
+
* crosses an RPC/DTO boundary to the browser - the chat UI only ever receives
|
|
22
|
+
* streamed tokens + tool events.
|
|
23
|
+
*
|
|
24
|
+
* The requested model id is ALWAYS coerced through {@link resolveModelId}
|
|
25
|
+
* against the connection's `availableModels` allowlist, so a model id from the
|
|
26
|
+
* wire (untrusted) can never bypass the per-integration model/cost control. The
|
|
27
|
+
* model id used by the provider is the resolved one, never the raw request.
|
|
28
|
+
*/
|
|
29
|
+
export function buildLanguageModel({
|
|
30
|
+
connection,
|
|
31
|
+
model,
|
|
32
|
+
}: {
|
|
33
|
+
connection: OpenAiCompatibleConnection;
|
|
34
|
+
/** Conversation-selected model id; revalidated against the connection. */
|
|
35
|
+
model?: string;
|
|
36
|
+
}): LanguageModel {
|
|
37
|
+
const provider = createOpenAICompatible({
|
|
38
|
+
name: "checkstack-openai-compatible",
|
|
39
|
+
baseURL: connection.baseUrl,
|
|
40
|
+
apiKey: connection.apiKey,
|
|
41
|
+
});
|
|
42
|
+
const modelId = resolveModelId({ connection, requested: model });
|
|
43
|
+
// `.chatModel` pins the `/chat/completions` API (see the Responses-API note
|
|
44
|
+
// above), the lingua franca every OpenAI-compatible gateway supports.
|
|
45
|
+
return provider.chatModel(modelId);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Resolve the effective model id for a conversation against the connection's
|
|
50
|
+
* allowlist (§14.6). A requested model not in a non-empty `availableModels`
|
|
51
|
+
* allowlist falls back to `defaultModel` (the model picker is constrained UI
|
|
52
|
+
* side, but the model is untrusted input from the wire, so we re-check).
|
|
53
|
+
*/
|
|
54
|
+
export function resolveModelId({
|
|
55
|
+
connection,
|
|
56
|
+
requested,
|
|
57
|
+
}: {
|
|
58
|
+
connection: OpenAiCompatibleConnection;
|
|
59
|
+
requested?: string;
|
|
60
|
+
}): string {
|
|
61
|
+
if (!requested) return connection.defaultModel;
|
|
62
|
+
const allow = connection.availableModels;
|
|
63
|
+
if (allow && allow.length > 0 && !allow.includes(requested)) {
|
|
64
|
+
return connection.defaultModel;
|
|
65
|
+
}
|
|
66
|
+
return requested;
|
|
67
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { APICallError } from "ai";
|
|
3
|
+
import { formatModelError } from "./model-error.logic";
|
|
4
|
+
|
|
5
|
+
describe("formatModelError", () => {
|
|
6
|
+
test("surfaces an APICallError's status + response body (the masked detail)", () => {
|
|
7
|
+
const error = new APICallError({
|
|
8
|
+
message: "Bad Request",
|
|
9
|
+
url: "https://openrouter.ai/api/v1/chat/completions",
|
|
10
|
+
requestBodyValues: {},
|
|
11
|
+
statusCode: 400,
|
|
12
|
+
responseBody: '{"error":{"code":"invalid_prompt","message":"bad"}}',
|
|
13
|
+
});
|
|
14
|
+
const { userMessage, logDetail } = formatModelError({ error });
|
|
15
|
+
expect(userMessage).toContain("HTTP 400");
|
|
16
|
+
expect(userMessage).toContain("invalid_prompt");
|
|
17
|
+
expect(logDetail.kind).toBe("APICallError");
|
|
18
|
+
expect(logDetail.statusCode).toBe(400);
|
|
19
|
+
expect(logDetail.responseBody).toContain("invalid_prompt");
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("falls back to the error message when the API error has no body", () => {
|
|
23
|
+
const error = new APICallError({
|
|
24
|
+
message: "upstream timeout",
|
|
25
|
+
url: "https://example.com",
|
|
26
|
+
requestBodyValues: {},
|
|
27
|
+
statusCode: 504,
|
|
28
|
+
});
|
|
29
|
+
const { userMessage } = formatModelError({ error });
|
|
30
|
+
expect(userMessage).toContain("HTTP 504");
|
|
31
|
+
expect(userMessage).toContain("upstream timeout");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test("truncates an oversized provider body", () => {
|
|
35
|
+
const huge = "x".repeat(5000);
|
|
36
|
+
const error = new APICallError({
|
|
37
|
+
message: "big",
|
|
38
|
+
url: "https://example.com",
|
|
39
|
+
requestBodyValues: {},
|
|
40
|
+
statusCode: 400,
|
|
41
|
+
responseBody: huge,
|
|
42
|
+
});
|
|
43
|
+
const { userMessage } = formatModelError({ error });
|
|
44
|
+
expect(userMessage).toContain("(truncated)");
|
|
45
|
+
expect(userMessage.length).toBeLessThan(huge.length);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test("handles a generic Error without leaking internals", () => {
|
|
49
|
+
const { userMessage, logDetail } = formatModelError({
|
|
50
|
+
error: new Error("something broke"),
|
|
51
|
+
});
|
|
52
|
+
expect(userMessage).toContain("something broke");
|
|
53
|
+
expect(logDetail.kind).toBe("error");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("handles a non-Error throw (string / unknown)", () => {
|
|
57
|
+
const { userMessage } = formatModelError({ error: "weird" });
|
|
58
|
+
expect(userMessage.length).toBeGreaterThan(0);
|
|
59
|
+
});
|
|
60
|
+
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { APICallError } from "ai";
|
|
2
|
+
import { extractErrorMessage } from "@checkstack/common";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Turn an error thrown by the model provider (or the agent loop) into a
|
|
6
|
+
* USER-FACING message plus a STRUCTURED log detail. Pure + total: never throws.
|
|
7
|
+
*
|
|
8
|
+
* The AI SDK masks streaming errors by default (the UI gets a generic "An error
|
|
9
|
+
* occurred"), which hid the provider's real HTTP response. We surface it
|
|
10
|
+
* instead: an `APICallError` carries the provider's `statusCode` and
|
|
11
|
+
* `responseBody`, which is exactly what an operator needs to forward to their
|
|
12
|
+
* model provider when a turn fails (for example a 400 `invalid_prompt`).
|
|
13
|
+
*
|
|
14
|
+
* SECURITY: `responseBody` is the provider's ERROR payload, not our request, so
|
|
15
|
+
* it never contains the integration credential. The request body (which carries
|
|
16
|
+
* the api key in headers, never the body) is deliberately NOT echoed.
|
|
17
|
+
*/
|
|
18
|
+
export interface FormattedModelError {
|
|
19
|
+
/** Shown in the chat UI in place of the SDK's masked generic error. */
|
|
20
|
+
userMessage: string;
|
|
21
|
+
/** Structured fields for the server log (never rendered verbatim to a user). */
|
|
22
|
+
logDetail: Record<string, unknown>;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Cap the provider body we echo so a huge payload can't flood the UI or log. */
|
|
26
|
+
const MAX_LENGTH = 2000;
|
|
27
|
+
|
|
28
|
+
function truncate({ value }: { value: string }): string {
|
|
29
|
+
return value.length > MAX_LENGTH
|
|
30
|
+
? `${value.slice(0, MAX_LENGTH)}... (truncated)`
|
|
31
|
+
: value;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function formatModelError({
|
|
35
|
+
error,
|
|
36
|
+
}: {
|
|
37
|
+
error: unknown;
|
|
38
|
+
}): FormattedModelError {
|
|
39
|
+
if (APICallError.isInstance(error)) {
|
|
40
|
+
const status = error.statusCode ?? "unknown";
|
|
41
|
+
const body =
|
|
42
|
+
typeof error.responseBody === "string" && error.responseBody.length > 0
|
|
43
|
+
? truncate({ value: error.responseBody })
|
|
44
|
+
: error.message;
|
|
45
|
+
return {
|
|
46
|
+
userMessage: `The AI provider rejected the request (HTTP ${status}). ${body}`,
|
|
47
|
+
logDetail: {
|
|
48
|
+
kind: "APICallError",
|
|
49
|
+
statusCode: error.statusCode,
|
|
50
|
+
url: error.url,
|
|
51
|
+
isRetryable: error.isRetryable,
|
|
52
|
+
responseBody: error.responseBody,
|
|
53
|
+
message: error.message,
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Any other error (agent-loop bug, network, unknown): surface its message via
|
|
59
|
+
// the shared extractor (handles Error, string, and unknown shapes uniformly).
|
|
60
|
+
const message = extractErrorMessage(error);
|
|
61
|
+
return {
|
|
62
|
+
userMessage: `The assistant hit an error: ${truncate({ value: message })}`,
|
|
63
|
+
logDetail: { kind: "error", message },
|
|
64
|
+
};
|
|
65
|
+
}
|