@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/drizzle/0000_productive_jackpot.sql +26 -0
  3. package/drizzle/0001_puzzling_purple_man.sql +26 -0
  4. package/drizzle/0002_sparkling_paper_doll.sql +15 -0
  5. package/drizzle/0003_married_senator_kelly.sql +1 -0
  6. package/drizzle/0004_crazy_miek.sql +2 -0
  7. package/drizzle/0005_tearful_randall_flagg.sql +1 -0
  8. package/drizzle/meta/0000_snapshot.json +232 -0
  9. package/drizzle/meta/0001_snapshot.json +434 -0
  10. package/drizzle/meta/0002_snapshot.json +551 -0
  11. package/drizzle/meta/0003_snapshot.json +557 -0
  12. package/drizzle/meta/0004_snapshot.json +573 -0
  13. package/drizzle/meta/0005_snapshot.json +574 -0
  14. package/drizzle/meta/_journal.json +48 -0
  15. package/drizzle.config.ts +7 -0
  16. package/package.json +42 -0
  17. package/src/agent-runner.test.ts +262 -0
  18. package/src/agent-runner.ts +262 -0
  19. package/src/chat/agent-loop.test.ts +119 -0
  20. package/src/chat/agent-loop.ts +73 -0
  21. package/src/chat/auto-apply.test.ts +237 -0
  22. package/src/chat/chat-handler.ts +111 -0
  23. package/src/chat/chat-service.streamturn.test.ts +417 -0
  24. package/src/chat/chat-service.test.ts +250 -0
  25. package/src/chat/chat-service.ts +923 -0
  26. package/src/chat/classifier-service.ts +64 -0
  27. package/src/chat/classifier.logic.test.ts +92 -0
  28. package/src/chat/classifier.logic.ts +71 -0
  29. package/src/chat/conversation-store.it.test.ts +203 -0
  30. package/src/chat/conversation-store.test.ts +248 -0
  31. package/src/chat/conversation-store.ts +237 -0
  32. package/src/chat/decision.logic.test.ts +45 -0
  33. package/src/chat/decision.logic.ts +54 -0
  34. package/src/chat/llm-provider.test.ts +63 -0
  35. package/src/chat/llm-provider.ts +67 -0
  36. package/src/chat/model-error.logic.test.ts +60 -0
  37. package/src/chat/model-error.logic.ts +65 -0
  38. package/src/chat/normalize-messages.logic.test.ts +101 -0
  39. package/src/chat/normalize-messages.logic.ts +65 -0
  40. package/src/chat/permission-mode.logic.test.ts +70 -0
  41. package/src/chat/permission-mode.logic.ts +45 -0
  42. package/src/chat/read-invoker.ts +72 -0
  43. package/src/chat/replay.test.ts +174 -0
  44. package/src/chat/scrub-content.test.ts +183 -0
  45. package/src/chat/scrub-content.ts +154 -0
  46. package/src/chat/sdk-tools.test.ts +168 -0
  47. package/src/chat/sdk-tools.ts +181 -0
  48. package/src/chat/title-service.test.ts +146 -0
  49. package/src/chat/title-service.ts +111 -0
  50. package/src/chat/title.logic.test.ts +98 -0
  51. package/src/chat/title.logic.ts +102 -0
  52. package/src/extension-points.ts +41 -0
  53. package/src/generated/docs-index.ts +3020 -0
  54. package/src/hardening/handler-authz.test.ts +282 -0
  55. package/src/hardening/no-secret-leak.test.ts +303 -0
  56. package/src/hooks.ts +33 -0
  57. package/src/index.ts +542 -0
  58. package/src/mcp/connection-registry.test.ts +25 -0
  59. package/src/mcp/connection-registry.ts +54 -0
  60. package/src/mcp/mcp-conformance.it.test.ts +128 -0
  61. package/src/mcp/server.test.ts +285 -0
  62. package/src/mcp/server.ts +300 -0
  63. package/src/mcp/tool-invoker.ts +65 -0
  64. package/src/openai-provider.test.ts +64 -0
  65. package/src/openai-provider.ts +146 -0
  66. package/src/projection.test.ts +97 -0
  67. package/src/projection.ts +132 -0
  68. package/src/propose-apply/args-hash.test.ts +26 -0
  69. package/src/propose-apply/args-hash.ts +30 -0
  70. package/src/propose-apply/service.test.ts +423 -0
  71. package/src/propose-apply/service.ts +419 -0
  72. package/src/propose-apply/store.test.ts +136 -0
  73. package/src/propose-apply/store.ts +224 -0
  74. package/src/propose-apply/token.test.ts +52 -0
  75. package/src/propose-apply/token.ts +71 -0
  76. package/src/rate-limit/spend-ledger.it.test.ts +224 -0
  77. package/src/rate-limit/spend-ledger.test.ts +176 -0
  78. package/src/rate-limit/spend-ledger.ts +162 -0
  79. package/src/rate-limit/tool-budget.it.test.ts +173 -0
  80. package/src/rate-limit/tool-budget.test.ts +58 -0
  81. package/src/rate-limit/tool-budget.ts +107 -0
  82. package/src/registry-wiring.test.ts +131 -0
  83. package/src/registry-wiring.ts +68 -0
  84. package/src/resolver.test.ts +156 -0
  85. package/src/resolver.ts +78 -0
  86. package/src/router.test.ts +78 -0
  87. package/src/router.ts +345 -0
  88. package/src/schema.ts +284 -0
  89. package/src/serializer.test.ts +88 -0
  90. package/src/serializer.ts +42 -0
  91. package/src/tool-registry.ts +58 -0
  92. package/src/tools/composite-tools.ts +24 -0
  93. package/src/tools/docs-tools.test.ts +150 -0
  94. package/src/tools/docs-tools.ts +115 -0
  95. package/src/tools/probe-url.test.ts +51 -0
  96. package/src/tools/probe-url.ts +146 -0
  97. package/src/tools/rank-docs.test.ts +153 -0
  98. package/src/tools/rank-docs.ts +209 -0
  99. package/src/tools/script-context-extract.test.ts +93 -0
  100. package/src/tools/script-context-extract.ts +283 -0
  101. package/src/tools/ssrf-guard.test.ts +69 -0
  102. package/src/tools/ssrf-guard.ts +108 -0
  103. package/src/tools/tool-set.e2e.test.ts +64 -0
  104. package/src/user-rpc-client.test.ts +45 -0
  105. package/src/user-rpc-client.ts +60 -0
  106. package/tsconfig.json +26 -0
@@ -0,0 +1,262 @@
1
+ import { describe, expect, it, mock } from "bun:test";
2
+ import { z } from "zod";
3
+ import type { AuthUser, RpcClient } from "@checkstack/backend-api";
4
+ import type { OpenAiCompatibleConnection } from "@checkstack/ai-common";
5
+ import { createAgentRunner } from "./agent-runner";
6
+ import { createAiToolRegistry } from "./tool-registry";
7
+ import { createAiToolResolver } from "./resolver";
8
+ import { deferredProjectionExecute } from "./projection";
9
+ import type { RegisteredAiTool } from "./tool-registry";
10
+
11
+ /**
12
+ * Unit coverage for the headless agent runner. The model is injected, so this
13
+ * exercises the real tool-resolution + filtering + execution wiring without a
14
+ * live LLM.
15
+ */
16
+
17
+ const connection: OpenAiCompatibleConnection = {
18
+ baseUrl: "https://example.test/v1",
19
+ apiKey: "sk-test",
20
+ defaultModel: "test-model",
21
+ } as OpenAiCompatibleConnection;
22
+
23
+ const principal: AuthUser = {
24
+ type: "application",
25
+ id: "svc-1",
26
+ name: "Svc",
27
+ accessRules: ["*"],
28
+ teamIds: [],
29
+ };
30
+
31
+ const rpcClient = { forPlugin: () => ({}) } as unknown as RpcClient;
32
+
33
+ function readTool(name: string, exec: () => Promise<unknown>): RegisteredAiTool {
34
+ return {
35
+ name,
36
+ description: `read ${name}`,
37
+ effect: "read",
38
+ input: z.object({}),
39
+ requiredAccessRules: [],
40
+ execute: exec,
41
+ } as RegisteredAiTool;
42
+ }
43
+
44
+ describe("createAgentRunner", () => {
45
+ it("offers non-destructive, non-projected tools and runs the loop", async () => {
46
+ const registry = createAiToolRegistry();
47
+ const calls: string[] = [];
48
+ registry.register(
49
+ readTool("plugin.read", async () => {
50
+ calls.push("plugin.read");
51
+ return { ok: true };
52
+ }),
53
+ );
54
+ // A destructive tool must NOT be offered.
55
+ registry.register({
56
+ name: "plugin.delete",
57
+ description: "delete",
58
+ effect: "destructive",
59
+ input: z.object({}),
60
+ requiredAccessRules: [],
61
+ execute: async () => ({ deleted: true }),
62
+ } as RegisteredAiTool);
63
+ // A projected read (deferred sentinel) must NOT be offered in v1.
64
+ registry.register({
65
+ name: "plugin.projected",
66
+ description: "projected",
67
+ effect: "read",
68
+ input: z.object({}),
69
+ requiredAccessRules: [],
70
+ execute: deferredProjectionExecute,
71
+ } as RegisteredAiTool);
72
+
73
+ const resolver = createAiToolResolver({ registry });
74
+
75
+ let offeredToolNames: string[] = [];
76
+ const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
77
+ offeredToolNames = Object.keys(args.tools ?? {});
78
+ // Simulate the model calling the read tool once.
79
+ const t = (args.tools ?? {})["plugin.read"] as {
80
+ execute: (i: unknown) => Promise<unknown>;
81
+ };
82
+ await t.execute({});
83
+ return { text: "done", usage: {} };
84
+ });
85
+ const generateObject = mock(async () => ({ object: { severity: "high" }, usage: {} }));
86
+
87
+ const runner = createAgentRunner({
88
+ resolver,
89
+ resolveConnection: async () => connection,
90
+ modelFns: {
91
+ generateText: generateText as never,
92
+ generateObject: generateObject as never,
93
+ },
94
+ });
95
+
96
+ const result = await runner({
97
+ principal,
98
+ rpcClient,
99
+ connectionId: "conn-1",
100
+ prompt: "go",
101
+ outputSchema: z.object({ severity: z.string() }),
102
+ });
103
+
104
+ expect(offeredToolNames.sort()).toEqual(["plugin.read"]);
105
+ expect(calls).toEqual(["plugin.read"]);
106
+ expect(result.text).toBe("done");
107
+ expect(result.object).toEqual({ severity: "high" });
108
+ expect(result.toolCalls).toEqual([{ tool: "plugin.read", ok: true }]);
109
+ });
110
+
111
+ it("offers a projected read tool and routes it through the principal's client", async () => {
112
+ const registry = createAiToolRegistry();
113
+ registry.register({
114
+ name: "incident.list",
115
+ description: "list incidents",
116
+ effect: "read",
117
+ input: z.object({}),
118
+ requiredAccessRules: [],
119
+ execute: deferredProjectionExecute,
120
+ } as RegisteredAiTool);
121
+ const resolver = createAiToolResolver({ registry });
122
+
123
+ const procCalls: unknown[] = [];
124
+ const routedClient = {
125
+ forPlugin: (def: { pluginId: string }) => {
126
+ expect(def.pluginId).toBe("incident");
127
+ return {
128
+ listIncidents: async (i: unknown) => {
129
+ procCalls.push(i);
130
+ return { incidents: [] };
131
+ },
132
+ };
133
+ },
134
+ } as unknown as RpcClient;
135
+
136
+ let offered: string[] = [];
137
+ const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
138
+ offered = Object.keys(args.tools ?? {});
139
+ const t = (args.tools ?? {})["incident.list"] as {
140
+ execute: (i: unknown) => Promise<unknown>;
141
+ };
142
+ await t.execute({ status: "open" });
143
+ return { text: "ok", usage: {} };
144
+ });
145
+
146
+ const runner = createAgentRunner({
147
+ resolver,
148
+ resolveConnection: async () => connection,
149
+ getProjectionRoute: (name) =>
150
+ name === "incident.list"
151
+ ? { pluginId: "incident", procedureKey: "listIncidents" }
152
+ : undefined,
153
+ modelFns: { generateText: generateText as never },
154
+ });
155
+
156
+ const result = await runner({
157
+ principal,
158
+ rpcClient: routedClient,
159
+ connectionId: "conn-1",
160
+ prompt: "go",
161
+ });
162
+
163
+ expect(offered).toEqual(["incident.list"]);
164
+ expect(procCalls).toEqual([{ status: "open" }]);
165
+ expect(result.toolCalls).toEqual([{ tool: "incident.list", ok: true }]);
166
+ });
167
+
168
+ it("records a tool failure and surfaces it to the model instead of aborting", async () => {
169
+ const registry = createAiToolRegistry();
170
+ registry.register(
171
+ readTool("plugin.boom", async () => {
172
+ throw new Error("missing access: plugin.read");
173
+ }),
174
+ );
175
+ const resolver = createAiToolResolver({ registry });
176
+
177
+ let toolResult: unknown;
178
+ const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
179
+ const t = (args.tools ?? {})["plugin.boom"] as {
180
+ execute: (i: unknown) => Promise<unknown>;
181
+ };
182
+ toolResult = await t.execute({});
183
+ return { text: "handled", usage: {} };
184
+ });
185
+
186
+ const runner = createAgentRunner({
187
+ resolver,
188
+ resolveConnection: async () => connection,
189
+ modelFns: { generateText: generateText as never },
190
+ });
191
+
192
+ const result = await runner({
193
+ principal,
194
+ rpcClient,
195
+ connectionId: "conn-1",
196
+ prompt: "go",
197
+ });
198
+
199
+ expect(toolResult).toEqual({ error: "missing access: plugin.read" });
200
+ expect(result.toolCalls).toEqual([{ tool: "plugin.boom", ok: false }]);
201
+ expect(result.object).toBeUndefined();
202
+ });
203
+
204
+ it("calls recordToolCall for each invocation (ok and failure)", async () => {
205
+ const registry = createAiToolRegistry();
206
+ registry.register(readTool("plugin.ok", async () => ({ ok: true })));
207
+ registry.register(
208
+ readTool("plugin.boom", async () => {
209
+ throw new Error("nope");
210
+ }),
211
+ );
212
+ const resolver = createAiToolResolver({ registry });
213
+
214
+ const recorded: Array<{ toolName: string; ok: boolean; effect: string }> =
215
+ [];
216
+ const recordToolCall = async (a: {
217
+ toolName: string;
218
+ ok: boolean;
219
+ effect: string;
220
+ }) => {
221
+ recorded.push({ toolName: a.toolName, ok: a.ok, effect: a.effect });
222
+ };
223
+
224
+ const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
225
+ const tools = args.tools ?? {};
226
+ await (tools["plugin.ok"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
227
+ await (tools["plugin.boom"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
228
+ return { text: "x", usage: {} };
229
+ });
230
+
231
+ const runner = createAgentRunner({
232
+ resolver,
233
+ resolveConnection: async () => connection,
234
+ recordToolCall: recordToolCall as never,
235
+ modelFns: { generateText: generateText as never },
236
+ });
237
+ await runner({ principal, rpcClient, connectionId: "c", prompt: "go" });
238
+
239
+ expect(recorded).toContainEqual({
240
+ toolName: "plugin.ok",
241
+ ok: true,
242
+ effect: "read",
243
+ });
244
+ expect(recorded).toContainEqual({
245
+ toolName: "plugin.boom",
246
+ ok: false,
247
+ effect: "read",
248
+ });
249
+ });
250
+
251
+ it("throws a clear error when the connection is invalid", async () => {
252
+ const resolver = createAiToolResolver({ registry: createAiToolRegistry() });
253
+ const runner = createAgentRunner({
254
+ resolver,
255
+ resolveConnection: async () => undefined,
256
+ modelFns: { generateText: mock(async () => ({ text: "", usage: {} })) as never },
257
+ });
258
+ await expect(
259
+ runner({ principal, rpcClient, connectionId: "missing", prompt: "go" }),
260
+ ).rejects.toThrow(/connection "missing"/i);
261
+ });
262
+ });
@@ -0,0 +1,262 @@
1
+ /**
2
+ * Headless AI agent runner.
3
+ *
4
+ * The chat agent loop is HTTP/streaming/conversation-coupled. This is the
5
+ * transport-agnostic core for running ONE bounded agent task with no human in
6
+ * the loop - the engine behind the automation "AI Action". It is exposed as a
7
+ * service ({@link aiAgentRunnerRef}) so `automation-backend` (which already
8
+ * depends on `ai-backend`) can drive it without ai-backend depending on
9
+ * automation-backend.
10
+ *
11
+ * Security: the runner resolves the allowed tools for the supplied PRINCIPAL
12
+ * (the automation's `runAs` service account) and executes them through the
13
+ * supplied `rpcClient` (bound to that same principal), so every call is
14
+ * authorized exactly as that bounded identity. Destructive tools are NEVER
15
+ * offered (no human to confirm), matching the chat invariant.
16
+ *
17
+ * Tools offered: hand-authored read + mutate tools (run via their own
18
+ * `execute`) AND projected read tools (routed through the live router as the
19
+ * principal via the supplied `getProjectionRoute`). Destructive tools are never
20
+ * offered. Mutating tools auto-apply (executed directly via the principal's
21
+ * client); the propose/apply token gate is a chat/MCP human gate, intentionally
22
+ * bypassed here (the run is unattended and bounded by the principal).
23
+ */
24
+ import {
25
+ tool as aiTool,
26
+ stepCountIs,
27
+ generateText,
28
+ generateObject,
29
+ type Tool,
30
+ type LanguageModel,
31
+ } from "ai";
32
+ import { z } from "zod";
33
+ import {
34
+ createServiceRef,
35
+ type AuthUser,
36
+ type RpcClient,
37
+ } from "@checkstack/backend-api";
38
+ import type {
39
+ OpenAiCompatibleConnection,
40
+ AiToolEffect,
41
+ } from "@checkstack/ai-common";
42
+ import { extractErrorMessage, type ClientDefinition } from "@checkstack/common";
43
+ import type { AiToolResolver } from "./resolver";
44
+ import { buildLanguageModel } from "./chat/llm-provider";
45
+ import { deferredProjectionExecute } from "./projection";
46
+
47
+ const DEFAULT_MAX_STEPS = 8;
48
+
49
+ const DEFAULT_SYSTEM_PROMPT = [
50
+ "You are an automation agent acting UNATTENDED - there is no human to ask.",
51
+ "You run with a bounded service account; you can only do what its permissions allow.",
52
+ "Use the available tools to investigate and act decisively on the task.",
53
+ "If a tool call is refused, do not retry it - work within your permissions.",
54
+ "Never attempt destructive actions. Be concise.",
55
+ ].join(" ");
56
+
57
+ /** One tool invocation outcome, surfaced in the action's artifact for audit. */
58
+ export interface AgentTaskToolCall {
59
+ tool: string;
60
+ ok: boolean;
61
+ }
62
+
63
+ export interface AgentTaskInput {
64
+ /**
65
+ * The principal the task runs as (the automation's `runAs` service account,
66
+ * already enriched). The runner resolves the allowed tools from it and
67
+ * executes them through `rpcClient`, which must be bound to the SAME
68
+ * identity, so authorization is enforced exactly as that bounded principal.
69
+ */
70
+ principal: AuthUser;
71
+ /** RPC client bound to that same service account (the run's `context.rpcClient`). */
72
+ rpcClient: RpcClient;
73
+ /** Integration connection id for the OpenAI-compatible model. */
74
+ connectionId: string;
75
+ /** Optional model override (validated against the connection's allowlist). */
76
+ model?: string;
77
+ /** Optional system-prompt override. */
78
+ systemPrompt?: string;
79
+ /** The task prompt (the automation injects its trigger/artifact context here). */
80
+ prompt: string;
81
+ /**
82
+ * When provided, after the tool loop the runner runs a structured-output
83
+ * pass to fill this schema - the action's typed artifact.
84
+ */
85
+ outputSchema?: z.ZodType;
86
+ /** Max agent steps (tool-call rounds). Defaults to 8. */
87
+ maxSteps?: number;
88
+ }
89
+
90
+ export interface AgentTaskResult {
91
+ /** The model's free-text result of the loop. */
92
+ text: string;
93
+ /** The structured object, present iff `outputSchema` was provided. */
94
+ object?: unknown;
95
+ /** Per-tool-call outcomes. */
96
+ toolCalls: AgentTaskToolCall[];
97
+ }
98
+
99
+ export type AiAgentRunner = (input: AgentTaskInput) => Promise<AgentTaskResult>;
100
+
101
+ /** Service ref so `automation-backend` can resolve the runner at action time. */
102
+ export const aiAgentRunnerRef =
103
+ createServiceRef<AiAgentRunner>("ai.agentRunner");
104
+
105
+ /** Injectable model functions (so the runner is unit-testable without a model). */
106
+ export interface AgentRunnerModelFns {
107
+ generateText: typeof generateText;
108
+ generateObject: typeof generateObject;
109
+ }
110
+
111
+ export function createAgentRunner({
112
+ resolver,
113
+ resolveConnection,
114
+ getProjectionRoute,
115
+ recordToolCall,
116
+ modelFns,
117
+ }: {
118
+ resolver: AiToolResolver;
119
+ resolveConnection: (
120
+ connectionId: string,
121
+ ) => Promise<OpenAiCompatibleConnection | undefined>;
122
+ /**
123
+ * Resolve a projected read tool's underlying procedure routing
124
+ * (`{ pluginId, procedureKey }`). When provided, projected read tools are
125
+ * offered and invoked through the principal's client; when omitted they are
126
+ * not offered (cannot be run headlessly).
127
+ */
128
+ getProjectionRoute?: (
129
+ toolName: string,
130
+ ) => { pluginId: string; procedureKey: string } | undefined;
131
+ /**
132
+ * Best-effort audit hook, called once per tool invocation. Lets the host
133
+ * record the call into the durable AI audit log (with the `automation`
134
+ * transport). Failures here never break the agent loop.
135
+ */
136
+ recordToolCall?: (args: {
137
+ principal: AuthUser;
138
+ toolName: string;
139
+ effect: AiToolEffect;
140
+ input: unknown;
141
+ ok: boolean;
142
+ error?: string;
143
+ }) => Promise<void>;
144
+ modelFns?: Partial<AgentRunnerModelFns>;
145
+ }): AiAgentRunner {
146
+ const gen = modelFns?.generateText ?? generateText;
147
+ const genObj = modelFns?.generateObject ?? generateObject;
148
+
149
+ return async ({
150
+ principal,
151
+ rpcClient,
152
+ connectionId,
153
+ model,
154
+ systemPrompt,
155
+ prompt,
156
+ outputSchema,
157
+ maxSteps,
158
+ }) => {
159
+ const connection = await resolveConnection(connectionId);
160
+ if (!connection) {
161
+ throw new Error(
162
+ `AI connection "${connectionId}" not found or not a valid OpenAI-compatible connection.`,
163
+ );
164
+ }
165
+ const languageModel: LanguageModel = buildLanguageModel({
166
+ connection,
167
+ model,
168
+ });
169
+
170
+ // Offer the principal's read + mutate tools (never destructive - no human
171
+ // to confirm). Hand-authored tools run via their own `execute`; projected
172
+ // read tools (the deferred sentinel) are routed through the live router AS
173
+ // the principal via `rpcClient`, so handler-side authz still holds.
174
+ const offered = resolver
175
+ .resolveTools(principal)
176
+ .filter((t) => t.effect !== "destructive");
177
+
178
+ const toolCalls: AgentTaskToolCall[] = [];
179
+ const sdkTools: Record<string, Tool> = {};
180
+ for (const t of offered) {
181
+ const isProjected = t.execute === deferredProjectionExecute;
182
+ const route = isProjected ? getProjectionRoute?.(t.name) : undefined;
183
+ // A projected tool with no resolvable route cannot be invoked headlessly;
184
+ // skip it rather than calling the deferred sentinel (which throws).
185
+ if (isProjected && !route) continue;
186
+
187
+ const invoke = route
188
+ ? async (input: unknown) => {
189
+ // `forPlugin` only reads `.pluginId`; this re-enters the live router
190
+ // AS the principal for the projected read's underlying procedure,
191
+ // so handler-side authz applies exactly as a direct call.
192
+ const pluginClient = rpcClient.forPlugin({
193
+ pluginId: route.pluginId,
194
+ } as ClientDefinition) as Record<
195
+ string,
196
+ (i: unknown) => Promise<unknown>
197
+ >;
198
+ return pluginClient[route.procedureKey](input);
199
+ }
200
+ : async (input: unknown) => t.execute({ input, principal, rpcClient });
201
+
202
+ sdkTools[t.name] = aiTool({
203
+ description: t.description,
204
+ inputSchema: t.input as z.ZodType,
205
+ execute: async (input: unknown) => {
206
+ try {
207
+ const result = await invoke(input);
208
+ toolCalls.push({ tool: t.name, ok: true });
209
+ if (recordToolCall) {
210
+ await recordToolCall({
211
+ principal,
212
+ toolName: t.name,
213
+ effect: t.effect,
214
+ input,
215
+ ok: true,
216
+ }).catch(() => {});
217
+ }
218
+ return result;
219
+ } catch (error) {
220
+ // Surface the failure to the model (e.g. a missing-permission
221
+ // message) so it can adapt, rather than aborting the whole run.
222
+ const message = extractErrorMessage(error);
223
+ toolCalls.push({ tool: t.name, ok: false });
224
+ if (recordToolCall) {
225
+ await recordToolCall({
226
+ principal,
227
+ toolName: t.name,
228
+ effect: t.effect,
229
+ input,
230
+ ok: false,
231
+ error: message,
232
+ }).catch(() => {});
233
+ }
234
+ return { error: message };
235
+ }
236
+ },
237
+ });
238
+ }
239
+
240
+ const { text } = await gen({
241
+ model: languageModel,
242
+ system: systemPrompt ?? DEFAULT_SYSTEM_PROMPT,
243
+ prompt,
244
+ tools: sdkTools,
245
+ stopWhen: stepCountIs(maxSteps ?? DEFAULT_MAX_STEPS),
246
+ });
247
+
248
+ let object: unknown;
249
+ if (outputSchema) {
250
+ const res = await genObj({
251
+ model: languageModel,
252
+ schema: outputSchema,
253
+ system:
254
+ "Produce the structured result from the analysis below. Use only information present in it; do not invent values.",
255
+ prompt: `Task: ${prompt}\n\n--- Analysis ---\n${text}`,
256
+ });
257
+ object = res.object;
258
+ }
259
+
260
+ return { text, object, toolCalls };
261
+ };
262
+ }
@@ -0,0 +1,119 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { z } from "zod";
3
+ import type { AuthUser } from "@checkstack/backend-api";
4
+ import { createAiToolRegistry } from "../tool-registry";
5
+ import { createAiToolResolver } from "../resolver";
6
+ import type { RegisteredAiTool } from "../tool-registry";
7
+ import { disposeAgentTool, offeredTools } from "./agent-loop";
8
+
9
+ function tool(
10
+ name: string,
11
+ effect: RegisteredAiTool["effect"],
12
+ rule: string,
13
+ ): RegisteredAiTool {
14
+ return {
15
+ name,
16
+ description: name,
17
+ effect,
18
+ input: z.object({}),
19
+ requiredAccessRules: [rule],
20
+ ...(effect === "read"
21
+ ? {}
22
+ : {
23
+ dryRun: async () => ({ summary: `would ${name}`, payload: {} }),
24
+ }),
25
+ execute: () => Promise.resolve({ ok: true }),
26
+ };
27
+ }
28
+
29
+ function setup() {
30
+ const registry = createAiToolRegistry();
31
+ const read = tool("incident.list", "read", "incident.incident.read");
32
+ const mutate = tool("automation.propose", "mutate", "automation.automation.manage");
33
+ const destroy = tool("incident.delete", "destructive", "incident.incident.manage");
34
+ registry.register(read);
35
+ registry.register(mutate);
36
+ registry.register(destroy);
37
+ const resolver = createAiToolResolver({ registry });
38
+ return { registry, resolver };
39
+ }
40
+
41
+ const limited: AuthUser = {
42
+ type: "user",
43
+ id: "u1",
44
+ accessRules: ["incident.incident.read"],
45
+ };
46
+ const power: AuthUser = {
47
+ type: "user",
48
+ id: "u2",
49
+ accessRules: [
50
+ "incident.incident.read",
51
+ "automation.automation.manage",
52
+ "incident.incident.manage",
53
+ ],
54
+ };
55
+
56
+ describe("agent loop tool gating (matrix #14)", () => {
57
+ test("the loop only offers resolver-allowed tools", () => {
58
+ const { resolver } = setup();
59
+ const offered = offeredTools({ principal: limited, resolver }).map((t) => t.name);
60
+ expect(offered).toEqual(["incident.list"]);
61
+ expect(offered).not.toContain("automation.propose");
62
+ expect(offered).not.toContain("incident.delete");
63
+ });
64
+
65
+ test("a model-requested tool OUTSIDE the principal's set is refused server-side", () => {
66
+ const { resolver, registry } = setup();
67
+ const d = disposeAgentTool({
68
+ toolName: "automation.propose",
69
+ principal: limited,
70
+ resolver,
71
+ getTool: (n) => registry.getTool(n),
72
+ });
73
+ expect(d.kind).toBe("refused");
74
+ });
75
+
76
+ test("an unknown tool name is refused", () => {
77
+ const { resolver, registry } = setup();
78
+ const d = disposeAgentTool({
79
+ toolName: "does.not.exist",
80
+ principal: power,
81
+ resolver,
82
+ getTool: (n) => registry.getTool(n),
83
+ });
84
+ expect(d.kind).toBe("refused");
85
+ });
86
+
87
+ test("a read tool auto-runs", () => {
88
+ const { resolver, registry } = setup();
89
+ const d = disposeAgentTool({
90
+ toolName: "incident.list",
91
+ principal: limited,
92
+ resolver,
93
+ getTool: (n) => registry.getTool(n),
94
+ });
95
+ expect(d.kind).toBe("run");
96
+ });
97
+
98
+ test("a mutate tool requires a confirm card (never silently mutates)", () => {
99
+ const { resolver, registry } = setup();
100
+ const d = disposeAgentTool({
101
+ toolName: "automation.propose",
102
+ principal: power,
103
+ resolver,
104
+ getTool: (n) => registry.getTool(n),
105
+ });
106
+ expect(d.kind).toBe("confirm");
107
+ });
108
+
109
+ test("a destructive tool requires a confirm card", () => {
110
+ const { resolver, registry } = setup();
111
+ const d = disposeAgentTool({
112
+ toolName: "incident.delete",
113
+ principal: power,
114
+ resolver,
115
+ getTool: (n) => registry.getTool(n),
116
+ });
117
+ expect(d.kind).toBe("confirm");
118
+ });
119
+ });