npm - @checkstack/ai-backend - Versions diffs - 0.1.0 - Mend

@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/CHANGELOG.md +97 -0
package/drizzle/0000_productive_jackpot.sql +26 -0
package/drizzle/0001_puzzling_purple_man.sql +26 -0
package/drizzle/0002_sparkling_paper_doll.sql +15 -0
package/drizzle/0003_married_senator_kelly.sql +1 -0
package/drizzle/0004_crazy_miek.sql +2 -0
package/drizzle/0005_tearful_randall_flagg.sql +1 -0
package/drizzle/meta/0000_snapshot.json +232 -0
package/drizzle/meta/0001_snapshot.json +434 -0
package/drizzle/meta/0002_snapshot.json +551 -0
package/drizzle/meta/0003_snapshot.json +557 -0
package/drizzle/meta/0004_snapshot.json +573 -0
package/drizzle/meta/0005_snapshot.json +574 -0
package/drizzle/meta/_journal.json +48 -0
package/drizzle.config.ts +7 -0
package/package.json +42 -0
package/src/agent-runner.test.ts +262 -0
package/src/agent-runner.ts +262 -0
package/src/chat/agent-loop.test.ts +119 -0
package/src/chat/agent-loop.ts +73 -0
package/src/chat/auto-apply.test.ts +237 -0
package/src/chat/chat-handler.ts +111 -0
package/src/chat/chat-service.streamturn.test.ts +417 -0
package/src/chat/chat-service.test.ts +250 -0
package/src/chat/chat-service.ts +923 -0
package/src/chat/classifier-service.ts +64 -0
package/src/chat/classifier.logic.test.ts +92 -0
package/src/chat/classifier.logic.ts +71 -0
package/src/chat/conversation-store.it.test.ts +203 -0
package/src/chat/conversation-store.test.ts +248 -0
package/src/chat/conversation-store.ts +237 -0
package/src/chat/decision.logic.test.ts +45 -0
package/src/chat/decision.logic.ts +54 -0
package/src/chat/llm-provider.test.ts +63 -0
package/src/chat/llm-provider.ts +67 -0
package/src/chat/model-error.logic.test.ts +60 -0
package/src/chat/model-error.logic.ts +65 -0
package/src/chat/normalize-messages.logic.test.ts +101 -0
package/src/chat/normalize-messages.logic.ts +65 -0
package/src/chat/permission-mode.logic.test.ts +70 -0
package/src/chat/permission-mode.logic.ts +45 -0
package/src/chat/read-invoker.ts +72 -0
package/src/chat/replay.test.ts +174 -0
package/src/chat/scrub-content.test.ts +183 -0
package/src/chat/scrub-content.ts +154 -0
package/src/chat/sdk-tools.test.ts +168 -0
package/src/chat/sdk-tools.ts +181 -0
package/src/chat/title-service.test.ts +146 -0
package/src/chat/title-service.ts +111 -0
package/src/chat/title.logic.test.ts +98 -0
package/src/chat/title.logic.ts +102 -0
package/src/extension-points.ts +41 -0
package/src/generated/docs-index.ts +3020 -0
package/src/hardening/handler-authz.test.ts +282 -0
package/src/hardening/no-secret-leak.test.ts +303 -0
package/src/hooks.ts +33 -0
package/src/index.ts +542 -0
package/src/mcp/connection-registry.test.ts +25 -0
package/src/mcp/connection-registry.ts +54 -0
package/src/mcp/mcp-conformance.it.test.ts +128 -0
package/src/mcp/server.test.ts +285 -0
package/src/mcp/server.ts +300 -0
package/src/mcp/tool-invoker.ts +65 -0
package/src/openai-provider.test.ts +64 -0
package/src/openai-provider.ts +146 -0
package/src/projection.test.ts +97 -0
package/src/projection.ts +132 -0
package/src/propose-apply/args-hash.test.ts +26 -0
package/src/propose-apply/args-hash.ts +30 -0
package/src/propose-apply/service.test.ts +423 -0
package/src/propose-apply/service.ts +419 -0
package/src/propose-apply/store.test.ts +136 -0
package/src/propose-apply/store.ts +224 -0
package/src/propose-apply/token.test.ts +52 -0
package/src/propose-apply/token.ts +71 -0
package/src/rate-limit/spend-ledger.it.test.ts +224 -0
package/src/rate-limit/spend-ledger.test.ts +176 -0
package/src/rate-limit/spend-ledger.ts +162 -0
package/src/rate-limit/tool-budget.it.test.ts +173 -0
package/src/rate-limit/tool-budget.test.ts +58 -0
package/src/rate-limit/tool-budget.ts +107 -0
package/src/registry-wiring.test.ts +131 -0
package/src/registry-wiring.ts +68 -0
package/src/resolver.test.ts +156 -0
package/src/resolver.ts +78 -0
package/src/router.test.ts +78 -0
package/src/router.ts +345 -0
package/src/schema.ts +284 -0
package/src/serializer.test.ts +88 -0
package/src/serializer.ts +42 -0
package/src/tool-registry.ts +58 -0
package/src/tools/composite-tools.ts +24 -0
package/src/tools/docs-tools.test.ts +150 -0
package/src/tools/docs-tools.ts +115 -0
package/src/tools/probe-url.test.ts +51 -0
package/src/tools/probe-url.ts +146 -0
package/src/tools/rank-docs.test.ts +153 -0
package/src/tools/rank-docs.ts +209 -0
package/src/tools/script-context-extract.test.ts +93 -0
package/src/tools/script-context-extract.ts +283 -0
package/src/tools/ssrf-guard.test.ts +69 -0
package/src/tools/ssrf-guard.ts +108 -0
package/src/tools/tool-set.e2e.test.ts +64 -0
package/src/user-rpc-client.test.ts +45 -0
package/src/user-rpc-client.ts +60 -0
package/tsconfig.json +26 -0

package/src/chat/scrub-content.test.ts ADDED Viewed

@@ -0,0 +1,183 @@
+import { describe, expect, test } from "bun:test";
+import {
+  REDACTED,
+  scrubContent,
+  scrubModelMessages,
+} from "./scrub-content";
+/**
+ * NO-SECRET-LEAK content scrubbing (Phase 6) — the canary regression suite.
+ *
+ * The architectural guarantee "a credential can NEVER be persisted into
+ * `ai_messages.content`" is now an ENFORCED invariant: `scrubContent` runs on the
+ * message write path. These tests inject credential-shaped keys/values and assert
+ * they are stripped, while proving the scrub does NOT blanket-redact innocent
+ * user prose (the guarantee is "a credential cannot ride along", not "no string
+ * is ever touched").
+ */
+const SECRET = "sk-canary-DO-NOT-LEAK-0123456789abcdef";
+function assertNoSecret(value: unknown, where: string): void {
+  expect(
+    JSON.stringify(value) ?? "",
+    `${where} must not contain the secret`,
+  ).not.toContain(SECRET);
+}
+describe("scrubContent: credential-shaped KEYS are redacted", () => {
+  test("apiKey at top level is stripped", () => {
+    const out = scrubContent({ text: "hi", apiKey: SECRET });
+    expect(out.apiKey).toBe(REDACTED);
+    expect(out.text).toBe("hi");
+    assertNoSecret(out, "scrubbed content");
+  });
+  test("nested secret keys (api_key, authorization, password, x-secret) are stripped", () => {
+    const out = scrubContent({
+      result: {
+        headers: { authorization: `Bearer ${SECRET}` },
+        config: { api_key: SECRET, password: SECRET, "x-secret": SECRET },
+      },
+    });
+    const result = out.result as Record<string, unknown>;
+    const headers = result.headers as Record<string, unknown>;
+    const config = result.config as Record<string, unknown>;
+    expect(headers.authorization).toBe(REDACTED);
+    expect(config.api_key).toBe(REDACTED);
+    expect(config.password).toBe(REDACTED);
+    expect(config["x-secret"]).toBe(REDACTED);
+    assertNoSecret(out, "deeply nested scrubbed content");
+  });
+  test("secret keys inside arrays are stripped", () => {
+    const out = scrubContent({
+      items: [{ name: "a", clientSecret: SECRET }, { name: "b" }],
+    });
+    const items = out.items as Array<Record<string, unknown>>;
+    expect(items[0]?.clientSecret).toBe(REDACTED);
+    expect(items[0]?.name).toBe("a");
+    expect(items[1]?.name).toBe("b");
+    assertNoSecret(out, "array scrubbed content");
+  });
+});
+describe("scrubContent: credential-shaped VALUES are redacted regardless of key", () => {
+  test("an sk-... value under an innocent key is still stripped", () => {
+    // Worst case: a buggy tool result puts the key under a harmless field name.
+    const out = scrubContent({ note: SECRET, freeform: { blob: SECRET } });
+    expect(out.note).toBe(REDACTED);
+    expect((out.freeform as Record<string, unknown>).blob).toBe(REDACTED);
+    assertNoSecret(out, "value-pattern scrubbed content");
+  });
+  test("a Bearer token value is stripped", () => {
+    const out = scrubContent({
+      msg: "use Bearer abcdef1234567890ABCDEF for auth",
+    });
+    expect(out.msg).toBe(REDACTED);
+    assertNoSecret(out, "bearer-value scrubbed content");
+  });
+});
+describe("scrubContent: innocent prose is preserved (no blanket scrubbing)", () => {
+  test("ordinary chat text mentioning 'token' or 'password' is NOT redacted", () => {
+    const out = scrubContent({
+      text: "I forgot my password, can you reset the auth token flow?",
+    });
+    expect(out.text).toBe(
+      "I forgot my password, can you reset the auth token flow?",
+    );
+  });
+  test("a normal incident title and numbers survive untouched", () => {
+    const out = scrubContent({
+      text: "deploy at 14:02 caused 500s on /api/checkout",
+      count: 42,
+      ok: true,
+    });
+    expect(out.text).toBe("deploy at 14:02 caused 500s on /api/checkout");
+    expect(out.count).toBe(42);
+    expect(out.ok).toBe(true);
+  });
+  test("a token-shaped slug embedded in a URL path is PRESERVED (no over-redaction)", () => {
+    // The `sk-...` shape appears inside a longer URL path segment, NOT as a
+    // standalone credential. It must survive — the value pattern only fires on a
+    // standalone/boundary-delimited token.
+    const url = "https://host/api/sk-checkout-flow-12345678/status";
+    const out = scrubContent({ link: url, text: `see ${url} for details` });
+    expect(out.link).toBe(url);
+    expect(out.text).toBe(`see ${url} for details`);
+  });
+  test("a standalone sk-... value is STILL redacted (even after tightening the boundary)", () => {
+    // Whole-value, whitespace-delimited, and quote-delimited standalone tokens.
+    expect(scrubContent({ v: SECRET }).v).toBe(REDACTED);
+    expect(scrubContent({ v: `key is ${SECRET} ok` }).v).toBe(REDACTED);
+    expect(scrubContent({ v: `"${SECRET}"` }).v).toBe(REDACTED);
+  });
+});
+describe("scrubModelMessages: replay history is scrubbed too", () => {
+  test("a tool-result part carrying a credential is redacted before persist", () => {
+    // The canonical AI-SDK ResponseMessage[] shape (a tool message with a
+    // tool-result part). A credential smuggled into the output must not persist.
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "calling tool" },
+          {
+            type: "tool-call",
+            toolCallId: "tc1",
+            toolName: "incident.list",
+            input: { status: "open" },
+          },
+        ],
+      },
+      {
+        role: "tool",
+        content: [
+          {
+            type: "tool-result",
+            toolCallId: "tc1",
+            toolName: "incident.list",
+            output: {
+              type: "json",
+              value: { rows: [{ id: 1 }], apiKey: SECRET, leaked: SECRET },
+            },
+          },
+        ],
+      },
+    ];
+    const scrubbed = scrubModelMessages(messages);
+    assertNoSecret(scrubbed, "scrubbed replay messages");
+    // The non-secret structure survives so replay still works.
+    const toolMsg = scrubbed[1];
+    const part = (toolMsg.content as Array<Record<string, unknown>>)[0];
+    const value = (part.output as Record<string, unknown>).value as Record<
+      string,
+      unknown
+    >;
+    expect(value.apiKey).toBe(REDACTED);
+    expect(value.leaked).toBe(REDACTED);
+    expect(value.rows).toEqual([{ id: 1 }]);
+  });
+});
+describe("scrubContent: the guard has teeth (idempotent + cycle-safe)", () => {
+  test("re-scrubbing already-redacted content is a no-op", () => {
+    const once = scrubContent({ apiKey: SECRET, text: "hi" });
+    const twice = scrubContent(once);
+    expect(twice).toEqual(once);
+  });
+  test("a cyclic object does not hang and is redacted at the cycle", () => {
+    const cyclic: Record<string, unknown> = { text: "hi" };
+    cyclic.self = cyclic;
+    const out = scrubContent(cyclic);
+    expect(out.text).toBe("hi");
+    expect(out.self).toBe(REDACTED);
+  });
+});

package/src/chat/scrub-content.ts ADDED Viewed

@@ -0,0 +1,154 @@
+/**
+ * NO-SECRET-LEAK content scrubber (Phase 6) — makes the previously-architectural
+ * guarantee an ENFORCED, tested invariant on the message write path.
+ *
+ * `ai_messages.content` (and the replay `modelMessages`) is a free-form JSON bag.
+ * Architecturally, a provider credential should never reach it — the model only
+ * ever sees tool RESULTS (which source procedures redact) and never the
+ * integration apiKey. But "should never" is not "cannot": nothing structurally
+ * stopped a buggy tool result, a future feature, or a malicious tool from
+ * persisting a secret into the bag. This scrubber closes that gap by running on
+ * EVERY message write (`appendMessage`), so a credential can never be persisted
+ * into message content even if upstream code is wrong.
+ *
+ * The scrub is deliberately conservative — it redacts:
+ *  - any value under a SECRET-SHAPED KEY (apiKey, api_key, authorization, token,
+ *    password, secret, x-secret, bearer, ...), recursively; and
+ *  - any string VALUE that matches a high-confidence credential pattern
+ *    (OpenAI-style `sk-...`, `Bearer <token>` headers), regardless of its key.
+ *
+ * It does NOT blanket-scrub arbitrary strings: a user's chat text or an
+ * incident title that merely contains the word "token" is preserved. The
+ * guarantee is "a credential cannot be persisted", not "no string is ever
+ * touched".
+ */
+/** The sentinel a redacted value is replaced with. */
+export const REDACTED = "[REDACTED]";
+/**
+ * Key names whose VALUE is treated as a credential and redacted wholesale
+ * (case-insensitive, matched as a substring of the normalized key). These mirror
+ * the `x-secret` field names the integration platform stores in the Vault.
+ */
+const SECRET_KEY_PATTERNS = [
+  "apikey",
+  "api-key",
+  "api_key",
+  "secret",
+  "password",
+  "passwd",
+  "authorization",
+  "auth-token",
+  "authtoken",
+  "access-token",
+  "accesstoken",
+  "access_token",
+  "refresh-token",
+  "refreshtoken",
+  "refresh_token",
+  "bearer",
+  "x-api-key",
+  "private-key",
+  "privatekey",
+  "private_key",
+  "client-secret",
+  "clientsecret",
+  "client_secret",
+  "credential",
+];
+/**
+ * A standalone-token delimiter: whitespace, quotes, comma/semicolon, or string
+ * start/end. Deliberately EXCLUDES `/` and other path characters so a
+ * token-shaped slug embedded in a URL path (e.g.
+ * `https://host/api/sk-checkout-flow-12345678`) is NOT treated as a credential —
+ * the key-based redaction (apiKey / authorization / x-secret / ...) remains the
+ * primary defense for structured fields, so the value pattern can safely err
+ * toward fewer false positives in free text.
+ */
+const TOKEN_BOUNDARY = String.raw`(?:^|[\s"'\`,;])`;
+const TOKEN_BOUNDARY_END = String.raw`(?:$|[\s"'\`,;])`;
+/**
+ * Value patterns redacted regardless of their key (high-confidence credential
+ * shapes only, to avoid scrubbing innocent prose):
+ *  - OpenAI-style keys: `sk-...`, `sk-proj-...`, `rk-...` (>= 16 trailing chars)
+ *    ONLY when they are a standalone, boundary-delimited token (or the whole
+ *    value) — never when embedded inside a longer URL/path.
+ *  - An `Authorization: Bearer <token>` header value (the `Bearer ` prefix is
+ *    itself a strong signal, so no extra boundary constraint is needed).
+ */
+const SECRET_VALUE_PATTERNS: RegExp[] = [
+  new RegExp(
+    `${TOKEN_BOUNDARY}(?:sk|rk)-(?:proj-)?[A-Za-z0-9_-]{16,}${TOKEN_BOUNDARY_END}`,
+  ),
+  /\bBearer\s+[A-Za-z0-9._~+/-]{12,}=*\b/i,
+];
+/** True if a key name looks like it holds a credential. */
+function isSecretKey(key: string): boolean {
+  const normalized = key.toLowerCase().replaceAll(/\s+/g, "");
+  return SECRET_KEY_PATTERNS.some((p) => normalized.includes(p));
+}
+/** True if a string value matches a high-confidence credential pattern. */
+function valueLooksSecret(value: string): boolean {
+  return SECRET_VALUE_PATTERNS.some((re) => re.test(value));
+}
+/**
+ * Recursively scrub a JSON-serializable value. Cycles are guarded with a seen
+ * set (defensive — message content is already JSON, but a hand-built object
+ * could contain a cycle). The input is never mutated; a fresh structure is
+ * returned.
+ */
+function scrubValue(value: unknown, seen: WeakSet<object>): unknown {
+  if (typeof value === "string") {
+    return valueLooksSecret(value) ? REDACTED : value;
+  }
+  if (value === null || typeof value !== "object") {
+    return value;
+  }
+  if (seen.has(value)) return REDACTED;
+  seen.add(value);
+  if (Array.isArray(value)) {
+    return value.map((item) => scrubValue(item, seen));
+  }
+  const out: Record<string, unknown> = {};
+  for (const [key, child] of Object.entries(value as Record<string, unknown>)) {
+    if (isSecretKey(key)) {
+      // A secret-shaped key: redact the whole subtree, never persist its value.
+      out[key] = REDACTED;
+      continue;
+    }
+    out[key] = scrubValue(child, seen);
+  }
+  return out;
+}
+/**
+ * Scrub a message-content bag before it is persisted. Returns a new record with
+ * every credential-shaped key/value redacted. Safe to call on already-clean
+ * content (idempotent — re-running over `[REDACTED]` is a no-op).
+ */
+export function scrubContent(
+  content: Record<string, unknown>,
+): Record<string, unknown> {
+  return scrubValue(content, new WeakSet()) as Record<string, unknown>;
+}
+/**
+ * Scrub the replay `modelMessages` array (AI-SDK `ResponseMessage[]`). Each
+ * element is a JSON object; the same recursive scrub applies so a tool-result
+ * part can never carry a credential into the replay history.
+ */
+export function scrubModelMessages(
+  messages: Array<Record<string, unknown>>,
+): Array<Record<string, unknown>> {
+  return messages.map(
+    (m) => scrubValue(m, new WeakSet()) as Record<string, unknown>,
+  );
+}

package/src/chat/sdk-tools.test.ts ADDED Viewed

@@ -0,0 +1,168 @@
+import { describe, expect, test } from "bun:test";
+import { z } from "zod";
+import type { AuthUser } from "@checkstack/backend-api";
+import type { AiPermissionMode } from "@checkstack/ai-common";
+import type { RegisteredAiTool } from "../tool-registry";
+import {
+  buildAgentSdkTools,
+  type AutoAppliedResult,
+  type ConfirmCardResult,
+} from "./sdk-tools";
+function tool(
+  name: string,
+  effect: RegisteredAiTool["effect"],
+): RegisteredAiTool {
+  return {
+    name,
+    description: name,
+    effect,
+    input: z.object({ value: z.string() }),
+    requiredAccessRules: [],
+    ...(effect === "read"
+      ? {}
+      : { dryRun: async () => ({ summary: "s", payload: {} }) }),
+    execute: () => Promise.resolve({ ok: true }),
+  };
+}
+const principal: AuthUser = { type: "user", id: "u1", accessRules: ["*"] };
+function callbacks() {
+  const calls: string[] = [];
+  return {
+    calls,
+    enforceBudget: async () => {
+      calls.push("budget");
+    },
+    runRead: async () => {
+      calls.push("runRead");
+      return { rows: [] };
+    },
+    propose: async ({ tool: t }: { tool: RegisteredAiTool }) => {
+      calls.push("propose");
+      return {
+        __confirm: true,
+        toolName: t.name,
+        effect: t.effect as "mutate" | "destructive",
+        summary: "would do it",
+        token: "propose:abc.def",
+        payload: { value: "x" },
+        expiresAt: new Date().toISOString(),
+        note: "awaiting approval",
+      } satisfies ConfirmCardResult;
+    },
+    autoApply: async ({ tool: t }: { tool: RegisteredAiTool }) => {
+      calls.push("autoApply");
+      return {
+        __applied: true,
+        toolName: t.name,
+        effect: "mutate",
+        summary: "did it",
+        toolCallId: "tc-1",
+        result: { created: true },
+        note: "applied",
+      } satisfies AutoAppliedResult;
+    },
+  };
+}
+function build({
+  effect,
+  mode,
+  cb,
+}: {
+  effect: RegisteredAiTool["effect"];
+  mode: AiPermissionMode;
+  cb: ReturnType<typeof callbacks>;
+}) {
+  const name = `t.${effect}`;
+  const sdk = buildAgentSdkTools({
+    tools: [tool(name, effect)],
+    principal,
+    mode,
+    callbacks: cb,
+  });
+  return sdk[name]?.execute;
+}
+describe("buildAgentSdkTools — 3-tier gating", () => {
+  test("read tool ALWAYS auto-runs (approve mode)", async () => {
+    const cb = callbacks();
+    const execute = build({ effect: "read", mode: "approve", cb });
+    const result = await execute?.(
+      { value: "x" },
+      { toolCallId: "t1", messages: [] },
+    );
+    expect(result).toEqual({ rows: [] });
+    expect(cb.calls).toEqual(["budget", "runRead"]);
+  });
+  test("read tool ALWAYS auto-runs (auto mode) — mode never gates reads", async () => {
+    const cb = callbacks();
+    const execute = build({ effect: "read", mode: "auto", cb });
+    await execute?.({ value: "x" }, { toolCallId: "t1", messages: [] });
+    expect(cb.calls).toEqual(["budget", "runRead"]);
+  });
+  test("mutate tool in APPROVE mode -> propose (confirm card, never commits)", async () => {
+    const cb = callbacks();
+    const execute = build({ effect: "mutate", mode: "approve", cb });
+    const result = (await execute?.(
+      { value: "x" },
+      { toolCallId: "t1", messages: [] },
+    )) as ConfirmCardResult;
+    expect(result.__confirm).toBe(true);
+    expect(result.token).toBe("propose:abc.def");
+    expect(cb.calls).toEqual(["budget", "propose"]);
+  });
+  test("mutate tool in AUTO mode -> auto-applies server-side (no confirm card)", async () => {
+    const cb = callbacks();
+    const execute = build({ effect: "mutate", mode: "auto", cb });
+    const result = (await execute?.(
+      { value: "x" },
+      { toolCallId: "t1", messages: [] },
+    )) as AutoAppliedResult;
+    expect(result.__applied).toBe(true);
+    expect(result.toolCallId).toBe("tc-1");
+    // It applied; it did NOT return a confirm card.
+    expect(cb.calls).toEqual(["budget", "autoApply"]);
+  });
+  test("destructive tool in APPROVE mode -> propose (confirm card)", async () => {
+    const cb = callbacks();
+    const execute = build({ effect: "destructive", mode: "approve", cb });
+    const result = (await execute?.(
+      { value: "x" },
+      { toolCallId: "t1", messages: [] },
+    )) as ConfirmCardResult;
+    expect(result.effect).toBe("destructive");
+    expect(result.__confirm).toBe(true);
+    expect(cb.calls).toEqual(["budget", "propose"]);
+  });
+  test("SECURITY INVARIANT: destructive tool in AUTO mode STILL proposes (never auto-applies)", async () => {
+    const cb = callbacks();
+    const execute = build({ effect: "destructive", mode: "auto", cb });
+    const result = (await execute?.(
+      { value: "x" },
+      { toolCallId: "t1", messages: [] },
+    )) as ConfirmCardResult;
+    // AUTO mode does NOT change a destructive tool's disposition: still a card.
+    expect(result.__confirm).toBe(true);
+    expect(result.effect).toBe("destructive");
+    // autoApply was NEVER called for the destructive tool.
+    expect(cb.calls).toEqual(["budget", "propose"]);
+  });
+  test("the model is offered exactly the tools passed in (resolver-allowed only)", () => {
+    const sdk = buildAgentSdkTools({
+      tools: [tool("incident.list", "read")],
+      principal,
+      mode: "approve",
+      callbacks: callbacks(),
+    });
+    expect(Object.keys(sdk)).toEqual(["incident.list"]);
+  });
+});

package/src/chat/sdk-tools.ts ADDED Viewed

@@ -0,0 +1,181 @@
+import { tool as aiTool, type Tool } from "ai";
+import type { AuthUser } from "@checkstack/backend-api";
+import type { AiPermissionMode, AiFieldDiff } from "@checkstack/ai-common";
+import type { RegisteredAiTool } from "../tool-registry";
+import { decideToolDisposition } from "./permission-mode.logic";
+/**
+ * Result a mutate/destructive tool's `execute` returns to the model in APPROVE
+ * mode (and for ALL destructive tools): it does NOT commit. It runs the propose
+ * dry-run and returns a CONFIRM CARD the human must approve via `applyTool`. The
+ * model can never silently mutate.
+ */
+export interface ConfirmCardResult {
+  __confirm: true;
+  toolName: string;
+  effect: "mutate" | "destructive";
+  summary: string;
+  /** Opaque single-use proposal token consumed by `applyTool`. */
+  token: string;
+  /** Validated, ready-to-apply payload rendered on the card. */
+  payload: unknown;
+  /** Optional before -> after diff for an update, rendered on the card. */
+  diff?: AiFieldDiff[];
+  expiresAt: string;
+  /**
+   * MODEL-FACING guidance (ignored by the UI): tells the agent the proposal was
+   * created and shown, so it STOPS instead of re-proposing the same change. The
+   * dispatcher saw the model fire the same propose three times in a row.
+   */
+  note: string;
+}
+/**
+ * Returned to the model when it proposes/auto-applies the SAME tool with the
+ * SAME arguments again within ONE turn. Carries no `__confirm`/`__applied`, so
+ * the UI renders NO extra card; the model just gets a clear "already handled,
+ * stop" signal. Guards against the model spamming duplicate proposals/tokens
+ * because it thought the first call did not go through.
+ */
+export interface DuplicateToolCallResult {
+  __duplicate: true;
+  toolName: string;
+  note: string;
+}
+/**
+ * Result a `mutate` tool's `execute` returns to the model in AUTO mode: the
+ * proposal was applied SERVER-SIDE immediately (no human click), under the SAME
+ * `isAllowed` re-check + audit row the human `applyTool` path uses. Surfaced to
+ * the model so it knows the change took effect. ONLY `mutate` tools reach this
+ * (destructive tools always return a `ConfirmCardResult`).
+ */
+export interface AutoAppliedResult {
+  __applied: true;
+  toolName: string;
+  effect: "mutate";
+  summary: string;
+  /** The audit row id the apply produced. */
+  toolCallId: string;
+  /** The tool's `execute` result (e.g. the created automation). */
+  result: unknown;
+  /** Optional before -> after diff for an update, shown on the applied card. */
+  diff?: AiFieldDiff[];
+  /** MODEL-FACING guidance (ignored by the UI); see {@link ConfirmCardResult.note}. */
+  note: string;
+}
+/** Callbacks the SDK tool executors delegate to (kept injectable for testing). */
+export interface AgentToolCallbacks {
+  /** Enforce the per-principal tool budget; throws when over budget. */
+  enforceBudget(principal: AuthUser): Promise<void>;
+  /** Run a read tool (re-checks authz, records audit). Returns the result. */
+  runRead(args: {
+    principal: AuthUser;
+    tool: RegisteredAiTool;
+    input: unknown;
+  }): Promise<unknown>;
+  /**
+   * Propose a mutate/destructive tool; returns a confirm card (no commit), or a
+   * {@link DuplicateToolCallResult} if the SAME tool+args was already proposed
+   * this turn (so the model cannot spam duplicate cards/tokens).
+   */
+  propose(args: {
+    principal: AuthUser;
+    tool: RegisteredAiTool;
+    input: unknown;
+  }): Promise<ConfirmCardResult | DuplicateToolCallResult>;
+  /**
+   * AUTO-mode-only: propose AND apply a `mutate` tool SERVER-SIDE in one shot.
+   * Runs through the SAME propose/apply service (same `isAllowed` re-check, same
+   * `ai_tool_calls` audit rows) the human `applyTool` path uses - never a weaker
+   * path. Reached ONLY for `mutate` tools; destructive tools never call this.
+   */
+  autoApply(args: {
+    principal: AuthUser;
+    tool: RegisteredAiTool;
+    input: unknown;
+  }): Promise<AutoAppliedResult | DuplicateToolCallResult>;
+}
+/**
+ * Convert resolver-allowed Checkstack tools into Vercel-AI-SDK `tool()` defs for
+ * the agent loop. The disposition is baked into each tool's `execute` by the
+ * pure `decideToolDisposition` 3-tier model (Phase 4):
+ *
+ *  - `read` tools ALWAYS auto-run via `runRead`, in BOTH modes (handler authz
+ *    re-checks on execute). The mode never gates reads.
+ *  - `mutate` tools INHERIT the conversation's permission mode: in AUTO they
+ *    auto-apply SERVER-SIDE via `autoApply` (no human click); in APPROVE they
+ *    `propose` and return a CONFIRM CARD the human approves via `applyTool`.
+ *  - `destructive` tools ALWAYS `propose` and return a CONFIRM CARD, in BOTH
+ *    modes - the mode is NEVER consulted, so a destructive tool can never
+ *    auto-apply (the security invariant).
+ *
+ * Only tools the resolver already allowed for the principal are passed in, so
+ * the model is never even offered a forbidden tool; the budget + per-call authz
+ * re-check inside the executors (and inside propose/apply) are the server-side
+ * authority regardless.
+ */
+export function buildAgentSdkTools({
+  tools,
+  principal,
+  mode,
+  callbacks,
+}: {
+  tools: RegisteredAiTool[];
+  principal: AuthUser;
+  /** The conversation's permission mode. Governs the `mutate` branch only. */
+  mode: AiPermissionMode;
+  callbacks: AgentToolCallbacks;
+}): Record<string, Tool> {
+  const sdkTools: Record<string, Tool> = {};
+  for (const t of tools) {
+    const disposition = decideToolDisposition({ effect: t.effect, mode });
+    if (disposition === "auto-run") {
+      sdkTools[t.name] = aiTool({
+        description: t.description,
+        inputSchema: t.input,
+        execute: async (input: unknown) => {
+          await callbacks.enforceBudget(principal);
+          return callbacks.runRead({ principal, tool: t, input });
+        },
+      });
+      continue;
+    }
+    if (disposition === "auto-apply") {
+      // AUTO mode + mutate: apply immediately server-side. Same propose/apply
+      // service (same authz re-check + audit) as a human apply - never weaker.
+      sdkTools[t.name] = aiTool({
+        description: `${t.description} (auto-applied immediately in this conversation's auto mode)`,
+        inputSchema: t.input,
+        execute: async (
+          input: unknown,
+        ): Promise<AutoAppliedResult | DuplicateToolCallResult> => {
+          await callbacks.enforceBudget(principal);
+          return callbacks.autoApply({ principal, tool: t, input });
+        },
+      });
+      continue;
+    }
+    // disposition === "propose": mutate-in-APPROVE or ANY destructive tool. The
+    // returned confirm card is what the chat UI renders; nothing is committed
+    // until the human applies.
+    sdkTools[t.name] = aiTool({
+      description: `${t.description} (requires human confirmation before it takes effect)`,
+      inputSchema: t.input,
+      execute: async (
+        input: unknown,
+      ): Promise<ConfirmCardResult | DuplicateToolCallResult> => {
+        await callbacks.enforceBudget(principal);
+        return callbacks.propose({ principal, tool: t, input });
+      },
+    });
+  }
+  return sdkTools;
+}