npm - @checkstack/ai-backend - Versions diffs - 0.1.0 - Mend

@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/CHANGELOG.md +97 -0
package/drizzle/0000_productive_jackpot.sql +26 -0
package/drizzle/0001_puzzling_purple_man.sql +26 -0
package/drizzle/0002_sparkling_paper_doll.sql +15 -0
package/drizzle/0003_married_senator_kelly.sql +1 -0
package/drizzle/0004_crazy_miek.sql +2 -0
package/drizzle/0005_tearful_randall_flagg.sql +1 -0
package/drizzle/meta/0000_snapshot.json +232 -0
package/drizzle/meta/0001_snapshot.json +434 -0
package/drizzle/meta/0002_snapshot.json +551 -0
package/drizzle/meta/0003_snapshot.json +557 -0
package/drizzle/meta/0004_snapshot.json +573 -0
package/drizzle/meta/0005_snapshot.json +574 -0
package/drizzle/meta/_journal.json +48 -0
package/drizzle.config.ts +7 -0
package/package.json +42 -0
package/src/agent-runner.test.ts +262 -0
package/src/agent-runner.ts +262 -0
package/src/chat/agent-loop.test.ts +119 -0
package/src/chat/agent-loop.ts +73 -0
package/src/chat/auto-apply.test.ts +237 -0
package/src/chat/chat-handler.ts +111 -0
package/src/chat/chat-service.streamturn.test.ts +417 -0
package/src/chat/chat-service.test.ts +250 -0
package/src/chat/chat-service.ts +923 -0
package/src/chat/classifier-service.ts +64 -0
package/src/chat/classifier.logic.test.ts +92 -0
package/src/chat/classifier.logic.ts +71 -0
package/src/chat/conversation-store.it.test.ts +203 -0
package/src/chat/conversation-store.test.ts +248 -0
package/src/chat/conversation-store.ts +237 -0
package/src/chat/decision.logic.test.ts +45 -0
package/src/chat/decision.logic.ts +54 -0
package/src/chat/llm-provider.test.ts +63 -0
package/src/chat/llm-provider.ts +67 -0
package/src/chat/model-error.logic.test.ts +60 -0
package/src/chat/model-error.logic.ts +65 -0
package/src/chat/normalize-messages.logic.test.ts +101 -0
package/src/chat/normalize-messages.logic.ts +65 -0
package/src/chat/permission-mode.logic.test.ts +70 -0
package/src/chat/permission-mode.logic.ts +45 -0
package/src/chat/read-invoker.ts +72 -0
package/src/chat/replay.test.ts +174 -0
package/src/chat/scrub-content.test.ts +183 -0
package/src/chat/scrub-content.ts +154 -0
package/src/chat/sdk-tools.test.ts +168 -0
package/src/chat/sdk-tools.ts +181 -0
package/src/chat/title-service.test.ts +146 -0
package/src/chat/title-service.ts +111 -0
package/src/chat/title.logic.test.ts +98 -0
package/src/chat/title.logic.ts +102 -0
package/src/extension-points.ts +41 -0
package/src/generated/docs-index.ts +3020 -0
package/src/hardening/handler-authz.test.ts +282 -0
package/src/hardening/no-secret-leak.test.ts +303 -0
package/src/hooks.ts +33 -0
package/src/index.ts +542 -0
package/src/mcp/connection-registry.test.ts +25 -0
package/src/mcp/connection-registry.ts +54 -0
package/src/mcp/mcp-conformance.it.test.ts +128 -0
package/src/mcp/server.test.ts +285 -0
package/src/mcp/server.ts +300 -0
package/src/mcp/tool-invoker.ts +65 -0
package/src/openai-provider.test.ts +64 -0
package/src/openai-provider.ts +146 -0
package/src/projection.test.ts +97 -0
package/src/projection.ts +132 -0
package/src/propose-apply/args-hash.test.ts +26 -0
package/src/propose-apply/args-hash.ts +30 -0
package/src/propose-apply/service.test.ts +423 -0
package/src/propose-apply/service.ts +419 -0
package/src/propose-apply/store.test.ts +136 -0
package/src/propose-apply/store.ts +224 -0
package/src/propose-apply/token.test.ts +52 -0
package/src/propose-apply/token.ts +71 -0
package/src/rate-limit/spend-ledger.it.test.ts +224 -0
package/src/rate-limit/spend-ledger.test.ts +176 -0
package/src/rate-limit/spend-ledger.ts +162 -0
package/src/rate-limit/tool-budget.it.test.ts +173 -0
package/src/rate-limit/tool-budget.test.ts +58 -0
package/src/rate-limit/tool-budget.ts +107 -0
package/src/registry-wiring.test.ts +131 -0
package/src/registry-wiring.ts +68 -0
package/src/resolver.test.ts +156 -0
package/src/resolver.ts +78 -0
package/src/router.test.ts +78 -0
package/src/router.ts +345 -0
package/src/schema.ts +284 -0
package/src/serializer.test.ts +88 -0
package/src/serializer.ts +42 -0
package/src/tool-registry.ts +58 -0
package/src/tools/composite-tools.ts +24 -0
package/src/tools/docs-tools.test.ts +150 -0
package/src/tools/docs-tools.ts +115 -0
package/src/tools/probe-url.test.ts +51 -0
package/src/tools/probe-url.ts +146 -0
package/src/tools/rank-docs.test.ts +153 -0
package/src/tools/rank-docs.ts +209 -0
package/src/tools/script-context-extract.test.ts +93 -0
package/src/tools/script-context-extract.ts +283 -0
package/src/tools/ssrf-guard.test.ts +69 -0
package/src/tools/ssrf-guard.ts +108 -0
package/src/tools/tool-set.e2e.test.ts +64 -0
package/src/user-rpc-client.test.ts +45 -0
package/src/user-rpc-client.ts +60 -0
package/tsconfig.json +26 -0

package/src/chat/agent-loop.ts ADDED Viewed

@@ -0,0 +1,73 @@
+import type { AuthUser } from "@checkstack/backend-api";
+import type { AiToolResolver } from "../resolver";
+import type { RegisteredAiTool } from "../tool-registry";
+/**
+ * Server-side agent-loop CORE (plan §4 / Phase 4) — provider-agnostic and
+ * DOM-free, so the security-critical tool-gating logic is unit-testable without
+ * a model, a browser, or the Vercel AI SDK.
+ *
+ * The loop treats the model as an UNTRUSTED caller (decision §1.5): it may only
+ * invoke tools the resolver allows for the logged-in principal, and it may never
+ * silently mutate. The decision of WHAT a model-requested tool call does is made
+ * here, not by the SDK:
+ *
+ *  - `read` tools AUTO-RUN (handler-side authz still re-checks on execute).
+ *  - `mutate` / `destructive` tools NEVER execute inline; they go through the
+ *    propose/apply gate and surface a CONFIRM CARD the human must approve.
+ *  - a tool the principal cannot see is REFUSED server-side, even if the model
+ *    asks for it (the resolver never offered it, but the model is untrusted).
+ */
+/** What the agent loop should do with a model-requested tool call. */
+export type AgentToolDisposition =
+  | { kind: "run"; tool: RegisteredAiTool } // read tool: auto-run
+  | { kind: "confirm"; tool: RegisteredAiTool } // mutate/destructive: propose + confirm card
+  | { kind: "refused"; reason: string }; // unknown / not allowed
+/**
+ * The single server-side gate for a model-requested tool. Mirrors the MCP
+ * `tools/call` gate so both transports treat the model identically.
+ */
+export function disposeAgentTool({
+  toolName,
+  principal,
+  resolver,
+  getTool,
+}: {
+  toolName: string;
+  principal: AuthUser;
+  resolver: AiToolResolver;
+  getTool: (name: string) => RegisteredAiTool | undefined;
+}): AgentToolDisposition {
+  const tool = getTool(toolName);
+  if (!tool) {
+    return { kind: "refused", reason: `Unknown tool: ${toolName}` };
+  }
+  // The model is untrusted: re-check authorization server-side even though the
+  // resolver only ever OFFERED allowed tools.
+  if (!resolver.isAllowed({ principal, tool })) {
+    return { kind: "refused", reason: `Forbidden: ${toolName}` };
+  }
+  if (tool.effect === "read") {
+    return { kind: "run", tool };
+  }
+  // mutate / destructive: never inline; require human confirmation.
+  return { kind: "confirm", tool };
+}
+/**
+ * The set of tools the loop OFFERS the model for a principal. Identical to the
+ * resolver output — the loop never widens it. Mutating/destructive tools are
+ * offered too (so the model can REQUEST them), but their disposition is
+ * `confirm`, never auto-run.
+ */
+export function offeredTools({
+  principal,
+  resolver,
+}: {
+  principal: AuthUser;
+  resolver: AiToolResolver;
+}): RegisteredAiTool[] {
+  return resolver.resolveTools(principal);
+}

package/src/chat/auto-apply.test.ts ADDED Viewed

@@ -0,0 +1,237 @@
+import { describe, expect, test, mock } from "bun:test";
+import { z } from "zod";
+import type { AuthUser } from "@checkstack/backend-api";
+import { createAiToolRegistry } from "../tool-registry";
+import type { RegisteredAiTool } from "../tool-registry";
+import { createAiToolResolver } from "../resolver";
+import {
+  createProposeApplyService,
+  ProposeApplyError,
+} from "../propose-apply/service";
+import { generateProposalNonce } from "../propose-apply/token";
+import type { AiToolCallStore } from "../propose-apply/store";
+import type { AiToolCallRow } from "../schema";
+import { buildChatToolCallbacks, type ChatRecordExecuted } from "./chat-service";
+import type { ChatReadInvoker } from "./read-invoker";
+/**
+ * In-memory `AiToolCallStore` mirroring the atomic single-use consume (same as
+ * the propose/apply service test). Used here to prove the AUTO-mode server-side
+ * auto-apply path runs through the SAME propose/apply service - same `isAllowed`
+ * re-check, same `ai_tool_calls` audit rows - as the human `applyTool` flow.
+ */
+function createFakeStore(now: () => Date): AiToolCallStore & {
+  rows: Map<string, AiToolCallRow>;
+} {
+  const rows = new Map<string, AiToolCallRow>();
+  let counter = 0;
+  const baseRow = (over: Partial<AiToolCallRow>): AiToolCallRow => ({
+    id: `row-${++counter}`,
+    principalKind: "user",
+    principalId: "u1",
+    transport: "chat",
+    conversationId: null,
+    toolName: "x",
+    effect: "mutate",
+    argsHash: "h",
+    status: "proposed",
+    proposalNonce: null,
+    proposalExpiresAt: null,
+    resultSnapshot: null,
+    proposedPayload: null,
+    error: null,
+    proposedAt: null,
+    appliedAt: null,
+    appliedByKind: null,
+    appliedById: null,
+    createdAt: now(),
+    ...over,
+  });
+  return {
+    rows,
+    async recordExecuted(args) {
+      const row = baseRow({
+        ...args,
+        conversationId: args.conversationId ?? null,
+        effect: "read",
+        status: "executed",
+        resultSnapshot: args.resultSnapshot ?? null,
+      });
+      rows.set(row.id, row);
+      return row;
+    },
+    async recordFailed(args) {
+      const row = baseRow({
+        ...args,
+        conversationId: args.conversationId ?? null,
+        status: "failed",
+      });
+      rows.set(row.id, row);
+      return row;
+    },
+    async createProposal(args) {
+      const nonce = generateProposalNonce();
+      const row = baseRow({
+        principalKind: args.principal.kind,
+        principalId: args.principal.id,
+        transport: args.transport,
+        conversationId: args.conversationId ?? null,
+        toolName: args.toolName,
+        effect: args.effect,
+        argsHash: args.argsHash,
+        status: "proposed",
+        proposalNonce: nonce,
+        proposalExpiresAt: new Date((args.now ?? now()).getTime() + 600_000),
+        proposedPayload: args.proposedPayload,
+        resultSnapshot: args.resultSnapshot ?? null,
+        proposedAt: args.now ?? now(),
+      });
+      rows.set(row.id, row);
+      return { row, nonce };
+    },
+    async consumeProposal({ rowId, applier, now: at = now() }) {
+      const row = rows.get(rowId);
+      if (!row) return undefined;
+      if (row.status !== "proposed") return undefined;
+      if (row.proposalExpiresAt && row.proposalExpiresAt.getTime() <= at.getTime()) {
+        return undefined;
+      }
+      const updated: AiToolCallRow = {
+        ...row,
+        status: "applied",
+        appliedAt: at,
+        appliedByKind: applier.kind,
+        appliedById: applier.id,
+      };
+      rows.set(rowId, updated);
+      return updated;
+    },
+    async getProposal(rowId) {
+      return rows.get(rowId);
+    },
+    async expireStaleProposals() {
+      return 0;
+    },
+  };
+}
+const ManageInput = z.object({ value: z.string() });
+function mutatingTool(): {
+  tool: RegisteredAiTool<{ value: string }, { created: string }>;
+  execute: ReturnType<typeof mock>;
+} {
+  // A spy execute so a test can assert the tool body NEVER ran (e.g. when the
+  // authz gate refuses before any commit). Refusal happens in `propose` today,
+  // before execute — this makes the no-execute guarantee regression-proof if the
+  // gate order ever changes.
+  const execute = mock(async ({ input }: { input: { value: string } }) => ({
+    created: input.value,
+  }));
+  const tool: RegisteredAiTool<{ value: string }, { created: string }> = {
+    name: "demo.mutate",
+    description: "demo mutating tool",
+    effect: "mutate",
+    input: ManageInput,
+    requiredAccessRules: ["demo.demo.manage"],
+    dryRun: async ({ input }) => ({
+      summary: `Would create ${input.value}`,
+      payload: { value: input.value },
+    }),
+    execute,
+  };
+  return { tool, execute };
+}
+const allowed: AuthUser = {
+  type: "user",
+  id: "u1",
+  accessRules: ["demo.demo.manage"],
+};
+const notAllowed: AuthUser = {
+  type: "user",
+  id: "u2",
+  accessRules: ["other.read"],
+};
+function budgetDb(used: number) {
+  const where = mock(() => Promise.resolve([{ value: used }]));
+  const from = mock(() => ({ where }));
+  const select = mock(() => ({ from }));
+  return { select } as never;
+}
+function setup() {
+  const registry = createAiToolRegistry();
+  const { tool, execute } = mutatingTool();
+  registry.register(tool);
+  const resolver = createAiToolResolver({ registry });
+  const store = createFakeStore(() => new Date());
+  const proposeApply = createProposeApplyService({ registry, resolver, store });
+  const readInvoker: ChatReadInvoker = {
+    invoke: () => Promise.reject(new Error("read invoker should not run")),
+  };
+  const recordExecuted: ChatRecordExecuted = async () => {};
+  const callbacks = buildChatToolCallbacks({
+    proposeApply,
+    readInvoker,
+    recordExecuted,
+    readRouting: new Map(),
+    db: budgetDb(0),
+    conversationId: "conv-1",
+    forwardHeaders: {},
+    internalUrl: "http://localhost:3000",
+  });
+  return { tool, execute, store, callbacks };
+}
+describe("AUTO-mode mutate auto-apply path", () => {
+  test("auto-applies server-side through the SAME propose/apply service (audited as applied)", async () => {
+    const { tool, execute, store, callbacks } = setup();
+    const result = await callbacks.autoApply({
+      principal: allowed,
+      tool,
+      input: { value: "alpha" },
+    });
+    // A first call must auto-apply (not be deduped as a duplicate).
+    if (!("__applied" in result)) {
+      throw new Error("expected an applied result, got a duplicate");
+    }
+    // The tool's execute actually ran (server-side apply, no human click).
+    expect(result.__applied).toBe(true);
+    expect(result.result).toEqual({ created: "alpha" });
+    expect(execute).toHaveBeenCalledTimes(1);
+    // The audit trail mirrors a HUMAN apply exactly: a row transitioned
+    // proposed -> applied, with the applier stamped. Not a weaker/parallel path.
+    const applied = [...store.rows.values()].filter((r) => r.status === "applied");
+    expect(applied).toHaveLength(1);
+    expect(applied[0]?.toolName).toBe("demo.mutate");
+    expect(applied[0]?.effect).toBe("mutate");
+    expect(applied[0]?.appliedById).toBe("u1");
+    expect(applied[0]?.id).toBe(result.toolCallId);
+  });
+  test("re-checks authz: an unauthorized principal is refused (no apply, no execute)", async () => {
+    const { tool, execute, store, callbacks } = setup();
+    await expect(
+      callbacks.autoApply({
+        principal: notAllowed,
+        tool,
+        input: { value: "beta" },
+      }),
+    ).rejects.toBeInstanceOf(ProposeApplyError);
+    // Nothing was applied — the authz gate (the SAME `isAllowed` re-check the
+    // human path uses) refused before any commit.
+    const applied = [...store.rows.values()].filter((r) => r.status === "applied");
+    expect(applied).toHaveLength(0);
+    // The tool body NEVER ran for the unauthorized principal. Refusal happens in
+    // `propose` (before any execute) today; this assertion makes the no-execute
+    // guarantee regression-proof even if the gate order ever changes.
+    expect(execute).not.toHaveBeenCalled();
+  });
+});

package/src/chat/chat-handler.ts ADDED Viewed

@@ -0,0 +1,111 @@
+import { z } from "zod";
+import type { AuthService } from "@checkstack/backend-api";
+import { extractErrorMessage } from "@checkstack/common";
+import type { ChatService } from "./chat-service";
+import { forwardableAuthHeaders } from "./read-invoker";
+/** Body of a streaming chat turn POST (a new user message). */
+const ChatTurnBodySchema = z.object({
+  conversationId: z.string(),
+  connectionId: z.string(),
+  model: z.string().optional(),
+  message: z.string().min(1),
+});
+/**
+ * Body of a post-confirm-card decision POST: the operator applied or declined a
+ * proposal and we stream the model's acknowledgment. The actual apply runs
+ * separately via `applyTool`; this only carries the proposal token + decision.
+ */
+const ChatDecisionBodySchema = z.object({
+  conversationId: z.string(),
+  connectionId: z.string(),
+  model: z.string().optional(),
+  decision: z.object({
+    token: z.string().min(1),
+    kind: z.enum(["apply", "decline"]),
+  }),
+});
+/** A /chat POST is either a new user turn or a confirm-card decision turn. */
+const ChatRequestBodySchema = z.union([
+  ChatTurnBodySchema,
+  ChatDecisionBodySchema,
+]);
+/**
+ * Raw HTTP handler for the streaming chat turn, mounted at /api/ai/chat. SSE
+ * streaming requires a raw handler (oRPC does not stream), so authentication is
+ * done here via the platform auth strategy — the SAME principal resolution as
+ * every other request. The resolved principal must be a logged-in RealUser
+ * (chat is RealUser-only); the credential never crosses to the browser.
+ */
+export function createChatRequestHandler({
+  chatService,
+  auth,
+}: {
+  chatService: ChatService;
+  auth: AuthService;
+}): (req: Request) => Promise<Response> {
+  return async function handleChatRequest(req: Request): Promise<Response> {
+    if (req.method !== "POST") {
+      return new Response(null, { status: 405 });
+    }
+    const principal = await auth.authenticate(req);
+    if (!principal) {
+      return Response.json({ error: "Unauthorized" }, { status: 401 });
+    }
+    if (principal.type !== "user") {
+      // Applications/services use MCP, not the in-app chat.
+      return Response.json(
+        { error: "AI chat is available to logged-in users only." },
+        { status: 403 },
+      );
+    }
+    let body: z.infer<typeof ChatRequestBodySchema>;
+    try {
+      const parsed = ChatRequestBodySchema.safeParse(await req.json());
+      if (!parsed.success) {
+        return Response.json(
+          { error: `Invalid request: ${parsed.error.message}` },
+          { status: 400 },
+        );
+      }
+      body = parsed.data;
+    } catch {
+      return Response.json({ error: "Invalid JSON body." }, { status: 400 });
+    }
+    const forwardHeaders = forwardableAuthHeaders(req);
+    try {
+      // A decision turn (operator applied/declined a confirm card) vs a normal
+      // user message turn. The union narrows on the presence of `decision`.
+      if ("decision" in body) {
+        return await chatService.streamDecision({
+          principal,
+          conversationId: body.conversationId,
+          connectionId: body.connectionId,
+          model: body.model,
+          forwardHeaders,
+          token: body.decision.token,
+          decision: body.decision.kind,
+        });
+      }
+      return await chatService.streamTurn({
+        principal,
+        conversationId: body.conversationId,
+        connectionId: body.connectionId,
+        model: body.model,
+        forwardHeaders,
+        userText: body.message,
+      });
+    } catch (error) {
+      return Response.json(
+        { error: extractErrorMessage(error, "Chat turn failed.") },
+        { status: 500 },
+      );
+    }
+  };
+}