npm - @checkstack/ai-backend - Versions diffs - 0.1.0 - Mend

@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/CHANGELOG.md +97 -0
package/drizzle/0000_productive_jackpot.sql +26 -0
package/drizzle/0001_puzzling_purple_man.sql +26 -0
package/drizzle/0002_sparkling_paper_doll.sql +15 -0
package/drizzle/0003_married_senator_kelly.sql +1 -0
package/drizzle/0004_crazy_miek.sql +2 -0
package/drizzle/0005_tearful_randall_flagg.sql +1 -0
package/drizzle/meta/0000_snapshot.json +232 -0
package/drizzle/meta/0001_snapshot.json +434 -0
package/drizzle/meta/0002_snapshot.json +551 -0
package/drizzle/meta/0003_snapshot.json +557 -0
package/drizzle/meta/0004_snapshot.json +573 -0
package/drizzle/meta/0005_snapshot.json +574 -0
package/drizzle/meta/_journal.json +48 -0
package/drizzle.config.ts +7 -0
package/package.json +42 -0
package/src/agent-runner.test.ts +262 -0
package/src/agent-runner.ts +262 -0
package/src/chat/agent-loop.test.ts +119 -0
package/src/chat/agent-loop.ts +73 -0
package/src/chat/auto-apply.test.ts +237 -0
package/src/chat/chat-handler.ts +111 -0
package/src/chat/chat-service.streamturn.test.ts +417 -0
package/src/chat/chat-service.test.ts +250 -0
package/src/chat/chat-service.ts +923 -0
package/src/chat/classifier-service.ts +64 -0
package/src/chat/classifier.logic.test.ts +92 -0
package/src/chat/classifier.logic.ts +71 -0
package/src/chat/conversation-store.it.test.ts +203 -0
package/src/chat/conversation-store.test.ts +248 -0
package/src/chat/conversation-store.ts +237 -0
package/src/chat/decision.logic.test.ts +45 -0
package/src/chat/decision.logic.ts +54 -0
package/src/chat/llm-provider.test.ts +63 -0
package/src/chat/llm-provider.ts +67 -0
package/src/chat/model-error.logic.test.ts +60 -0
package/src/chat/model-error.logic.ts +65 -0
package/src/chat/normalize-messages.logic.test.ts +101 -0
package/src/chat/normalize-messages.logic.ts +65 -0
package/src/chat/permission-mode.logic.test.ts +70 -0
package/src/chat/permission-mode.logic.ts +45 -0
package/src/chat/read-invoker.ts +72 -0
package/src/chat/replay.test.ts +174 -0
package/src/chat/scrub-content.test.ts +183 -0
package/src/chat/scrub-content.ts +154 -0
package/src/chat/sdk-tools.test.ts +168 -0
package/src/chat/sdk-tools.ts +181 -0
package/src/chat/title-service.test.ts +146 -0
package/src/chat/title-service.ts +111 -0
package/src/chat/title.logic.test.ts +98 -0
package/src/chat/title.logic.ts +102 -0
package/src/extension-points.ts +41 -0
package/src/generated/docs-index.ts +3020 -0
package/src/hardening/handler-authz.test.ts +282 -0
package/src/hardening/no-secret-leak.test.ts +303 -0
package/src/hooks.ts +33 -0
package/src/index.ts +542 -0
package/src/mcp/connection-registry.test.ts +25 -0
package/src/mcp/connection-registry.ts +54 -0
package/src/mcp/mcp-conformance.it.test.ts +128 -0
package/src/mcp/server.test.ts +285 -0
package/src/mcp/server.ts +300 -0
package/src/mcp/tool-invoker.ts +65 -0
package/src/openai-provider.test.ts +64 -0
package/src/openai-provider.ts +146 -0
package/src/projection.test.ts +97 -0
package/src/projection.ts +132 -0
package/src/propose-apply/args-hash.test.ts +26 -0
package/src/propose-apply/args-hash.ts +30 -0
package/src/propose-apply/service.test.ts +423 -0
package/src/propose-apply/service.ts +419 -0
package/src/propose-apply/store.test.ts +136 -0
package/src/propose-apply/store.ts +224 -0
package/src/propose-apply/token.test.ts +52 -0
package/src/propose-apply/token.ts +71 -0
package/src/rate-limit/spend-ledger.it.test.ts +224 -0
package/src/rate-limit/spend-ledger.test.ts +176 -0
package/src/rate-limit/spend-ledger.ts +162 -0
package/src/rate-limit/tool-budget.it.test.ts +173 -0
package/src/rate-limit/tool-budget.test.ts +58 -0
package/src/rate-limit/tool-budget.ts +107 -0
package/src/registry-wiring.test.ts +131 -0
package/src/registry-wiring.ts +68 -0
package/src/resolver.test.ts +156 -0
package/src/resolver.ts +78 -0
package/src/router.test.ts +78 -0
package/src/router.ts +345 -0
package/src/schema.ts +284 -0
package/src/serializer.test.ts +88 -0
package/src/serializer.ts +42 -0
package/src/tool-registry.ts +58 -0
package/src/tools/composite-tools.ts +24 -0
package/src/tools/docs-tools.test.ts +150 -0
package/src/tools/docs-tools.ts +115 -0
package/src/tools/probe-url.test.ts +51 -0
package/src/tools/probe-url.ts +146 -0
package/src/tools/rank-docs.test.ts +153 -0
package/src/tools/rank-docs.ts +209 -0
package/src/tools/script-context-extract.test.ts +93 -0
package/src/tools/script-context-extract.ts +283 -0
package/src/tools/ssrf-guard.test.ts +69 -0
package/src/tools/ssrf-guard.ts +108 -0
package/src/tools/tool-set.e2e.test.ts +64 -0
package/src/user-rpc-client.test.ts +45 -0
package/src/user-rpc-client.ts +60 -0
package/tsconfig.json +26 -0

package/src/propose-apply/service.ts ADDED Viewed

@@ -0,0 +1,419 @@
+import { extractErrorMessage } from "@checkstack/common";
+import type { AuthUser, EventBus, RpcClient } from "@checkstack/backend-api";
+import type { AiFieldDiff } from "@checkstack/ai-common";
+import type { AiToolResolver } from "../resolver";
+import type { RegisteredAiTool } from "../tool-registry";
+import type { AiToolRegistry } from "../tool-registry";
+import { aiHooks } from "../hooks";
+import { hashToolArgs } from "./args-hash";
+import type { AiToolCallStore, AuditPrincipal } from "./store";
+import {
+  formatProposalToken,
+  nonceMatches,
+  parseProposalToken,
+} from "./token";
+/** Errors a transport maps to an appropriate status code. */
+export type ProposeApplyErrorCode =
+  | "forbidden" // resolver/authz gate refused
+  | "not_found" // unknown tool / token row
+  | "not_proposable" // tool has no dryRun (read tool or misconfigured mutate)
+  | "invalid_token" // malformed token / nonce mismatch
+  | "expired" // TTL elapsed (or row already swept to expired)
+  | "consumed" // already applied/rejected — single-use violated
+  | "execute_failed"; // apply's execute threw
+export class ProposeApplyError extends Error {
+  constructor(
+    public readonly code: ProposeApplyErrorCode,
+    message: string,
+  ) {
+    super(message);
+    this.name = "ProposeApplyError";
+  }
+}
+export interface ProposeResult {
+  /** Opaque proposal token (`propose:<rowId>.<nonce>`). */
+  token: string;
+  /** Human/model-facing one-line summary of what `apply` will do. */
+  summary: string;
+  /** The validated, ready-to-apply payload (for the chat confirm card). */
+  payload: unknown;
+  /** Optional before -> after diff for an update (shown on the card). */
+  diff?: AiFieldDiff[];
+  /** Audit row id (== the rowId inside the token). */
+  toolCallId: string;
+  /** Hard expiry of the proposal. */
+  expiresAt: Date;
+}
+export interface ApplyResult {
+  toolCallId: string;
+  result: unknown;
+}
+/**
+ * Read-only description of a proposal, resolved from its token WITHOUT consuming
+ * it. Powers the post-confirm-card acknowledgment turn (the model reacting to a
+ * human apply/decline): the caller needs the tool name + stored summary + the
+ * owning conversation + current status, but must NOT apply anything. The nonce
+ * is verified so a guessed/forged token reveals nothing.
+ */
+export interface ProposalDescription {
+  rowId: string;
+  toolName: string;
+  /** Lifecycle status: proposed | applied | rejected | expired | failed. */
+  status: string;
+  /** The chat conversation the proposal was created in (if any). */
+  conversationId?: string;
+  /** The one-line summary captured at propose time (resultSnapshot.summary). */
+  summary?: string;
+}
+interface ProposeApplyDeps {
+  registry: AiToolRegistry;
+  resolver: AiToolResolver;
+  store: AiToolCallStore;
+  /** Optional bus; when present, `ai.toolCalled` is emitted (best-effort). */
+  eventBus?: EventBus;
+}
+function auditPrincipalOf(principal: AuthUser): AuditPrincipal {
+  // Services bypass the registry entirely; the resolver gate below also refuses
+  // them, but we guard here so the audit row never records a "service" kind.
+  if (principal.type === "service") {
+    throw new ProposeApplyError(
+      "forbidden",
+      "Service principals cannot drive AI tools.",
+    );
+  }
+  return { kind: principal.type, id: principal.id };
+}
+/**
+ * The required access rules the principal does NOT hold, so a `forbidden` error
+ * can name the missing permission (the assistant relays it to the user). Returns
+ * `[]` for the `"*"` admin escape. Never includes anything the principal has.
+ */
+function missingRules({
+  principal,
+  tool,
+}: {
+  principal: AuthUser;
+  tool: RegisteredAiTool;
+}): string[] {
+  const have =
+    "accessRules" in principal ? (principal.accessRules ?? []) : [];
+  if (have.includes("*")) return [];
+  return tool.requiredAccessRules.filter((rule) => !have.includes(rule));
+}
+/** A `forbidden` message that names the missing rule(s) when known. */
+function forbiddenMessage({
+  principal,
+  tool,
+}: {
+  principal: AuthUser;
+  tool: RegisteredAiTool;
+}): string {
+  const missing = missingRules({ principal, tool });
+  return missing.length > 0
+    ? `Forbidden: ${tool.name} (missing permission: ${missing.join(", ")})`
+    : `Forbidden: ${tool.name}`;
+}
+function emitToolCalled({
+  eventBus,
+  principal,
+  transport,
+  tool,
+  status,
+}: {
+  eventBus?: EventBus;
+  principal: AuditPrincipal;
+  transport: "chat" | "mcp" | "automation";
+  tool: RegisteredAiTool;
+  status: "proposed" | "applied" | "failed";
+}): void {
+  if (!eventBus) return;
+  // Best-effort, fire-and-forget: an audit/notification failure must never
+  // block or fail the tool call itself.
+  void eventBus.emit(aiHooks.toolCalled, {
+    principalKind: principal.kind,
+    principalId: principal.id,
+    transport,
+    toolName: tool.name,
+    effect: tool.effect,
+    status,
+  });
+}
+/**
+ * The transport-agnostic two-step propose -> apply service (§8, §13.4).
+ *
+ * `propose` runs the mutating tool's `dryRun` (the mature validateDefinition /
+ * renderConfig pattern), persists a `proposed` audit row, and returns a token.
+ * `apply` parses + validates the token, re-checks authorization (rules may have
+ * changed since propose), and commits via `execute` — atomic single-use.
+ *
+ * Read-effect tools are NOT proposable: they run directly via the transport,
+ * never through this gate.
+ */
+export function createProposeApplyService({
+  registry,
+  resolver,
+  store,
+  eventBus,
+}: ProposeApplyDeps) {
+  return {
+    async propose({
+      principal,
+      toolName,
+      input,
+      transport,
+      conversationId,
+      rpcClient,
+    }: {
+      principal: AuthUser;
+      toolName: string;
+      input: unknown;
+      transport: "chat" | "mcp";
+      conversationId?: string;
+      /** USER-scoped client (bound to the originating user) for the dry-run. */
+      rpcClient: RpcClient;
+    }): Promise<ProposeResult> {
+      const auditPrincipal = auditPrincipalOf(principal);
+      const tool = registry.getTool(toolName);
+      if (!tool) {
+        throw new ProposeApplyError("not_found", `Unknown tool: ${toolName}`);
+      }
+      // Authz gate (decision 5) — the same predicate the handler enforces.
+      if (!resolver.isAllowed({ principal, tool })) {
+        throw new ProposeApplyError(
+          "forbidden",
+          forbiddenMessage({ principal, tool }),
+        );
+      }
+      // Only mutate/destructive tools with a dryRun are proposable.
+      if (tool.effect === "read" || !tool.dryRun) {
+        throw new ProposeApplyError(
+          "not_proposable",
+          `Tool "${toolName}" is not a proposable mutating tool.`,
+        );
+      }
+      // Validate the input against the tool's own schema BEFORE dry-running so
+      // a malformed call is rejected without side effects.
+      const parsed = tool.input.safeParse(input);
+      if (!parsed.success) {
+        throw new ProposeApplyError(
+          "execute_failed",
+          `Invalid arguments for ${toolName}: ${parsed.error.message}`,
+        );
+      }
+      const argsHash = hashToolArgs(parsed.data);
+      const preview = await tool.dryRun({
+        input: parsed.data,
+        principal,
+        rpcClient,
+      });
+      const { row, nonce } = await store.createProposal({
+        principal: auditPrincipal,
+        transport,
+        conversationId,
+        toolName,
+        effect: tool.effect,
+        argsHash,
+        proposedPayload: preview.payload as Record<string, unknown>,
+        resultSnapshot: { summary: preview.summary },
+      });
+      emitToolCalled({
+        eventBus,
+        principal: auditPrincipal,
+        transport,
+        tool,
+        status: "proposed",
+      });
+      return {
+        token: formatProposalToken({ rowId: row.id, nonce }),
+        summary: preview.summary,
+        payload: preview.payload,
+        diff: preview.diff,
+        toolCallId: row.id,
+        expiresAt: row.proposalExpiresAt ?? new Date(),
+      };
+    },
+    /**
+     * Resolve a proposal token to a read-only description WITHOUT consuming it.
+     * Returns undefined for a malformed token, an unknown row, or a nonce
+     * mismatch (so a forged token leaks nothing). Does NOT check TTL/status -
+     * the caller inspects `status` itself (an applied proposal is expected here).
+     */
+    async describeProposal({
+      token,
+    }: {
+      token: string;
+    }): Promise<ProposalDescription | undefined> {
+      const parsedToken = parseProposalToken(token);
+      if (!parsedToken) return undefined;
+      const row = await store.getProposal(parsedToken.rowId);
+      if (!row || !row.proposalNonce) return undefined;
+      if (
+        !nonceMatches({
+          candidate: parsedToken.nonce,
+          stored: row.proposalNonce,
+        })
+      ) {
+        return undefined;
+      }
+      const snapshot = row.resultSnapshot as { summary?: unknown } | null;
+      const summary =
+        snapshot && typeof snapshot.summary === "string"
+          ? snapshot.summary
+          : undefined;
+      return {
+        rowId: row.id,
+        toolName: row.toolName,
+        status: row.status,
+        conversationId: row.conversationId ?? undefined,
+        summary,
+      };
+    },
+    async apply({
+      principal,
+      token,
+      rpcClient,
+    }: {
+      principal: AuthUser;
+      token: string;
+      transport?: "chat" | "mcp";
+      /** USER-scoped client (bound to the originating user) for the commit. */
+      rpcClient: RpcClient;
+    }): Promise<ApplyResult> {
+      const auditPrincipal = auditPrincipalOf(principal);
+      const parsedToken = parseProposalToken(token);
+      if (!parsedToken) {
+        throw new ProposeApplyError("invalid_token", "Malformed proposal token.");
+      }
+      // Fetch first to validate the nonce + status + TTL with precise errors
+      // (the atomic consume below is the single-use authority, but we want a
+      // constant-time nonce compare and clear 410/409 distinctions).
+      const existing = await store.getProposal(parsedToken.rowId);
+      if (!existing || !existing.proposalNonce) {
+        throw new ProposeApplyError("not_found", "Unknown proposal token.");
+      }
+      if (
+        !nonceMatches({
+          candidate: parsedToken.nonce,
+          stored: existing.proposalNonce,
+        })
+      ) {
+        // Constant-time mismatch — treat as invalid, never reveal which part.
+        throw new ProposeApplyError("invalid_token", "Invalid proposal token.");
+      }
+      if (existing.status !== "proposed") {
+        // Already applied / rejected / expired — single-use.
+        throw new ProposeApplyError(
+          existing.status === "expired" ? "expired" : "consumed",
+          `Proposal is no longer applicable (status: ${existing.status}).`,
+        );
+      }
+      if (
+        existing.proposalExpiresAt &&
+        existing.proposalExpiresAt.getTime() <= Date.now()
+      ) {
+        throw new ProposeApplyError("expired", "Proposal token has expired.");
+      }
+      const tool = registry.getTool(existing.toolName);
+      if (!tool) {
+        throw new ProposeApplyError(
+          "not_found",
+          `Tool "${existing.toolName}" is no longer registered.`,
+        );
+      }
+      // Re-check authz at apply time — the principal's rules may have changed
+      // since propose (narrowing runs live, §6.3).
+      if (!resolver.isAllowed({ principal, tool })) {
+        throw new ProposeApplyError(
+          "forbidden",
+          forbiddenMessage({ principal, tool }),
+        );
+      }
+      // Atomic single-use consume: only one caller wins the proposed -> applied
+      // transition. A concurrent second apply gets `undefined`. The applier
+      // principal is stamped into the row so the audit records WHO applied,
+      // even if it differs from the proposer (P3 review item 1).
+      const consumed = await store.consumeProposal({
+        rowId: parsedToken.rowId,
+        applier: auditPrincipal,
+      });
+      if (!consumed) {
+        throw new ProposeApplyError(
+          "consumed",
+          "Proposal token was already consumed.",
+        );
+      }
+      // Belt-and-suspenders (P3 review item 2): re-parse the SERVER-STORED
+      // payload against the tool's own input schema before executing. `apply`
+      // executes ONLY the server-stored payload captured at propose time — never
+      // any caller-supplied arguments — so this guards against a payload that no
+      // longer satisfies the (possibly evolved) schema.
+      const repared = tool.input.safeParse(consumed.proposedPayload);
+      if (!repared.success) {
+        emitToolCalled({
+          eventBus,
+          principal: auditPrincipal,
+          transport: consumed.transport,
+          tool,
+          status: "failed",
+        });
+        throw new ProposeApplyError(
+          "execute_failed",
+          `Stored proposal payload for ${tool.name} no longer matches its input schema: ${repared.error.message}`,
+        );
+      }
+      try {
+        const result = await tool.execute({
+          input: repared.data,
+          principal,
+          rpcClient,
+        });
+        emitToolCalled({
+          eventBus,
+          principal: auditPrincipal,
+          transport: consumed.transport,
+          tool,
+          status: "applied",
+        });
+        return { toolCallId: consumed.id, result };
+      } catch (error) {
+        emitToolCalled({
+          eventBus,
+          principal: auditPrincipal,
+          transport: consumed.transport,
+          tool,
+          status: "failed",
+        });
+        throw new ProposeApplyError(
+          "execute_failed",
+          extractErrorMessage(error, "Apply failed during execute."),
+        );
+      }
+    },
+  };
+}
+export type ProposeApplyService = ReturnType<typeof createProposeApplyService>;

package/src/propose-apply/store.test.ts ADDED Viewed

@@ -0,0 +1,136 @@
+import { describe, expect, test, mock } from "bun:test";
+import { createAiToolCallStore, PROPOSAL_TTL_MS } from "./store";
+import type { AiToolCallRow } from "../schema";
+function row(over: Partial<AiToolCallRow> = {}): AiToolCallRow {
+  return {
+    id: "r1",
+    principalKind: "user",
+    principalId: "u1",
+    transport: "chat",
+    conversationId: null,
+    toolName: "demo.mutate",
+    effect: "mutate",
+    argsHash: "h".repeat(64),
+    status: "proposed",
+    proposalNonce: "n".repeat(64),
+    proposalExpiresAt: new Date("2026-06-01T00:10:00Z"),
+    resultSnapshot: null,
+    proposedPayload: { value: "x" },
+    error: null,
+    proposedAt: new Date("2026-06-01T00:00:00Z"),
+    appliedAt: null,
+    appliedByKind: null,
+    appliedById: null,
+    createdAt: new Date("2026-06-01T00:00:00Z"),
+    ...over,
+  };
+}
+describe("createAiToolCallStore", () => {
+  test("createProposal inserts a proposed row with nonce + TTL", async () => {
+    const now = new Date("2026-06-01T00:00:00Z");
+    const values = mock((_v: Record<string, unknown>) => ({
+      returning: mock(() => Promise.resolve([row()])),
+    }));
+    const db = { insert: mock(() => ({ values })) };
+    const store = createAiToolCallStore({ db: db as never });
+    const { row: created, nonce } = await store.createProposal({
+      principal: { kind: "user", id: "u1" },
+      transport: "chat",
+      toolName: "demo.mutate",
+      effect: "mutate",
+      argsHash: "h".repeat(64),
+      proposedPayload: { value: "x" },
+      now,
+    });
+    const inserted = values.mock.calls[0]?.[0] as {
+      status: string;
+      proposalNonce: string;
+      proposalExpiresAt: Date;
+    };
+    expect(inserted.status).toBe("proposed");
+    expect(inserted.proposalNonce).toMatch(/^[0-9a-f]{64}$/);
+    expect(inserted.proposalExpiresAt.getTime()).toBe(now.getTime() + PROPOSAL_TTL_MS);
+    expect(nonce).toBe(inserted.proposalNonce);
+    expect(created.id).toBe("r1");
+  });
+  test("recordExecuted writes an executed read row (no nonce)", async () => {
+    const values = mock((_v: Record<string, unknown>) => ({
+      returning: mock(() =>
+        Promise.resolve([row({ status: "executed", effect: "read", proposalNonce: null })]),
+      ),
+    }));
+    const db = { insert: mock(() => ({ values })) };
+    const store = createAiToolCallStore({ db: db as never });
+    await store.recordExecuted({
+      principal: { kind: "user", id: "u1" },
+      transport: "mcp",
+      toolName: "incident.list",
+      argsHash: "h".repeat(64),
+    });
+    const inserted = values.mock.calls[0]?.[0] as {
+      status: string;
+      effect: string;
+      proposalNonce?: string;
+    };
+    expect(inserted.status).toBe("executed");
+    expect(inserted.effect).toBe("read");
+    expect(inserted.proposalNonce).toBeUndefined();
+  });
+  test("consumeProposal issues an UPDATE ... WHERE status='proposed' (atomic single-use) and stamps the applier", async () => {
+    const where = mock(() => ({
+      returning: mock(() =>
+        Promise.resolve([
+          row({ status: "applied", appliedByKind: "user", appliedById: "u2" }),
+        ]),
+      ),
+    }));
+    const set = mock(
+      (_v: {
+        status: string;
+        appliedAt: Date;
+        appliedByKind: string;
+        appliedById: string;
+      }) => ({ where }),
+    );
+    const db = { update: mock(() => ({ set })) };
+    const store = createAiToolCallStore({ db: db as never });
+    const consumed = await store.consumeProposal({
+      rowId: "r1",
+      applier: { kind: "user", id: "u2" },
+    });
+    expect(consumed?.status).toBe("applied");
+    // The set transitions to applied with an appliedAt timestamp + applier.
+    const setArg = set.mock.calls[0]?.[0];
+    expect(setArg?.status).toBe("applied");
+    expect(setArg?.appliedAt).toBeInstanceOf(Date);
+    // P3 review item 1: the actual applying principal is recorded.
+    expect(setArg?.appliedByKind).toBe("user");
+    expect(setArg?.appliedById).toBe("u2");
+    // The atomic guard (id + status + TTL) is expressed in the WHERE clause.
+    expect(where).toHaveBeenCalledTimes(1);
+  });
+  test("consumeProposal returns undefined when no row matches (already consumed)", async () => {
+    const where = mock(() => ({
+      returning: mock(() => Promise.resolve([])),
+    }));
+    const set = mock(() => ({ where }));
+    const db = { update: mock(() => ({ set })) };
+    const store = createAiToolCallStore({ db: db as never });
+    expect(
+      await store.consumeProposal({
+        rowId: "r1",
+        applier: { kind: "user", id: "u2" },
+      }),
+    ).toBeUndefined();
+  });
+});