npm - @checkstack/ai-backend - Versions diffs - 0.1.0 - Mend

@checkstack/ai-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/CHANGELOG.md +97 -0
package/drizzle/0000_productive_jackpot.sql +26 -0
package/drizzle/0001_puzzling_purple_man.sql +26 -0
package/drizzle/0002_sparkling_paper_doll.sql +15 -0
package/drizzle/0003_married_senator_kelly.sql +1 -0
package/drizzle/0004_crazy_miek.sql +2 -0
package/drizzle/0005_tearful_randall_flagg.sql +1 -0
package/drizzle/meta/0000_snapshot.json +232 -0
package/drizzle/meta/0001_snapshot.json +434 -0
package/drizzle/meta/0002_snapshot.json +551 -0
package/drizzle/meta/0003_snapshot.json +557 -0
package/drizzle/meta/0004_snapshot.json +573 -0
package/drizzle/meta/0005_snapshot.json +574 -0
package/drizzle/meta/_journal.json +48 -0
package/drizzle.config.ts +7 -0
package/package.json +42 -0
package/src/agent-runner.test.ts +262 -0
package/src/agent-runner.ts +262 -0
package/src/chat/agent-loop.test.ts +119 -0
package/src/chat/agent-loop.ts +73 -0
package/src/chat/auto-apply.test.ts +237 -0
package/src/chat/chat-handler.ts +111 -0
package/src/chat/chat-service.streamturn.test.ts +417 -0
package/src/chat/chat-service.test.ts +250 -0
package/src/chat/chat-service.ts +923 -0
package/src/chat/classifier-service.ts +64 -0
package/src/chat/classifier.logic.test.ts +92 -0
package/src/chat/classifier.logic.ts +71 -0
package/src/chat/conversation-store.it.test.ts +203 -0
package/src/chat/conversation-store.test.ts +248 -0
package/src/chat/conversation-store.ts +237 -0
package/src/chat/decision.logic.test.ts +45 -0
package/src/chat/decision.logic.ts +54 -0
package/src/chat/llm-provider.test.ts +63 -0
package/src/chat/llm-provider.ts +67 -0
package/src/chat/model-error.logic.test.ts +60 -0
package/src/chat/model-error.logic.ts +65 -0
package/src/chat/normalize-messages.logic.test.ts +101 -0
package/src/chat/normalize-messages.logic.ts +65 -0
package/src/chat/permission-mode.logic.test.ts +70 -0
package/src/chat/permission-mode.logic.ts +45 -0
package/src/chat/read-invoker.ts +72 -0
package/src/chat/replay.test.ts +174 -0
package/src/chat/scrub-content.test.ts +183 -0
package/src/chat/scrub-content.ts +154 -0
package/src/chat/sdk-tools.test.ts +168 -0
package/src/chat/sdk-tools.ts +181 -0
package/src/chat/title-service.test.ts +146 -0
package/src/chat/title-service.ts +111 -0
package/src/chat/title.logic.test.ts +98 -0
package/src/chat/title.logic.ts +102 -0
package/src/extension-points.ts +41 -0
package/src/generated/docs-index.ts +3020 -0
package/src/hardening/handler-authz.test.ts +282 -0
package/src/hardening/no-secret-leak.test.ts +303 -0
package/src/hooks.ts +33 -0
package/src/index.ts +542 -0
package/src/mcp/connection-registry.test.ts +25 -0
package/src/mcp/connection-registry.ts +54 -0
package/src/mcp/mcp-conformance.it.test.ts +128 -0
package/src/mcp/server.test.ts +285 -0
package/src/mcp/server.ts +300 -0
package/src/mcp/tool-invoker.ts +65 -0
package/src/openai-provider.test.ts +64 -0
package/src/openai-provider.ts +146 -0
package/src/projection.test.ts +97 -0
package/src/projection.ts +132 -0
package/src/propose-apply/args-hash.test.ts +26 -0
package/src/propose-apply/args-hash.ts +30 -0
package/src/propose-apply/service.test.ts +423 -0
package/src/propose-apply/service.ts +419 -0
package/src/propose-apply/store.test.ts +136 -0
package/src/propose-apply/store.ts +224 -0
package/src/propose-apply/token.test.ts +52 -0
package/src/propose-apply/token.ts +71 -0
package/src/rate-limit/spend-ledger.it.test.ts +224 -0
package/src/rate-limit/spend-ledger.test.ts +176 -0
package/src/rate-limit/spend-ledger.ts +162 -0
package/src/rate-limit/tool-budget.it.test.ts +173 -0
package/src/rate-limit/tool-budget.test.ts +58 -0
package/src/rate-limit/tool-budget.ts +107 -0
package/src/registry-wiring.test.ts +131 -0
package/src/registry-wiring.ts +68 -0
package/src/resolver.test.ts +156 -0
package/src/resolver.ts +78 -0
package/src/router.test.ts +78 -0
package/src/router.ts +345 -0
package/src/schema.ts +284 -0
package/src/serializer.test.ts +88 -0
package/src/serializer.ts +42 -0
package/src/tool-registry.ts +58 -0
package/src/tools/composite-tools.ts +24 -0
package/src/tools/docs-tools.test.ts +150 -0
package/src/tools/docs-tools.ts +115 -0
package/src/tools/probe-url.test.ts +51 -0
package/src/tools/probe-url.ts +146 -0
package/src/tools/rank-docs.test.ts +153 -0
package/src/tools/rank-docs.ts +209 -0
package/src/tools/script-context-extract.test.ts +93 -0
package/src/tools/script-context-extract.ts +283 -0
package/src/tools/ssrf-guard.test.ts +69 -0
package/src/tools/ssrf-guard.ts +108 -0
package/src/tools/tool-set.e2e.test.ts +64 -0
package/src/user-rpc-client.test.ts +45 -0
package/src/user-rpc-client.ts +60 -0
package/tsconfig.json +26 -0

package/src/chat/normalize-messages.logic.test.ts ADDED Viewed

@@ -0,0 +1,101 @@
+import { describe, expect, test } from "bun:test";
+import type { ModelMessage } from "ai";
+import { normalizeModelMessages } from "./normalize-messages.logic";
+describe("normalizeModelMessages", () => {
+  test("leaves a well-formed alternating history untouched", () => {
+    const input: ModelMessage[] = [
+      { role: "user", content: "hi" },
+      { role: "assistant", content: "hello" },
+      { role: "user", content: "list incidents" },
+    ];
+    expect(normalizeModelMessages(input)).toEqual(input);
+  });
+  test("merges consecutive user messages (the failed-turn cascade)", () => {
+    // A failed turn persists no assistant reply, so retries pile up `user` rows.
+    const input: ModelMessage[] = [
+      { role: "user", content: "first try" },
+      { role: "user", content: "second try" },
+      { role: "user", content: "third try" },
+    ];
+    expect(normalizeModelMessages(input)).toEqual([
+      { role: "user", content: "first try\n\nsecond try\n\nthird try" },
+    ]);
+  });
+  test("merges consecutive assistant messages", () => {
+    const input: ModelMessage[] = [
+      { role: "user", content: "hi" },
+      { role: "assistant", content: "part one" },
+      { role: "assistant", content: "part two" },
+    ];
+    expect(normalizeModelMessages(input)).toEqual([
+      { role: "user", content: "hi" },
+      { role: "assistant", content: "part one\n\npart two" },
+    ]);
+  });
+  test("drops empty / whitespace-only text rows, then merges across the gap", () => {
+    const input: ModelMessage[] = [
+      { role: "user", content: "before" },
+      { role: "assistant", content: "   " },
+      { role: "user", content: "after" },
+    ];
+    // The blank assistant row is dropped, leaving two user rows that merge.
+    expect(normalizeModelMessages(input)).toEqual([
+      { role: "user", content: "before\n\nafter" },
+    ]);
+  });
+  test("keeps structured (tool-call / tool-result) content verbatim", () => {
+    const input: ModelMessage[] = [
+      { role: "user", content: "run it" },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "tool-call",
+            toolCallId: "c1",
+            toolName: "listIncidents",
+            input: {},
+          },
+        ],
+      },
+      {
+        role: "tool",
+        content: [
+          {
+            type: "tool-result",
+            toolCallId: "c1",
+            toolName: "listIncidents",
+            output: { type: "json", value: { count: 0 } },
+          },
+        ],
+      },
+      { role: "assistant", content: "no incidents" },
+    ];
+    // Structured rows are never merged or dropped (would orphan a tool pair).
+    expect(normalizeModelMessages(input)).toEqual(input);
+  });
+  test("strips a leading non-user message (corruption guard)", () => {
+    const input: ModelMessage[] = [
+      { role: "assistant", content: "stray leading assistant" },
+      { role: "user", content: "real start" },
+    ];
+    expect(normalizeModelMessages(input)).toEqual([
+      { role: "user", content: "real start" },
+    ]);
+  });
+  test("a trailing user message always survives (never empties the array)", () => {
+    const input: ModelMessage[] = [
+      { role: "assistant", content: "" },
+      { role: "user", content: "hello" },
+    ];
+    const out = normalizeModelMessages(input);
+    expect(out.length).toBeGreaterThan(0);
+    expect(out.at(-1)).toEqual({ role: "user", content: "hello" });
+  });
+});

package/src/chat/normalize-messages.logic.ts ADDED Viewed

@@ -0,0 +1,65 @@
+import type { ModelMessage } from "ai";
+/**
+ * Defensive normalization of the model message history before it is sent to the
+ * provider. Strict OpenAI-compatible providers (notably DeepSeek) reject a
+ * history that does not strictly alternate user/assistant, or that contains an
+ * empty-content message, with HTTP 400 `invalid_prompt`.
+ *
+ * Two real corruptions this guards against:
+ *  - A FAILED turn persists no assistant reply (`onFinish` never runs on error),
+ *    so each retry appends another `user` row, leaving consecutive `user`
+ *    messages. Without this, ONE provider hiccup would brick the whole
+ *    conversation permanently (every later message also 400s).
+ *  - An assistant turn that produced only a tool call (no text) can persist an
+ *    empty-text row.
+ *
+ * Rules (pure, total - never throws):
+ *  1. Drop messages whose content is an empty / whitespace-only STRING. Messages
+ *     with structured content (tool-call / tool-result parts) are ALWAYS kept -
+ *     they arrive as valid assistant+tool pairs from replay and dropping one
+ *     half would orphan the other.
+ *  2. Merge consecutive same-role STRING messages (user or assistant) into a
+ *     single message joined by a blank line, so the roles alternate.
+ *  3. Drop any leading non-`user` messages: a valid sequence (the system prompt
+ *     is sent separately) must start with a user message. The turn always
+ *     appends the new user message last, so a trailing user message is
+ *     guaranteed and this can never empty the array.
+ */
+export function normalizeModelMessages(
+  messages: ModelMessage[],
+): ModelMessage[] {
+  const out: ModelMessage[] = [];
+  for (const message of messages) {
+    const text = typeof message.content === "string" ? message.content : null;
+    // Rule 1: drop empty/whitespace-only text rows.
+    if (text !== null && text.trim() === "") continue;
+    // Rule 2: merge with the previous row when both are same-role plain text.
+    const last = out.at(-1);
+    if (
+      last !== undefined &&
+      text !== null &&
+      typeof last.content === "string" &&
+      last.role === message.role &&
+      (message.role === "user" || message.role === "assistant")
+    ) {
+      const merged = `${last.content}\n\n${text}`;
+      out[out.length - 1] =
+        message.role === "user"
+          ? { role: "user", content: merged }
+          : { role: "assistant", content: merged };
+      continue;
+    }
+    out.push(message);
+  }
+  // Rule 3: strip any leading non-user rows (corruption / a dangling assistant).
+  while (out.length > 0 && out[0].role !== "user") {
+    out.shift();
+  }
+  return out;
+}

package/src/chat/permission-mode.logic.test.ts ADDED Viewed

@@ -0,0 +1,70 @@
+import { describe, expect, test } from "bun:test";
+import type { AiPermissionMode, AiToolEffect } from "@checkstack/ai-common";
+import { decideToolDisposition } from "./permission-mode.logic";
+const MODES: AiPermissionMode[] = ["approve", "auto"];
+const EFFECTS: AiToolEffect[] = ["read", "mutate", "destructive"];
+describe("decideToolDisposition — 3-tier gating", () => {
+  test("read ALWAYS auto-runs, in BOTH modes (mode never gates reads)", () => {
+    for (const mode of MODES) {
+      expect(decideToolDisposition({ effect: "read", mode })).toBe("auto-run");
+    }
+  });
+  test("mutate INHERITS the mode: auto -> auto-apply, approve -> propose", () => {
+    expect(decideToolDisposition({ effect: "mutate", mode: "auto" })).toBe(
+      "auto-apply",
+    );
+    expect(decideToolDisposition({ effect: "mutate", mode: "approve" })).toBe(
+      "propose",
+    );
+  });
+  describe("SECURITY INVARIANT: destructive can NEVER auto-apply", () => {
+    test("destructive ALWAYS proposes, in BOTH modes", () => {
+      for (const mode of MODES) {
+        expect(decideToolDisposition({ effect: "destructive", mode })).toBe(
+          "propose",
+        );
+      }
+    });
+    test("NO permission-mode value changes a destructive tool's requirement for a human apply", () => {
+      // The mode is the ONLY parameter that could differ; assert it has no
+      // effect on a destructive tool's disposition. If a future change let the
+      // mode leak into the destructive branch, the two values would diverge.
+      const approveDisposition = decideToolDisposition({
+        effect: "destructive",
+        mode: "approve",
+      });
+      const autoDisposition = decideToolDisposition({
+        effect: "destructive",
+        mode: "auto",
+      });
+      expect(autoDisposition).toBe(approveDisposition);
+      expect(autoDisposition).toBe("propose");
+    });
+    test("no (effect, mode) pair routes a destructive tool to auto-apply", () => {
+      for (const mode of MODES) {
+        expect(
+          decideToolDisposition({ effect: "destructive", mode }),
+        ).not.toBe("auto-apply");
+      }
+    });
+    test("auto-apply is reachable ONLY via (mutate, auto)", () => {
+      const autoApplyPairs: Array<{ effect: AiToolEffect; mode: AiPermissionMode }> =
+        [];
+      for (const effect of EFFECTS) {
+        for (const mode of MODES) {
+          if (decideToolDisposition({ effect, mode }) === "auto-apply") {
+            autoApplyPairs.push({ effect, mode });
+          }
+        }
+      }
+      expect(autoApplyPairs).toEqual([{ effect: "mutate", mode: "auto" }]);
+    });
+  });
+});

package/src/chat/permission-mode.logic.ts ADDED Viewed

@@ -0,0 +1,45 @@
+import type { AiToolEffect } from "@checkstack/ai-common";
+import type { AiPermissionMode } from "@checkstack/ai-common";
+/**
+ * The disposition the agent loop bakes into a tool's `execute`, decided purely
+ * from the tool's `effect` and the conversation's permission mode. This is the
+ * single source of truth for the 3-tier gating model (Phase 4) and is kept
+ * DOM-free / dependency-free so it is exhaustively unit-testable.
+ *
+ *  - `auto-run`    -> run the read directly (read tools, BOTH modes).
+ *  - `auto-apply`  -> propose + apply SERVER-SIDE in one shot, no human click
+ *                     (mutate tools, `auto` mode ONLY).
+ *  - `propose`     -> propose + return a confirm card the human must `applyTool`
+ *                     (mutate tools in `approve` mode; ALL destructive tools in
+ *                     BOTH modes).
+ */
+export type ToolDisposition = "auto-run" | "auto-apply" | "propose";
+/**
+ * Decide a tool's disposition from its effect + the conversation's mode.
+ *
+ * SECURITY INVARIANT (structurally enforced here): `destructive` ALWAYS returns
+ * `propose`, regardless of `mode`. The `mode` parameter is consulted ONLY for
+ * the `mutate` branch, so there is no `(effect, mode)` pair that lets a
+ * destructive tool reach `auto-apply`. The accompanying tests assert that no
+ * mode value changes a destructive tool's `propose` disposition.
+ */
+export function decideToolDisposition({
+  effect,
+  mode,
+}: {
+  effect: AiToolEffect;
+  mode: AiPermissionMode;
+}): ToolDisposition {
+  // Tier 1: reads ALWAYS auto-run, in BOTH modes. Mode never gates reads.
+  if (effect === "read") return "auto-run";
+  // Tier 3: destructive ALWAYS requires a human apply, in BOTH modes. The mode
+  // is NEVER consulted here -> destructive can never auto-apply.
+  if (effect === "destructive") return "propose";
+  // Tier 2: mutate INHERITS the mode. `auto` auto-applies server-side; `approve`
+  // surfaces a confirm card. This is the ONLY branch the mode governs.
+  return mode === "auto" ? "auto-apply" : "propose";
+}

package/src/chat/read-invoker.ts ADDED Viewed

@@ -0,0 +1,72 @@
+import { createORPCClient } from "@orpc/client";
+import { RPCLink } from "@orpc/client/fetch";
+/**
+ * Runs a chat read-tool's SOURCE oRPC procedure by re-entering the live router
+ * AS THE LOGGED-IN USER, so handler-side authorization runs exactly as for any
+ * other caller (decision §1.5 — the model is an untrusted caller that merely
+ * picks arguments; it never bypasses authz).
+ *
+ * Mirrors the MCP `tool-invoker`, but forwards the chat request's OWN auth
+ * (session cookie and/or bearer) to the loopback endpoint instead of an OAuth
+ * bearer. The API route re-authenticates that request to the SAME principal the
+ * chat handler resolved, then runs `autoAuthMiddleware`. We deliberately do NOT
+ * use the trusted service client (that would skip the user's authz).
+ */
+export interface ChatReadInvoker {
+  invoke(args: {
+    pluginId: string;
+    procedureKey: string;
+    input: unknown;
+    /** Auth headers forwarded verbatim from the chat request (cookie/bearer). */
+    forwardHeaders: Record<string, string>;
+  }): Promise<unknown>;
+}
+type LoopbackClient = Record<
+  string,
+  Record<string, (input: unknown) => Promise<unknown>>
+>;
+export function createChatReadInvoker({
+  internalUrl,
+}: {
+  internalUrl: string;
+}): ChatReadInvoker {
+  return {
+    async invoke({ pluginId, procedureKey, input, forwardHeaders }) {
+      const link = new RPCLink({
+        url: `${internalUrl}/api`,
+        headers: forwardHeaders,
+      });
+      const client = createORPCClient(link) as LoopbackClient;
+      const pluginClient = client[pluginId];
+      if (!pluginClient) {
+        throw new Error(`No RPC client for plugin "${pluginId}".`);
+      }
+      const procedure = pluginClient[procedureKey];
+      if (typeof procedure !== "function") {
+        throw new TypeError(
+          `Procedure "${pluginId}.${procedureKey}" is not callable.`,
+        );
+      }
+      return procedure(input);
+    },
+  };
+}
+/**
+ * Extract the auth headers to forward from the incoming chat request: the
+ * session cookie and/or the bearer Authorization header. Only these are
+ * forwarded — never arbitrary client headers.
+ */
+export function forwardableAuthHeaders(
+  req: Request,
+): Record<string, string> {
+  const headers: Record<string, string> = {};
+  const cookie = req.headers.get("cookie");
+  if (cookie) headers.cookie = cookie;
+  const auth = req.headers.get("authorization");
+  if (auth) headers.authorization = auth;
+  return headers;
+}

package/src/chat/replay.test.ts ADDED Viewed

@@ -0,0 +1,174 @@
+import { describe, expect, test } from "bun:test";
+import type { ModelMessage } from "ai";
+import { toModelMessages } from "./chat-service";
+/**
+ * TOOL-MESSAGE REPLAY (Phase 6) — a RESUMED multi-turn conversation must replay
+ * the full prior TOOL-CALL history to the model, not just assistant text.
+ *
+ * `toModelMessages` reconstructs a persisted `ai_messages` row into AI-SDK
+ * `ModelMessage`s. When a row carries `modelMessages` (the canonical
+ * `ResponseMessage[]` the SDK produced — assistant tool-call parts + tool-result
+ * parts), they are replayed VERBATIM, so the model sees the prior tool
+ * interaction on the next turn. Legacy text-only rows still replay as text.
+ */
+/** A persisted assistant row with a full tool round-trip (what onFinish stores). */
+const toolRoundTrip: Array<Record<string, unknown>> = [
+  {
+    role: "assistant",
+    content: [
+      { type: "text", text: "Let me check open incidents." },
+      {
+        type: "tool-call",
+        toolCallId: "tc1",
+        toolName: "incident.list",
+        input: { status: "open" },
+      },
+    ],
+  },
+  {
+    role: "tool",
+    content: [
+      {
+        type: "tool-result",
+        toolCallId: "tc1",
+        toolName: "incident.list",
+        output: { type: "json", value: { rows: [{ id: 7 }] } },
+      },
+    ],
+  },
+  {
+    role: "assistant",
+    content: [{ type: "text", text: "There is 1 open incident (#7)." }],
+  },
+];
+describe("toModelMessages: full tool-call history replay", () => {
+  test("an assistant row WITH modelMessages replays the assistant + tool messages verbatim", () => {
+    const replayed = toModelMessages({
+      role: "assistant",
+      content: { text: "There is 1 open incident (#7)." },
+      modelMessages: toolRoundTrip,
+    });
+    // One row expands into THREE model messages (assistant, tool, assistant) —
+    // the prior tool interaction is fully reconstructed for the model.
+    expect(replayed).toHaveLength(3);
+    expect(replayed.map((m) => m.role)).toEqual([
+      "assistant",
+      "tool",
+      "assistant",
+    ]);
+    // The tool-call + tool-result parts survive (not just text).
+    const assistant = replayed[0] as ModelMessage & {
+      content: Array<Record<string, unknown>>;
+    };
+    expect(assistant.content[1]?.type).toBe("tool-call");
+    expect(assistant.content[1]?.toolCallId).toBe("tc1");
+    const tool = replayed[1] as ModelMessage & {
+      content: Array<Record<string, unknown>>;
+    };
+    expect(tool.content[0]?.type).toBe("tool-result");
+    expect(tool.content[0]?.toolCallId).toBe("tc1");
+  });
+  test("a user row replays as a plain user text message", () => {
+    const replayed = toModelMessages({
+      role: "user",
+      content: { text: "what changed in the last hour?" },
+      modelMessages: null,
+    });
+    expect(replayed).toEqual([
+      { role: "user", content: "what changed in the last hour?" },
+    ]);
+  });
+  test("a LEGACY assistant row (text only, no modelMessages) still replays as text", () => {
+    // Rows written before the replay column existed have modelMessages = null.
+    const replayed = toModelMessages({
+      role: "assistant",
+      content: { text: "a deploy at 14:02" },
+      modelMessages: null,
+    });
+    expect(replayed).toEqual([
+      { role: "assistant", content: "a deploy at 14:02" },
+    ]);
+  });
+  test("a fully-malformed modelMessages array falls back to the text content", () => {
+    const replayed = toModelMessages({
+      role: "assistant",
+      content: { text: "fallback text" },
+      // entries with no/invalid role are skipped; an all-bad array falls back.
+      modelMessages: [{ nonsense: true }, { role: "banana", content: [] }],
+    });
+    expect(replayed).toEqual([
+      { role: "assistant", content: "fallback text" },
+    ]);
+  });
+  test("a PARTIALLY-corrupt modelMessages array falls back to text (all-or-nothing, no dangling pair)", () => {
+    // The first two entries are a valid assistant tool-call + tool-result pair,
+    // but the third (the follow-up assistant message) is corrupt. Dropping only
+    // the bad entry would keep a dangling tool-call/result the provider rejects;
+    // the row must instead fall back ENTIRELY to its text representation.
+    const replayed = toModelMessages({
+      role: "assistant",
+      content: { text: "There is 1 open incident (#7)." },
+      modelMessages: [
+        toolRoundTrip[0], // valid assistant tool-call
+        toolRoundTrip[1], // valid tool-result for tc1
+        { role: 42, content: [] }, // corrupt: role is not a string
+      ],
+    });
+    // NOT a partial [assistant, tool] — the whole row degrades to text.
+    expect(replayed).toEqual([
+      { role: "assistant", content: "There is 1 open incident (#7)." },
+    ]);
+  });
+  test("a row missing the tool-result half of a pair falls back to text, never an orphaned tool-call", () => {
+    const replayed = toModelMessages({
+      role: "assistant",
+      content: { text: "checked incidents" },
+      modelMessages: [
+        toolRoundTrip[0], // assistant tool-call (tc1)
+        { type: "tool-result" }, // corrupt: no role -> invalidates the row
+      ],
+    });
+    expect(replayed).toEqual([
+      { role: "assistant", content: "checked incidents" },
+    ]);
+  });
+  test("a standalone tool row with no modelMessages is skipped (no dangling tool result)", () => {
+    const replayed = toModelMessages({
+      role: "tool",
+      content: { text: "" },
+      modelMessages: null,
+    });
+    expect(replayed).toEqual([]);
+  });
+  test("a multi-turn transcript reconstructs in order across rows", () => {
+    const rows = [
+      { role: "user", content: { text: "list incidents" }, modelMessages: null },
+      {
+        role: "assistant",
+        content: { text: "1 open (#7)" },
+        modelMessages: toolRoundTrip,
+      },
+      { role: "user", content: { text: "ack it" }, modelMessages: null },
+    ];
+    const all: ModelMessage[] = [];
+    for (const row of rows) all.push(...toModelMessages(row));
+    // user, (assistant, tool, assistant), user — the tool round-trip is inline.
+    expect(all.map((m) => m.role)).toEqual([
+      "user",
+      "assistant",
+      "tool",
+      "assistant",
+      "user",
+    ]);
+  });
+});