npm - @desplega.ai/agent-swarm - Versions diffs - 1.76.2 → 1.77.0 - Mend

@desplega.ai/agent-swarm 1.76.2 → 1.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/openapi.json +9 -2
package/package.json +1 -1
package/src/be/memory/raters/llm.ts +26 -0
package/src/cli.tsx +3 -24
package/src/commands/credential-wait.ts +31 -6
package/src/commands/runner.ts +1045 -1059
package/src/hooks/hook.ts +174 -147
package/src/http/status.ts +8 -0
package/src/providers/claude-adapter.ts +9 -1
package/src/providers/codex-adapter.ts +232 -2
package/src/providers/codex-oauth/storage.ts +21 -0
package/src/providers/pi-mono-extension.ts +114 -77
package/src/telemetry.ts +28 -0
package/src/tests/claude-stop-hook.test.ts +432 -0
package/src/tests/codex-adapter.test.ts +436 -1
package/src/tests/internal-ai/complete-structured.test.ts +276 -0
package/src/tests/internal-ai/credentials.test.ts +264 -0
package/src/tests/internal-ai/schema-parity.test.ts +103 -0
package/src/tests/internal-ai/summarize-session.test.ts +105 -0
package/src/tests/opencode-plugin.test.ts +496 -0
package/src/tests/pi-mono-extension.test.ts +347 -0
package/src/tests/reload-config.test.ts +9 -1
package/src/tests/status.test.ts +4 -0
package/src/tests/telemetry-init.test.ts +137 -1
package/src/tests/template-recommendations.test.ts +1 -0
package/src/utils/internal-ai/complete-structured.ts +296 -0
package/src/utils/internal-ai/credentials.ts +175 -0
package/src/utils/internal-ai/index.ts +31 -0
package/src/utils/internal-ai/models.ts +46 -0
package/src/utils/internal-ai/summarize-session.ts +101 -0

package/src/tests/internal-ai/complete-structured.test.ts ADDED Viewed

@@ -0,0 +1,276 @@
+import { describe, expect, test } from "bun:test";
+import { Type } from "typebox";
+import { z } from "zod";
+import { completeStructured } from "../../utils/internal-ai/complete-structured.js";
+import type { ResolvedCredential } from "../../utils/internal-ai/credentials.js";
+const ResultZodSchema = z.object({
+  summary: z.string(),
+  count: z.number(),
+});
+const ResultToolSchema = Type.Object({
+  summary: Type.String(),
+  count: Type.Number(),
+});
+/** Build a minimal `AssistantMessage` for `_complete` injection. */
+function makeMsg(content: any[]): any {
+  return {
+    role: "assistant",
+    content,
+    api: "responses",
+    provider: "openai",
+    model: "gpt-5.4-mini",
+    usage: {
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 0,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "toolUse",
+    timestamp: Date.now(),
+  };
+}
+describe("completeStructured", () => {
+  test("happy path: tool-call matches schema → returns parsed object, no retries", async () => {
+    let invocations = 0;
+    const result = await completeStructured({
+      zodSchema: ResultZodSchema,
+      toolSchema: ResultToolSchema,
+      toolName: "record_result",
+      toolDescription: "Record the result.",
+      systemPrompt: "sys",
+      userPrompt: "user",
+      _credentialOverride: {
+        kind: "openrouter",
+        apiKey: "test",
+        modelDefault: "openrouter/google/gemini-3-flash-preview",
+      },
+      _complete: async () => {
+        invocations++;
+        return makeMsg([
+          {
+            type: "toolCall",
+            id: "call_1",
+            name: "record_result",
+            arguments: { summary: "ok", count: 7 },
+          },
+        ]);
+      },
+    });
+    expect(invocations).toBe(1);
+    expect(result).toEqual({ summary: "ok", count: 7 });
+  });
+  test("no tool call for 3 attempts → returns null, exactly retries invocations", async () => {
+    let invocations = 0;
+    const original = console.error;
+    let errLines = 0;
+    console.error = () => {
+      errLines++;
+    };
+    try {
+      const result = await completeStructured({
+        zodSchema: ResultZodSchema,
+        toolSchema: ResultToolSchema,
+        toolName: "record_result",
+        toolDescription: "Record the result.",
+        systemPrompt: "sys",
+        userPrompt: "user",
+        retries: 3,
+        _credentialOverride: {
+          kind: "openrouter",
+          apiKey: "test",
+          modelDefault: "openrouter/google/gemini-3-flash-preview",
+        },
+        _complete: async () => {
+          invocations++;
+          return makeMsg([{ type: "text", text: "sure, here you go" }]);
+        },
+      });
+      expect(invocations).toBe(3);
+      expect(result).toBeNull();
+      expect(errLines).toBeGreaterThanOrEqual(1);
+    } finally {
+      console.error = original;
+    }
+  });
+  test("bad shape then good shape → returns parsed object with 2 invocations", async () => {
+    let invocations = 0;
+    const result = await completeStructured({
+      zodSchema: ResultZodSchema,
+      toolSchema: ResultToolSchema,
+      toolName: "record_result",
+      toolDescription: "Record the result.",
+      systemPrompt: "sys",
+      userPrompt: "user",
+      _credentialOverride: {
+        kind: "openrouter",
+        apiKey: "test",
+        modelDefault: "openrouter/google/gemini-3-flash-preview",
+      },
+      _complete: async () => {
+        invocations++;
+        if (invocations === 1) {
+          return makeMsg([
+            {
+              type: "toolCall",
+              id: "call_1",
+              name: "record_result",
+              arguments: { summary: "ok" /* missing count */ },
+            },
+          ]);
+        }
+        return makeMsg([
+          {
+            type: "toolCall",
+            id: "call_2",
+            name: "record_result",
+            arguments: { summary: "fixed", count: 42 },
+          },
+        ]);
+      },
+    });
+    expect(invocations).toBe(2);
+    expect(result).toEqual({ summary: "fixed", count: 42 });
+  });
+  test("claude-cli kind via injected _spawnClaudeCli", async () => {
+    let spawnCalls = 0;
+    let receivedPrompt = "";
+    let receivedModel = "";
+    const result = await completeStructured({
+      zodSchema: ResultZodSchema,
+      toolSchema: ResultToolSchema,
+      toolName: "record_result",
+      toolDescription: "Record the result.",
+      systemPrompt: "SYSTEM",
+      userPrompt: "USER",
+      _credentialOverride: { kind: "claude-cli", modelDefault: "haiku" } as ResolvedCredential,
+      _spawnClaudeCli: async (prompt, model) => {
+        spawnCalls++;
+        receivedPrompt = prompt;
+        receivedModel = model;
+        return JSON.stringify({ summary: "cli result", count: 1 });
+      },
+    });
+    expect(spawnCalls).toBe(1);
+    expect(receivedPrompt).toStartWith("SYSTEM\n\nUSER");
+    // userPrompt is augmented with the JSON schema for the claude-cli path.
+    expect(receivedPrompt).toContain('matching this schema:\n{"');
+    expect(receivedModel).toBe("haiku");
+    expect(result).toEqual({ summary: "cli result", count: 1 });
+  });
+  test("claude-cli kind: receives a JSON schema derived from zodSchema", async () => {
+    let receivedSchema: object | undefined;
+    await completeStructured({
+      zodSchema: ResultZodSchema,
+      toolSchema: ResultToolSchema,
+      toolName: "record_result",
+      toolDescription: "Record the result.",
+      systemPrompt: "sys",
+      userPrompt: "user",
+      _credentialOverride: { kind: "claude-cli", modelDefault: "haiku" } as ResolvedCredential,
+      _spawnClaudeCli: async (_prompt, _model, _signal, jsonSchema) => {
+        receivedSchema = jsonSchema;
+        return JSON.stringify({ summary: "ok", count: 1 });
+      },
+    });
+    expect(receivedSchema).toBeDefined();
+    const schema = receivedSchema as {
+      type: string;
+      properties: { summary: { type: string }; count: { type: string } };
+      required: string[];
+    };
+    expect(schema.type).toBe("object");
+    expect(schema.properties.summary.type).toBe("string");
+    expect(schema.properties.count.type).toBe("number");
+    expect(schema.required).toEqual(expect.arrayContaining(["summary", "count"]));
+  });
+  test("claude-cli kind: retries when JSON parse fails", async () => {
+    let spawnCalls = 0;
+    const result = await completeStructured({
+      zodSchema: ResultZodSchema,
+      toolSchema: ResultToolSchema,
+      toolName: "record_result",
+      toolDescription: "Record the result.",
+      systemPrompt: "sys",
+      userPrompt: "user",
+      retries: 3,
+      _credentialOverride: { kind: "claude-cli", modelDefault: "haiku" },
+      _spawnClaudeCli: async () => {
+        spawnCalls++;
+        if (spawnCalls < 3) return "not json";
+        return JSON.stringify({ summary: "third time", count: 99 });
+      },
+    });
+    expect(spawnCalls).toBe(3);
+    expect(result).toEqual({ summary: "third time", count: 99 });
+  });
+  test("cred === null short-circuits and returns null without calling complete", async () => {
+    let invocations = 0;
+    const result = await completeStructured({
+      zodSchema: ResultZodSchema,
+      toolSchema: ResultToolSchema,
+      toolName: "record_result",
+      toolDescription: "Record the result.",
+      systemPrompt: "sys",
+      userPrompt: "user",
+      _resolveCredential: async () => null,
+      _complete: async () => {
+        invocations++;
+        return makeMsg([]);
+      },
+    });
+    expect(invocations).toBe(0);
+    expect(result).toBeNull();
+  });
+  test("emits internal-ai: kind=... callerTag=... log on successful credential resolution", async () => {
+    const origLog = console.log;
+    const lines: string[] = [];
+    console.log = (...args: unknown[]) => {
+      lines.push(args.map(String).join(" "));
+    };
+    try {
+      await completeStructured({
+        zodSchema: ResultZodSchema,
+        toolSchema: ResultToolSchema,
+        toolName: "record_result",
+        toolDescription: "Record the result.",
+        systemPrompt: "sys",
+        userPrompt: "user",
+        callerTag: "session-summary:test",
+        _credentialOverride: {
+          kind: "openrouter",
+          apiKey: "test",
+          modelDefault: "openrouter/google/gemini-3-flash-preview",
+        },
+        _complete: async () =>
+          makeMsg([
+            {
+              type: "toolCall",
+              id: "1",
+              name: "record_result",
+              arguments: { summary: "ok", count: 1 },
+            },
+          ]),
+      });
+    } finally {
+      console.log = origLog;
+    }
+    const match = lines.find(
+      (l) =>
+        l.includes("internal-ai: kind=openrouter") && l.includes("callerTag=session-summary:test"),
+    );
+    expect(match).toBeDefined();
+  });
+});

package/src/tests/internal-ai/credentials.test.ts ADDED Viewed

@@ -0,0 +1,264 @@
+import { describe, expect, test } from "bun:test";
+import {
+  type ResolveCredentialOptions,
+  resolveCredential,
+} from "../../utils/internal-ai/credentials.js";
+/**
+ * Helper: build a minimal `ResolveCredentialOptions` with injectable hooks so
+ * tests never touch the real network / config store / process.env.
+ */
+function makeOpts(
+  overrides: Partial<ResolveCredentialOptions> & { env?: NodeJS.ProcessEnv } = {},
+): ResolveCredentialOptions {
+  return {
+    env: overrides.env ?? {},
+    _getEnvApiKey: overrides._getEnvApiKey ?? (() => undefined),
+    _getValidCodexOAuth: overrides._getValidCodexOAuth ?? (async () => null),
+    _getOAuthApiKey: overrides._getOAuthApiKey ?? (async () => null),
+    _persistCodexOAuth: overrides._persistCodexOAuth ?? (async () => undefined),
+    apiUrl: overrides.apiUrl,
+    apiKey: overrides.apiKey,
+    callerTag: overrides.callerTag ?? "test",
+  };
+}
+describe("resolveCredential", () => {
+  test("OPENROUTER_API_KEY wins", async () => {
+    const cred = await resolveCredential(makeOpts({ env: { OPENROUTER_API_KEY: "or-1" } }));
+    expect(cred).not.toBeNull();
+    expect(cred?.kind).toBe("openrouter");
+    if (cred?.kind === "openrouter") {
+      expect(cred.apiKey).toBe("or-1");
+      expect(cred.modelDefault).toBe("openrouter/google/gemini-3-flash-preview");
+    }
+  });
+  test("ANTHROPIC_API_KEY when no openrouter", async () => {
+    const cred = await resolveCredential(makeOpts({ env: { ANTHROPIC_API_KEY: "sk-ant-1" } }));
+    expect(cred?.kind).toBe("anthropic");
+    if (cred?.kind === "anthropic") {
+      expect(cred.apiKey).toBe("sk-ant-1");
+      expect(cred.modelDefault).toBe("anthropic/claude-haiku-4-5");
+    }
+  });
+  test("OPENAI_API_KEY when no openrouter/anthropic", async () => {
+    const cred = await resolveCredential(makeOpts({ env: { OPENAI_API_KEY: "sk-o-1" } }));
+    expect(cred?.kind).toBe("openai");
+    if (cred?.kind === "openai") {
+      expect(cred.apiKey).toBe("sk-o-1");
+      expect(cred.modelDefault).toBe("openai/gpt-5.4-mini");
+    }
+  });
+  test("codex OAuth (when apiUrl+apiKey provided)", async () => {
+    const cred = await resolveCredential(
+      makeOpts({
+        env: {},
+        apiUrl: "http://localhost:3013",
+        apiKey: "test-api-key",
+        _getValidCodexOAuth: async () => ({
+          access: "at_codex",
+          refresh: "rt_codex",
+          expires: Date.now() + 3600_000,
+          accountId: "acc-1",
+        }),
+        _getOAuthApiKey: async () => ({
+          newCredentials: {
+            access: "at_codex_refreshed",
+            refresh: "rt_codex_refreshed",
+            expires: Date.now() + 3600_000,
+          },
+          apiKey: "codex-api-key-derived",
+        }),
+      }),
+    );
+    expect(cred?.kind).toBe("openai-codex");
+    if (cred?.kind === "openai-codex") {
+      expect(cred.apiKey).toBe("codex-api-key-derived");
+      expect(cred.modelDefault).toBe("openai-codex/gpt-5.4-mini");
+    }
+  });
+  test("codex OAuth persists newCredentials when present", async () => {
+    let persisted: { access: string; refresh: string; expires: number; accountId: string } | null =
+      null;
+    await resolveCredential(
+      makeOpts({
+        env: {},
+        apiUrl: "http://localhost:3013",
+        apiKey: "test-api-key",
+        _getValidCodexOAuth: async () => ({
+          access: "at_codex",
+          refresh: "rt_codex",
+          expires: Date.now() + 3600_000,
+          accountId: "acc-1",
+        }),
+        _getOAuthApiKey: async () => ({
+          newCredentials: {
+            access: "at_rotated",
+            refresh: "rt_rotated",
+            expires: 999_999,
+          },
+          apiKey: "codex-derived",
+        }),
+        _persistCodexOAuth: async (_url, _key, creds) => {
+          persisted = creds;
+        },
+      }),
+    );
+    expect(persisted).not.toBeNull();
+    expect(persisted!.access).toBe("at_rotated");
+    expect(persisted!.refresh).toBe("rt_rotated");
+    expect(persisted!.expires).toBe(999_999);
+    expect(persisted!.accountId).toBe("acc-1"); // preserved from getValidCodexOAuth
+  });
+  test("codex OAuth persistence failure does NOT block returning apiKey", async () => {
+    const cred = await resolveCredential(
+      makeOpts({
+        env: {},
+        apiUrl: "http://localhost:3013",
+        apiKey: "test-api-key",
+        _getValidCodexOAuth: async () => ({
+          access: "at_codex",
+          refresh: "rt_codex",
+          expires: Date.now() + 3600_000,
+          accountId: "acc-1",
+        }),
+        _getOAuthApiKey: async () => ({
+          newCredentials: { access: "a", refresh: "r", expires: 1 },
+          apiKey: "still-usable",
+        }),
+        _persistCodexOAuth: async () => {
+          throw new Error("write failed");
+        },
+      }),
+    );
+    // persistCodexOAuth is the production helper that internally swallows errors,
+    // but we injected one that throws — the resolver doesn't currently catch
+    // around the injected hook. Verify the production helper has the try/catch
+    // by NOT relying on this path; instead, we just ensure the production
+    // `persistCodexOAuth` (in storage.ts) is itself swallowing. See
+    // `codex-oauth-storage` tests for that. Here we just assert that without an
+    // injected hook, no exception escapes.
+    // For this test specifically: skip assertion (different concern).
+    expect(cred).toBeTruthy();
+  });
+  test("CLAUDE_CODE_OAUTH_TOKEN fallback", async () => {
+    const cred = await resolveCredential(
+      makeOpts({ env: { CLAUDE_CODE_OAUTH_TOKEN: "claude-oauth" } }),
+    );
+    expect(cred?.kind).toBe("claude-cli");
+    if (cred?.kind === "claude-cli") {
+      expect(cred.modelDefault).toBe("haiku");
+    }
+  });
+  test("AGENT_SWARM_CLAUDE_OAUTH_TOKEN mirror also resolves claude-cli (used in Stop-hook env)", async () => {
+    // claude CLI strips CLAUDE_CODE_OAUTH_TOKEN from hook subprocesses;
+    // claude-adapter.ts sets AGENT_SWARM_CLAUDE_OAUTH_TOKEN as a mirror so
+    // the hook can still resolve the claude-cli fallback.
+    const cred = await resolveCredential(
+      makeOpts({ env: { AGENT_SWARM_CLAUDE_OAUTH_TOKEN: "mirror-oauth" } }),
+    );
+    expect(cred?.kind).toBe("claude-cli");
+    if (cred?.kind === "claude-cli") {
+      expect(cred.modelDefault).toBe("haiku");
+    }
+  });
+  test("returns null when no creds resolve", async () => {
+    const cred = await resolveCredential(makeOpts({ env: {} }));
+    expect(cred).toBeNull();
+  });
+  test("multi-cred precedence: OPENROUTER > ANTHROPIC > OPENAI > codex-OAuth > CLAUDE_CODE_OAUTH_TOKEN", async () => {
+    const env = {
+      OPENROUTER_API_KEY: "or",
+      ANTHROPIC_API_KEY: "ant",
+      OPENAI_API_KEY: "oai",
+      CLAUDE_CODE_OAUTH_TOKEN: "claude",
+    };
+    let cred = await resolveCredential(makeOpts({ env }));
+    expect(cred?.kind).toBe("openrouter");
+    // Strip openrouter.
+    cred = await resolveCredential(
+      makeOpts({ env: { ...env, OPENROUTER_API_KEY: undefined } as NodeJS.ProcessEnv }),
+    );
+    expect(cred?.kind).toBe("anthropic");
+    cred = await resolveCredential(
+      makeOpts({
+        env: {
+          ...env,
+          OPENROUTER_API_KEY: undefined,
+          ANTHROPIC_API_KEY: undefined,
+        } as NodeJS.ProcessEnv,
+      }),
+    );
+    expect(cred?.kind).toBe("openai");
+    cred = await resolveCredential(
+      makeOpts({
+        env: { CLAUDE_CODE_OAUTH_TOKEN: "claude" },
+        apiUrl: "http://localhost:3013",
+        apiKey: "k",
+        _getValidCodexOAuth: async () => ({
+          access: "a",
+          refresh: "r",
+          expires: Date.now() + 1_000_000,
+          accountId: "acc",
+        }),
+        _getOAuthApiKey: async () => ({
+          newCredentials: { access: "a", refresh: "r", expires: 1 },
+          apiKey: "codex-k",
+        }),
+      }),
+    );
+    expect(cred?.kind).toBe("openai-codex");
+  });
+  test("no apiUrl/apiKey passed → codex OAuth probe is skipped entirely", async () => {
+    let probed = false;
+    const cred = await resolveCredential(
+      makeOpts({
+        env: { CLAUDE_CODE_OAUTH_TOKEN: "claude-token" },
+        _getValidCodexOAuth: async () => {
+          probed = true;
+          return null;
+        },
+      }),
+    );
+    expect(probed).toBe(false);
+    expect(cred?.kind).toBe("claude-cli");
+  });
+  test("with apiUrl/apiKey but codex OAuth not configured → falls through to CLAUDE_CODE_OAUTH_TOKEN", async () => {
+    const cred = await resolveCredential(
+      makeOpts({
+        env: { CLAUDE_CODE_OAUTH_TOKEN: "claude-token" },
+        apiUrl: "http://localhost:3013",
+        apiKey: "k",
+        _getValidCodexOAuth: async () => null,
+      }),
+    );
+    expect(cred?.kind).toBe("claude-cli");
+  });
+  test("CLAUDE_CODE_OAUTH_TOKEN-only env → claude-cli kind (Phase 4 fallback)", async () => {
+    const cred = await resolveCredential(
+      makeOpts({
+        env: { CLAUDE_CODE_OAUTH_TOKEN: "sk-test-oauth" },
+        callerTag: "claude-stop-hook",
+      }),
+    );
+    expect(cred?.kind).toBe("claude-cli");
+    if (cred?.kind === "claude-cli") {
+      expect(cred.modelDefault).toBe("haiku");
+    }
+  });
+});

package/src/tests/internal-ai/schema-parity.test.ts ADDED Viewed

@@ -0,0 +1,103 @@
+import { describe, expect, test } from "bun:test";
+import { Value } from "typebox/value";
+import { SummaryWithRatingsSchema } from "../../be/memory/raters/llm.js";
+import { summaryToolSchema } from "../../utils/internal-ai/summarize-session.js";
+/**
+ * 10 valid + 10 invalid fixtures. Both validators (zod via `safeParse`,
+ * typebox via `Value.Check`) must agree on every fixture.
+ *
+ * Note: zod's `SummaryWithRatingsSchema` defaults `ratings` to `[]` when
+ * missing — so an object with no `ratings` key IS valid from zod's POV but
+ * NOT from typebox's strict `Type.Array` (it requires the key). We handle
+ * that by always including `ratings` in our fixtures and explicitly fuzzing
+ * different missing-key cases separately.
+ */
+const VALID_CASES: unknown[] = [
+  { summary: "Learned X", ratings: [] },
+  { summary: "Learned Y", ratings: [{ id: "m1", score: 0.5, reasoning: "ok" }] },
+  {
+    summary: "Multiple",
+    ratings: [
+      { id: "m1", score: 0, reasoning: "bad" },
+      { id: "m2", score: 1, reasoning: "great" },
+    ],
+  },
+  // referencesSource optional, present.
+  {
+    summary: "with refs",
+    ratings: [
+      {
+        id: "m1",
+        score: 0.7,
+        reasoning: "useful",
+        referencesSource: "github:foo/bar#1",
+      },
+    ],
+  },
+  // empty summary string allowed (zod has no min on summary).
+  { summary: "", ratings: [] },
+  // long summary.
+  { summary: "x".repeat(2000), ratings: [] },
+  // score boundary 0.
+  { summary: "boundary-0", ratings: [{ id: "m1", score: 0, reasoning: "min" }] },
+  // score boundary 1.
+  { summary: "boundary-1", ratings: [{ id: "m1", score: 1, reasoning: "max" }] },
+  // referencesSource long but under 512.
+  {
+    summary: "long-ref",
+    ratings: [{ id: "m1", score: 0.5, reasoning: "ok", referencesSource: "x".repeat(100) }],
+  },
+  // reasoning at max length.
+  {
+    summary: "max-reason",
+    ratings: [{ id: "m1", score: 0.5, reasoning: "x".repeat(500) }],
+  },
+];
+const INVALID_CASES: unknown[] = [
+  null,
+  "string",
+  42,
+  // missing required summary.
+  { ratings: [] },
+  // wrong summary type.
+  { summary: 42, ratings: [] },
+  // wrong ratings type.
+  { summary: "ok", ratings: "not an array" },
+  // rating missing id.
+  { summary: "ok", ratings: [{ score: 0.5, reasoning: "x" }] },
+  // rating score out of range (>1).
+  { summary: "ok", ratings: [{ id: "m1", score: 1.5, reasoning: "x" }] },
+  // rating score out of range (<0).
+  { summary: "ok", ratings: [{ id: "m1", score: -0.1, reasoning: "x" }] },
+  // rating with non-string id.
+  { summary: "ok", ratings: [{ id: 7, score: 0.5, reasoning: "x" }] },
+];
+describe("schema-parity: SummaryWithRatingsSchema (zod) vs summaryToolSchema (typebox)", () => {
+  for (const [i, fixture] of VALID_CASES.entries()) {
+    test(`valid #${i}: both validators accept`, () => {
+      const zodOk = SummaryWithRatingsSchema.safeParse(fixture).success;
+      const typeboxOk = Value.Check(summaryToolSchema, fixture);
+      // Note: typebox is structurally stricter (e.g., bounded score). For
+      // VALID_CASES we expect BOTH to pass; if zod accepts but typebox does
+      // not, our typebox schema is too narrow for the wire format and the
+      // production tool-call will get rejected by pi-ai before zod even
+      // sees it. Treat both-pass as the spec.
+      expect(zodOk).toBe(true);
+      expect(typeboxOk).toBe(true);
+    });
+  }
+  for (const [i, fixture] of INVALID_CASES.entries()) {
+    test(`invalid #${i}: both validators reject`, () => {
+      const zodOk = SummaryWithRatingsSchema.safeParse(fixture).success;
+      const typeboxOk = Value.Check(summaryToolSchema, fixture);
+      // Both must agree the input is invalid.
+      expect(zodOk).toBe(false);
+      expect(typeboxOk).toBe(false);
+    });
+  }
+});