npm - @intx/inference-discovery - Versions diffs - 0.1.2 - Mend

@intx/inference-discovery 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +110 -0
package/media/sample.jpg +0 -0
package/media/sample.mp4 +0 -0
package/media/sample.pdf +0 -0
package/media/sample.wav +0 -0
package/package.json +19 -0
package/src/catalog/capability.test.ts +43 -0
package/src/catalog/capability.ts +38 -0
package/src/catalog/index.ts +10 -0
package/src/catalog/intent.test.ts +94 -0
package/src/catalog/intent.ts +297 -0
package/src/catalog/manifest.test.ts +72 -0
package/src/catalog/manifest.ts +12 -0
package/src/catalog/support-matrix.test.ts +79 -0
package/src/catalog/support-matrix.ts +344 -0
package/src/ci-guard.test.ts +36 -0
package/src/ci-guard.ts +9 -0
package/src/cli.test.ts +129 -0
package/src/cli.ts +133 -0
package/src/content-type.test.ts +45 -0
package/src/content-type.ts +20 -0
package/src/env.test.ts +31 -0
package/src/env.ts +36 -0
package/src/index.ts +24 -0
package/src/manifest.test.ts +56 -0
package/src/manifest.ts +29 -0
package/src/plugin.ts +70 -0
package/src/runner.test.ts +508 -0
package/src/runner.ts +242 -0
package/src/write-capture.test.ts +168 -0
package/src/write-capture.ts +83 -0
package/tsconfig.json +4 -0
package/tsconfig.tsbuildinfo +1 -0

package/src/catalog/manifest.test.ts ADDED Viewed

@@ -0,0 +1,72 @@
+import { describe, test, expect } from "bun:test";
+import { type } from "arktype";
+import { FixtureManifest } from "./manifest";
+describe("FixtureManifest validator", () => {
+  test("accepts a well-formed manifest", () => {
+    const result = FixtureManifest({
+      provider: "google-genai",
+      model: "gemini-2.5-flash",
+      capability: "plain-text",
+      capturedAt: "2026-05-20T15:00:00Z",
+      schemaVersion: "1",
+    });
+    expect(result instanceof type.errors).toBe(false);
+  });
+  test("accepts an explicit null observedModelVersion", () => {
+    const result = FixtureManifest({
+      provider: "opencode-zen",
+      model: "kimi-k2.6",
+      capability: "reasoning-content",
+      capturedAt: "2026-05-20T15:00:00Z",
+      observedModelVersion: null,
+      schemaVersion: "1",
+    });
+    expect(result instanceof type.errors).toBe(false);
+  });
+  test("accepts a populated observedModelVersion string", () => {
+    const result = FixtureManifest({
+      provider: "opencode-zen",
+      model: "kimi-k2.6",
+      capability: "plain-text",
+      capturedAt: "2026-05-20T15:00:00Z",
+      observedModelVersion: "moonshotai/kimi-k2.6-20260420",
+      schemaVersion: "1",
+    });
+    expect(result instanceof type.errors).toBe(false);
+  });
+  test("rejects a manifest with an unknown capability", () => {
+    const result = FixtureManifest({
+      provider: "google-genai",
+      model: "gemini-2.5-flash",
+      capability: "not-a-real-capability",
+      capturedAt: "2026-05-20T15:00:00Z",
+      schemaVersion: "1",
+    });
+    expect(result instanceof type.errors).toBe(true);
+  });
+  test("rejects a manifest with the wrong schemaVersion literal", () => {
+    const result = FixtureManifest({
+      provider: "google-genai",
+      model: "gemini-2.5-flash",
+      capability: "plain-text",
+      capturedAt: "2026-05-20T15:00:00Z",
+      schemaVersion: "2",
+    });
+    expect(result instanceof type.errors).toBe(true);
+  });
+  test("rejects a manifest missing required fields", () => {
+    const result = FixtureManifest({
+      provider: "google-genai",
+      capability: "plain-text",
+      capturedAt: "2026-05-20T15:00:00Z",
+      schemaVersion: "1",
+    });
+    expect(result instanceof type.errors).toBe(true);
+  });
+});

package/src/catalog/manifest.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import { type } from "arktype";
+import { Capability } from "./capability";
+export const FixtureManifest = type({
+  provider: "string",
+  model: "string",
+  capability: Capability,
+  capturedAt: "string",
+  "observedModelVersion?": "string | null",
+  schemaVersion: "'1'",
+});
+export type FixtureManifest = typeof FixtureManifest.infer;

package/src/catalog/support-matrix.test.ts ADDED Viewed

@@ -0,0 +1,79 @@
+import { describe, test, expect } from "bun:test";
+import { type } from "arktype";
+import { SUPPORT_MATRIX, SupportEntry, getFixtureDir } from "./support-matrix";
+describe("SUPPORT_MATRIX validation", () => {
+  test("every entry parses as a SupportEntry", () => {
+    for (const entry of SUPPORT_MATRIX) {
+      const result = SupportEntry(entry);
+      expect(result instanceof type.errors).toBe(false);
+    }
+  });
+  test("contains at least 22 google-genai captured entries", () => {
+    const count = SUPPORT_MATRIX.filter(
+      (entry) =>
+        entry.provider === "google-genai" && entry.outcome === "captured",
+    ).length;
+    expect(count).toBeGreaterThanOrEqual(22);
+  });
+  test("contains at least 33 opencode-zen captured entries", () => {
+    const count = SUPPORT_MATRIX.filter(
+      (entry) =>
+        entry.provider === "opencode-zen" && entry.outcome === "captured",
+    ).length;
+    expect(count).toBeGreaterThanOrEqual(33);
+  });
+  test("contains at least one non-captured opencode-zen entry with notes", () => {
+    const nonCaptured = SUPPORT_MATRIX.filter(
+      (entry) =>
+        entry.provider === "opencode-zen" && entry.outcome !== "captured",
+    );
+    expect(nonCaptured.length).toBeGreaterThanOrEqual(1);
+    for (const entry of nonCaptured) {
+      expect(typeof entry.notes).toBe("string");
+      expect((entry.notes ?? "").length).toBeGreaterThan(0);
+    }
+  });
+  test("no duplicate (provider, model, capability) triples", () => {
+    const seen = new Set<string>();
+    for (const entry of SUPPORT_MATRIX) {
+      const key = `${entry.provider}|${entry.model}|${entry.capability}`;
+      expect(seen.has(key)).toBe(false);
+      seen.add(key);
+    }
+  });
+});
+describe("getFixtureDir", () => {
+  test("returns a wire-relative path for a captured entry", () => {
+    const captured = SUPPORT_MATRIX.find((e) => e.outcome === "captured");
+    expect(captured).toBeDefined();
+    if (captured === undefined) return;
+    const dir = getFixtureDir(captured);
+    expect(dir).toBe(
+      `packages/inference-testing/wire/${captured.provider}/${captured.model}/${captured.capability}`,
+    );
+  });
+  test("returns a wire-relative path for a misled entry", () => {
+    const misled = SUPPORT_MATRIX.find((e) => e.outcome === "misled");
+    if (misled === undefined) return;
+    const dir = getFixtureDir(misled);
+    expect(dir).toBe(
+      `packages/inference-testing/wire/${misled.provider}/${misled.model}/${misled.capability}`,
+    );
+  });
+  test("returns null for an entry without a fixture", () => {
+    const noFixture = SUPPORT_MATRIX.find(
+      (e) => e.outcome !== "captured" && e.outcome !== "misled",
+    );
+    expect(noFixture).toBeDefined();
+    if (noFixture === undefined) return;
+    expect(getFixtureDir(noFixture)).toBeNull();
+  });
+});

package/src/catalog/support-matrix.ts ADDED Viewed

@@ -0,0 +1,344 @@
+import { type } from "arktype";
+import { Capability } from "./capability";
+// Outcome vocabulary:
+//
+// - captured: HTTP succeeded and the response contains the wire shape
+//   the capability's name implies. Fixture on disk; smoke test validates.
+//
+// - misled: HTTP succeeded and the model responded normally, but the
+//   provider's documented contract for the input did not materialize.
+//   The model did not refuse — there is no statement of inability in
+//   the response — the documented behavior just did not fire. Used when
+//   the wire shape is conditional on external state we do not control
+//   (e.g. Anthropic's safety classifier not engaging on the documented
+//   redacted-thinking canary). The fixture on disk documents what was
+//   actually returned; smoke test validates file presence. A future
+//   re-capture may flip the row to captured.
+//
+// - refused: the provider's response contains an explicit refusal of
+//   the requested task. The model told us it would not do the thing —
+//   sometimes via HTTP non-2xx, sometimes via a successful HTTP body
+//   carrying a textual refusal. No fixture by convention; the refusal
+//   detail goes in notes.
+//
+// - http-error: the provider returned a non-2xx HTTP status. No fixture.
+//
+// - unsupported: the provider does not support this capability. No
+//   fixture, no attempt made.
+export const SupportEntry = type({
+  provider: "string",
+  model: "string",
+  capability: Capability,
+  outcome: "'captured' | 'misled' | 'refused' | 'http-error' | 'unsupported'",
+  "notes?": "string",
+});
+export type SupportEntry = typeof SupportEntry.infer;
+const ANTHROPIC_PROVIDER = "anthropic";
+const ANTHROPIC_MODELS = [
+  "claude-sonnet-4-5-20250929",
+  "claude-opus-4-1-20250805",
+  "claude-haiku-4-5-20251001",
+] as const;
+const GEMINI_PROVIDER = "google-genai";
+const GEMINI_TEXT_MODEL = "gemini-2.5-flash";
+const GEMINI_IMAGE_MODEL = "gemini-2.5-flash-image";
+const OPENCODE_PROVIDER = "opencode-zen";
+const GEMINI_TEXT_CAPABILITIES = [
+  "plain-text",
+  "plain-text-streaming",
+  "function-calling-multi-turn",
+  "function-calling-multi-turn-streaming",
+  "function-calling-with-thinking",
+  "function-calling-with-thinking-streaming",
+  "vision-input",
+  "vision-input-streaming",
+  "audio-input",
+  "audio-input-streaming",
+  "video-input",
+  "video-input-streaming",
+  "document-input",
+  "document-input-streaming",
+  "code-execution",
+  "code-execution-streaming",
+  "grounding",
+  "grounding-streaming",
+  "files-api-reference",
+  "files-api-reference-streaming",
+  "structured-output",
+  "structured-output-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+const GEMINI_TEXT_MISLED_CAPABILITIES = [
+  "safety-classification",
+  "safety-classification-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+const GEMINI_IMAGE_CAPABILITIES = [
+  "image-output",
+  "image-output-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+const OPENCODE_FULL_CAPABILITIES = [
+  "plain-text",
+  "plain-text-streaming",
+  "function-calling",
+  "function-calling-multi-turn",
+  "reasoning-content",
+  "reasoning-content-streaming",
+  "vision-input",
+] as const satisfies readonly SupportEntry["capability"][];
+const OPENCODE_NON_VISION_CAPABILITIES = [
+  "plain-text",
+  "plain-text-streaming",
+  "function-calling",
+  "function-calling-multi-turn",
+  "reasoning-content",
+  "reasoning-content-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+function gemini(
+  model: string,
+  capabilities: readonly SupportEntry["capability"][],
+): SupportEntry[] {
+  return capabilities.map((capability) => ({
+    provider: GEMINI_PROVIDER,
+    model,
+    capability,
+    outcome: "captured",
+  }));
+}
+function geminiMisled(
+  model: string,
+  capabilities: readonly SupportEntry["capability"][],
+  notes: string,
+): SupportEntry[] {
+  return capabilities.map((capability) => ({
+    provider: GEMINI_PROVIDER,
+    model,
+    capability,
+    outcome: "misled",
+    notes,
+  }));
+}
+function opencode(
+  model: string,
+  capabilities: readonly SupportEntry["capability"][],
+): SupportEntry[] {
+  return capabilities.map((capability) => ({
+    provider: OPENCODE_PROVIDER,
+    model,
+    capability,
+    outcome: "captured",
+  }));
+}
+const ANTHROPIC_CAPTURED_CAPABILITIES = [
+  "plain-text",
+  "plain-text-streaming",
+  "function-calling",
+  "function-calling-multi-turn",
+  "function-calling-multi-turn-streaming",
+  "function-calling-with-thinking",
+  "function-calling-with-thinking-streaming",
+  "vision-input",
+  "vision-input-streaming",
+  "document-input",
+  "document-input-streaming",
+  "code-execution",
+  "code-execution-streaming",
+  "reasoning-content",
+  "reasoning-content-streaming",
+  "files-api-reference",
+  "files-api-reference-streaming",
+  "grounding",
+  "grounding-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+const ANTHROPIC_MISLED_CAPABILITIES = [
+  "redacted-thinking",
+  "redacted-thinking-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+const ANTHROPIC_UNSUPPORTED_INPUT_MODALITIES = [
+  "audio-input",
+  "audio-input-streaming",
+  "video-input",
+  "video-input-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+const ANTHROPIC_UNSUPPORTED_OUTPUT_MODALITIES = [
+  "image-output",
+  "image-output-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+const ANTHROPIC_UNSUPPORTED_STRUCTURED_OUTPUTS = [
+  "structured-output",
+  "structured-output-streaming",
+] as const satisfies readonly SupportEntry["capability"][];
+function anthropic(
+  model: string,
+  capabilities: readonly SupportEntry["capability"][],
+): SupportEntry[] {
+  return capabilities.map((capability) => ({
+    provider: ANTHROPIC_PROVIDER,
+    model,
+    capability,
+    outcome: "captured",
+  }));
+}
+function anthropicUnsupported(
+  model: string,
+  capabilities: readonly SupportEntry["capability"][],
+  notes: string,
+): SupportEntry[] {
+  return capabilities.map((capability) => ({
+    provider: ANTHROPIC_PROVIDER,
+    model,
+    capability,
+    outcome: "unsupported",
+    notes,
+  }));
+}
+function anthropicMisled(
+  model: string,
+  capabilities: readonly SupportEntry["capability"][],
+  notes: string,
+): SupportEntry[] {
+  return capabilities.map((capability) => ({
+    provider: ANTHROPIC_PROVIDER,
+    model,
+    capability,
+    outcome: "misled",
+    notes,
+  }));
+}
+const MATRIX: SupportEntry[] = [
+  ...ANTHROPIC_MODELS.flatMap((model) =>
+    anthropic(model, ANTHROPIC_CAPTURED_CAPABILITIES),
+  ),
+  ...ANTHROPIC_MODELS.flatMap((model) =>
+    anthropicMisled(
+      model,
+      ANTHROPIC_MISLED_CAPABILITIES,
+      "Anthropic's documentation describes the canary prompt as a deterministic trigger for a redacted_thinking content block. On capture day the safety classifier did not fire on any first-party model; the assistant response carries a regular thinking block instead. The fixture on disk documents what the wire actually returned for the documented input. The plug-in and SSE parser already accept redacted_thinking blocks, so a future re-capture on a day the classifier does fire will flip this row to captured without code changes.",
+    ),
+  ),
+  ...ANTHROPIC_MODELS.flatMap((model) =>
+    anthropicUnsupported(
+      model,
+      ANTHROPIC_UNSUPPORTED_INPUT_MODALITIES,
+      "Anthropic's first-party Claude models do not accept audio or video inputs; the Messages API content array only permits text, image, document, tool_use, and tool_result blocks. No equivalent server-side ingestion path exists today.",
+    ),
+  ),
+  ...ANTHROPIC_MODELS.flatMap((model) =>
+    anthropicUnsupported(
+      model,
+      ANTHROPIC_UNSUPPORTED_OUTPUT_MODALITIES,
+      "Anthropic's first-party Claude models do not emit images; the Messages API surface is text-only on the output side and has no responseModalities-style toggle.",
+    ),
+  ),
+  ...ANTHROPIC_MODELS.flatMap((model) =>
+    anthropicUnsupported(
+      model,
+      ANTHROPIC_UNSUPPORTED_STRUCTURED_OUTPUTS,
+      "Anthropic's Messages API has no native structured-outputs surface. The internal adapter rejects responseFormat values of json and json-schema at the marshaling boundary rather than synthesizing a hidden tool-input wrapper; callers needing structured output route through a provider with native support.",
+    ),
+  ),
+  ...gemini(GEMINI_TEXT_MODEL, GEMINI_TEXT_CAPABILITIES),
+  ...gemini(GEMINI_IMAGE_MODEL, GEMINI_IMAGE_CAPABILITIES),
+  ...geminiMisled(
+    GEMINI_TEXT_MODEL,
+    GEMINI_TEXT_MISLED_CAPABILITIES,
+    'Probe prompt did not engage Gemini\'s structured safety classifier on capture day. The model self-refused via response text content but `safetyRatings`, `promptFeedback`, and `finishReason: "SAFETY"` are all absent from the response. The fixture on disk documents what the wire actually returned for the documented probe input. A future re-capture (different prompt, different classifier thresholds, or different model behavior) may flip this row to captured without code changes once a structured safety signal materializes.',
+  ),
+  ...opencode("kimi-k2.6", OPENCODE_FULL_CAPABILITIES),
+  ...opencode("mimo-v2-omni", OPENCODE_FULL_CAPABILITIES),
+  ...opencode("qwen3.6-plus", OPENCODE_FULL_CAPABILITIES),
+  ...opencode("glm-5.1", OPENCODE_NON_VISION_CAPABILITIES),
+  ...opencode("deepseek-v4-pro", OPENCODE_NON_VISION_CAPABILITIES),
+  ...opencode("gpt-5.4-mini", [
+    "structured-output",
+    "structured-output-streaming",
+  ]),
+  ...opencode("kimi-k2.6", [
+    "structured-output",
+    "structured-output-streaming",
+  ]),
+  ...opencode("glm-5.1", ["structured-output", "structured-output-streaming"]),
+  ...opencode("qwen3.6-plus", [
+    "structured-output",
+    "structured-output-streaming",
+  ]),
+  {
+    provider: OPENCODE_PROVIDER,
+    model: "deepseek-v4-pro",
+    capability: "structured-output",
+    outcome: "http-error",
+    notes:
+      "Probe against /zen/v1 returned HTTP 401 with body {type:'error',error:{type:'ModelError',message:'Model deepseek-v4-pro is not supported'}}. The HTTP status is the relay's chosen code for the routing miss, not an auth failure; deepseek-v4-pro's reasoning-content captures live on the older /zen/go/v1 tier and the v1 tier does not route the model.",
+  },
+  {
+    provider: OPENCODE_PROVIDER,
+    model: "deepseek-v4-pro",
+    capability: "structured-output-streaming",
+    outcome: "http-error",
+    notes:
+      "Probe against /zen/v1 returned HTTP 401 with body {type:'error',error:{type:'ModelError',message:'Model deepseek-v4-pro is not supported'}}. The HTTP status is the relay's chosen code for the routing miss, not an auth failure; deepseek-v4-pro's reasoning-content captures live on the older /zen/go/v1 tier and the v1 tier does not route the model.",
+  },
+  {
+    provider: OPENCODE_PROVIDER,
+    model: "mimo-v2-omni",
+    capability: "structured-output",
+    outcome: "http-error",
+    notes:
+      "Probe against /zen/v1 returned HTTP 401 with body {type:'error',error:{type:'ModelError',message:'Model mimo-v2-omni is not supported'}}. The HTTP status is the relay's chosen code for the routing miss, not an auth failure; mimo-v2-omni's other captures live on the older /zen/go/v1 tier and the v1 tier does not route the model.",
+  },
+  {
+    provider: OPENCODE_PROVIDER,
+    model: "mimo-v2-omni",
+    capability: "structured-output-streaming",
+    outcome: "http-error",
+    notes:
+      "Probe against /zen/v1 returned HTTP 401 with body {type:'error',error:{type:'ModelError',message:'Model mimo-v2-omni is not supported'}}. The HTTP status is the relay's chosen code for the routing miss, not an auth failure; mimo-v2-omni's other captures live on the older /zen/go/v1 tier and the v1 tier does not route the model.",
+  },
+  {
+    provider: OPENCODE_PROVIDER,
+    model: "glm-5.1",
+    capability: "vision-input",
+    outcome: "refused",
+    notes:
+      "Probe returned HTTP 200 with the textual refusal 'Please provide an image so I can describe it for you' rather than a real image description; recorded as 'refused' here so no capture is attempted.",
+  },
+  {
+    provider: OPENCODE_PROVIDER,
+    model: "deepseek-v4-pro",
+    capability: "vision-input",
+    outcome: "http-error",
+    notes:
+      "OpenAI-style multimodal messages[].content elicits HTTP 400 invalid_request_error \"unknown variant 'image_url', expected 'text'\"; recorded as 'http-error' here so no capture is attempted.",
+  },
+];
+export const SUPPORT_MATRIX: readonly SupportEntry[] = MATRIX;
+const FIXTURE_ROOT = "packages/inference-testing/wire";
+export function getFixtureDir(entry: SupportEntry): string | null {
+  // captured and misled rows both carry fixtures on disk; the smoke
+  // test validates either flavor for file presence. refused / http-
+  // error / unsupported rows do not.
+  if (entry.outcome !== "captured" && entry.outcome !== "misled") return null;
+  return `${FIXTURE_ROOT}/${entry.provider}/${entry.model}/${entry.capability}`;
+}

package/src/ci-guard.test.ts ADDED Viewed

@@ -0,0 +1,36 @@
+import { describe, test, expect } from "bun:test";
+import { assertNotCI } from "./ci-guard";
+describe("assertNotCI", () => {
+  test("does nothing when CI is unset", () => {
+    expect(() => assertNotCI({})).not.toThrow();
+  });
+  test("does nothing when CI is an empty string", () => {
+    expect(() => assertNotCI({ CI: "" })).not.toThrow();
+  });
+  test("throws when CI is 'true'", () => {
+    expect(() => assertNotCI({ CI: "true" })).toThrow(/must not run in CI/);
+  });
+  test("throws when CI is '1'", () => {
+    expect(() => assertNotCI({ CI: "1" })).toThrow(/must not run in CI/);
+  });
+  test("reads process.env by default", () => {
+    const original = process.env.CI;
+    try {
+      delete process.env.CI;
+      expect(() => assertNotCI()).not.toThrow();
+      process.env.CI = "yes";
+      expect(() => assertNotCI()).toThrow(/must not run in CI/);
+    } finally {
+      if (original === undefined) {
+        delete process.env.CI;
+      } else {
+        process.env.CI = original;
+      }
+    }
+  });
+});

package/src/ci-guard.ts ADDED Viewed

@@ -0,0 +1,9 @@
+export function assertNotCI(env: NodeJS.ProcessEnv = process.env): void {
+  const value = env.CI;
+  if (value !== undefined && value !== "") {
+    throw new Error(
+      "Discovery runs make live network calls and must not run in CI. " +
+        "Unset the CI environment variable to proceed.",
+    );
+  }
+}

package/src/cli.test.ts ADDED Viewed

@@ -0,0 +1,129 @@
+import { describe, test, expect } from "bun:test";
+import { parseCLI } from "./cli";
+describe("parseCLI", () => {
+  test("returns help for --help", () => {
+    const result = parseCLI(["--help"]);
+    expect(result.kind).toBe("help");
+    if (result.kind === "help") {
+      expect(result.message).toMatch(/Usage: discover/);
+    }
+  });
+  test("returns help for -h", () => {
+    expect(parseCLI(["-h"]).kind).toBe("help");
+  });
+  test("returns error when --provider missing", () => {
+    const result = parseCLI(["--all"]);
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.message).toMatch(/--provider/);
+    }
+  });
+  test("returns run for --provider X --all", () => {
+    const result = parseCLI(["--provider", "google-genai", "--all"]);
+    expect(result.kind).toBe("run");
+    if (result.kind === "run") {
+      expect(result.provider).toBe("google-genai");
+      expect(result.all).toBe(true);
+      expect(result.models).toEqual([]);
+      expect(result.capabilities).toEqual([]);
+    }
+  });
+  test("collects repeated --model flags", () => {
+    const result = parseCLI([
+      "--provider",
+      "p",
+      "--model",
+      "m1",
+      "--model",
+      "m2",
+    ]);
+    expect(result.kind).toBe("run");
+    if (result.kind === "run") {
+      expect(result.models).toEqual(["m1", "m2"]);
+    }
+  });
+  test("collects repeated --only flags", () => {
+    const result = parseCLI([
+      "--provider",
+      "p",
+      "--only",
+      "plain-text",
+      "--only",
+      "plain-text-streaming",
+    ]);
+    expect(result.kind).toBe("run");
+    if (result.kind === "run") {
+      expect(result.capabilities).toEqual([
+        "plain-text",
+        "plain-text-streaming",
+      ]);
+    }
+  });
+  test("rejects --all combined with --model", () => {
+    const result = parseCLI(["--provider", "p", "--all", "--model", "m"]);
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.message).toMatch(/mutually exclusive/);
+    }
+  });
+  test("rejects --all combined with --only", () => {
+    const result = parseCLI([
+      "--provider",
+      "p",
+      "--all",
+      "--only",
+      "plain-text",
+    ]);
+    expect(result.kind).toBe("error");
+  });
+  test("rejects no scope flags without --all", () => {
+    const result = parseCLI(["--provider", "p"]);
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.message).toMatch(/--all|--model|--only/);
+    }
+  });
+  test("rejects unknown flag", () => {
+    const result = parseCLI(["--provider", "p", "--bogus"]);
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.message).toMatch(/Unknown argument: --bogus/);
+    }
+  });
+  test("rejects --provider without value", () => {
+    const result = parseCLI(["--provider"]);
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.message).toMatch(/requires a value/);
+    }
+  });
+  test("rejects duplicate --provider", () => {
+    const result = parseCLI(["--provider", "a", "--provider", "b", "--all"]);
+    expect(result.kind).toBe("error");
+    if (result.kind === "error") {
+      expect(result.message).toMatch(/only be specified once/);
+    }
+  });
+  test("rejects --model without value", () => {
+    const result = parseCLI(["--provider", "p", "--model"]);
+    expect(result.kind).toBe("error");
+  });
+  test("rejects --model followed by another flag as value", () => {
+    const result = parseCLI(["--provider", "p", "--model", "--all"]);
+    expect(result.kind).toBe("error");
+  });
+});