npm - @checklabs/core - Versions diffs - 0.2.1 - Mend

@checklabs/core 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/package.json +31 -0
package/src/adapters/index.ts +136 -0
package/src/assertions/expect.ts +218 -0
package/src/config.ts +89 -0
package/src/discovery.ts +57 -0
package/src/env.ts +35 -0
package/src/generate/index.ts +103 -0
package/src/generate/templates.ts +225 -0
package/src/index.ts +93 -0
package/src/judge/index.ts +158 -0
package/src/pricing.ts +56 -0
package/src/registry.ts +23 -0
package/src/reporters/colors.ts +36 -0
package/src/reporters/console.ts +154 -0
package/src/reporters/html.ts +189 -0
package/src/reporters/index.ts +4 -0
package/src/reporters/json.ts +11 -0
package/src/runner/compare.ts +84 -0
package/src/runner/runner.ts +144 -0
package/src/types.ts +197 -0

package/package.json ADDED Viewed

@@ -0,0 +1,31 @@
+{
+  "name": "@checklabs/core",
+  "version": "0.2.1",
+  "type": "module",
+  "description": "CheckAI core: assertions, scored LLM judge, adapters, runner, comparison and reporters.",
+  "license": "MIT",
+  "author": "MaxDanchenko",
+  "homepage": "https://github.com/MaxDanchenko/check-ai#readme",
+  "bugs": "https://github.com/MaxDanchenko/check-ai/issues",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/MaxDanchenko/check-ai.git",
+    "directory": "packages/checkai-core"
+  },
+  "keywords": ["ai", "testing", "regression-testing", "agents", "llm", "evals"],
+  "engines": {
+    "node": ">=18"
+  },
+  "main": "src/index.ts",
+  "types": "src/index.ts",
+  "exports": {
+    ".": "./src/index.ts"
+  },
+  "files": ["src", "dist"],
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "build": "tsc -p tsconfig.json --outDir dist"
+  }
+}

package/src/adapters/index.ts ADDED Viewed

@@ -0,0 +1,136 @@
+import { pathToFileURL } from "node:url";
+import { basename, dirname } from "node:path";
+import type { AgentAdapter, AgentResponse, AgentSource } from "../types";
+import { estimateUsage, estimateCost } from "../pricing";
+/** Fill in usage + cost if the adapter didn't provide them. */
+export function finalizeResponse(input: string, res: AgentResponse): AgentResponse {
+  const usage = res.usage ?? estimateUsage(input, res.output);
+  const costUsd = res.costUsd ?? estimateCost(usage, res.model);
+  return { ...res, usage, costUsd };
+}
+/**
+ * Wrap a plain `runAgent(input)` function as an adapter. This is the simplest
+ * way to connect a local agent — your function just returns an AgentResponse.
+ */
+export function functionAdapter(
+  run: (input: string) => Promise<AgentResponse>,
+  opts: { name?: string; model?: string } = {}
+): AgentAdapter {
+  let model = opts.model ?? "";
+  return {
+    name: opts.name ?? "agent",
+    get model() {
+      return model;
+    },
+    async run(input: string): Promise<AgentResponse> {
+      const res = finalizeResponse(input, await run(input));
+      if (!model && res.model) model = res.model;
+      return res;
+    },
+  };
+}
+export interface HttpAdapterOptions {
+  url: string;
+  name?: string;
+  model?: string;
+  method?: string;
+  headers?: Record<string, string>;
+  /** Build the request body from the user input. Default: `{ input }`. */
+  body?: (input: string) => unknown;
+  /** Map the JSON response to an AgentResponse. Default tries common fields. */
+  map?: (json: any, input: string) => Partial<AgentResponse> & { output: string };
+}
+function defaultMap(json: any): Partial<AgentResponse> & { output: string } {
+  const output =
+    json?.output ?? json?.text ?? json?.reply ?? json?.message ?? json?.content ?? "";
+  return {
+    output: String(output),
+    toolsUsed: json?.toolsUsed ?? json?.tools ?? [],
+    model: json?.model,
+    usage: json?.usage,
+  };
+}
+/** Connect an agent exposed over an HTTP endpoint. */
+export function httpAdapter(opts: HttpAdapterOptions): AgentAdapter {
+  let model = opts.model ?? "";
+  return {
+    name: opts.name ?? "http-agent",
+    get model() {
+      return model;
+    },
+    async run(input: string): Promise<AgentResponse> {
+      const start = Date.now();
+      const resp = await fetch(opts.url, {
+        method: opts.method ?? "POST",
+        headers: { "content-type": "application/json", ...(opts.headers ?? {}) },
+        body: JSON.stringify(opts.body ? opts.body(input) : { input }),
+      });
+      if (!resp.ok) {
+        throw new Error(`HTTP ${resp.status} ${resp.statusText} from ${opts.url}`);
+      }
+      const json = await resp.json();
+      const mapped = opts.map ? opts.map(json, input) : defaultMap(json);
+      const res = finalizeResponse(input, {
+        output: mapped.output,
+        toolsUsed: mapped.toolsUsed ?? [],
+        latencyMs: mapped.latencyMs ?? Date.now() - start,
+        model: mapped.model ?? opts.model ?? "http",
+        usage: mapped.usage,
+        costUsd: mapped.costUsd,
+        raw: json,
+      });
+      if (!model && res.model) model = res.model;
+      return res;
+    },
+  };
+}
+function deriveName(agentPath: string, fallback: string): string {
+  const parts = agentPath.split(/[\\/]/);
+  const srcIdx = parts.lastIndexOf("src");
+  if (srcIdx > 0) return parts[srcIdx - 1];
+  return basename(dirname(agentPath)) || fallback;
+}
+function isAdapter(value: unknown): value is AgentAdapter {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    typeof (value as AgentAdapter).run === "function"
+  );
+}
+/**
+ * Resolve an {@link AgentSource} (a module path or an inline adapter) into an
+ * {@link AgentAdapter}. A module may export an `adapter`/`default` adapter, or a
+ * `runAgent(input)` function (optionally with `name`/`model` consts).
+ */
+export async function loadAgentSource(
+  source: AgentSource,
+  fallbackName: string
+): Promise<AgentAdapter> {
+  if (isAdapter(source)) return source;
+  if (typeof source !== "string") {
+    throw new Error("Agent source must be a module path or an AgentAdapter object.");
+  }
+  const mod = await import(pathToFileURL(source).href);
+  const exported = mod.adapter ?? mod.default ?? mod.agent;
+  if (isAdapter(exported)) return exported;
+  const runAgent = mod.runAgent ?? (typeof exported === "function" ? exported : undefined);
+  if (typeof runAgent !== "function") {
+    throw new Error(
+      `Agent module "${source}" must export runAgent(input) or an AgentAdapter.`
+    );
+  }
+  return functionAdapter(runAgent, {
+    name: mod.name ?? deriveName(source, fallbackName),
+    model: mod.model ?? "",
+  });
+}

package/src/assertions/expect.ts ADDED Viewed

@@ -0,0 +1,218 @@
+import type { AgentResponse, AssertionResult } from "../types";
+import { judge, getJudgeThreshold } from "../judge/index";
+/**
+ * Assertion library — the `expect(result).toX()` surface.
+ *
+ * Synchronous matchers throw immediately on failure (Jest-style). The async
+ * `toSatisfyBehavior` returns a Promise the test author awaits. Every matcher
+ * supports `.not`. Each attempted assertion is recorded into the active sink so
+ * reports can show exactly what was checked.
+ */
+export class CheckAIAssertionError extends Error {
+  readonly result: AssertionResult;
+  constructor(result: AssertionResult) {
+    super(
+      `expect(...).${result.matcher}()\n` +
+        `  Expected: ${result.expected}\n` +
+        `  Actual:   ${result.actual}`
+    );
+    this.name = "CheckAIAssertionError";
+    this.result = result;
+  }
+}
+// Active assertion sink, set by the runner around each test.
+let sink: AssertionResult[] | null = null;
+export function setAssertionSink(s: AssertionResult[] | null): void {
+  sink = s;
+}
+// Tracks in-flight async (judge) assertions so the runner can await them even
+// when a test author forgets to `await` toSatisfyBehavior.
+let pendingSink: Promise<unknown>[] | null = null;
+export function setPendingSink(s: Promise<unknown>[] | null): void {
+  pendingSink = s;
+}
+const POLITE = /(please|thank|sorry|apolog|happy to|glad to|of course|certainly|i understand|i'd be happy|appreciate|i can help|anything else (i can )?help|no problem|my pleasure)/i;
+const RUDE = /(stupid|idiot|shut up|whatever|not my problem|deal with it|that'?s your fault|calm down|get over it)/i;
+const APPROVES_REFUND = /(approv\w*[^.]{0,40}refund|refund[^.]{0,40}approv\w*|issued your refund|processed your refund|granted your refund|refund (has been|was) (issued|processed|granted))/i;
+const ESCALATION_WORDS = /(escalat|senior (support )?specialist|connect you (with|to) (a )?(human|specialist|agent|team)|transfer(ring)? you|hand(ing)? (this|it|you) (off|over)|a (human|team member|specialist) will (reach|get|contact))/i;
+const ASK_INDICATORS = /(\?|could you|can you|would you|please (provide|share|confirm|tell|let me know)|what(?:'s| is) your|may i (have|ask)|i(?:'ll| will) need|so i can (look|pull|find))/i;
+const ORDER_REF = /#\s*\d{3,}|order\s*#?\s*\d{3,}|order (number|id|#)/i;
+const POLICY_REF = /policy|terms|return window|30[- ]?day|eligib|guideline|per our|warranty|coverage/i;
+export interface SatisfyOptions {
+  /** Minimum judge score (0..1) required to pass. Defaults to config threshold. */
+  threshold?: number;
+}
+class Expectation {
+  constructor(
+    private readonly result: AgentResponse,
+    private readonly negated: boolean = false
+  ) {}
+  /** Negate the next matcher. */
+  get not(): Expectation {
+    return new Expectation(this.result, !this.negated);
+  }
+  private snippet(): string {
+    const o = this.result.output.replace(/\s+/g, " ").trim();
+    return o.length > 140 ? `"${o.slice(0, 137)}..."` : `"${o}"`;
+  }
+  private record(
+    rawPass: boolean,
+    matcher: string,
+    expected: string,
+    actual: string,
+    extra?: { score?: number; threshold?: number }
+  ): this {
+    const pass = this.negated ? !rawPass : rawPass;
+    const entry: AssertionResult = {
+      matcher: (this.negated ? "not." : "") + matcher,
+      negated: this.negated,
+      pass,
+      expected: this.negated ? `NOT — ${expected}` : expected,
+      actual,
+      score: extra?.score,
+      threshold: extra?.threshold,
+    };
+    sink?.push(entry);
+    if (!pass) throw new CheckAIAssertionError(entry);
+    return this;
+  }
+  // --- text + tools --------------------------------------------------------
+  toContainText(text: string): this {
+    const pass = this.result.output.toLowerCase().includes(text.toLowerCase());
+    return this.record(
+      pass,
+      "toContainText",
+      `output contains "${text}"`,
+      pass ? `found "${text}"` : `not found in ${this.snippet()}`
+    );
+  }
+  toUseTool(name: string): this {
+    const pass = this.result.toolsUsed.includes(name);
+    return this.record(
+      pass,
+      "toUseTool",
+      `agent uses tool "${name}"`,
+      `toolsUsed = [${this.result.toolsUsed.join(", ")}]`
+    );
+  }
+  // --- behavior ------------------------------------------------------------
+  toAskFor(thing: string): this {
+    const out = this.result.output;
+    const mentions = out.toLowerCase().includes(thing.toLowerCase());
+    const asks = ASK_INDICATORS.test(out);
+    const pass = mentions && asks;
+    return this.record(
+      pass,
+      "toAskFor",
+      `agent asks the user for "${thing}"`,
+      !mentions
+        ? `never mentions "${thing}" — ${this.snippet()}`
+        : !asks
+          ? `mentions "${thing}" but does not request it — ${this.snippet()}`
+          : `asks for "${thing}"`
+    );
+  }
+  toEscalate(): this {
+    const usedTool = this.result.toolsUsed.includes("escalateToHuman");
+    const says = ESCALATION_WORDS.test(this.result.output);
+    return this.record(
+      usedTool || says,
+      "toEscalate",
+      "agent escalates to a human",
+      `escalateToHuman ${usedTool ? "called" : "not called"}; wording ${says ? "present" : "absent"}`
+    );
+  }
+  toBePolite(): this {
+    const polite = POLITE.test(this.result.output);
+    const rude = RUDE.test(this.result.output);
+    return this.record(
+      polite && !rude,
+      "toBePolite",
+      "reply is polite and professional",
+      rude
+        ? `contains discourteous language — ${this.snippet()}`
+        : polite
+          ? "polite"
+          : `no courtesy markers found — ${this.snippet()}`
+    );
+  }
+  toApproveRefund(): this {
+    const pass = APPROVES_REFUND.test(this.result.output);
+    return this.record(
+      pass,
+      "toApproveRefund",
+      "agent approves the refund",
+      pass ? "approval detected" : `no approval (pending/declined/asked) — ${this.snippet()}`
+    );
+  }
+  toReferenceOrder(): this {
+    const pass = ORDER_REF.test(this.result.output);
+    return this.record(
+      pass,
+      "toReferenceOrder",
+      "reply references the order (number/id)",
+      pass ? "order reference found" : `no order reference — ${this.snippet()}`
+    );
+  }
+  toReferencePolicy(): this {
+    const pass = POLICY_REF.test(this.result.output);
+    return this.record(
+      pass,
+      "toReferencePolicy",
+      "reply references a policy (terms/window/eligibility)",
+      pass ? "policy reference found" : `no policy reference — ${this.snippet()}`
+    );
+  }
+  // --- LLM judge (async, scored) ------------------------------------------
+  toSatisfyBehavior(behavior: string, opts: SatisfyOptions = {}): Promise<this> {
+    const p = this.evaluateBehavior(behavior, opts);
+    // Register so the runner awaits it even if the caller forgets to. The
+    // tracking promise never rejects (the verdict is recorded in the sink).
+    pendingSink?.push(p.then(() => undefined, () => undefined));
+    return p;
+  }
+  private async evaluateBehavior(behavior: string, opts: SatisfyOptions): Promise<this> {
+    const threshold = opts.threshold ?? getJudgeThreshold();
+    const verdict = await judge(
+      { output: this.result.output, toolsUsed: this.result.toolsUsed, behavior },
+      threshold
+    );
+    return this.record(
+      verdict.pass,
+      "toSatisfyBehavior",
+      `behavior "${behavior}" (score ≥ ${threshold.toFixed(2)})`,
+      `score ${verdict.score.toFixed(2)} — ${verdict.reasoning}`,
+      { score: verdict.score, threshold }
+    );
+  }
+}
+/** Entry point: `expect(result)`. */
+export function expect(result: AgentResponse): Expectation {
+  return new Expectation(result);
+}
+export type { Expectation };

package/src/config.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { existsSync } from "node:fs";
+import { dirname, isAbsolute, resolve } from "node:path";
+import { pathToFileURL } from "node:url";
+import type { AgentSource, CheckAIConfig, ResolvedConfig } from "./types";
+/** Identity helper so config files get full types + autocomplete. */
+export function defineConfig(config: CheckAIConfig): CheckAIConfig {
+  return config;
+}
+const CONFIG_NAMES = [
+  "checkai.config.ts",
+  "checkai.config.mts",
+  "checkai.config.js",
+  "checkai.config.mjs",
+];
+export function findConfigFile(startDir: string): string | null {
+  let dir = resolve(startDir);
+  while (true) {
+    for (const name of CONFIG_NAMES) {
+      const candidate = resolve(dir, name);
+      if (existsSync(candidate)) return candidate;
+    }
+    const parent = dirname(dir);
+    if (parent === dir) return null;
+    dir = parent;
+  }
+}
+const DEFAULTS = {
+  testDir: "checkai",
+  judgeModel: "gpt-4.1-mini",
+  judgeThreshold: 0.8,
+};
+function resolveSource(src: AgentSource, rootDir: string): AgentSource {
+  if (typeof src !== "string") return src;
+  return isAbsolute(src) ? src : resolve(rootDir, src);
+}
+/** Load and normalize the CheckAI config, resolving paths to absolute. */
+export async function loadConfig(startDir: string = process.cwd()): Promise<ResolvedConfig> {
+  const configPath = findConfigFile(startDir);
+  if (!configPath) {
+    throw new Error(
+      `Could not find a checkai.config.ts (searched upward from ${startDir}). ` +
+        `Run "checkai init" to create one.`
+    );
+  }
+  const rootDir = dirname(configPath);
+  const mod = await import(pathToFileURL(configPath).href);
+  const raw: CheckAIConfig = mod.default ?? mod.config ?? mod;
+  const agentsInput: Record<string, AgentSource> = { ...(raw.agents ?? {}) };
+  if (raw.agent && Object.keys(agentsInput).length === 0) {
+    agentsInput.default = raw.agent;
+  }
+  if (Object.keys(agentsInput).length === 0) {
+    throw new Error("CheckAI config must define at least one agent (via `agents` or `agent`).");
+  }
+  const agents: Record<string, AgentSource> = {};
+  for (const [name, src] of Object.entries(agentsInput)) {
+    agents[name] = resolveSource(src, rootDir);
+  }
+  if (raw.defaultAgent && !agents[raw.defaultAgent]) {
+    throw new Error(
+      `defaultAgent "${raw.defaultAgent}" is not one of the configured agents: ${Object.keys(
+        agents
+      ).join(", ")}`
+    );
+  }
+  return {
+    configPath,
+    rootDir,
+    testDir: raw.testDir
+      ? isAbsolute(raw.testDir)
+        ? raw.testDir
+        : resolve(rootDir, raw.testDir)
+      : resolve(rootDir, DEFAULTS.testDir),
+    judgeModel: raw.judgeModel ?? DEFAULTS.judgeModel,
+    judgeThreshold: raw.judgeThreshold ?? DEFAULTS.judgeThreshold,
+    agents,
+    defaultAgent: raw.defaultAgent ?? Object.keys(agents)[0],
+  };
+}

package/src/discovery.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import { readdirSync, statSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { pathToFileURL } from "node:url";
+import { setCurrentFile, getTests } from "./registry";
+import type { TestCase } from "./types";
+/** Recursively collect every *.test.ts file under `dir`. */
+export function findTestFiles(dir: string): string[] {
+  if (!existsSync(dir)) return [];
+  const out: string[] = [];
+  const walk = (current: string) => {
+    for (const entry of readdirSync(current)) {
+      if (entry === "node_modules" || entry.startsWith(".")) continue;
+      const full = join(current, entry);
+      if (statSync(full).isDirectory()) walk(full);
+      else if (/\.test\.(ts|mts|js|mjs)$/.test(entry)) out.push(full);
+    }
+  };
+  walk(dir);
+  return out.sort();
+}
+/**
+ * Import every test file so its `test(...)` calls register. Single-shot per
+ * process (ESM caches modules). Throws a clear diagnostic if files exist but
+ * nothing registered (the classic two-instances / preserve-symlinks trap).
+ */
+export async function discoverTests(testDir: string): Promise<TestCase[]> {
+  const files = findTestFiles(testDir);
+  for (const file of files) {
+    setCurrentFile(file);
+    await import(pathToFileURL(file).href);
+  }
+  const tests = getTests();
+  // Reject duplicate (file, name) pairs — compare aligns results by test identity,
+  // so a collision would silently mis-classify regressions. Fail loudly instead.
+  const seen = new Set<string>();
+  for (const t of tests) {
+    const key = `${t.file}::${t.name}`;
+    if (seen.has(key)) {
+      throw new Error(
+        `Duplicate test "${t.name}" in ${t.file}. Test names must be unique within a file.`
+      );
+    }
+    seen.add(key);
+  }
+  if (files.length > 0 && tests.length === 0) {
+    throw new Error(
+      `Found ${files.length} test file(s) under ${testDir} but none registered any tests.\n` +
+        `This usually means two copies of "@checklabs/core" were loaded (e.g. Node's ` +
+        `--preserve-symlinks is enabled). Disable it so the package resolves once.`
+    );
+  }
+  return tests;
+}

package/src/env.ts ADDED Viewed

@@ -0,0 +1,35 @@
+import { existsSync, readFileSync } from "node:fs";
+import { resolve } from "node:path";
+/**
+ * Tiny .env loader (no dependency). Loads `<rootDir>/.env` into process.env
+ * without overriding variables already present in the environment.
+ */
+export function loadEnv(rootDir: string): void {
+  const file = resolve(rootDir, ".env");
+  if (!existsSync(file)) return;
+  for (const rawLine of readFileSync(file, "utf8").split(/\r?\n/)) {
+    const line = rawLine.trim();
+    if (!line || line.startsWith("#")) continue;
+    const eq = line.indexOf("=");
+    if (eq === -1) continue;
+    const key = line.slice(0, eq).trim();
+    let value = line.slice(eq + 1).trim();
+    if (
+      value.length >= 2 &&
+      ((value.startsWith('"') && value.endsWith('"')) ||
+        (value.startsWith("'") && value.endsWith("'")))
+    ) {
+      value = value.slice(1, -1);
+    }
+    if (key && process.env[key] === undefined) process.env[key] = value;
+  }
+}
+/** Resolve the active backend from env + optional override. */
+export function resolveBackend(): "openai" | "mock" {
+  const override = (process.env.CHECKAI_BACKEND ?? "auto").toLowerCase();
+  if (override === "mock") return "mock";
+  if (override === "openai") return "openai";
+  return process.env.OPENAI_API_KEY ? "openai" : "mock";
+}

package/src/generate/index.ts ADDED Viewed

@@ -0,0 +1,103 @@
+import { TEMPLATES, type TestTemplate, type GeneratedScenario } from "./templates";
+export type { TestTemplate, GeneratedScenario };
+export { TEMPLATES };
+export interface GenerateOptions {
+  /** Explicit template key (support | ecommerce | helpdesk | hr | billing). */
+  template?: string;
+  /** Free-text agent description (used to infer a template when none is given). */
+  description?: string;
+  /** Cap the number of scenarios emitted. */
+  count?: number;
+}
+export interface GenerateOutput {
+  template: TestTemplate;
+  scenarios: GeneratedScenario[];
+  code: string;
+}
+export function listTemplates(): { key: string; title: string; description: string; scenarios: number }[] {
+  return Object.values(TEMPLATES).map((t) => ({
+    key: t.key,
+    title: t.title,
+    description: t.description,
+    scenarios: t.scenarios.length,
+  }));
+}
+function inferTemplate(description: string): TestTemplate {
+  const d = description.toLowerCase();
+  if (/cart|checkout|discount|coupon|shipping|store|ecommerce|e-commerce|return/.test(d)) return TEMPLATES.ecommerce;
+  if (/password|vpn|laptop|it\b|helpdesk|access|account|device|software/.test(d)) return TEMPLATES.helpdesk;
+  if (/pto|vacation|onboarding|benefit|payroll|employee|hr\b|human resources|leave/.test(d)) return TEMPLATES.hr;
+  if (/invoice|billing|subscription|charge|payment|plan|fee/.test(d)) return TEMPLATES.billing;
+  return TEMPLATES.support;
+}
+export function selectTemplate(opts: GenerateOptions): TestTemplate {
+  if (opts.template) {
+    const found = TEMPLATES[opts.template];
+    if (!found) {
+      throw new Error(
+        `Unknown template "${opts.template}". Available: ${Object.keys(TEMPLATES).join(", ")}`
+      );
+    }
+    return found;
+  }
+  if (opts.description) return inferTemplate(opts.description);
+  return TEMPLATES.support;
+}
+function renderTestFile(
+  template: TestTemplate,
+  scenarios: GeneratedScenario[],
+  description?: string
+): string {
+  const header = [
+    `// Generated by \`checkai generate\` — template: ${template.key} (${template.title}).`,
+    description ? `// From description: ${description.replace(/\n/g, " ").slice(0, 200)}` : null,
+    `//`,
+    `// These tests are a STARTING POINT. Review, edit, and keep the ones that fit your`,
+    `// agent — then commit them. CheckAI never regenerates this file automatically; you`,
+    `// own it from here. (${scenarios.length} scenarios across ${
+      new Set(scenarios.map((sc) => sc.category)).size
+    } categories.)`,
+    ``,
+    `import { test, expect } from "@checklabs/checkai";`,
+    ``,
+  ]
+    .filter((line) => line !== null)
+    .join("\n");
+  const byCat = new Map<string, GeneratedScenario[]>();
+  for (const sc of scenarios) {
+    (byCat.get(sc.category) ?? byCat.set(sc.category, []).get(sc.category)!).push(sc);
+  }
+  let body = "\n";
+  for (const [category, group] of byCat) {
+    body += `// --- ${category} ---\n\n`;
+    for (const sc of group) {
+      const asserts = sc.assertions.map((a) => `  ${a}`).join("\n");
+      body += `test(${JSON.stringify(sc.name)}, async ({ agent }) => {\n`;
+      body += `  const result = await agent.run(${JSON.stringify(sc.prompt)});\n`;
+      body += `${asserts}\n`;
+      body += `});\n\n`;
+    }
+  }
+  return header + body.trimEnd() + "\n";
+}
+/** Generate a CheckAI test file from a template (and/or description). */
+export function generateTests(opts: GenerateOptions): GenerateOutput {
+  const template = selectTemplate(opts);
+  let scenarios = template.scenarios;
+  if (opts.count && opts.count > 0 && opts.count < scenarios.length) {
+    scenarios = scenarios.slice(0, opts.count);
+  }
+  const code = renderTestFile(template, scenarios, opts.description);
+  return { template, scenarios, code };
+}