npm - unrag - Versions diffs - 0.2.8 → 0.2.9 - Mend

unrag 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/cli/index.js +982 -59
package/package.json +1 -1
package/registry/core/ingest.ts +35 -8
package/registry/core/types.ts +8 -1
package/registry/docs/unrag.md +3 -1
package/registry/eval/dataset.ts +224 -0
package/registry/eval/index.ts +39 -0
package/registry/eval/metrics.ts +85 -0
package/registry/eval/report.ts +342 -0
package/registry/eval/runner.ts +450 -0
package/registry/manifest.json +9 -0
package/registry/store/drizzle-postgres-pgvector/schema.ts +1 -1
package/registry/store/drizzle-postgres-pgvector/store.ts +40 -44
package/registry/store/prisma-postgres-pgvector/store.ts +25 -20
package/registry/store/raw-sql-postgres-pgvector/store.ts +24 -22

package/dist/cli/index.js CHANGED Viewed

@@ -396,6 +396,10 @@ async function copyRegistryFiles(selection) {
       src: path2.join(selection.registryRoot, "core/retrieve.ts"),
       dest: path2.join(installBaseAbs, "core/retrieve.ts")
     },
+    {
+      src: path2.join(selection.registryRoot, "core/rerank.ts"),
+      dest: path2.join(installBaseAbs, "core/rerank.ts")
+    },
     {
       src: path2.join(selection.registryRoot, "embedding/_shared.ts"),
       dest: path2.join(installBaseAbs, "embedding/_shared.ts")
@@ -707,6 +711,9 @@ function isPresetPayloadV1(x) {
     return false;
   if (!Array.isArray(o.modules.extractors) || !Array.isArray(o.modules.connectors))
     return false;
+  if ("batteries" in o.modules && o.modules.batteries != null && !Array.isArray(o.modules.batteries)) {
+    return false;
+  }
   return true;
 }
 function toPresetUrl(input) {
@@ -879,6 +886,7 @@ function depsForBattery(battery) {
     deps["ai"] = "^6.0.3";
     deps["@ai-sdk/cohere"] = "^3.0.1";
   }
+  if (battery === "eval") {}
   return { deps, devDeps };
 }
 function installCmd(pm) {
@@ -983,6 +991,314 @@ async function patchTsconfigPaths(params) {
   return { changed: true, file: configFile };
 }
+// cli/commands/init.ts
+import { writeFile as writeFile5 } from "node:fs/promises";
+// cli/lib/evalBatteryScaffold.ts
+var EVAL_SAMPLE_DATASET_V1 = {
+  version: "1",
+  id: "sample",
+  description: "Tiny dataset to validate retrieval changes.",
+  defaults: {
+    topK: 10,
+    scopePrefix: "eval:sample:",
+    mode: "retrieve",
+    thresholds: { min: { recallAtK: 0.75 } }
+  },
+  documents: [
+    {
+      sourceId: "eval:sample:doc:refund-policy",
+      content: "Refunds are available within 30 days of purchase, provided you have a receipt."
+    },
+    {
+      sourceId: "eval:sample:doc:contact-support",
+      content: "Contact support by emailing support@example.com. Response times are typically under 24 hours."
+    }
+  ],
+  queries: [
+    {
+      id: "q_refund_window",
+      query: "How long do I have to request a refund?",
+      relevant: { sourceIds: ["eval:sample:doc:refund-policy"] }
+    },
+    {
+      id: "q_contact_support",
+      query: "How do I contact support?",
+      relevant: { sourceIds: ["eval:sample:doc:contact-support"] }
+    }
+  ]
+};
+var EVAL_CONFIG_DEFAULT = {
+  thresholds: { min: { recallAtK: 0.75 } },
+  cleanup: "none",
+  ingest: true
+};
+var EVAL_PACKAGE_JSON_SCRIPTS = {
+  "unrag:eval": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json`,
+  "unrag:eval:ci": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json --ci`
+};
+function renderEvalRunnerScript(opts) {
+  const installImportBase = `../${opts.installDir.replace(/\\/g, "/")}`;
+  return `/**
+ * Unrag eval runner entrypoint (generated).
+ *
+ * You own this file — customize it freely.
+ */
+import path from "node:path";
+import { access, readFile } from "node:fs/promises";
+import { createUnragEngine } from "../unrag.config";
+import {
+  runEval,
+  readEvalReportFromFile,
+  writeEvalReport,
+  writeEvalSummaryMd,
+  diffEvalReports,
+  writeEvalDiffJson,
+  writeEvalDiffMd,
+  type EvalMode,
+  type EvalThresholds,
+  type EvalCleanupPolicy,
+} from "${installImportBase}/eval";
+type CliArgs = {
+  dataset?: string;
+  baseline?: string;
+  outputDir?: string;
+  mode?: EvalMode;
+  topK?: number;
+  rerankTopK?: number;
+  scopePrefix?: string;
+  ingest?: boolean;
+  cleanup?: EvalCleanupPolicy;
+  thresholds?: Partial<EvalThresholds>;
+  ci?: boolean;
+  allowAssets?: boolean;
+  allowNonEvalPrefix?: boolean;
+  yes?: boolean;
+  includeNdcg?: boolean;
+};
+async function fileExists(p: string): Promise<boolean> {
+  try {
+    await access(p);
+    return true;
+  } catch {
+    return false;
+  }
+}
+async function loadEnvFilesBestEffort(projectRoot: string) {
+  const nodeEnv = process.env.NODE_ENV ?? "development";
+  const candidates = [
+    ".env",
+    ".env.local",
+    \`.env.\${nodeEnv}\`,
+    \`.env.\${nodeEnv}.local\`,
+  ];
+  for (const rel of candidates) {
+    const abs = path.join(projectRoot, rel);
+    if (!(await fileExists(abs))) continue;
+    try {
+      const raw = await readFile(abs, "utf8");
+      for (const line of raw.split(/\\r?\\n/)) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed.startsWith("#")) continue;
+        const eq = trimmed.indexOf("=");
+        if (eq < 0) continue;
+        const key = trimmed.slice(0, eq).trim();
+        const value = trimmed.slice(eq + 1).trim().replace(/^"|"$/g, "");
+        if (!key) continue;
+        if (process.env[key] == null) process.env[key] = value;
+      }
+    } catch {
+      // ignore
+    }
+  }
+}
+function parseThresholdExpr(expr: string): Partial<EvalThresholds> {
+  // Accept both:
+  // - "min.recallAtK=0.75"
+  // - "recallAtK=0.75" (shorthand for min)
+  const [lhsRaw, rhsRaw] = String(expr ?? "").split("=");
+  const lhs = (lhsRaw ?? "").trim();
+  const rhs = Number(String(rhsRaw ?? "").trim());
+  if (!lhs || Number.isNaN(rhs)) return {};
+  const parts = lhs.split(".").map((p) => p.trim()).filter(Boolean);
+  const level = parts.length === 2 ? parts[0] : "min";
+  const metric = parts.length === 2 ? parts[1] : parts[0];
+  if (level !== "min") return {};
+  const allowed = new Set(["hitAtK", "precisionAtK", "recallAtK", "mrrAtK", "ndcgAtK"]);
+  if (!allowed.has(metric)) return {};
+  return { min: { [metric]: rhs } } as any;
+}
+function mergeThresholds(
+  a: Partial<EvalThresholds> | undefined,
+  b: Partial<EvalThresholds> | undefined
+): Partial<EvalThresholds> | undefined {
+  if (!a && !b) return undefined;
+  const out: any = { ...(a ?? {}) };
+  if (b?.min) out.min = { ...(out.min ?? {}), ...(b.min as any) };
+  return out;
+}
+function parseArgs(argv: string[]): CliArgs {
+  const out: CliArgs = {};
+  const thresholds: Partial<EvalThresholds>[] = [];
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === "--dataset") out.dataset = argv[++i];
+    else if (a === "--baseline") out.baseline = argv[++i];
+    else if (a === "--outputDir" || a === "--output-dir") out.outputDir = argv[++i];
+    else if (a === "--mode") out.mode = argv[++i] as EvalMode;
+    else if (a === "--topK" || a === "--top-k") out.topK = Number(argv[++i]);
+    else if (a === "--rerankTopK" || a === "--rerank-top-k") out.rerankTopK = Number(argv[++i]);
+    else if (a === "--scopePrefix" || a === "--scope-prefix") out.scopePrefix = argv[++i];
+    else if (a === "--no-ingest") out.ingest = false;
+    else if (a === "--cleanup") out.cleanup = argv[++i] as EvalCleanupPolicy;
+    else if (a === "--threshold") thresholds.push(parseThresholdExpr(argv[++i] ?? ""));
+    else if (a === "--ci") out.ci = true;
+    else if (a === "--allow-assets") out.allowAssets = true;
+    else if (a === "--allow-non-eval-prefix" || a === "--allow-custom-prefix") out.allowNonEvalPrefix = true;
+    else if (a === "--yes" || a === "-y") out.yes = true;
+    else if (a === "--include-ndcg") out.includeNdcg = true;
+    else if (a === "--help" || a === "-h") {
+      printHelp();
+      process.exit(0);
+    }
+  }
+  for (const t of thresholds) out.thresholds = mergeThresholds(out.thresholds ?? {}, t);
+  return out;
+}
+function printHelp() {
+  console.log(
+    [
+      "unrag-eval — retrieval eval harness",
+      "",
+      "Usage:",
+      "  bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json",
+      "",
+      "Options:",
+      "  --dataset <path>                 Dataset JSON path (required)",
+      "  --baseline <report.json>         Baseline report for diffing",
+      "  --output-dir <dir>               Output dir (default: .unrag/eval/runs/<ts>-<datasetId>)",
+      "  --mode retrieve|retrieve+rerank   Override mode",
+      "  --top-k <n>                      Override topK",
+      "  --rerank-top-k <n>               In rerank mode, retrieve N candidates before reranking (default: topK*3)",
+      "  --scope-prefix <prefix>          Override scopePrefix",
+      "  --no-ingest                       Skip dataset document ingest",
+      "  --cleanup none|on-success|always  Cleanup policy when ingesting",
+      "  --threshold <k=v>                 Repeatable thresholds (e.g. min.recallAtK=0.75)",
+      "  --ci                              CI mode (non-interactive)",
+      "  --yes, -y                         Allow dangerous operations when explicitly enabled",
+      "  --allow-assets                    Allow documents[].assets ingestion (advanced)",
+      "  --allow-custom-prefix             Allow scopePrefix outside eval:* (dangerous)",
+      "  --include-ndcg                    Compute nDCG@k (optional)",
+    ].join("\\n")
+  );
+}
+async function readConfigFile(projectRoot: string): Promise<any | null> {
+  const abs = path.join(projectRoot, ".unrag/eval/config.json");
+  if (!(await fileExists(abs))) return null;
+  const raw = await readFile(abs, "utf8");
+  try {
+    return JSON.parse(raw);
+  } catch (e) {
+    const msg = e instanceof Error ? e.message : String(e);
+    throw new Error(\`Failed to parse .unrag/eval/config.json: \${msg}\`);
+  }
+}
+function sanitizeMode(v: any): EvalMode | undefined {
+  if (v === "retrieve" || v === "retrieve+rerank") return v;
+  return undefined;
+}
+function sanitizeCleanup(v: any): EvalCleanupPolicy | undefined {
+  if (v === "none" || v === "on-success" || v === "always") return v;
+  return undefined;
+}
+async function main() {
+  const projectRoot = path.join(process.cwd());
+  await loadEnvFilesBestEffort(projectRoot);
+  const cli = parseArgs(process.argv.slice(2));
+  const cfg = await readConfigFile(projectRoot);
+  const datasetPath = cli.dataset ?? cfg?.dataset ?? ".unrag/eval/datasets/sample.json";
+  if (!datasetPath) throw new Error("--dataset is required");
+  const engine = createUnragEngine();
+  const mode = sanitizeMode(cli.mode ?? cfg?.mode) ?? undefined;
+  const cleanup = sanitizeCleanup(cli.cleanup ?? cfg?.cleanup) ?? undefined;
+  const result = await runEval({
+    engine,
+    datasetPath,
+    mode,
+    topK: typeof cli.topK === "number" ? cli.topK : undefined,
+    rerankTopK: typeof cli.rerankTopK === "number" ? cli.rerankTopK : undefined,
+    scopePrefix: typeof cli.scopePrefix === "string" ? cli.scopePrefix : undefined,
+    ingest: typeof cli.ingest === "boolean" ? cli.ingest : (typeof cfg?.ingest === "boolean" ? cfg.ingest : undefined),
+    cleanup,
+    thresholds: mergeThresholds(cfg?.thresholds, cli.thresholds),
+    ci: Boolean(cli.ci),
+    allowAssets: Boolean(cli.allowAssets),
+    allowNonEvalPrefix: Boolean(cli.allowNonEvalPrefix),
+    yes: Boolean(cli.yes),
+    includeNdcg: Boolean(cli.includeNdcg),
+  });
+  const outputDir = cli.outputDir ?? cfg?.outputDir ?? result.outputDir;
+  const reportPath = await writeEvalReport(outputDir, result.report);
+  const summaryPath = await writeEvalSummaryMd(outputDir, result.report);
+  let diffPaths: { json: string; md: string } | null = null;
+  const baselinePath = cli.baseline ?? cfg?.baseline;
+  if (baselinePath) {
+    const baseline = await readEvalReportFromFile(baselinePath);
+    const diff = diffEvalReports({ baseline, candidate: result.report, baselinePath, candidatePath: reportPath });
+    const diffJson = await writeEvalDiffJson(outputDir, diff);
+    const diffMd = await writeEvalDiffMd(outputDir, diff);
+    diffPaths = { json: diffJson, md: diffMd };
+  }
+  console.log(
+    [
+      \`[unrag:eval] Wrote report: \${reportPath}\`,
+      \`[unrag:eval] Wrote summary: \${summaryPath}\`,
+      diffPaths ? \`[unrag:eval] Wrote diff: \${diffPaths.json} (+ \${diffPaths.md})\` : "",
+      result.thresholdFailures.length > 0
+        ? \`[unrag:eval] Threshold failures:\\n- \${result.thresholdFailures.join("\\n- ")}\`
+        : \`[unrag:eval] Thresholds: pass\`,
+    ]
+      .filter(Boolean)
+      .join("\\n")
+  );
+  process.exitCode = result.exitCode;
+}
+main().catch((err) => {
+  const msg = err instanceof Error ? err.stack ?? err.message : String(err);
+  console.error(\`[unrag:eval] Error: \${msg}\`);
+  process.exitCode = 2;
+});
+`;
+}
 // cli/commands/init.ts
 var CONFIG_FILE = "unrag.json";
 var CONFIG_VERSION = 1;
@@ -1069,6 +1385,7 @@ var parseInitArgs = (args) => {
 };
 var toExtractors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
 var toConnectors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
+var toBatteries = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
 async function initCommand(args) {
   const root = await tryFindProjectRoot(process.cwd());
   if (!root) {
@@ -1291,7 +1608,34 @@ async function initCommand(args) {
     Object.assign(connectorDeps, r.deps);
     Object.assign(connectorDevDeps, r.devDeps);
   }
-  const merged = mergeDeps(pkg, { ...deps, ...embeddingDeps.deps, ...extractorDeps, ...connectorDeps }, { ...devDeps, ...embeddingDeps.devDeps, ...extractorDevDeps, ...connectorDevDeps });
+  const batteriesFromPreset = preset ? Array.from(new Set(toBatteries(preset.modules?.batteries))).sort() : [];
+  const availableBatteryIds = new Set((manifest.batteries ?? []).filter((b) => b.status === "available").map((b) => String(b.id)));
+  if (preset) {
+    const unknown = batteriesFromPreset.filter((b) => !availableBatteryIds.has(b));
+    if (unknown.length > 0) {
+      throw new Error(`Preset contains unknown/unavailable batteries: ${unknown.join(", ")}`);
+    }
+  }
+  if (batteriesFromPreset.length > 0) {
+    for (const battery of batteriesFromPreset) {
+      await copyBatteryFiles({
+        projectRoot: root,
+        registryRoot,
+        installDir,
+        battery,
+        yes: nonInteractive,
+        overwrite: overwritePolicy
+      });
+    }
+  }
+  const batteryDeps = {};
+  const batteryDevDeps = {};
+  for (const b of batteriesFromPreset) {
+    const r = depsForBattery(b);
+    Object.assign(batteryDeps, r.deps);
+    Object.assign(batteryDevDeps, r.devDeps);
+  }
+  const merged = mergeDeps(pkg, { ...deps, ...embeddingDeps.deps, ...extractorDeps, ...connectorDeps, ...batteryDeps }, { ...devDeps, ...embeddingDeps.devDeps, ...extractorDevDeps, ...connectorDevDeps, ...batteryDevDeps });
   if (merged.changes.length > 0) {
     await writePackageJson(root, merged.pkg);
     if (!noInstall) {
@@ -1308,9 +1652,42 @@ async function initCommand(args) {
     extractors: Array.from(new Set([
       ...existing?.extractors ?? [],
       ...richMediaEnabled ? selectedExtractors : []
-    ])).sort()
+    ])).sort(),
+    batteries: Array.from(new Set([...existing?.batteries ?? [], ...batteriesFromPreset])).sort()
   };
   await writeJsonFile(path6.join(root, CONFIG_FILE), config);
+  const writeTextFile = async (absPath, content) => {
+    await ensureDir(path6.dirname(absPath));
+    await writeFile5(absPath, content, "utf8");
+  };
+  const writeIfMissing = async (absPath, content) => {
+    if (await exists(absPath))
+      return false;
+    await writeTextFile(absPath, content);
+    return true;
+  };
+  if (batteriesFromPreset.includes("eval")) {
+    const datasetAbs = path6.join(root, ".unrag/eval/datasets/sample.json");
+    const evalConfigAbs = path6.join(root, ".unrag/eval/config.json");
+    const scriptAbs = path6.join(root, "scripts/unrag-eval.ts");
+    await writeIfMissing(datasetAbs, JSON.stringify(EVAL_SAMPLE_DATASET_V1, null, 2) + `
+`);
+    await writeIfMissing(evalConfigAbs, JSON.stringify(EVAL_CONFIG_DEFAULT, null, 2) + `
+`);
+    await writeIfMissing(scriptAbs, renderEvalRunnerScript({ installDir }));
+    const pkg2 = await readPackageJson(root);
+    const existingScripts = pkg2.scripts ?? {};
+    const toAdd = {};
+    for (const [name, cmd] of Object.entries(EVAL_PACKAGE_JSON_SCRIPTS)) {
+      if (!(name in existingScripts)) {
+        toAdd[name] = cmd;
+      }
+    }
+    if (Object.keys(toAdd).length > 0) {
+      pkg2.scripts = { ...existingScripts, ...toAdd };
+      await writePackageJson(root, pkg2);
+    }
+  }
   const pm = await detectPackageManager(root);
   const installLine = merged.changes.length === 0 ? "Dependencies already satisfied." : noInstall ? `Next: run \`${installCmd(pm)}\`` : "Dependencies installed.";
   const isNext = Boolean((merged.pkg.dependencies ?? {})["next"]) || Boolean((merged.pkg.devDependencies ?? {})["next"]);
@@ -1437,12 +1814,92 @@ async function initCommand(args) {
 }
 // cli/commands/add.ts
-import { outro as outro2 } from "@clack/prompts";
+import { cancel as cancel3, confirm as confirm3, isCancel as isCancel3, outro as outro2, select as select2, text as text2 } from "@clack/prompts";
+import { writeFile as writeFile6 } from "node:fs/promises";
 import path7 from "node:path";
 import { fileURLToPath as fileURLToPath2 } from "node:url";
 var CONFIG_FILE2 = "unrag.json";
 var __filename3 = fileURLToPath2(import.meta.url);
 var __dirname3 = path7.dirname(__filename3);
+var writeTextFile = async (absPath, content) => {
+  await ensureDir(path7.dirname(absPath));
+  await writeFile6(absPath, content, "utf8");
+};
+var shouldWriteFile = async (absPath, projectRoot, nonInteractive) => {
+  if (!await exists(absPath))
+    return true;
+  if (nonInteractive)
+    return false;
+  const answer = await confirm3({
+    message: `Overwrite ${path7.relative(projectRoot, absPath)}?`,
+    initialValue: false
+  });
+  if (isCancel3(answer)) {
+    cancel3("Cancelled.");
+    return false;
+  }
+  return Boolean(answer);
+};
+var addPackageJsonScripts = async (args) => {
+  const existing = args.pkg.scripts ?? {};
+  const desired = args.scripts;
+  const conflicting = Object.keys(desired).filter((k) => (k in existing));
+  const toAdd = { ...desired };
+  if (conflicting.length > 0 && args.nonInteractive) {
+    for (const k of conflicting)
+      delete toAdd[k];
+  }
+  if (conflicting.length > 0 && !args.nonInteractive) {
+    for (const scriptName of conflicting) {
+      const action = await select2({
+        message: `Script "${scriptName}" already exists. What would you like to do?`,
+        options: [
+          { value: "keep", label: "Keep existing", hint: existing[scriptName] },
+          { value: "overwrite", label: "Overwrite", hint: desired[scriptName] },
+          { value: "rename", label: "Add with different name", hint: `${scriptName}:new` }
+        ],
+        initialValue: "keep"
+      });
+      if (isCancel3(action)) {
+        cancel3("Cancelled.");
+        return { added: [], kept: Object.keys(desired) };
+      }
+      if (action === "keep") {
+        delete toAdd[scriptName];
+        continue;
+      }
+      if (action === "rename") {
+        const newName = await text2({
+          message: `New script name for ${scriptName}`,
+          initialValue: `${scriptName}:new`,
+          validate: (v) => {
+            const s = String(v).trim();
+            if (!s)
+              return "Script name is required";
+            if (s in existing || s in toAdd)
+              return "Script name already exists";
+            return;
+          }
+        });
+        if (isCancel3(newName)) {
+          cancel3("Cancelled.");
+          return { added: [], kept: Object.keys(desired) };
+        }
+        const nextName = String(newName).trim();
+        const value = toAdd[scriptName];
+        delete toAdd[scriptName];
+        toAdd[nextName] = value;
+      }
+    }
+  }
+  const added = Object.keys(toAdd);
+  if (added.length > 0) {
+    args.pkg.scripts = { ...existing, ...toAdd };
+    await writePackageJson(args.projectRoot, args.pkg);
+  }
+  const kept = conflicting.filter((k) => !(k in toAdd));
+  return { added, kept };
+};
 var parseAddArgs = (args) => {
   const out = {};
   for (let i = 0;i < args.length; i++) {
@@ -1539,6 +1996,339 @@ Available batteries: ${Array.from(availableBatteries).join(", ")}`);
     }
     const batteries = Array.from(new Set([...config.batteries ?? [], battery])).sort();
     await writeJsonFile(configPath, { ...config, batteries });
+    if (battery === "eval") {
+      const datasetAbs = path7.join(root, ".unrag/eval/datasets/sample.json");
+      const configAbs = path7.join(root, ".unrag/eval/config.json");
+      const scriptAbs = path7.join(root, "scripts/unrag-eval.ts");
+      const sampleDataset = {
+        version: "1",
+        id: "sample",
+        description: "Tiny dataset to validate retrieval changes.",
+        defaults: {
+          topK: 10,
+          scopePrefix: "eval:sample:",
+          mode: "retrieve",
+          thresholds: { min: { recallAtK: 0.75 } }
+        },
+        documents: [
+          {
+            sourceId: "eval:sample:doc:refund-policy",
+            content: "Refunds are available within 30 days of purchase, provided you have a receipt."
+          },
+          {
+            sourceId: "eval:sample:doc:contact-support",
+            content: "Contact support by emailing support@example.com. Response times are typically under 24 hours."
+          }
+        ],
+        queries: [
+          {
+            id: "q_refund_window",
+            query: "How long do I have to request a refund?",
+            relevant: { sourceIds: ["eval:sample:doc:refund-policy"] }
+          },
+          {
+            id: "q_contact_support",
+            query: "How do I contact support?",
+            relevant: { sourceIds: ["eval:sample:doc:contact-support"] }
+          }
+        ]
+      };
+      const evalConfig = {
+        thresholds: { min: { recallAtK: 0.75 } },
+        cleanup: "none",
+        ingest: true
+      };
+      const installImportBase = `../${config.installDir.replace(/\\/g, "/")}`;
+      const script = `/**
+ * Unrag eval runner entrypoint (generated).
+ *
+ * You own this file — customize it freely.
+ */
+import path from "node:path";
+import { access, readFile } from "node:fs/promises";
+import { createUnragEngine } from "../unrag.config";
+import {
+  runEval,
+  readEvalReportFromFile,
+  writeEvalReport,
+  writeEvalSummaryMd,
+  diffEvalReports,
+  writeEvalDiffJson,
+  writeEvalDiffMd,
+  type EvalMode,
+  type EvalThresholds,
+  type EvalCleanupPolicy,
+} from "${installImportBase}/eval";
+type CliArgs = {
+  dataset?: string;
+  baseline?: string;
+  outputDir?: string;
+  mode?: EvalMode;
+  topK?: number;
+  rerankTopK?: number;
+  scopePrefix?: string;
+  ingest?: boolean;
+  cleanup?: EvalCleanupPolicy;
+  thresholds?: Partial<EvalThresholds>;
+  ci?: boolean;
+  allowAssets?: boolean;
+  allowNonEvalPrefix?: boolean;
+  yes?: boolean;
+  includeNdcg?: boolean;
+};
+async function fileExists(p: string): Promise<boolean> {
+  try {
+    await access(p);
+    return true;
+  } catch {
+    return false;
+  }
+}
+async function loadEnvFilesBestEffort(projectRoot: string) {
+  const nodeEnv = process.env.NODE_ENV ?? "development";
+  const candidates = [
+    ".env",
+    ".env.local",
+    \`.env.\${nodeEnv}\`,
+    \`.env.\${nodeEnv}.local\`,
+  ];
+  for (const rel of candidates) {
+    const abs = path.join(projectRoot, rel);
+    if (!(await fileExists(abs))) continue;
+    const raw = await readFile(abs, "utf8").catch(() => "");
+    for (const line of raw.split(/\\r?\\n/)) {
+      const s = line.trim();
+      if (!s || s.startsWith("#")) continue;
+      const eq = s.indexOf("=");
+      if (eq < 0) continue;
+      const key = s.slice(0, eq).trim();
+      const value = s.slice(eq + 1).trim().replace(/^"|"$/g, "");
+      if (!key) continue;
+      if (process.env[key] === undefined) process.env[key] = value;
+    }
+  }
+}
+function parseThresholdExpr(expr: string): Partial<EvalThresholds> {
+  const s = String(expr ?? "").trim();
+  const eq = s.indexOf("=");
+  if (eq < 0) throw new Error(\`Invalid --threshold: "\${s}" (expected key=value)\`);
+  const key = s.slice(0, eq).trim();
+  const value = Number(s.slice(eq + 1).trim());
+  if (!Number.isFinite(value)) throw new Error(\`Invalid --threshold value: "\${s}"\`);
+  const out: Partial<EvalThresholds> = {};
+  if (key === "min.hitAtK") out.min = { hitAtK: value };
+  else if (key === "min.recallAtK") out.min = { recallAtK: value };
+  else if (key === "min.mrrAtK") out.min = { mrrAtK: value };
+  else if (key === "max.p95TotalMs") out.max = { p95TotalMs: value };
+  else throw new Error(\`Unknown threshold key: "\${key}"\`);
+  return out;
+}
+function mergeThresholds(a: Partial<EvalThresholds>, b: Partial<EvalThresholds>): Partial<EvalThresholds> {
+  return {
+    min: { ...(a.min ?? {}), ...(b.min ?? {}) },
+    max: { ...(a.max ?? {}), ...(b.max ?? {}) },
+  };
+}
+function parseArgs(argv: string[]): CliArgs {
+  const out: CliArgs = {};
+  const thresholds: Partial<EvalThresholds>[] = [];
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === "--dataset") out.dataset = argv[++i];
+    else if (a === "--baseline") out.baseline = argv[++i];
+    else if (a === "--outputDir" || a === "--output-dir") out.outputDir = argv[++i];
+    else if (a === "--mode") out.mode = argv[++i] as EvalMode;
+    else if (a === "--topK" || a === "--top-k") out.topK = Number(argv[++i]);
+    else if (a === "--rerankTopK" || a === "--rerank-top-k") out.rerankTopK = Number(argv[++i]);
+    else if (a === "--scopePrefix" || a === "--scope-prefix") out.scopePrefix = argv[++i];
+    else if (a === "--no-ingest") out.ingest = false;
+    else if (a === "--cleanup") out.cleanup = argv[++i] as EvalCleanupPolicy;
+    else if (a === "--threshold") thresholds.push(parseThresholdExpr(argv[++i] ?? ""));
+    else if (a === "--ci") out.ci = true;
+    else if (a === "--allow-assets") out.allowAssets = true;
+    else if (a === "--allow-non-eval-prefix" || a === "--allow-custom-prefix") out.allowNonEvalPrefix = true;
+    else if (a === "--yes" || a === "-y") out.yes = true;
+    else if (a === "--include-ndcg") out.includeNdcg = true;
+    else if (a === "--help" || a === "-h") {
+      printHelp();
+      process.exit(0);
+    }
+  }
+  for (const t of thresholds) out.thresholds = mergeThresholds(out.thresholds ?? {}, t);
+  return out;
+}
+function printHelp() {
+  console.log(
+    [
+      "unrag-eval — retrieval eval harness",
+      "",
+      "Usage:",
+      "  bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json",
+      "",
+      "Options:",
+      "  --dataset <path>                 Dataset JSON path (required)",
+      "  --baseline <report.json>         Baseline report for diffing",
+      "  --output-dir <dir>               Output dir (default: .unrag/eval/runs/<ts>-<datasetId>)",
+      "  --mode retrieve|retrieve+rerank   Override mode",
+      "  --top-k <n>                      Override topK",
+      "  --rerank-top-k <n>               In rerank mode, retrieve N candidates before reranking (default: topK*3)",
+      "  --scope-prefix <prefix>          Override scopePrefix",
+      "  --no-ingest                       Skip dataset document ingest",
+      "  --cleanup none|on-success|always  Cleanup policy when ingesting",
+      "  --threshold <k=v>                 Repeatable thresholds (e.g. min.recallAtK=0.75)",
+      "  --ci                              CI mode (non-interactive)",
+      "  --yes, -y                         Allow dangerous operations when explicitly enabled",
+      "  --allow-assets                    Allow documents[].assets ingestion (advanced)",
+      "  --allow-custom-prefix             Allow scopePrefix outside eval:* (dangerous)",
+      "  --include-ndcg                    Compute nDCG@k (optional)",
+    ].join("\\n")
+  );
+}
+async function readConfigFile(projectRoot: string): Promise<any | null> {
+  const abs = path.join(projectRoot, ".unrag/eval/config.json");
+  if (!(await fileExists(abs))) return null;
+  const raw = await readFile(abs, "utf8");
+  try {
+    return JSON.parse(raw);
+  } catch (e) {
+    const msg = e instanceof Error ? e.message : String(e);
+    throw new Error(\`Failed to parse .unrag/eval/config.json: \${msg}\`);
+  }
+}
+function sanitizeMode(v: any): EvalMode | undefined {
+  if (v === "retrieve" || v === "retrieve+rerank") return v;
+  return undefined;
+}
+function sanitizeCleanup(v: any): EvalCleanupPolicy | undefined {
+  if (v === "none" || v === "on-success" || v === "always") return v;
+  return undefined;
+}
+async function main() {
+  const projectRoot = path.join(process.cwd());
+  await loadEnvFilesBestEffort(projectRoot);
+  const cli = parseArgs(process.argv.slice(2));
+  const cfg = await readConfigFile(projectRoot);
+  const datasetPath = cli.dataset ?? cfg?.dataset ?? ".unrag/eval/datasets/sample.json";
+  if (!datasetPath) throw new Error("--dataset is required");
+  const engine = createUnragEngine();
+  const thresholds: Partial<EvalThresholds> = mergeThresholds(cfg?.thresholds ?? {}, cli.thresholds ?? {});
+  const result = await runEval({
+    engine,
+    datasetPath,
+    mode: cli.mode ?? sanitizeMode(cfg?.mode),
+    topK: cli.topK ?? (typeof cfg?.topK === "number" ? cfg.topK : undefined),
+    rerankTopK: cli.rerankTopK ?? (typeof cfg?.rerankTopK === "number" ? cfg.rerankTopK : undefined),
+    scopePrefix: cli.scopePrefix ?? (typeof cfg?.scopePrefix === "string" ? cfg.scopePrefix : undefined),
+    ingest: cli.ingest ?? (typeof cfg?.ingest === "boolean" ? cfg.ingest : undefined),
+    cleanup: cli.cleanup ?? sanitizeCleanup(cfg?.cleanup) ?? "none",
+    includeNdcg: cli.includeNdcg ?? Boolean(cfg?.includeNdcg),
+    allowAssets: cli.allowAssets ?? Boolean(cfg?.allowAssets),
+    allowNonEvalPrefix: cli.allowNonEvalPrefix ?? Boolean(cfg?.allowNonEvalPrefix),
+    confirmedDangerousDelete: Boolean(cli.yes),
+    thresholds,
+  });
+  const ts = new Date().toISOString().replace(/[:.]/g, "-");
+  const outputDir =
+    cli.outputDir ??
+    cfg?.outputDir ??
+    path.join(".unrag/eval/runs", \`\${ts}-\${result.report.dataset.id}\`);
+  const reportPath = await writeEvalReport(outputDir, result.report);
+  const summaryPath = await writeEvalSummaryMd(outputDir, result.report);
+  let diffPaths: { json: string; md: string } | null = null;
+  const baselinePath = cli.baseline ?? cfg?.baseline;
+  if (baselinePath) {
+    const baseline = await readEvalReportFromFile(baselinePath);
+    const diff = diffEvalReports({ baseline, candidate: result.report, baselinePath, candidatePath: reportPath });
+    const diffJson = await writeEvalDiffJson(outputDir, diff);
+    const diffMd = await writeEvalDiffMd(outputDir, diff);
+    diffPaths = { json: diffJson, md: diffMd };
+  }
+  console.log(
+    [
+      \`[unrag:eval] Wrote report: \${reportPath}\`,
+      \`[unrag:eval] Wrote summary: \${summaryPath}\`,
+      diffPaths ? \`[unrag:eval] Wrote diff: \${diffPaths.json} (+ \${diffPaths.md})\` : "",
+      result.thresholdFailures.length > 0
+        ? \`[unrag:eval] Threshold failures:\\n- \${result.thresholdFailures.join("\\n- ")}\`
+        : \`[unrag:eval] Thresholds: pass\`,
+    ]
+      .filter(Boolean)
+      .join("\\n")
+  );
+  process.exitCode = result.exitCode;
+}
+main().catch((err) => {
+  const msg = err instanceof Error ? err.stack ?? err.message : String(err);
+  console.error(\`[unrag:eval] Error: \${msg}\`);
+  process.exitCode = 2;
+});
+`;
+      if (await shouldWriteFile(datasetAbs, root, nonInteractive)) {
+        await writeTextFile(datasetAbs, JSON.stringify(sampleDataset, null, 2) + `
+`);
+      }
+      if (await shouldWriteFile(configAbs, root, nonInteractive)) {
+        await writeTextFile(configAbs, JSON.stringify(evalConfig, null, 2) + `
+`);
+      }
+      if (await shouldWriteFile(scriptAbs, root, nonInteractive)) {
+        await writeTextFile(scriptAbs, script);
+      }
+      const scriptsToAdd = {
+        "unrag:eval": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json`,
+        "unrag:eval:ci": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json --ci`
+      };
+      const scriptsResult = await addPackageJsonScripts({
+        projectRoot: root,
+        pkg,
+        scripts: scriptsToAdd,
+        nonInteractive
+      });
+      outro2([
+        `Installed battery: ${battery}.`,
+        "",
+        `- Code: ${path7.join(config.installDir, "eval")}`,
+        "",
+        `- Dataset: ${path7.relative(root, datasetAbs)}`,
+        `- Script: ${path7.relative(root, scriptAbs)}`,
+        "",
+        scriptsResult.added.length > 0 ? `Added scripts: ${scriptsResult.added.join(", ")}` : "Added scripts: none",
+        scriptsResult.kept.length > 0 ? `Kept existing scripts: ${scriptsResult.kept.join(", ")}` : "",
+        "",
+        "Next:",
+        "  bun run unrag:eval",
+        "  bun run unrag:eval:ci"
+      ].filter(Boolean).join(`
+`));
+      return;
+    }
     const wiringSnippet = battery === "reranker" ? [
       "",
       "Next steps:",
@@ -2776,14 +3566,15 @@ async function runDbChecks(state, options) {
     summary: `Using ${dbUrlResult.source}`,
     details: [redactConnectionString(dbUrlResult.url)]
   });
-  let client = null;
+  let end;
   try {
     const pg = await import("pg");
     const Pool = pg.default?.Pool ?? pg.Pool;
     const pool = new Pool({ connectionString: dbUrlResult.url });
-    client = {
+    end = () => pool.end();
+    const client = {
       query: (sql, params) => pool.query(sql, params),
-      end: () => pool.end()
+      end
     };
     const connectivityResult = await checkConnectivity(client);
     results.push(connectivityResult);
@@ -2796,6 +3587,10 @@ async function runDbChecks(state, options) {
     const tableNames = await inferTableNames(installDirFull ?? "", state.storeAdapter);
     const schemaResults = await checkSchema(client, options.schema, tableNames);
     results.push(...schemaResults);
+    const uniquenessResult = await checkSourceIdUniqueness(client, options.schema, tableNames);
+    results.push(uniquenessResult);
+    const duplicatesResult = await checkDuplicateSourceIds(client, options.schema, tableNames);
+    results.push(duplicatesResult);
     const indexResults = await checkIndexes(client, options.schema, tableNames);
     results.push(...indexResults);
     const dimensionResults = await checkDimensionConsistency(client, options.schema, tableNames, options.scope);
@@ -2814,9 +3609,8 @@ async function runDbChecks(state, options) {
       ]
     });
   } finally {
-    if (client) {
-      await client.end().catch(() => {});
-    }
+    if (end)
+      await end().catch(() => {});
   }
   return results;
 }
@@ -3083,6 +3877,135 @@ async function checkForeignKeys(client, schema, tableNames) {
     };
   }
 }
+async function checkSourceIdUniqueness(client, schema, tableNames) {
+  try {
+    const uniqueConstraintResult = await client.query(`SELECT con.conname as constraint_name
+       FROM pg_constraint con
+       JOIN pg_class t ON t.oid = con.conrelid
+       JOIN pg_namespace n ON n.oid = t.relnamespace
+       WHERE n.nspname = $1
+         AND t.relname = $2
+         AND con.contype = 'u'
+         AND array_length(con.conkey, 1) = 1
+         AND (
+           SELECT a.attname
+           FROM pg_attribute a
+           WHERE a.attrelid = t.oid AND a.attnum = con.conkey[1]
+         ) = 'source_id'`, [schema, tableNames.documents]);
+    if (uniqueConstraintResult.rows.length > 0) {
+      return {
+        id: "db-sourceid-unique",
+        title: "documents.source_id uniqueness",
+        status: "pass",
+        summary: "UNIQUE constraint exists on documents.source_id.",
+        details: [`Constraint: ${uniqueConstraintResult.rows[0].constraint_name}`]
+      };
+    }
+    const uniqueIndexResult = await client.query(`SELECT i.relname as indexname, pg_get_indexdef(i.oid) as indexdef
+       FROM pg_index ix
+       JOIN pg_class t ON t.oid = ix.indrelid
+       JOIN pg_class i ON i.oid = ix.indexrelid
+       JOIN pg_namespace n ON n.oid = t.relnamespace
+       WHERE n.nspname = $1
+         AND t.relname = $2
+         AND ix.indisunique = true
+         AND ix.indexprs IS NULL
+         AND ix.indpred IS NULL
+         -- Ensure key columns are exactly (source_id). This also allows INCLUDE columns.
+         AND pg_get_indexdef(i.oid) ~* '\\\\(\\\\s*"?source_id"?\\\\s*\\\\)'`, [schema, tableNames.documents]);
+    if (uniqueIndexResult.rows.length > 0) {
+      return {
+        id: "db-sourceid-unique",
+        title: "documents.source_id uniqueness",
+        status: "pass",
+        summary: "UNIQUE index exists on documents.source_id.",
+        details: [`Index: ${uniqueIndexResult.rows[0].indexname}`]
+      };
+    }
+    return {
+      id: "db-sourceid-unique",
+      title: "documents.source_id uniqueness",
+      status: "fail",
+      summary: "Missing UNIQUE constraint on documents.source_id.",
+      details: [
+        "Unrag requires a unique constraint on documents.source_id for idempotent ingestion.",
+        "Without this constraint, concurrent ingests for the same sourceId may create duplicates."
+      ],
+      fixHints: [
+        `ALTER TABLE ${schema}.${tableNames.documents} ADD CONSTRAINT ${tableNames.documents}_source_id_key UNIQUE (source_id);`,
+        "-- Or create a unique index:",
+        `CREATE UNIQUE INDEX ${tableNames.documents}_source_id_unique_idx ON ${schema}.${tableNames.documents}(source_id);`
+      ],
+      docsLink: docsUrl("/docs/getting-started/database#schema-requirements")
+    };
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    return {
+      id: "db-sourceid-unique",
+      title: "documents.source_id uniqueness",
+      status: "fail",
+      summary: `Could not check uniqueness constraint: ${message}`
+    };
+  }
+}
+async function checkDuplicateSourceIds(client, schema, tableNames) {
+  try {
+    const countResult = await client.query(`SELECT COUNT(*) as duplicate_count
+       FROM (
+         SELECT source_id
+         FROM ${schema}.${tableNames.documents}
+         GROUP BY source_id
+         HAVING COUNT(*) > 1
+       ) duplicates`);
+    const duplicateCount = parseInt(countResult.rows[0]?.duplicate_count ?? "0", 10);
+    if (duplicateCount === 0) {
+      return {
+        id: "db-sourceid-duplicates",
+        title: "documents.source_id duplicates",
+        status: "pass",
+        summary: "No duplicate source_id values found."
+      };
+    }
+    const sampleResult = await client.query(`SELECT source_id, COUNT(*) as count
+       FROM ${schema}.${tableNames.documents}
+       GROUP BY source_id
+       HAVING COUNT(*) > 1
+       ORDER BY COUNT(*) DESC
+       LIMIT 5`);
+    const samples = sampleResult.rows.map((r) => `"${r.source_id}" (${r.count} copies)`);
+    return {
+      id: "db-sourceid-duplicates",
+      title: "documents.source_id duplicates",
+      status: "fail",
+      summary: `Found ${duplicateCount} source_id value(s) with duplicates.`,
+      details: [
+        "Duplicate source_id values must be resolved before adding a unique constraint.",
+        "",
+        "Sample duplicates:",
+        ...samples,
+        duplicateCount > 5 ? `... and ${duplicateCount - 5} more` : ""
+      ].filter(Boolean),
+      fixHints: [
+        "-- Find all duplicates:",
+        `SELECT source_id, COUNT(*), array_agg(id) as document_ids`,
+        `FROM ${schema}.${tableNames.documents}`,
+        `GROUP BY source_id HAVING COUNT(*) > 1;`,
+        "",
+        "-- Resolve duplicates by deleting extra rows for a given source_id.",
+        "-- (Exact strategy depends on your app; pick which document_id to keep and delete the rest.)"
+      ],
+      docsLink: docsUrl("/docs/getting-started/database#resolving-duplicates")
+    };
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    return {
+      id: "db-sourceid-duplicates",
+      title: "documents.source_id duplicates",
+      status: "warn",
+      summary: `Could not check for duplicates: ${message}`
+    };
+  }
+}
 async function checkIndexes(client, schema, tableNames) {
   const results = [];
   try {
@@ -3500,14 +4423,14 @@ function resolveConfigPath(projectRoot, configPath) {
 // cli/commands/doctor-setup.ts
 import path14 from "node:path";
 import {
-  cancel as cancel3,
-  confirm as confirm3,
-  isCancel as isCancel3,
+  cancel as cancel4,
+  confirm as confirm4,
+  isCancel as isCancel4,
   multiselect,
   outro as outro3,
-  select as select2,
+  select as select3,
   spinner,
-  text as text2
+  text as text3
 } from "@clack/prompts";
 var DEFAULT_CONFIG_PATH = ".unrag/doctor.json";
 function parseSetupArgs(args) {
@@ -3582,7 +4505,7 @@ async function doctorSetupCommand(args) {
   });
   const tableNames = state.installDir ? await inferTableNames(path14.join(projectRoot, state.installDir), state.storeAdapter) : { documents: "documents", chunks: "chunks", embeddings: "embeddings" };
   s.stop("Configuration detected.");
-  const configPathAnswer = parsed.configPath ? parsed.configPath : nonInteractive ? DEFAULT_CONFIG_PATH : await text2({
+  const configPathAnswer = parsed.configPath ? parsed.configPath : nonInteractive ? DEFAULT_CONFIG_PATH : await text3({
     message: "Config file path",
     initialValue: DEFAULT_CONFIG_PATH,
     validate: (v) => {
@@ -3593,20 +4516,20 @@ async function doctorSetupCommand(args) {
       return;
     }
   });
-  if (isCancel3(configPathAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(configPathAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const configPath = String(configPathAnswer).trim();
   const configFullPath = path14.isAbsolute(configPath) ? configPath : path14.join(projectRoot, configPath);
   if (await exists(configFullPath)) {
     if (nonInteractive) {} else {
-      const overwrite = await confirm3({
+      const overwrite = await confirm4({
         message: `Config file ${configPath} already exists. Overwrite?`,
         initialValue: false
       });
-      if (isCancel3(overwrite)) {
-        cancel3("Cancelled.");
+      if (isCancel4(overwrite)) {
+        cancel4("Cancelled.");
         return;
       }
       if (!overwrite) {
@@ -3615,7 +4538,7 @@ async function doctorSetupCommand(args) {
       }
     }
   }
-  const installDirAnswer = nonInteractive ? state.installDir ?? "lib/unrag" : await text2({
+  const installDirAnswer = nonInteractive ? state.installDir ?? "lib/unrag" : await text3({
     message: "Unrag install directory",
     initialValue: state.installDir ?? "lib/unrag",
     validate: (v) => {
@@ -3624,8 +4547,8 @@ async function doctorSetupCommand(args) {
       return;
     }
   });
-  if (isCancel3(installDirAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(installDirAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const installDir = String(installDirAnswer).trim();
@@ -3648,13 +4571,13 @@ async function doctorSetupCommand(args) {
     initialValues: DEFAULT_ENV_LOAD_FILES,
     required: false
   });
-  if (isCancel3(envFilesAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(envFilesAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const envFiles = envFilesAnswer;
   const dbEnvVarDefault = state.inferredDbEnvVar ?? "DATABASE_URL";
-  const dbEnvVarAnswer = nonInteractive ? dbEnvVarDefault : await text2({
+  const dbEnvVarAnswer = nonInteractive ? dbEnvVarDefault : await text3({
     message: "Database URL environment variable name",
     initialValue: dbEnvVarDefault,
     validate: (v) => {
@@ -3665,80 +4588,80 @@ async function doctorSetupCommand(args) {
       return;
     }
   });
-  if (isCancel3(dbEnvVarAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(dbEnvVarAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const databaseUrlEnv = String(dbEnvVarAnswer).trim();
-  const schemaAnswer = nonInteractive ? "public" : await text2({
+  const schemaAnswer = nonInteractive ? "public" : await text3({
     message: "Database schema name",
     initialValue: "public"
   });
-  if (isCancel3(schemaAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(schemaAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const schema = String(schemaAnswer).trim() || "public";
-  const documentsTableAnswer = nonInteractive ? tableNames.documents : await text2({
+  const documentsTableAnswer = nonInteractive ? tableNames.documents : await text3({
     message: "Documents table name",
     initialValue: tableNames.documents
   });
-  if (isCancel3(documentsTableAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(documentsTableAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const documentsTable = String(documentsTableAnswer).trim() || "documents";
-  const chunksTableAnswer = nonInteractive ? tableNames.chunks : await text2({
+  const chunksTableAnswer = nonInteractive ? tableNames.chunks : await text3({
     message: "Chunks table name",
     initialValue: tableNames.chunks
   });
-  if (isCancel3(chunksTableAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(chunksTableAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const chunksTable = String(chunksTableAnswer).trim() || "chunks";
-  const embeddingsTableAnswer = nonInteractive ? tableNames.embeddings : await text2({
+  const embeddingsTableAnswer = nonInteractive ? tableNames.embeddings : await text3({
     message: "Embeddings table name",
     initialValue: tableNames.embeddings
   });
-  if (isCancel3(embeddingsTableAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(embeddingsTableAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const embeddingsTable = String(embeddingsTableAnswer).trim() || "embeddings";
-  const scopeAnswer = nonInteractive ? "" : await text2({
+  const scopeAnswer = nonInteractive ? "" : await text3({
     message: "Default scope prefix for dimension checks (optional, press enter to skip)",
     initialValue: ""
   });
-  if (isCancel3(scopeAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(scopeAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const defaultScope = String(scopeAnswer).trim() || null;
-  const strictAnswer = nonInteractive ? false : await confirm3({
+  const strictAnswer = nonInteractive ? false : await confirm4({
     message: "Enable strict mode by default? (treat warnings as failures)",
     initialValue: false
   });
-  if (isCancel3(strictAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(strictAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const strictDefault = Boolean(strictAnswer);
-  const ciIncludeDbAnswer = nonInteractive ? true : await confirm3({
+  const ciIncludeDbAnswer = nonInteractive ? true : await confirm4({
     message: "Should CI script include database checks (--db)?",
     initialValue: true
   });
-  if (isCancel3(ciIncludeDbAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(ciIncludeDbAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const ciIncludeDb = Boolean(ciIncludeDbAnswer);
-  const ciStrictAnswer = nonInteractive ? true : await confirm3({
+  const ciStrictAnswer = nonInteractive ? true : await confirm4({
     message: "Should CI script use strict mode (--strict)?",
     initialValue: true
   });
-  if (isCancel3(ciStrictAnswer)) {
-    cancel3("Cancelled.");
+  if (isCancel4(ciStrictAnswer)) {
+    cancel4("Cancelled.");
     return;
   }
   const ciStrict = Boolean(ciStrictAnswer);
@@ -3791,7 +4714,7 @@ async function doctorSetupCommand(args) {
   let scriptsToAdd = scripts;
   if (conflictingScripts.length > 0 && !nonInteractive) {
     for (const scriptName of conflictingScripts) {
-      const action = await select2({
+      const action = await select3({
         message: `Script "${scriptName}" already exists. What would you like to do?`,
         options: [
           { value: "keep", label: "Keep existing", hint: existingScripts[scriptName] },
@@ -3804,14 +4727,14 @@ async function doctorSetupCommand(args) {
         ],
         initialValue: "keep"
       });
-      if (isCancel3(action)) {
-        cancel3("Cancelled.");
+      if (isCancel4(action)) {
+        cancel4("Cancelled.");
         return;
       }
       if (action === "keep") {
         delete scriptsToAdd[scriptName];
       } else if (action === "rename") {
-        const newName = await text2({
+        const newName = await text3({
           message: `New script name for ${scriptName}`,
           initialValue: `${scriptName}:new`,
           validate: (v) => {
@@ -3822,8 +4745,8 @@ async function doctorSetupCommand(args) {
             return;
           }
         });
-        if (isCancel3(newName)) {
-          cancel3("Cancelled.");
+        if (isCancel4(newName)) {
+          cancel4("Cancelled.");
           return;
         }
         const value = scriptsToAdd[scriptName];
@@ -4147,7 +5070,7 @@ function renderHelp() {
     "  --alias <@name>      Import alias base (e.g. @unrag)",
     "  --preset <id|url>    Install from a web-generated preset (non-interactive)",
     "  --overwrite <mode>   skip | force (when files already exist)",
-    "  --rich-media         Enable rich media setup (also enables multimodal embeddings)",
+    "  --rich-media         Enable rich media setup (extractors + assetProcessing flags)",
     "  --no-rich-media      Disable rich media setup",
     "  --extractors <list>  Comma-separated extractors (implies --rich-media)",
     "  --no-install         Skip automatic dependency installation",