npm - @remnic/cli - Versions diffs - 1.0.4 → 1.0.5 - Mend

@remnic/cli 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +24 -0
package/dist/assets/download-datasets.sh +182 -0
package/dist/chunk-GAZ3DFWX.js +12027 -0
package/dist/dist-7DCVQLUB.js +292 -0
package/dist/index.js +1191 -20
package/package.json +5 -3
package/dist/chunk-U4MQO3IF.js +0 -1144
package/dist/dist-B67STFFX.js +0 -48

package/dist/index.js CHANGED Viewed

@@ -1,9 +1,24 @@
 import {
+  buildBenchmarkPublishFeed,
   checkRegression,
+  compareResults,
+  defaultBenchmarkBaselineDir,
+  defaultBenchmarkPublishPath,
+  deleteBenchmarkResults,
+  discoverAllProviders,
+  getBenchmarkLowerIsBetter,
+  listBenchmarkBaselines,
+  listBenchmarkResults,
   loadBaseline,
+  loadBenchmarkBaseline,
+  loadBenchmarkResult,
+  renderBenchmarkResultExport,
+  resolveBenchmarkResultReference,
   runBenchSuite,
-  runExplain
-} from "./chunk-U4MQO3IF.js";
+  runExplain,
+  saveBenchmarkBaseline,
+  writeBenchmarkPublishFeed
+} from "./chunk-GAZ3DFWX.js";
 // src/index.ts
 import fs from "fs";
@@ -87,6 +102,186 @@ import {
   resolveExtensionsRoot,
   coerceInstallExtension
 } from "@remnic/core";
+import {
+  convertMemoriesToRecords,
+  getTrainingExportAdapter as getTrainingExportAdapter2,
+  listTrainingExportAdapters,
+  parseStrictCliDate
+} from "@remnic/core";
+// ../export-weclone/dist/index.js
+import {
+  getTrainingExportAdapter,
+  registerTrainingExportAdapter
+} from "@remnic/core";
+var wecloneExportAdapter = {
+  name: "weclone",
+  fileExtension: ".json",
+  formatRecords(records) {
+    const alpacaRecords = records.map((r) => ({
+      instruction: r.instruction,
+      input: r.input,
+      output: r.output
+    }));
+    return JSON.stringify(alpacaRecords, null, 2);
+  }
+};
+var DEFAULT_MAX_PAIRS = 1;
+var QUESTION_TEMPLATES = {
+  preferences: [
+    "What kind of {topic} do you like?",
+    "What's your preference for {topic}?",
+    "What are your favorite {topic}?"
+  ],
+  opinions: [
+    "What do you think about {topic}?",
+    "How do you feel about {topic}?",
+    "What's your opinion on {topic}?"
+  ],
+  expertise: [
+    "Tell me about {topic}.",
+    "What do you know about {topic}?",
+    "Can you explain {topic}?"
+  ],
+  personal: [
+    "Can you tell me about your {topic}?",
+    "Tell me about your {topic}.",
+    "What can you share about your {topic}?"
+  ]
+};
+var DEFAULT_TEMPLATES = [
+  "Tell me about {topic}.",
+  "What can you share about {topic}?"
+];
+var CATEGORY_TO_TEMPLATE = {
+  preference: "preferences",
+  fact: "expertise",
+  entity: "expertise",
+  skill: "expertise",
+  correction: "opinions",
+  decision: "opinions",
+  principle: "opinions",
+  rule: "opinions",
+  personal: "personal",
+  relationship: "personal",
+  commitment: "personal",
+  moment: "personal"
+};
+function synthesizeTrainingPairs(records, options) {
+  const maxPairs = options?.maxPairsPerRecord ?? DEFAULT_MAX_PAIRS;
+  const style = options?.styleMarkers;
+  const result = [];
+  for (let i = 0; i < records.length; i++) {
+    const record = records[i];
+    const templateKey = resolveTemplateKey(record.category);
+    const topic = extractTopic(record.instruction);
+    const templates = QUESTION_TEMPLATES[templateKey] ?? DEFAULT_TEMPLATES;
+    const pairCount = Math.min(maxPairs, templates.length);
+    for (let j = 0; j < pairCount; j++) {
+      const templateIndex = (i + j) % templates.length;
+      const question = templates[templateIndex].replace("{topic}", topic);
+      let output = record.output;
+      if (style?.usesLowercase) {
+        output = output.toLowerCase();
+      }
+      result.push({
+        instruction: question,
+        input: "",
+        output,
+        category: record.category,
+        confidence: record.confidence,
+        sourceIds: record.sourceIds
+      });
+    }
+  }
+  return result;
+}
+function resolveTemplateKey(category) {
+  if (!category) return "";
+  return CATEGORY_TO_TEMPLATE[category.toLowerCase()] ?? "";
+}
+function extractTopic(instruction) {
+  const tagMatch = instruction.match(/\(([^()]+)\)/);
+  if (tagMatch) {
+    return tagMatch[1].trim().toLowerCase();
+  }
+  return "this";
+}
+var PII_PATTERNS = [
+  {
+    // Email: user@domain.tld
+    name: "email",
+    regex: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g
+  },
+  {
+    // SSN: 123-45-6789 (exactly 3-2-4 digit groups)
+    name: "ssn",
+    regex: /\b\d{3}-\d{2}-\d{4}\b/g
+  },
+  {
+    // Credit card: 4 groups of 4 digits separated by dashes or spaces
+    name: "credit_card",
+    regex: /\b\d{4}[-\s]\d{4}[-\s]\d{4}[-\s]\d{4}\b/g
+  },
+  {
+    // IP address: four octets 0-255
+    name: "ip_address",
+    regex: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g
+  },
+  {
+    // Phone: optional +1- prefix, then 3-3-4 with dashes, dots, or spaces
+    // Also matches (555) 123-4567 format
+    name: "phone",
+    regex: /(?:\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]\d{3}[-.\s]\d{4}\b/g
+  }
+];
+var SCANNED_FIELDS = [
+  "instruction",
+  "input",
+  "output"
+];
+function sweepPii(records) {
+  const redactionDetails = [];
+  const recordHasRedaction = /* @__PURE__ */ new Set();
+  const cleanRecords = records.map((record, idx) => {
+    const cleaned = { ...record };
+    for (const field of SCANNED_FIELDS) {
+      let value = record[field];
+      if (!value) continue;
+      for (const pattern of PII_PATTERNS) {
+        pattern.regex.lastIndex = 0;
+        if (pattern.regex.test(value)) {
+          pattern.regex.lastIndex = 0;
+          value = value.replace(pattern.regex, "[REDACTED]");
+          recordHasRedaction.add(idx);
+          redactionDetails.push({
+            index: idx,
+            field,
+            pattern: pattern.name
+          });
+        }
+      }
+      cleaned[field] = value;
+    }
+    return cleaned;
+  });
+  return {
+    cleanRecords,
+    redactedCount: recordHasRedaction.size,
+    redactionDetails
+  };
+}
+function ensureWecloneExportAdapterRegistered() {
+  if (getTrainingExportAdapter(wecloneExportAdapter.name) !== void 0) {
+    return false;
+  }
+  registerTrainingExportAdapter(wecloneExportAdapter);
+  return true;
+}
+try {
+  ensureWecloneExportAdapterRegistered();
+} catch {
+}
 // src/service-candidates.ts
 function firstSuccessfulResult(candidates, attempt) {
@@ -143,7 +338,7 @@ function collectBenchmarks(argv) {
   const benchmarks = [];
   for (let index = 0; index < argv.length; index += 1) {
     const arg = argv[index];
-    if (arg === "--dataset-dir") {
+    if (arg === "--dataset-dir" || arg === "--results-dir" || arg === "--baselines-dir" || arg === "--threshold" || arg === "--custom" || arg === "--format" || arg === "--output" || arg === "--target") {
       index += 1;
       continue;
     }
@@ -155,7 +350,7 @@ function collectBenchmarks(argv) {
 }
 function parseBenchActionArgs(argv) {
   const [first, ...rest] = argv;
-  const action = first === "list" || first === "run" || first === "check" || first === "report" ? first : first === void 0 || first === "--help" || first === "-h" ? "help" : "run";
+  const action = first === "list" || first === "run" || first === "datasets" || first === "runs" || first === "compare" || first === "ui" || first === "results" || first === "baseline" || first === "export" || first === "providers" || first === "publish" || first === "check" || first === "report" ? first : first === void 0 || first === "--help" || first === "-h" ? "help" : "run";
   return {
     action,
     args: action === "run" && action !== first ? argv : rest
@@ -163,15 +358,72 @@ function parseBenchActionArgs(argv) {
 }
 function parseBenchArgs(argv) {
   const { action, args } = parseBenchActionArgs(argv);
-  const benchmarks = collectBenchmarks(args);
+  const baselineAction = action === "baseline" ? args[0] === "save" || args[0] === "list" ? args[0] : void 0 : void 0;
+  const datasetAction = action === "datasets" ? args[0] === "download" || args[0] === "status" ? args[0] : void 0 : void 0;
+  const providerAction = action === "providers" ? args[0] === "discover" ? args[0] : void 0 : void 0;
+  const runAction = action === "runs" ? args[0] === "list" || args[0] === "show" || args[0] === "delete" ? args[0] : void 0 : void 0;
+  if (action === "baseline" && baselineAction === void 0) {
+    throw new Error("ERROR: baseline requires a subcommand: save or list.");
+  }
+  if (action === "datasets" && datasetAction === void 0) {
+    throw new Error("ERROR: datasets requires a subcommand: download or status.");
+  }
+  if (action === "providers" && providerAction === void 0) {
+    throw new Error("ERROR: providers requires a subcommand: discover.");
+  }
+  if (action === "runs" && runAction === void 0) {
+    throw new Error("ERROR: runs requires a subcommand: list, show, or delete.");
+  }
+  const benchmarkArgs = action === "baseline" || action === "datasets" || action === "providers" || action === "runs" ? args.slice(1) : args;
+  const benchmarks = collectBenchmarks(benchmarkArgs);
   const datasetDir = readBenchOptionValue(args, "--dataset-dir");
+  const resultsDir = readBenchOptionValue(args, "--results-dir");
+  const baselinesDir = readBenchOptionValue(args, "--baselines-dir");
+  const thresholdRaw = readBenchOptionValue(args, "--threshold");
+  const customRaw = readBenchOptionValue(args, "--custom");
+  const formatRaw = readBenchOptionValue(args, "--format");
+  const output = readBenchOptionValue(args, "--output");
+  const targetRaw = readBenchOptionValue(args, "--target");
+  let threshold;
+  if (thresholdRaw !== void 0) {
+    threshold = Number(thresholdRaw);
+    if (!Number.isFinite(threshold) || threshold < 0) {
+      throw new Error("ERROR: --threshold must be a non-negative number.");
+    }
+  }
+  let format;
+  if (formatRaw !== void 0) {
+    if (formatRaw !== "json" && formatRaw !== "csv" && formatRaw !== "html") {
+      throw new Error('ERROR: --format must be "json", "csv", or "html".');
+    }
+    format = formatRaw;
+  }
+  let target;
+  if (targetRaw !== void 0) {
+    if (targetRaw !== "remnic-ai") {
+      throw new Error('ERROR: --target must be "remnic-ai".');
+    }
+    target = targetRaw;
+  }
   return {
     action,
     benchmarks,
     quick: args.includes("--quick"),
     all: args.includes("--all"),
     json: args.includes("--json"),
-    datasetDir: datasetDir ? path.resolve(expandTilde(datasetDir)) : void 0
+    detail: args.includes("--detail"),
+    datasetDir: datasetDir ? path.resolve(expandTilde(datasetDir)) : void 0,
+    resultsDir: resultsDir ? path.resolve(expandTilde(resultsDir)) : void 0,
+    baselinesDir: baselinesDir ? path.resolve(expandTilde(baselinesDir)) : void 0,
+    threshold,
+    custom: customRaw ? path.resolve(expandTilde(customRaw)) : void 0,
+    baselineAction,
+    datasetAction,
+    providerAction,
+    runAction,
+    format,
+    output: output ? path.resolve(expandTilde(output)) : void 0,
+    target
   };
 }
@@ -296,12 +548,29 @@ var BENCHMARK_CATALOG = [
 ];
 var BENCHMARK_IDS = new Set(BENCHMARK_CATALOG.map((entry) => entry.id));
 function getBenchUsageText() {
-  return `Usage: remnic bench <list|run> [options] [benchmark...]
-       remnic benchmark <list|run|check|report> [options] [benchmark...]
+  return `Usage: remnic bench <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers> [options] [benchmark...]
+       remnic benchmark <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers|check|report> [options] [benchmark...]
 Commands:
   list                     List published benchmark packs
   run [benchmark...]       Run one or more benchmark packs
+  datasets download [benchmark...]
+                           Download local datasets for supported published benchmarks
+  datasets status          Show local dataset availability for supported benchmarks
+  runs list                List stored benchmark runs
+  runs show <run>          Show one stored benchmark run
+  runs delete <run...>     Delete one or more stored benchmark runs
+  compare <base> <cand>    Compare two stored benchmark runs by id or file path
+  results [run]            List stored runs or inspect a stored run
+  baseline save <name> [run]
+                           Save a stored run as a named baseline
+  baseline list            List saved baselines
+  export <run> --format <json|csv|html>
+                           Export one stored run as JSON, aggregate-metrics CSV, or static HTML
+  publish --target remnic-ai
+                           Generate the Remnic.ai benchmark feed from stored runs
+  ui                       Launch the local benchmark overview UI
+  providers discover       Auto-detect available local provider backends
   check                    Legacy latency regression gate (compatibility)
   report                   Legacy latency report generator (compatibility)
@@ -309,12 +578,36 @@ Options:
   --quick                  Run a lightweight quick pass (maps to --lightweight --limit 1)
   --all                    Run every published benchmark
   --dataset-dir <path>     Override the benchmark dataset directory for full runs
+  --custom <path>          Run a YAML-defined custom benchmark file
+  --results-dir <path>     Override the stored benchmark results directory
+  --baselines-dir <path>   Override the named baseline directory
+  --threshold <value>      Regression threshold for compare (default: 0.05)
+  --detail                 Include per-task details for bench results
+  --format <json|csv|html> Output format for bench export
+  --output <path>          Write bench export output to a file
+  --target <name>          Publish target for bench publish (remnic-ai)
   --json                   Output JSON for \`list\`
 Examples:
   remnic bench list
+  remnic bench datasets status
+  remnic bench datasets download longmemeval
+  remnic bench datasets download --all
+  remnic bench runs list
+  remnic bench runs show candidate-run --detail
+  remnic bench runs delete candidate-run
   remnic bench run --quick longmemeval
   remnic bench run longmemeval --dataset-dir ~/datasets/longmemeval
+  remnic bench compare base-run candidate-run
+  remnic bench results
+  remnic bench results candidate-run --detail
+  remnic bench baseline save main candidate-run
+  remnic bench baseline list
+  remnic bench export candidate-run --format csv --output ./candidate.csv
+  remnic bench export candidate-run --format html --output ./report.html
+  remnic bench publish --target remnic-ai
+  remnic bench providers discover
+  remnic bench run --custom ./my-bench.yaml
   remnic benchmark run --quick longmemeval`;
 }
 function buildBenchRunnerArgs(parsed, benchmarkId) {
@@ -347,7 +640,7 @@ async function listBenchmarksFromPackage() {
 }
 async function loadBenchDefinitionsFromPackage() {
   try {
-    const benchModule = await import("./dist-B67STFFX.js");
+    const benchModule = await import("./dist-7DCVQLUB.js");
     if (!benchModule.listBenchmarks) return void 0;
     const result = benchModule.listBenchmarks();
     return Array.isArray(result) ? result : void 0;
@@ -395,6 +688,154 @@ async function runBenchViaFallback(parsed, benchmarkId) {
 function resolveBenchOutputDir() {
   return path2.join(resolveHomeDir(), ".remnic", "bench", "results");
 }
+var DOWNLOADABLE_BENCHMARK_DATASETS = [
+  "ama-bench",
+  "memory-arena",
+  "amemgym",
+  "longmemeval",
+  "locomo"
+];
+var DOWNLOADED_DATASET_MARKERS = {
+  "ama-bench": { anyOf: ["open_end_qa_set.jsonl"] },
+  longmemeval: {
+    anyOf: ["longmemeval_oracle.json", "longmemeval_s_cleaned.json", "longmemeval.json"]
+  },
+  amemgym: {
+    anyOf: ["amemgym-v1-base.json", "amemgym-tasks.json", "data.json"]
+  },
+  locomo: { anyOf: ["locomo10.json", "locomo.json"] },
+  "memory-arena": { ext: ".jsonl" }
+};
+function isDatasetDownloaded(datasetPath, benchmarkId) {
+  let stats;
+  try {
+    stats = fs.statSync(datasetPath);
+  } catch {
+    return false;
+  }
+  if (!stats.isDirectory()) {
+    return false;
+  }
+  const marker = DOWNLOADED_DATASET_MARKERS[benchmarkId];
+  if (!marker) {
+    try {
+      return fs.readdirSync(datasetPath).length > 0;
+    } catch {
+      return false;
+    }
+  }
+  if (marker.anyOf) {
+    return marker.anyOf.some((name) => {
+      try {
+        return fs.statSync(path2.join(datasetPath, name)).isFile();
+      } catch {
+        return false;
+      }
+    });
+  }
+  if (marker.ext) {
+    try {
+      return fs.readdirSync(datasetPath).some((name) => name.endsWith(marker.ext));
+    } catch {
+      return false;
+    }
+  }
+  return false;
+}
+async function launchBenchUi(resultsDir) {
+  const benchUiDir = path2.join(CLI_REPO_ROOT, "packages", "bench-ui");
+  const pnpmCmd = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
+  if (!fs.existsSync(path2.join(benchUiDir, "package.json"))) {
+    console.error("ERROR: @remnic/bench-ui is not available in this checkout.");
+    process.exit(1);
+  }
+  console.log(`Launching bench UI with results from ${resultsDir}`);
+  console.log("Press Ctrl+C to stop the local server.");
+  const child = childProcess.spawn(pnpmCmd, ["exec", "vite", "--host", "127.0.0.1"], {
+    cwd: benchUiDir,
+    stdio: "inherit",
+    shell: process.platform === "win32",
+    env: {
+      ...process.env,
+      REMNIC_BENCH_RESULTS_DIR: resultsDir
+    }
+  });
+  await new Promise((resolve, reject) => {
+    child.on("error", reject);
+    child.on("close", (code, signal) => {
+      if (code === 0 || signal === "SIGINT" || signal === "SIGTERM") {
+        resolve();
+        return;
+      }
+      reject(new Error(`bench UI exited with code ${code ?? "unknown"}`));
+    });
+  });
+}
+function resolveBenchBaselineDir() {
+  return defaultBenchmarkBaselineDir();
+}
+function resolveRepoDatasetRoot() {
+  const repoCandidate = path2.join(CLI_REPO_ROOT, "evals", "datasets");
+  if (isRepoCheckout()) {
+    return repoCandidate;
+  }
+  return path2.join(resolveHomeDir(), ".remnic", "bench", "datasets");
+}
+function listDownloadableBenchmarks() {
+  return [...DOWNLOADABLE_BENCHMARK_DATASETS];
+}
+function resolveDatasetDownloadScriptPath() {
+  const bundled = path2.join(CLI_MODULE_DIR, "assets", "download-datasets.sh");
+  if (fs.existsSync(bundled)) {
+    return bundled;
+  }
+  return path2.join(CLI_REPO_ROOT, "evals", "scripts", "download-datasets.sh");
+}
+function isRepoCheckout() {
+  return fs.existsSync(path2.join(CLI_REPO_ROOT, "pnpm-workspace.yaml")) && fs.existsSync(path2.join(CLI_REPO_ROOT, "evals", "scripts", "download-datasets.sh"));
+}
+function runDatasetDownloadScript(scriptPath, benchmarkId, datasetRoot, jsonMode) {
+  const stdio = jsonMode ? ["inherit", process.stderr, "inherit"] : "inherit";
+  const env = { ...process.env, DATASETS_DIR: datasetRoot };
+  const options = {
+    cwd: CLI_REPO_ROOT,
+    stdio,
+    env
+  };
+  const args = ["--benchmark", benchmarkId];
+  if (process.platform !== "win32") {
+    childProcess.execFileSync(scriptPath, args, options);
+    return;
+  }
+  const bashProbe = childProcess.spawnSync("bash", ["--version"], { stdio: "ignore" });
+  if (bashProbe.error || bashProbe.status !== 0) {
+    throw new Error(
+      "bench datasets download requires bash on Windows (Git Bash or WSL). Install bash or run this command from a Unix shell."
+    );
+  }
+  childProcess.execFileSync("bash", [scriptPath, ...args], options);
+}
+function resolveSelectedDatasetDownloads(parsed) {
+  const supported = listDownloadableBenchmarks();
+  if (parsed.all) {
+    return supported;
+  }
+  if (parsed.benchmarks.length === 0) {
+    console.error(
+      "ERROR: datasets download requires at least one benchmark id or --all. Usage: remnic bench datasets download <benchmark...> [--all] [--json]"
+    );
+    process.exit(1);
+  }
+  const selected = [...new Set(parsed.benchmarks)];
+  const unsupported = selected.filter((benchmarkId) => !supported.includes(benchmarkId));
+  if (unsupported.length > 0) {
+    console.error(
+      `ERROR: unsupported downloadable benchmark dataset(s): ${unsupported.join(", ")}. Supported datasets: ${supported.join(", ")}.`
+    );
+    process.exit(1);
+  }
+  return selected;
+}
 function resolveBenchDatasetDir(benchmarkId, quick, datasetDirOverride) {
   if (datasetDirOverride) {
     return datasetDirOverride;
@@ -402,14 +843,13 @@ function resolveBenchDatasetDir(benchmarkId, quick, datasetDirOverride) {
   if (quick) {
     return void 0;
   }
-  const repoDatasetDir = path2.join(CLI_REPO_ROOT, "evals", "datasets", benchmarkId);
-  try {
-    return fs.statSync(repoDatasetDir).isDirectory() ? repoDatasetDir : void 0;
-  } catch {
-    return void 0;
+  const datasetDir = path2.join(resolveRepoDatasetRoot(), benchmarkId);
+  if (isDatasetDownloaded(datasetDir, benchmarkId)) {
+    return datasetDir;
   }
+  return void 0;
 }
-function printBenchPackageSummary(result, outputPath) {
+function printBenchPackageSummary(result, outputPath, outputLabel = "Results saved") {
   console.log(`Benchmark: ${result.meta.benchmark}`);
   console.log(`Mode: ${result.meta.mode}`);
   console.log(`Tasks: ${result.results.tasks.length}`);
@@ -417,12 +857,426 @@ function printBenchPackageSummary(result, outputPath) {
   for (const [metric, aggregate] of Object.entries(result.results.aggregates).sort()) {
     console.log(`  ${metric.padEnd(20)} ${aggregate.mean.toFixed(4)}`);
   }
-  console.log(`Results saved: ${outputPath}`);
+  console.log(`${outputLabel}: ${outputPath}`);
+}
+function printStoredBenchResultSummary(result, summary) {
+  printBenchPackageSummary(result, summary.path, "Stored result");
+  console.log(`Run id: ${summary.id}`);
+}
+function printStoredBenchResultDetails(result, summary) {
+  printStoredBenchResultSummary(result, summary);
+  if (result.results.tasks.length === 0) {
+    console.log("Tasks: none");
+    return;
+  }
+  console.log("Task breakdown:");
+  for (const task of result.results.tasks) {
+    const scores = Object.entries(task.scores).sort(([left], [right]) => left.localeCompare(right)).map(([metric, value]) => `${metric}=${value.toFixed(4)}`).join(", ");
+    console.log(
+      `  ${task.taskId}: ${task.latencyMs.toFixed(1)}ms${scores.length > 0 ? ` [${scores}]` : ""}`
+    );
+  }
+}
+function printBenchComparisonSummary(comparison, baseline, candidate) {
+  console.log(`Benchmark: ${comparison.benchmark}`);
+  console.log(`Baseline: ${baseline.id} (${baseline.path})`);
+  console.log(`Candidate: ${candidate.id} (${candidate.path})`);
+  console.log(`Verdict: ${comparison.verdict}`);
+  const metrics = Object.entries(comparison.metricDeltas).sort(
+    ([left], [right]) => left.localeCompare(right)
+  );
+  if (metrics.length === 0) {
+    console.log("No overlapping metrics were found between the two results.");
+    return;
+  }
+  console.log("Metrics:");
+  for (const [metric, delta] of metrics) {
+    const percent = Number.isFinite(delta.percentChange) ? `${(delta.percentChange * 100).toFixed(2)}%` : delta.percentChange > 0 ? "+Infinity%" : "-Infinity%";
+    const direction = delta.delta >= 0 ? "+" : "";
+    console.log(
+      `  ${metric.padEnd(18)} ${delta.baseline.toFixed(4)} -> ${delta.candidate.toFixed(4)} (${direction}${delta.delta.toFixed(4)}, ${percent}, d=${delta.effectSize.cohensD.toFixed(3)} ${delta.effectSize.interpretation})`
+    );
+    if (delta.ciOnDelta) {
+      console.log(
+        `    CI95 delta: [${delta.ciOnDelta.lower.toFixed(4)}, ${delta.ciOnDelta.upper.toFixed(4)}]`
+      );
+    }
+  }
+}
+async function compareBenchPackageResults(parsed) {
+  const refs = parsed.benchmarks;
+  if (refs.length !== 2) {
+    console.error(
+      "ERROR: compare requires exactly two stored result references. Usage: remnic bench compare <baseline> <candidate> [--results-dir <path>] [--threshold <value>] [--json]"
+    );
+    process.exit(1);
+  }
+  const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
+  const [baselineRef, candidateRef] = refs;
+  const baselineSummary = await resolveBenchmarkResultReference(resultsDir, baselineRef);
+  const candidateSummary = await resolveBenchmarkResultReference(resultsDir, candidateRef);
+  if (!baselineSummary) {
+    console.error(`ERROR: benchmark result not found: ${baselineRef}`);
+    process.exit(1);
+  }
+  if (!candidateSummary) {
+    console.error(`ERROR: benchmark result not found: ${candidateRef}`);
+    process.exit(1);
+  }
+  const baseline = await loadBenchmarkResult(baselineSummary.path);
+  const candidate = await loadBenchmarkResult(candidateSummary.path);
+  if (baseline.meta.benchmark !== candidate.meta.benchmark) {
+    console.error(
+      `ERROR: benchmark mismatch: ${baseline.meta.benchmark} vs ${candidate.meta.benchmark}. Compare runs from the same benchmark.`
+    );
+    process.exit(1);
+  }
+  const comparison = compareResults(
+    baseline,
+    candidate,
+    parsed.threshold ?? 0.05,
+    getBenchmarkLowerIsBetter(candidate.meta.benchmark)
+  );
+  if (parsed.json) {
+    console.log(JSON.stringify({
+      benchmark: comparison.benchmark,
+      baseline: baselineSummary,
+      candidate: candidateSummary,
+      comparison
+    }, null, 2));
+  } else {
+    printBenchComparisonSummary(comparison, baselineSummary, candidateSummary);
+  }
+  if (comparison.verdict === "regression") {
+    process.exit(1);
+  }
+}
+async function showBenchPackageResults(parsed) {
+  const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
+  if (parsed.benchmarks.length === 0) {
+    const summaries = await listBenchmarkResults(resultsDir);
+    if (parsed.json) {
+      console.log(JSON.stringify(summaries, null, 2));
+      return;
+    }
+    if (summaries.length === 0) {
+      console.log(`No stored benchmark runs found in ${resultsDir}`);
+      return;
+    }
+    console.log("Stored benchmark runs:");
+    for (const summary2 of summaries) {
+      console.log(
+        `  ${summary2.id.padEnd(24)} ${summary2.benchmark.padEnd(16)} ${summary2.mode.padEnd(5)} ${summary2.timestamp}`
+      );
+    }
+    return;
+  }
+  if (parsed.benchmarks.length !== 1) {
+    console.error(
+      "ERROR: results accepts at most one stored result reference. Usage: remnic bench results [run] [--detail] [--results-dir <path>] [--json]"
+    );
+    process.exit(1);
+  }
+  const reference = parsed.benchmarks[0];
+  const summary = await resolveBenchmarkResultReference(resultsDir, reference);
+  if (!summary) {
+    console.error(`ERROR: benchmark result not found: ${reference}`);
+    process.exit(1);
+  }
+  const result = await loadBenchmarkResult(summary.path);
+  if (parsed.json) {
+    console.log(JSON.stringify(result, null, 2));
+    return;
+  }
+  if (parsed.detail) {
+    printStoredBenchResultDetails(result, summary);
+  } else {
+    printStoredBenchResultSummary(result, summary);
+  }
+}
+async function manageBenchBaselines(parsed) {
+  const baselineDir = parsed.baselinesDir ?? resolveBenchBaselineDir();
+  if (parsed.baselineAction === "list") {
+    const baselines = await listBenchmarkBaselines(baselineDir);
+    if (parsed.json) {
+      console.log(JSON.stringify(baselines, null, 2));
+      return;
+    }
+    if (baselines.length === 0) {
+      console.log(`No saved baselines found in ${baselineDir}`);
+      return;
+    }
+    console.log("Saved baselines:");
+    for (const baseline of baselines) {
+      console.log(
+        `  ${baseline.name.padEnd(20)} ${baseline.benchmark.padEnd(16)} ${baseline.mode.padEnd(5)} ${baseline.timestamp}`
+      );
+    }
+    return;
+  }
+  if (parsed.baselineAction !== "save") {
+    console.error("ERROR: baseline requires a subcommand: save or list.");
+    process.exit(1);
+  }
+  if (parsed.benchmarks.length < 1 || parsed.benchmarks.length > 2) {
+    console.error(
+      "ERROR: baseline save requires a name and optionally one stored result reference. Usage: remnic bench baseline save <name> [run] [--results-dir <path>] [--baselines-dir <path>] [--json]"
+    );
+    process.exit(1);
+  }
+  const [name, explicitReference] = parsed.benchmarks;
+  const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
+  const sourceSummary = explicitReference ? await resolveBenchmarkResultReference(resultsDir, explicitReference) : (await listBenchmarkResults(resultsDir))[0];
+  if (!sourceSummary) {
+    console.error(
+      explicitReference ? `ERROR: benchmark result not found: ${explicitReference}` : `ERROR: no stored benchmark runs found in ${resultsDir}`
+    );
+    process.exit(1);
+  }
+  const result = await loadBenchmarkResult(sourceSummary.path);
+  let writtenPath;
+  try {
+    writtenPath = await saveBenchmarkBaseline(
+      baselineDir,
+      name,
+      result,
+      { id: sourceSummary.id, path: sourceSummary.path }
+    );
+  } catch (error) {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  }
+  if (parsed.json) {
+    const baseline = await loadBenchmarkBaseline(writtenPath);
+    console.log(JSON.stringify({
+      name: baseline.name,
+      path: writtenPath,
+      source: baseline.source,
+      benchmark: baseline.result.meta.benchmark,
+      timestamp: baseline.savedAt
+    }, null, 2));
+    return;
+  }
+  console.log(`Saved baseline "${name}" to ${writtenPath}`);
+  console.log(`  Source run: ${sourceSummary.id}`);
+  console.log(`  Benchmark: ${result.meta.benchmark}`);
+}
+async function exportBenchPackageResult(parsed) {
+  if (parsed.benchmarks.length !== 1) {
+    console.error(
+      "ERROR: export requires exactly one stored result reference. Usage: remnic bench export <run> --format <json|csv|html> [--output <path>] [--results-dir <path>]"
+    );
+    process.exit(1);
+  }
+  if (!parsed.format) {
+    console.error("ERROR: export requires --format json, csv, or html.");
+    process.exit(1);
+  }
+  const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
+  const reference = parsed.benchmarks[0];
+  const summary = await resolveBenchmarkResultReference(resultsDir, reference);
+  if (!summary) {
+    console.error(`ERROR: benchmark result not found: ${reference}`);
+    process.exit(1);
+  }
+  const result = await loadBenchmarkResult(summary.path);
+  const rendered = renderBenchmarkResultExport(result, parsed.format);
+  if (parsed.output) {
+    fs.mkdirSync(path2.dirname(parsed.output), { recursive: true });
+    fs.writeFileSync(parsed.output, rendered);
+    console.log(`Exported ${summary.id} as ${parsed.format} to ${parsed.output}`);
+    return;
+  }
+  process.stdout.write(rendered);
+}
+async function manageBenchDatasets(parsed) {
+  const datasetRoot = resolveRepoDatasetRoot();
+  const supported = listDownloadableBenchmarks();
+  if (parsed.datasetAction === "status") {
+    if (parsed.benchmarks.length > 0 || parsed.all) {
+      console.error(
+        "ERROR: datasets status does not accept benchmark names or --all. Usage: remnic bench datasets status [--json]"
+      );
+      process.exit(1);
+    }
+    const status = supported.map((benchmarkId) => {
+      const datasetPath = path2.join(datasetRoot, benchmarkId);
+      return {
+        benchmark: benchmarkId,
+        downloaded: isDatasetDownloaded(datasetPath, benchmarkId),
+        path: datasetPath
+      };
+    });
+    if (parsed.json) {
+      console.log(JSON.stringify(status, null, 2));
+      return;
+    }
+    console.log("Downloadable benchmark datasets:");
+    for (const entry of status) {
+      console.log(
+        `  ${entry.benchmark.padEnd(16)} ${entry.downloaded ? "downloaded" : "missing"}  ${entry.path}`
+      );
+    }
+    console.log("");
+    console.log(
+      "Only the script-backed published datasets are managed here. Other benchmark fixtures remain repo-managed or manual."
+    );
+    return;
+  }
+  if (parsed.datasetAction !== "download") {
+    console.error("ERROR: datasets requires a subcommand: download or status.");
+    process.exit(1);
+  }
+  const scriptPath = resolveDatasetDownloadScriptPath();
+  if (!fs.existsSync(scriptPath)) {
+    console.error(`ERROR: dataset download script not found: ${scriptPath}`);
+    process.exit(1);
+  }
+  const selected = resolveSelectedDatasetDownloads(parsed);
+  const downloaded = [];
+  for (const benchmarkId of selected) {
+    runDatasetDownloadScript(scriptPath, benchmarkId, datasetRoot, parsed.json === true);
+    downloaded.push({
+      benchmark: benchmarkId,
+      path: path2.join(datasetRoot, benchmarkId)
+    });
+  }
+  if (parsed.json) {
+    console.log(JSON.stringify(downloaded, null, 2));
+    return;
+  }
+  console.log("Downloaded benchmark datasets:");
+  for (const entry of downloaded) {
+    console.log(`  ${entry.benchmark}  ${entry.path}`);
+  }
+}
+async function manageBenchRuns(parsed) {
+  const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
+  if (parsed.runAction === "list") {
+    if (parsed.benchmarks.length > 0 || parsed.all) {
+      console.error(
+        "ERROR: runs list does not accept benchmark names or --all. Usage: remnic bench runs list [--results-dir <path>] [--json]"
+      );
+      process.exit(1);
+    }
+    await showBenchPackageResults({ ...parsed, action: "results", benchmarks: [] });
+    return;
+  }
+  if (parsed.runAction === "show") {
+    if (parsed.benchmarks.length !== 1 || parsed.all) {
+      console.error(
+        "ERROR: runs show requires exactly one stored result reference. Usage: remnic bench runs show <run> [--detail] [--results-dir <path>] [--json]"
+      );
+      process.exit(1);
+    }
+    await showBenchPackageResults(parsed);
+    return;
+  }
+  if (parsed.runAction === "delete") {
+    if (parsed.benchmarks.length === 0 || parsed.all) {
+      console.error(
+        "ERROR: runs delete requires at least one stored result reference. Usage: remnic bench runs delete <run...> [--results-dir <path>] [--json]"
+      );
+      process.exit(1);
+    }
+    const deleted = await deleteBenchmarkResults(resultsDir, parsed.benchmarks);
+    if (parsed.json) {
+      console.log(JSON.stringify(deleted, null, 2));
+    } else {
+      if (deleted.deleted.length === 0) {
+        console.log("No benchmark runs were deleted.");
+      } else {
+        console.log("Deleted benchmark runs:");
+        for (const summary of deleted.deleted) {
+          console.log(`  ${summary.id}  ${summary.path}`);
+        }
+      }
+      if (deleted.missing.length > 0) {
+        console.log("Missing benchmark runs:");
+        for (const reference of deleted.missing) {
+          console.log(`  ${reference}`);
+        }
+      }
+    }
+    if (deleted.missing.length > 0) {
+      process.exit(1);
+    }
+    return;
+  }
+  console.error("ERROR: runs requires a subcommand: list, show, or delete.");
+  process.exit(1);
+}
+async function discoverBenchProviders(parsed) {
+  if (parsed.benchmarks.length > 0) {
+    console.error(
+      "ERROR: providers discover does not accept positional arguments. Usage: remnic bench providers discover [--json]"
+    );
+    process.exit(1);
+  }
+  const discovered = await discoverAllProviders();
+  if (parsed.json) {
+    console.log(JSON.stringify(discovered, null, 2));
+    return;
+  }
+  if (discovered.length === 0) {
+    console.log("No local bench providers were discovered.");
+    return;
+  }
+  console.log("Discovered bench providers:");
+  for (const entry of discovered) {
+    console.log(`  ${entry.provider}`);
+    for (const model of entry.models) {
+      const capabilities = model.capabilities.join(", ");
+      const details = [
+        model.contextLength > 0 ? `context=${model.contextLength}` : void 0,
+        model.parameterCount ? `params=${model.parameterCount}` : void 0,
+        model.quantization ? `quant=${model.quantization}` : void 0,
+        capabilities.length > 0 ? `caps=${capabilities}` : void 0
+      ].filter((value) => Boolean(value));
+      console.log(
+        `    - ${model.id}${details.length > 0 ? ` (${details.join(", ")})` : ""}`
+      );
+    }
+  }
+}
+async function publishBenchPackageResults(parsed) {
+  if (parsed.benchmarks.length > 0) {
+    console.error(
+      "ERROR: publish does not accept positional result references. Usage: remnic bench publish --target remnic-ai [--results-dir <path>] [--output <path>] [--json]"
+    );
+    process.exit(1);
+  }
+  if (parsed.target !== "remnic-ai") {
+    console.error("ERROR: publish requires --target remnic-ai.");
+    process.exit(1);
+  }
+  const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
+  const feed = await buildBenchmarkPublishFeed(resultsDir, parsed.target);
+  if (feed.benchmarks.length === 0) {
+    console.error(
+      `ERROR: no publishable benchmark results found in ${resultsDir}. remnic-ai requires stored full runs for published benchmarks.`
+    );
+    process.exit(1);
+  }
+  const outputPath = parsed.output ?? defaultBenchmarkPublishPath(parsed.target);
+  const writtenPath = await writeBenchmarkPublishFeed(feed, outputPath);
+  if (parsed.json) {
+    console.log(JSON.stringify({
+      target: parsed.target,
+      outputPath: writtenPath,
+      benchmarkCount: feed.benchmarks.length,
+      feed
+    }, null, 2));
+    return;
+  }
+  console.log(
+    `Published ${feed.benchmarks.length} benchmark entries for ${parsed.target} to ${writtenPath}`
+  );
 }
 async function runBenchViaPackage(parsed, benchmarkId) {
   let benchModule;
   try {
-    benchModule = await import("./dist-B67STFFX.js");
+    benchModule = await import("./dist-7DCVQLUB.js");
   } catch {
     return false;
   }
@@ -430,6 +1284,11 @@ async function runBenchViaPackage(parsed, benchmarkId) {
   if (!definition?.runnerAvailable || !benchModule.runBenchmark || !benchModule.writeBenchmarkResult) {
     return false;
   }
+  if (definition.meta?.category === "ingestion") {
+    throw new Error(
+      `Benchmark "${benchmarkId}" requires an ingestion adapter which is not yet available via the CLI. Run ingestion benchmarks programmatically by passing an ingestionAdapter to runBenchmark().`
+    );
+  }
   const createAdapter = parsed.quick ? benchModule.createLightweightAdapter : benchModule.createRemnicAdapter;
   if (!createAdapter) {
     return false;
@@ -442,7 +1301,7 @@ async function runBenchViaPackage(parsed, benchmarkId) {
   );
   if (!parsed.quick && !datasetDir) {
     throw new Error(
-      `full benchmark runs for "${benchmarkId}" require dataset files. Pass --dataset-dir <path> or run from a Remnic repo checkout with evals/datasets/${benchmarkId}.`
+      `full benchmark runs for "${benchmarkId}" require dataset files. Run "remnic bench datasets download ${benchmarkId}" or pass --dataset-dir <path>.`
     );
   }
   const system = await createAdapter();
@@ -466,6 +1325,41 @@ async function runBenchViaPackage(parsed, benchmarkId) {
     await system.destroy();
   }
 }
+async function runCustomBenchViaPackage(parsed) {
+  let benchModule;
+  try {
+    benchModule = await import("./dist-7DCVQLUB.js");
+  } catch {
+    return false;
+  }
+  if (!benchModule.runCustomBenchmarkFile || !benchModule.writeBenchmarkResult) {
+    return false;
+  }
+  const createAdapter = parsed.quick ? benchModule.createLightweightAdapter : benchModule.createRemnicAdapter;
+  if (!createAdapter) {
+    return false;
+  }
+  const outputDir = resolveBenchOutputDir();
+  const system = await createAdapter();
+  try {
+    const result = await benchModule.runCustomBenchmarkFile(parsed.custom, {
+      mode: parsed.quick ? "quick" : "full",
+      outputDir,
+      limit: parsed.quick ? 1 : void 0,
+      adapterMode: parsed.quick ? "lightweight" : "direct",
+      system
+    });
+    const writtenPath = await benchModule.writeBenchmarkResult(result, outputDir);
+    if (parsed.json) {
+      console.log(JSON.stringify(result, null, 2));
+    } else {
+      printBenchPackageSummary(result, writtenPath);
+    }
+    return true;
+  } finally {
+    await system.destroy();
+  }
+}
 function resolveConfigPath(cliPath) {
   if (cliPath) return path2.resolve(cliPath);
   const envPath = readCompatEnv("REMNIC_CONFIG_PATH", "ENGRAM_CONFIG_PATH");
@@ -628,6 +1522,7 @@ async function cmdQuery(queryText, json, explain) {
   const config = parseConfig(remnicCfg);
   const orchestrator = new Orchestrator(config);
   await orchestrator.initialize();
+  await orchestrator.deferredReady;
   const service = new EngramAccessService(orchestrator);
   if (explain) {
     const result2 = await runExplain(service, queryText);
@@ -808,6 +1703,7 @@ async function cmdEnrich(rest) {
     ];
     const orchestrator2 = new Orchestrator(config);
     await orchestrator2.initialize();
+    await orchestrator2.deferredReady;
     const searchBackend2 = orchestrator2.qmd;
     const searchFn2 = searchBackend2.isAvailable() ? async (query) => {
       const results2 = await searchBackend2.search(query, void 0, 10);
@@ -843,6 +1739,7 @@ Registered providers:`);
   }
   const orchestrator = new Orchestrator(config);
   await orchestrator.initialize();
+  await orchestrator.deferredReady;
   const storage = await orchestrator.getStorage(config.defaultNamespace);
   const entityFiles = await storage.readAllEntityFiles();
   let targets = entityFiles;
@@ -1925,6 +2822,42 @@ async function cmdBench(rest) {
     await cmdLegacyBenchmark(parsed.action, benchAction.args, parsed.json);
     return;
   }
+  if (parsed.action === "compare") {
+    await compareBenchPackageResults(parsed);
+    return;
+  }
+  if (parsed.action === "results") {
+    await showBenchPackageResults(parsed);
+    return;
+  }
+  if (parsed.action === "baseline") {
+    await manageBenchBaselines(parsed);
+    return;
+  }
+  if (parsed.action === "export") {
+    await exportBenchPackageResult(parsed);
+    return;
+  }
+  if (parsed.action === "datasets") {
+    await manageBenchDatasets(parsed);
+    return;
+  }
+  if (parsed.action === "runs") {
+    await manageBenchRuns(parsed);
+    return;
+  }
+  if (parsed.action === "publish") {
+    await publishBenchPackageResults(parsed);
+    return;
+  }
+  if (parsed.action === "ui") {
+    await launchBenchUi(parsed.resultsDir ?? resolveBenchOutputDir());
+    return;
+  }
+  if (parsed.action === "providers") {
+    await discoverBenchProviders(parsed);
+    return;
+  }
   if (parsed.action === "list") {
     const catalog = await listBenchmarksFromPackage() ?? BENCHMARK_CATALOG;
     if (parsed.json) {
@@ -1937,6 +2870,20 @@ async function cmdBench(rest) {
     }
     return;
   }
+  if (parsed.custom) {
+    if (parsed.all || parsed.benchmarks.length > 0) {
+      console.error("ERROR: --custom cannot be combined with benchmark names or --all.");
+      process.exit(1);
+    }
+    const handledByPackage = await runCustomBenchViaPackage(parsed);
+    if (!handledByPackage) {
+      console.error(
+        "Benchmark runner not found. Expected a phase-1 @remnic/bench runtime export for custom benchmarks."
+      );
+      process.exit(1);
+    }
+    return;
+  }
   const selectedBenchmarks = parsed.all ? await resolveAllBenchmarks() : parsed.benchmarks;
   if (selectedBenchmarks.length === 0) {
     console.error(
@@ -2789,6 +3736,180 @@ Usage:
       break;
   }
 }
+function resolveRequiredValueFlag(args, flag) {
+  if (!hasFlag(args, flag)) return void 0;
+  const value = resolveFlagStrict(args, flag);
+  if (value === void 0) {
+    throw new Error(
+      `${flag} requires a value. Provide it as \`${flag} <value>\`, not as a bare flag.`
+    );
+  }
+  return value;
+}
+function parseTrainingExportArgs(rest, defaultMemoryDir) {
+  const format = resolveRequiredValueFlag(rest, "--format");
+  if (!format) {
+    throw new Error(
+      "--format <name> is required. Run `remnic training:export --help` for the list of registered adapters."
+    );
+  }
+  const dryRun = hasFlag(rest, "--dry-run");
+  const outputRaw = resolveRequiredValueFlag(rest, "--output") ?? resolveRequiredValueFlag(rest, "--out");
+  if (!outputRaw && !dryRun) {
+    throw new Error(
+      "--output <path> (or --out <path>) is required for training:export. Use --dry-run to print statistics without writing a file."
+    );
+  }
+  const output = outputRaw ? expandTilde(outputRaw) : "";
+  const memoryDirFlag = resolveRequiredValueFlag(rest, "--memory-dir");
+  const memoryDir = expandTilde(memoryDirFlag ?? defaultMemoryDir);
+  const since = resolveRequiredValueFlag(rest, "--since");
+  const until = resolveRequiredValueFlag(rest, "--until");
+  const minConfidenceRaw = resolveRequiredValueFlag(rest, "--min-confidence");
+  let minConfidence;
+  if (minConfidenceRaw !== void 0) {
+    const n = Number(minConfidenceRaw);
+    if (!Number.isFinite(n) || n < 0 || n > 1) {
+      throw new Error(
+        `Invalid --min-confidence value "${minConfidenceRaw}": expected a number in [0, 1].`
+      );
+    }
+    minConfidence = n;
+  }
+  const categoriesRaw = resolveRequiredValueFlag(rest, "--categories");
+  const categories = categoriesRaw ? categoriesRaw.split(",").map((c) => c.trim()).filter((c) => c.length > 0) : void 0;
+  const maxPairsRaw = resolveRequiredValueFlag(rest, "--max-pairs-per-record");
+  let maxPairsPerRecord;
+  if (maxPairsRaw !== void 0) {
+    const n = Number(maxPairsRaw);
+    if (!Number.isInteger(n) || n < 1) {
+      throw new Error(
+        `Invalid --max-pairs-per-record value "${maxPairsRaw}": expected a positive integer.`
+      );
+    }
+    maxPairsPerRecord = n;
+  }
+  const includeEntities = hasFlag(rest, "--include-entities");
+  const synthesize = hasFlag(rest, "--synthesize");
+  const privacySweep = !hasFlag(rest, "--no-privacy-sweep");
+  return {
+    format,
+    output,
+    memoryDir,
+    since,
+    until,
+    minConfidence,
+    categories,
+    includeEntities,
+    synthesize,
+    maxPairsPerRecord,
+    privacySweep,
+    dryRun
+  };
+}
+async function runTrainingExport(args, stdout = process.stdout) {
+  ensureWecloneExportAdapterRegistered();
+  const adapter = getTrainingExportAdapter2(args.format);
+  if (!adapter) {
+    const registered = listTrainingExportAdapters();
+    const validList = registered.length > 0 ? `Valid formats: [${registered.join(", ")}]` : "No adapters are currently registered.";
+    throw new Error(
+      `Unknown training-export format "${args.format}". ${validList}`
+    );
+  }
+  if (!fs.existsSync(args.memoryDir)) {
+    throw new Error(
+      `--memory-dir "${args.memoryDir}" does not exist. Provide the path to an existing memory directory.`
+    );
+  }
+  if (!fs.statSync(args.memoryDir).isDirectory()) {
+    throw new Error(
+      `--memory-dir "${args.memoryDir}" is not a directory. Provide the path to a memory directory, not a file.`
+    );
+  }
+  let since;
+  if (args.since) since = parseStrictCliDate(args.since, "--since");
+  let until;
+  if (args.until) until = parseStrictCliDate(args.until, "--until");
+  const convertOptions = {
+    memoryDir: args.memoryDir,
+    since,
+    until,
+    minConfidence: args.minConfidence,
+    categories: args.categories,
+    includeEntities: args.includeEntities
+  };
+  let records = await convertMemoriesToRecords(convertOptions);
+  const recordsRead = records.length;
+  if (args.synthesize) {
+    records = synthesizeTrainingPairs(records, {
+      maxPairsPerRecord: args.maxPairsPerRecord
+    });
+  }
+  let redactedCount = 0;
+  if (args.privacySweep) {
+    const swept = sweepPii(records);
+    records = swept.cleanRecords;
+    redactedCount = swept.redactedCount;
+  }
+  if (args.dryRun) {
+    stdout.write(`Training export dry run
+`);
+    stdout.write(`Format: ${adapter.name}
+`);
+    stdout.write(`Records read: ${recordsRead}
+`);
+    stdout.write(`Records to write: ${records.length}
+`);
+    if (args.privacySweep) {
+      stdout.write(`Redacted records: ${redactedCount}
+`);
+    }
+    const cats = /* @__PURE__ */ new Map();
+    for (const r of records) {
+      const c = r.category ?? "unknown";
+      cats.set(c, (cats.get(c) ?? 0) + 1);
+    }
+    const sortedCats = [...cats.entries()].sort(
+      (a, b) => a[0].localeCompare(b[0])
+    );
+    for (const [cat, count] of sortedCats) {
+      stdout.write(`  ${cat}: ${count}
+`);
+    }
+    return {
+      recordsRead,
+      recordsWritten: 0,
+      redactedCount,
+      outputPath: null
+    };
+  }
+  if (!args.output) {
+    throw new Error(
+      "runTrainingExport: `output` is required when dryRun is false. Pass dryRun: true to skip file I/O."
+    );
+  }
+  const formatted = adapter.formatRecords(records);
+  const outDir = path2.dirname(args.output);
+  fs.mkdirSync(outDir, { recursive: true });
+  const tmpPath = `${args.output}.tmp-${process.pid}-${Date.now()}`;
+  fs.writeFileSync(tmpPath, formatted, "utf-8");
+  fs.renameSync(tmpPath, args.output);
+  stdout.write(
+    `Exported ${records.length} records to ${args.output} (${adapter.name} format)
+`
+  );
+  if (args.privacySweep && redactedCount > 0) {
+    stdout.write(`Privacy sweep redacted PII in ${redactedCount} record(s).
+`);
+  }
+  return {
+    recordsRead,
+    recordsWritten: records.length,
+    redactedCount,
+    outputPath: args.output
+  };
+}
 async function main(argv = process.argv.slice(2)) {
   const [command, ...rest] = argv;
   if (command !== "migrate") {
@@ -3047,6 +4168,51 @@ Options:
       await cmdExtensions(action, rest.slice(1));
       break;
     }
+    case "training:export": {
+      if (rest.includes("--help") || rest.includes("-h")) {
+        console.log(`
+remnic training:export \u2014 Export Remnic memories as fine-tuning datasets (issue #459)
+Usage:
+  remnic training:export --format <name> --output <path> [options]
+Required:
+  --format <name>              Registered adapter name (e.g. weclone)
+  --output <path> | --out      Path to write the dataset file
+Filters:
+  --memory-dir <path>          Memory directory (defaults to resolved memoryDir)
+  --since <YYYY-MM-DD[T...]>   Only include memories created at or after this date
+  --until <YYYY-MM-DD[T...]>   Only include memories created before this date (exclusive)
+  --min-confidence <0..1>      Inclusive lower bound on memory confidence
+  --categories <list>          Comma-separated category filter (fact,preference,...)
+  --include-entities           Also read from entities/ (off by default)
+Adapter options:
+  --synthesize                 Generate conversational Q/A pairs (WeClone-optimised)
+  --max-pairs-per-record <n>   When --synthesize, max pairs emitted per memory
+  --no-privacy-sweep           Skip the final PII redaction pass (default: on)
+Other:
+  --dry-run                    Print statistics only; do not write the file
+`);
+        break;
+      }
+      let parsed;
+      try {
+        parsed = parseTrainingExportArgs(rest, resolveMemoryDir());
+      } catch (err) {
+        console.error(err instanceof Error ? err.message : String(err));
+        process.exit(1);
+      }
+      try {
+        await runTrainingExport(parsed);
+      } catch (err) {
+        console.error(err instanceof Error ? err.message : String(err));
+        process.exit(1);
+      }
+      break;
+    }
     case "openclaw": {
       const subAction = rest[0] ?? "help";
       if (subAction === "install") {
@@ -3103,9 +4269,9 @@ Usage:
   remnic extensions <list|show|validate|reload>  Manage memory extensions
   remnic space <list|switch|create|delete|push|pull|share|promote|audit>  Manage spaces
     create accepts --parent <id> to set parent-child relationship
-  remnic bench <list|run> [benchmark...] [--quick] [--all] [--dataset-dir <path>] [--json]
+  remnic bench <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers> [benchmark...] [--quick] [--all] [--dataset-dir <path>] [--results-dir <path>] [--baselines-dir <path>] [--threshold <value>] [--detail] [--format <json|csv|html>] [--output <path>] [--target remnic-ai] [--json]
     benchmark is kept as a compatibility alias. check/report remain under that alias.
-  remnic benchmark <list|run|check|report> [queries...] [--explain] [--baseline=<path>] [--report=<path>]
+  remnic benchmark <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers|check|report> [queries...] [--explain] [--baseline=<path>] [--report=<path>]
   remnic briefing [--since <window>] [--focus <filter>] [--save] [--format markdown|json]
     Daily context briefing. Windows: yesterday, today, NNh, NNd, NNw.
     Focus: person:<name>, project:<name>, topic:<name>.
@@ -3126,6 +4292,9 @@ Usage:
   remnic enrich --dry-run        Preview what would be enriched
   remnic enrich audit            Show recent enrichment audit log
   remnic enrich providers        List registered providers and their status
+  remnic training:export --format <name> --output <path> [options]
+    Export memories as a fine-tuning dataset (issue #459). Run
+    'remnic training:export --help' for the full option list.
 Options:
   --json    Output in JSON format
@@ -3152,7 +4321,9 @@ export {
   main,
   parseBenchArgs,
   parseConnectorConfig,
+  parseTrainingExportArgs,
   resolveFlag,
+  runTrainingExport,
   stripConfigArgv,
   stripResolveFlags
 };