npm - @remnic/bench - Versions diffs - 9.3.674 → 9.3.676 - Mend

@remnic/bench 9.3.674 → 9.3.676

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -10562,12 +10562,315 @@ function isPlainObject(value) {
 // src/benchmark.ts
 import fs2 from "fs";
-import path31 from "path";
+import path32 from "path";
+import { createHash as createHash9 } from "crypto";
+import { expandTildePath as expandTildePath3 } from "@remnic/core";
+// src/judges/judge-cache.ts
+import { createHash as createHash6, randomBytes as randomBytes2 } from "crypto";
+import {
+  mkdir as mkdir8,
+  readFile as readFile9,
+  rename as rename2,
+  rm as rm3,
+  writeFile as writeFile8
+} from "fs/promises";
+import path11 from "path";
+var JUDGE_CACHE_PROTOCOL_VERSION = "judge-protocol-v1";
+function stableStringify2(value) {
+  if (Array.isArray(value)) {
+    return `[${value.map((item) => stableStringify2(item)).join(",")}]`;
+  }
+  if (value !== null && typeof value === "object") {
+    const record = value;
+    const keys = Object.keys(record).sort();
+    const body = keys.map((key) => `${JSON.stringify(key)}:${stableStringify2(record[key])}`).join(",");
+    return `{${body}}`;
+  }
+  return JSON.stringify(value) ?? "null";
+}
+var JudgeCache = class {
+  dir;
+  // Per-key write serialization so concurrent writers never race a temp-file
+  // rename into place for the same key. Cached entries are read straight from
+  // disk, so reads remain lock-free.
+  writeQueues = /* @__PURE__ */ new Map();
+  // PR #1591 round-8 (cursor thread): in-memory layer for fire-and-forget
+  // writes. putSafely chains cache.put onto a pendingWrites promise
+  // without awaiting, so a second benchmark iteration can call get()
+  // before the disk rename lands. The inflight map is populated
+  // synchronously inside put() (before the first await) and cleared in
+  // the finally after the write settles — closing the gap without
+  // changing the byte-identical baseline (a fresh process has no
+  // inflight entries).
+  inflight = /* @__PURE__ */ new Map();
+  cachedDirExists = false;
+  constructor(options) {
+    this.dir = path11.resolve(options.dir);
+  }
+  /** Compute the sha256-hex key for a set of parts. Pure, sync, side-effect-free. */
+  computeKey(parts) {
+    const fieldDigest = (value) => createHash6("sha256").update(value).digest();
+    return createHash6("sha256").update(fieldDigest(parts.benchmarkId)).update(fieldDigest(parts.datasetVersion)).update(fieldDigest(parts.questionId)).update(fieldDigest(parts.answerText)).update(fieldDigest(parts.judgePromptHash)).update(fieldDigest(parts.judgeModelId)).update(fieldDigest(parts.judgeParamsHash)).digest("hex");
+  }
+  /**
+   * Read a previously-stored verdict. Returns `undefined` on miss, corrupted
+   * entry, missing required field, or read error — never throws, never
+   * fabricates.
+   */
+  async get(parts) {
+    const key = this.computeKey(parts);
+    const inflightHit = this.inflight.get(key);
+    if (inflightHit !== void 0) {
+      return {
+        cacheHit: true,
+        verdict: inflightHit.verdict,
+        storedAt: inflightHit.storedAt
+      };
+    }
+    const filePath = this.entryPath(key);
+    let raw;
+    try {
+      raw = await readFile9(filePath, "utf8");
+    } catch {
+      return void 0;
+    }
+    const envelope = parseEnvelope(raw);
+    if (envelope === void 0) return void 0;
+    return {
+      cacheHit: true,
+      verdict: envelope.verdict,
+      storedAt: envelope.storedAt
+    };
+  }
+  /**
+   * Persist a verdict atomically: write to a temp file then rename into
+   * place. Concurrent writes for the same key serialize via an in-memory
+   * chain so the temp-file never lands on top of a sibling rename.
+   */
+  async put(parts, verdict) {
+    const key = this.computeKey(parts);
+    const envelope = {
+      storedAt: (/* @__PURE__ */ new Date()).toISOString(),
+      key,
+      verdict
+    };
+    this.inflight.set(key, envelope);
+    const prior = this.writeQueues.get(key) ?? Promise.resolve();
+    const next = prior.then(() => this.writeOne(key, envelope));
+    const tracked = next.catch(() => void 0);
+    this.writeQueues.set(key, tracked);
+    try {
+      await next;
+    } finally {
+      if (this.writeQueues.get(key) === tracked) {
+        this.writeQueues.delete(key);
+      }
+      if (this.inflight.get(key) === envelope) {
+        this.inflight.delete(key);
+      }
+    }
+  }
+  /** Number of in-flight per-key write chains (diagnostic/test seam). */
+  pendingWriteCount() {
+    return this.writeQueues.size;
+  }
+  async writeOne(key, envelope) {
+    if (!this.cachedDirExists) {
+      await mkdir8(this.dir, { recursive: true });
+      this.cachedDirExists = true;
+    }
+    const filePath = this.entryPath(key);
+    const tempPath = path11.join(
+      this.dir,
+      `.${key}.${randomBytes2(6).toString("hex")}.tmp`
+    );
+    await writeFile8(tempPath, `${JSON.stringify(envelope)}
+`, "utf8");
+    try {
+      await rename2(tempPath, filePath);
+    } catch (error) {
+      await rm3(tempPath, { force: true }).catch(() => void 0);
+      throw error;
+    }
+  }
+  entryPath(key) {
+    return path11.join(this.dir, `${key}.json`);
+  }
+};
+function runJudgeWithCache(options) {
+  const { judge, cache } = options;
+  const keyExtras = options.keyExtras ?? {};
+  const counters = {
+    modelCalls: 0,
+    cacheHits: 0,
+    cacheMisses: 0,
+    cacheWriteFailures: 0
+  };
+  let pendingWrites = Promise.resolve();
+  const putSafely = (parts, verdict, control) => {
+    if (!cache) return;
+    if (control?.signal?.aborted) return;
+    const write = cache.put(parts, verdict).catch(() => {
+      counters.cacheWriteFailures += 1;
+    });
+    pendingWrites = pendingWrites.then(() => write);
+  };
+  const CACHE_READ_BUDGET_MS = 250;
+  async function readCacheWithAbort(cache2, parts, control) {
+    if (control?.signal?.aborted) return void 0;
+    const read = cache2.get(parts);
+    const readBudget = new Promise((resolveBudget) => {
+      setTimeout(() => {
+        resolveBudget(void 0);
+      }, CACHE_READ_BUDGET_MS);
+    });
+    return Promise.race([read, readBudget]);
+  }
+  const cachedVerdict = (stored) => ({
+    score: stored.score,
+    tokens: { input: 0, output: 0 },
+    latencyMs: 0,
+    ...stored.model !== void 0 ? { model: stored.model } : {}
+  });
+  const wrapper = {
+    counters,
+    cache,
+    drainPendingWrites: () => pendingWrites,
+    async score(question, predicted, expected, control) {
+      const detailed = await wrapper.scoreWithMetrics(
+        question,
+        predicted,
+        expected,
+        control
+      );
+      return detailed.score;
+    },
+    async scoreWithMetrics(question, predicted, expected, control) {
+      const answerText = `${predicted}${expected}`;
+      const parts = {
+        benchmarkId: keyExtras.benchmarkId ?? "unknown-benchmark",
+        datasetVersion: keyExtras.datasetVersion ?? "unknown-version",
+        questionId: question,
+        answerText,
+        judgePromptHash: keyExtras.judgePromptHash ?? "unknown-prompt",
+        judgeModelId: keyExtras.judgeModelId ?? "unknown-judge",
+        judgeParamsHash: keyExtras.judgeParamsHash ?? "unknown-params"
+      };
+      if (cache) {
+        let hit;
+        try {
+          hit = await readCacheWithAbort(cache, parts, control);
+        } catch {
+          hit = void 0;
+        }
+        if (hit) {
+          counters.cacheHits += 1;
+          return cachedVerdict(hit.verdict);
+        }
+        counters.cacheMisses += 1;
+      }
+      if (!judge.scoreWithMetrics) {
+        counters.modelCalls += 1;
+        const scoreStartedAt = Date.now();
+        const scoreValue = judge.score ? await judge.score(question, predicted, expected, control) : 0;
+        const synthesized = {
+          score: scoreValue,
+          tokens: { input: 0, output: 0 },
+          latencyMs: Date.now() - scoreStartedAt,
+          model: keyExtras.judgeModelId ?? void 0
+        };
+        putSafely(parts, synthesized, control);
+        return synthesized;
+      }
+      counters.modelCalls += 1;
+      const fresh = await judge.scoreWithMetrics(
+        question,
+        predicted,
+        expected,
+        control
+      );
+      putSafely(parts, fresh, control);
+      return fresh;
+    }
+  };
+  if (typeof judge.scoreBinaryPrompt === "function") {
+    Object.defineProperty(wrapper, "scoreBinaryPrompt", {
+      configurable: true,
+      enumerable: true,
+      writable: false,
+      value: async function scoreBinaryPrompt(prompt, control) {
+        const parts = {
+          benchmarkId: keyExtras.benchmarkId ?? "unknown-benchmark",
+          datasetVersion: keyExtras.datasetVersion ?? "unknown-version",
+          // Binary prompts are content-sensitive: two distinct prompts of
+          // the same character length would collide on the previous
+          // `binary:N` key, so key on a sha256 prefix of the prompt body.
+          questionId: `binary:${createHash6("sha256").update(prompt).digest("hex").slice(0, 16)}`,
+          answerText: prompt,
+          judgePromptHash: keyExtras.judgePromptHash ?? "unknown-prompt",
+          judgeModelId: keyExtras.judgeModelId ?? "unknown-judge",
+          judgeParamsHash: keyExtras.judgeParamsHash ?? "unknown-params"
+        };
+        if (cache) {
+          let hit;
+          try {
+            hit = await readCacheWithAbort(cache, parts, control);
+          } catch {
+            hit = void 0;
+          }
+          if (hit) {
+            counters.cacheHits += 1;
+            return cachedVerdict(hit.verdict);
+          }
+          counters.cacheMisses += 1;
+        }
+        counters.modelCalls += 1;
+        const fresh = await judge.scoreBinaryPrompt(prompt, control);
+        putSafely(parts, fresh, control);
+        return fresh;
+      }
+    });
+  }
+  return wrapper;
+}
+function parseEnvelope(raw) {
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch {
+    return void 0;
+  }
+  if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
+    return void 0;
+  }
+  const candidate = parsed;
+  if (typeof candidate.storedAt !== "string") return void 0;
+  if (typeof candidate.key !== "string") return void 0;
+  if (!isBenchJudgeResult(candidate.verdict)) return void 0;
+  return candidate;
+}
+function isBenchJudgeResult(value) {
+  if (value === null || typeof value !== "object" || Array.isArray(value)) {
+    return false;
+  }
+  const v = value;
+  if (typeof v.score !== "number" || !Number.isFinite(v.score)) return false;
+  if (v.tokens === null || typeof v.tokens !== "object" || Array.isArray(v.tokens)) {
+    return false;
+  }
+  const tokens = v.tokens;
+  if (typeof tokens.input !== "number" || !Number.isFinite(tokens.input)) return false;
+  if (typeof tokens.output !== "number" || !Number.isFinite(tokens.output)) return false;
+  if (typeof v.latencyMs !== "number" || !Number.isFinite(v.latencyMs)) return false;
+  if (v.model !== void 0 && typeof v.model !== "string") return false;
+  return true;
+}
 // src/benchmarks/published/ama-bench/runner.ts
 import { randomUUID as randomUUID2 } from "crypto";
-import { readFile as readFile9 } from "fs/promises";
-import path11 from "path";
+import { readFile as readFile10 } from "fs/promises";
+import path12 from "path";
 // src/benchmarks/published/ama-bench/fixture.ts
 var AMA_BENCH_SMOKE_FIXTURE = [
@@ -11142,10 +11445,10 @@ async function loadDataset(mode, datasetDir, limit) {
     return episodes;
   };
   if (datasetDir) {
-    const filePath = path11.join(datasetDir, "open_end_qa_set.jsonl");
+    const filePath = path12.join(datasetDir, "open_end_qa_set.jsonl");
     let raw;
     try {
-      raw = await readFile9(filePath, "utf8");
+      raw = await readFile10(filePath, "utf8");
     } catch (error) {
       throw new Error(
         `AMA-Bench dataset not found at ${filePath}: ${error instanceof Error ? error.message : String(error)}`
@@ -11437,8 +11740,8 @@ function isValidQaPairs(value) {
 // src/benchmarks/published/amemgym/runner.ts
 import { randomUUID as randomUUID3 } from "crypto";
-import { readFile as readFile10 } from "fs/promises";
-import path12 from "path";
+import { readFile as readFile11 } from "fs/promises";
+import path13 from "path";
 // src/benchmarks/published/amemgym/fixture.ts
 var AMEMGYM_SMOKE_FIXTURE = [
@@ -11967,7 +12270,7 @@ async function loadDataset2(mode, datasetDir, limit) {
     const datasetErrors = [];
     for (const filename of DATASET_FILENAMES) {
       try {
-        const raw = await readFile10(path12.join(datasetDir, filename), "utf8");
+        const raw = await readFile11(path13.join(datasetDir, filename), "utf8");
         const parsed = parseDataset(raw, filename, normalizedLimit);
         return ensureDatasetProfiles(parsed);
       } catch (error) {
@@ -12141,8 +12444,8 @@ function normalizeRole(role) {
 // src/benchmarks/published/memory-arena/runner.ts
 import { randomUUID as randomUUID4 } from "crypto";
-import { readFile as readFile11, readdir as readdir5, stat as stat3 } from "fs/promises";
-import path13 from "path";
+import { readFile as readFile12, readdir as readdir5, stat as stat3 } from "fs/promises";
+import path14 from "path";
 import { expandTildePath as expandTildePath2 } from "@remnic/core";
 // src/benchmarks/published/memory-arena/fixture.ts
@@ -12469,7 +12772,7 @@ async function loadDataset3(mode, datasetDir, limit) {
       if (remainingLimit2 === 0) {
         break;
       }
-      const raw = await readFile11(path13.join(datasetDir, filename), "utf8");
+      const raw = await readFile12(path14.join(datasetDir, filename), "utf8");
       const parsedTasks = [];
       raw.split("\n").forEach((line, lineIndex) => {
         if (line.trim().length === 0) {
@@ -12805,7 +13108,7 @@ async function loadMemoryArenaWebshopProductCatalog(datasetDir) {
       `MemoryArena WebShop product sidecar is ${sourceStat.size} bytes; provide a compact JSON/JSONL sidecar smaller than ${MEMORY_ARENA_WEBSHOP_PRODUCTS_MAX_BYTES} bytes instead of the full WebShop catalog.`
     );
   }
-  const raw = await readFile11(sourcePath, "utf8");
+  const raw = await readFile12(sourcePath, "utf8");
   const records = parseMemoryArenaWebshopSidecarRecords(raw, sourcePath);
   const byAsin = /* @__PURE__ */ new Map();
   for (const record of records) {
@@ -12825,14 +13128,14 @@ async function loadMemoryArenaWebshopProductCatalog(datasetDir) {
 async function resolveMemoryArenaWebshopProductCatalogPath(datasetDir) {
   const configuredPath = process.env[MEMORY_ARENA_WEBSHOP_PRODUCTS_ENV]?.trim();
   if (configuredPath && configuredPath.length > 0) {
-    return path13.resolve(expandTildePath2(configuredPath));
+    return path14.resolve(expandTildePath2(configuredPath));
   }
   if (datasetDir === void 0) {
     return void 0;
   }
   const candidatePaths = [
     ...MEMORY_ARENA_WEBSHOP_PRODUCT_SIDECAR_FILENAMES
-  ].map((filename) => path13.join(datasetDir, filename));
+  ].map((filename) => path14.join(datasetDir, filename));
   for (const candidatePath of candidatePaths) {
     try {
       const candidateStat = await stat3(candidatePath);
@@ -14254,8 +14557,8 @@ function scoreSubtaskSuccess(scores) {
 import { collectTemporalLexicalCues } from "@remnic/core";
 // src/benchmarks/published/dataset-loader.ts
-import { readFile as readFile12 } from "fs/promises";
-import path14 from "path";
+import { readFile as readFile13 } from "fs/promises";
+import path15 from "path";
 // src/benchmarks/published/longmemeval/fixture.ts
 var LONG_MEM_EVAL_SMOKE_FIXTURE = [
@@ -14358,10 +14661,10 @@ async function loadDataset4(options) {
   const errors = [];
   if (options.datasetDir) {
     for (const filename of options.filenames) {
-      const abs = path14.join(options.datasetDir, filename);
+      const abs = path15.join(options.datasetDir, filename);
       let raw;
       try {
-        raw = await readFile12(abs, "utf8");
+        raw = await readFile13(abs, "utf8");
       } catch (error) {
         errors.push(
           `${filename}: ${error instanceof Error ? error.message : String(error)}`
@@ -16098,7 +16401,7 @@ function normalizeQaArray(value, location) {
 import { randomUUID as randomUUID6 } from "crypto";
 import { createReadStream as createReadStream2 } from "fs";
 import { readdir as readdir6 } from "fs/promises";
-import path15 from "path";
+import path16 from "path";
 import { createInterface } from "readline/promises";
 import {
   asyncBufferFromFile,
@@ -16569,8 +16872,8 @@ async function listBeamDatasetFiles(datasetDir) {
     return directFiles;
   }
   try {
-    const nestedFilenames = await readdir6(path15.join(datasetDir, "data"));
-    return nestedFilenames.filter((filename) => isBeamDatasetFilename(filename)).map((filename) => path15.join("data", filename));
+    const nestedFilenames = await readdir6(path16.join(datasetDir, "data"));
+    return nestedFilenames.filter((filename) => isBeamDatasetFilename(filename)).map((filename) => path16.join("data", filename));
   } catch {
     return [];
   }
@@ -16597,7 +16900,7 @@ async function* iterateDatasetFiles(datasetDir, datasetFiles, limit) {
   let remainingLimit = limit;
   for (const filename of datasetFiles) {
     const scale = inferScaleFromFilename(filename);
-    const filePath = path15.join(datasetDir, filename);
+    const filePath = path16.join(datasetDir, filename);
     const conversations = filename.endsWith(".jsonl") ? streamJsonlDataset(filePath, filename, remainingLimit) : filename.endsWith(".parquet") ? streamParquetDataset(filePath, filename, remainingLimit) : streamJsonDataset(filePath, filename, remainingLimit);
     for await (const conversation of conversations) {
       yield {
@@ -17608,9 +17911,9 @@ var StructuredLiteralParser = class {
 };
 // src/benchmarks/published/personamem/runner.ts
-import { createHash as createHash6, randomUUID as randomUUID7 } from "crypto";
-import { readFile as readFile13, realpath as realpath4 } from "fs/promises";
-import path16 from "path";
+import { createHash as createHash7, randomUUID as randomUUID7 } from "crypto";
+import { readFile as readFile14, realpath as realpath4 } from "fs/promises";
+import path17 from "path";
 // src/benchmarks/published/personamem/fixture.ts
 var PERSONAMEM_SMOKE_FIXTURE = [
@@ -17886,10 +18189,10 @@ async function loadDataset8(mode, datasetDir, limit) {
   if (datasetDir) {
     const datasetErrors = [];
     for (const relativePath of DATASET_FILE_CANDIDATES) {
-      const datasetPath = path16.join(datasetDir, relativePath);
+      const datasetPath = path17.join(datasetDir, relativePath);
       let raw;
       try {
-        raw = await readFile13(datasetPath, "utf8");
+        raw = await readFile14(datasetPath, "utf8");
       } catch (error) {
         datasetErrors.push(
           `${relativePath}: ${error instanceof Error ? error.message : String(error)}`
@@ -17947,7 +18250,7 @@ async function hydrateSample(row, datasetRoot) {
     datasetRoot,
     row.chat_history_32k_link
   );
-  const chatHistoryRaw = await readFile13(chatHistoryPath, "utf8");
+  const chatHistoryRaw = await readFile14(chatHistoryPath, "utf8");
   const chatHistory = parseChatHistory(
     chatHistoryRaw,
     row.chat_history_32k_link
@@ -18080,12 +18383,12 @@ function parseCsv(raw, limit) {
   return rows;
 }
 async function resolveDatasetFilePath(datasetRoot, relativePath) {
-  const rootPath = path16.resolve(datasetRoot);
+  const rootPath = path17.resolve(datasetRoot);
   const rootRealPath = await realpath4(rootPath);
-  const candidatePath = path16.resolve(rootPath, relativePath);
+  const candidatePath = path17.resolve(rootPath, relativePath);
   const candidateRealPath = await realpath4(candidatePath);
-  const relativeToRoot = path16.relative(rootRealPath, candidateRealPath);
-  if (relativeToRoot.startsWith("..") || path16.isAbsolute(relativeToRoot)) {
+  const relativeToRoot = path17.relative(rootRealPath, candidateRealPath);
+  if (relativeToRoot.startsWith("..") || path17.isAbsolute(relativeToRoot)) {
     throw new Error(
       `PersonaMem-v2 dataset file reference "${relativePath}" must stay within datasetDir.`
     );
@@ -18213,7 +18516,7 @@ function buildMcqPrompt(sample, seed) {
 function deterministicShuffle(values, seedMaterial) {
   return values.map((value, index) => ({
     value,
-    key: createHash6("sha256").update(`${seedMaterial}:${index}:${value}`).digest("hex"),
+    key: createHash7("sha256").update(`${seedMaterial}:${index}:${value}`).digest("hex"),
     index
   })).sort((left, right) => {
     const byKey = left.key.localeCompare(right.key);
@@ -18413,8 +18716,8 @@ function applyLimit6(items, limit) {
 // src/benchmarks/published/membench/runner.ts
 import { randomUUID as randomUUID8 } from "crypto";
-import { readFile as readFile14, readdir as readdir7 } from "fs/promises";
-import path17 from "path";
+import { readFile as readFile15, readdir as readdir7 } from "fs/promises";
+import path18 from "path";
 // src/benchmarks/published/membench/fixture.ts
 var MEMBENCH_SMOKE_FIXTURE = [
@@ -18675,7 +18978,7 @@ async function loadDataset9(mode, datasetDir, limit) {
     let remainingLimit = normalizedLimit;
     for (const filename of filenames) {
       try {
-        const raw = await readFile14(path17.join(datasetDir, filename), "utf8");
+        const raw = await readFile15(path18.join(datasetDir, filename), "utf8");
         const parsed = filename.endsWith(".jsonl") ? parseJsonlDataset(raw, filename) : parseJsonDataset(raw, filename);
         const limitedCases = remainingLimit === 0 ? [] : applyLimit7(parsed, remainingLimit);
         if (limitedCases.length > 0) {
@@ -19542,8 +19845,8 @@ function isPlainObject2(value) {
 // src/benchmarks/published/memoryagentbench/runner.ts
 import { randomUUID as randomUUID9 } from "crypto";
-import { access, readFile as readFile15 } from "fs/promises";
-import path18 from "path";
+import { access, readFile as readFile16 } from "fs/promises";
+import path19 from "path";
 // src/benchmarks/published/memoryagentbench/fixture.ts
 var MEMORY_AGENT_BENCH_SMOKE_FIXTURE = [
@@ -20565,7 +20868,7 @@ async function loadRecSysEntityMapping(datasetDir) {
     }
     let parsed;
     try {
-      parsed = JSON.parse(await readFile15(candidate, "utf8"));
+      parsed = JSON.parse(await readFile16(candidate, "utf8"));
     } catch (error) {
       console.error(
         `  [WARN] MemoryAgentBench ReDial entity mapping ${candidate} is invalid JSON; trying the next candidate: ${error instanceof Error ? error.message : String(error)}`
@@ -20622,21 +20925,21 @@ function recsysEntityMappingCandidates(datasetDir) {
   if (!datasetDir) {
     return [];
   }
-  const absoluteDatasetDir = path18.resolve(datasetDir);
+  const absoluteDatasetDir = path19.resolve(datasetDir);
   const roots = [
     absoluteDatasetDir,
-    path18.dirname(absoluteDatasetDir)
+    path19.dirname(absoluteDatasetDir)
   ];
   const canonicalSuffixes = [
-    path18.join("processed_data", "Recsys_Redial", "entity2id.json"),
-    path18.join("Recsys_Redial", "entity2id.json")
+    path19.join("processed_data", "Recsys_Redial", "entity2id.json"),
+    path19.join("Recsys_Redial", "entity2id.json")
   ];
   const looseSuffixes = ["entity2id.json"];
   return [
     ...roots.flatMap(
-      (root) => canonicalSuffixes.map((suffix) => path18.join(root, suffix))
+      (root) => canonicalSuffixes.map((suffix) => path19.join(root, suffix))
     ),
-    ...looseSuffixes.map((suffix) => path18.join(absoluteDatasetDir, suffix))
+    ...looseSuffixes.map((suffix) => path19.join(absoluteDatasetDir, suffix))
   ];
 }
 async function fileExists(filePath) {
@@ -20673,7 +20976,7 @@ async function loadDataset10(mode, datasetDir, limit) {
     const datasetErrors = [];
     for (const filename of DATASET_BUNDLE_CANDIDATES) {
       const parsed = await tryReadDatasetFile(
-        path18.join(datasetDir, filename),
+        path19.join(datasetDir, filename),
         filename,
         datasetErrors
       );
@@ -20690,7 +20993,7 @@ async function loadDataset10(mode, datasetDir, limit) {
       let splitData;
       for (const filename of splitConfig.candidates) {
         try {
-          splitData = await readDatasetFile(path18.join(datasetDir, filename), filename);
+          splitData = await readDatasetFile(path19.join(datasetDir, filename), filename);
           break;
         } catch (error) {
           if (!isFileNotFoundError2(error)) {
@@ -20728,7 +21031,7 @@ async function loadDataset10(mode, datasetDir, limit) {
   return ensureDatasetItems(applyLimit8(MEMORY_AGENT_BENCH_SMOKE_FIXTURE, normalizedLimit));
 }
 async function readDatasetFile(filePath, filename) {
-  const raw = await readFile15(filePath, "utf8");
+  const raw = await readFile16(filePath, "utf8");
   const parsed = filename.endsWith(".jsonl") ? parseJsonLines(raw, filename) : parseJsonArray(raw, filename);
   return parsed.map(
     (item, index) => parseMemoryAgentBenchItem(item, `${filename} item ${index + 1}`)
@@ -21339,7 +21642,7 @@ function loadCases(mode, limit) {
 // src/benchmarks/remnic/extraction-judge-calibration/runner.ts
 import { randomUUID as randomUUID11 } from "crypto";
 import os4 from "os";
-import path19 from "path";
+import path20 from "path";
 import {
   createVerdictCache,
   judgeFactDurability,
@@ -21449,8 +21752,8 @@ var extractionJudgeCalibrationDefinition = {
 async function runExtractionJudgeCalibrationBenchmark(options) {
   const cases = loadCases2(options.mode, options.limit);
   const config = parseConfig2({
-    memoryDir: path19.join(os4.tmpdir(), "remnic-bench-extraction-judge"),
-    workspaceDir: path19.join(os4.tmpdir(), "remnic-bench-extraction-judge-workspace"),
+    memoryDir: path20.join(os4.tmpdir(), "remnic-bench-extraction-judge"),
+    workspaceDir: path20.join(os4.tmpdir(), "remnic-bench-extraction-judge-workspace"),
     openaiApiKey: "bench-test-key",
     extractionJudgeEnabled: true,
     extractionJudgeBatchSize: 4,
@@ -21999,9 +22302,9 @@ function constantAggregate2(value) {
 // src/benchmarks/remnic/entity-consolidation/runner.ts
 import os5 from "os";
-import path20 from "path";
+import path21 from "path";
 import { randomUUID as randomUUID13 } from "crypto";
-import { mkdtemp as mkdtemp3, rm as rm3 } from "fs/promises";
+import { mkdtemp as mkdtemp3, rm as rm4 } from "fs/promises";
 import { StorageManager } from "@remnic/core";
 // src/benchmarks/remnic/entity-consolidation/fixture.ts
@@ -22162,7 +22465,7 @@ function loadCases4(mode, limit) {
   return limited;
 }
 async function executeCase(sample) {
-  const tmpDir = await mkdtemp3(path20.join(os5.tmpdir(), "remnic-bench-entity-consolidation-"));
+  const tmpDir = await mkdtemp3(path21.join(os5.tmpdir(), "remnic-bench-entity-consolidation-"));
   try {
     const storage = new StorageManager(tmpDir);
     await storage.ensureDirectories();
@@ -22170,7 +22473,7 @@ async function executeCase(sample) {
     const rawEntity = await storage.readEntity(canonicalName);
     return summarizeEntity(rawEntity, canonicalName);
   } finally {
-    await rm3(tmpDir, { recursive: true, force: true });
+    await rm4(tmpDir, { recursive: true, force: true });
   }
 }
 async function applyScenario(storage, sample) {
@@ -22341,9 +22644,9 @@ function parseNonNegativeInt(rawValue) {
 // src/benchmarks/remnic/page-versioning/runner.ts
 import { randomUUID as randomUUID14 } from "crypto";
-import { mkdir as mkdir8, mkdtemp as mkdtemp4, readFile as readFile16, rm as rm4, writeFile as writeFile8 } from "fs/promises";
+import { mkdir as mkdir9, mkdtemp as mkdtemp4, readFile as readFile17, rm as rm5, writeFile as writeFile9 } from "fs/promises";
 import os6 from "os";
-import path21 from "path";
+import path22 from "path";
 import {
   createVersion,
   diffVersions,
@@ -22507,21 +22810,21 @@ function loadCases5(mode, limit) {
   return limited;
 }
 async function executeCase2(sample, dependencies) {
-  const tmpDir = await mkdtemp4(path21.join(os6.tmpdir(), "remnic-bench-page-versioning-"));
+  const tmpDir = await mkdtemp4(path22.join(os6.tmpdir(), "remnic-bench-page-versioning-"));
   try {
-    const factsDir = path21.join(tmpDir, "facts");
-    const pagePath = path21.join(factsDir, `${sample.id}.md`);
-    await mkdir8(factsDir, { recursive: true });
+    const factsDir = path22.join(tmpDir, "facts");
+    const pagePath = path22.join(factsDir, `${sample.id}.md`);
+    await mkdir9(factsDir, { recursive: true });
     const config = versioningConfig();
     switch (sample.scenario) {
       case "revert-flow": {
-        await writeFile8(pagePath, "original content", "utf-8");
+        await writeFile9(pagePath, "original content", "utf-8");
         await dependencies.createVersion(pagePath, "original content", "write", config, void 0, void 0, tmpDir);
-        await writeFile8(pagePath, "modified content", "utf-8");
+        await writeFile9(pagePath, "modified content", "utf-8");
         await dependencies.createVersion(pagePath, "modified content", "write", config, void 0, void 0, tmpDir);
         await dependencies.revertToVersion(pagePath, "1", config, void 0, tmpDir);
         const history = await dependencies.listVersions(pagePath, config, tmpDir);
-        const pageContent = await readFile16(pagePath, "utf-8");
+        const pageContent = await readFile17(pagePath, "utf-8");
         const observed = await dependencies.getVersion(pagePath, "3", config, tmpDir);
         return {
           versionIds: history.versions.map((version) => version.versionId),
@@ -22534,11 +22837,11 @@ async function executeCase2(sample, dependencies) {
         const pruningConfig = versioningConfig({ maxVersionsPerPage: 2 });
         for (let index = 1; index <= 4; index += 1) {
           const content = `content v${index}`;
-          await writeFile8(pagePath, content, "utf-8");
+          await writeFile9(pagePath, content, "utf-8");
           await dependencies.createVersion(pagePath, content, "write", pruningConfig, void 0, void 0, tmpDir);
         }
         const history = await dependencies.listVersions(pagePath, pruningConfig, tmpDir);
-        const pageContent = await readFile16(pagePath, "utf-8");
+        const pageContent = await readFile17(pagePath, "utf-8");
         const prunedIds = [];
         for (const versionId of ["1", "2"]) {
           try {
@@ -22558,7 +22861,7 @@ async function executeCase2(sample, dependencies) {
         };
       }
       case "diff-output": {
-        await writeFile8(pagePath, "line 1\nline 2\nline 3", "utf-8");
+        await writeFile9(pagePath, "line 1\nline 2\nline 3", "utf-8");
         await dependencies.createVersion(
           pagePath,
           "line 1\nline 2\nline 3",
@@ -22568,7 +22871,7 @@ async function executeCase2(sample, dependencies) {
           void 0,
           tmpDir
         );
-        await writeFile8(pagePath, "line 1\nline 2 changed\nline 3\nline 4", "utf-8");
+        await writeFile9(pagePath, "line 1\nline 2 changed\nline 3\nline 4", "utf-8");
         await dependencies.createVersion(
           pagePath,
           "line 1\nline 2 changed\nline 3\nline 4",
@@ -22579,7 +22882,7 @@ async function executeCase2(sample, dependencies) {
           tmpDir
         );
         const history = await dependencies.listVersions(pagePath, config, tmpDir);
-        const pageContent = await readFile16(pagePath, "utf-8");
+        const pageContent = await readFile17(pagePath, "utf-8");
         const diff = await dependencies.diffVersions(pagePath, "1", "2", config, tmpDir);
         const observedLines = normalizeDiffChangedLines(diff);
         return {
@@ -22591,7 +22894,7 @@ async function executeCase2(sample, dependencies) {
       }
     }
   } finally {
-    await rm4(tmpDir, { recursive: true, force: true });
+    await rm5(tmpDir, { recursive: true, force: true });
   }
 }
 function isMissingPageVersionError(error, pagePath, versionId) {
@@ -24864,9 +25167,9 @@ function loadCases9(mode, limit) {
 // src/benchmarks/remnic/procedural-recall/runner.ts
 import { randomUUID as randomUUID21 } from "crypto";
-import { mkdtemp as mkdtemp5, rm as rm5 } from "fs/promises";
+import { mkdtemp as mkdtemp5, rm as rm6 } from "fs/promises";
 import os7 from "os";
-import path22 from "path";
+import path23 from "path";
 import {
   StorageManager as StorageManager2,
   parseConfig as parseConfig3,
@@ -24996,7 +25299,7 @@ async function runProceduralRecallBenchmark(options) {
   }
   for (const sample of e2eCases) {
     const startedAt = performance.now();
-    const dir = await mkdtemp5(path22.join(os7.tmpdir(), "remnic-bench-procedural-recall-"));
+    const dir = await mkdtemp5(path23.join(os7.tmpdir(), "remnic-bench-procedural-recall-"));
     let section = null;
     try {
       const storage = new StorageManager2(dir);
@@ -25011,7 +25314,7 @@ ${body}`,
       );
       const config = parseConfig3({
         memoryDir: dir,
-        workspaceDir: path22.join(dir, "ws"),
+        workspaceDir: path23.join(dir, "ws"),
         openaiApiKey: "bench-key",
         procedural: {
           enabled: sample.proceduralEnabled !== false,
@@ -25020,7 +25323,7 @@ ${body}`,
       });
       section = await buildProcedureRecallSection(storage, sample.prompt, config);
     } finally {
-      await rm5(dir, { recursive: true, force: true });
+      await rm6(dir, { recursive: true, force: true });
     }
     const latencyMs = Math.round(performance.now() - startedAt);
     const nonNull = section !== null && section.length > 0;
@@ -25081,9 +25384,9 @@ ${body}`,
 // src/benchmarks/remnic/ingestion-entity-recall/runner.ts
 import { randomUUID as randomUUID22 } from "crypto";
-import { mkdtemp as mkdtemp6, writeFile as writeFile9, rm as rm6, mkdir as mkdir9, realpath as realpath5 } from "fs/promises";
+import { mkdtemp as mkdtemp6, writeFile as writeFile10, rm as rm7, mkdir as mkdir10, realpath as realpath5 } from "fs/promises";
 import { tmpdir as tmpdir2 } from "os";
-import path23 from "path";
+import path24 from "path";
 // src/ingestion-scorer.ts
 function normalize(value) {
@@ -25585,13 +25888,13 @@ async function runIngestionEntityRecallBenchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp6(path23.join(tmpdir2(), "bench-email-"));
+  const fixtureDir = await mkdtemp6(path24.join(tmpdir2(), "bench-email-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path23.join(fixtureDir, file.relativePath);
-      await mkdir9(path23.dirname(filePath), { recursive: true });
-      await writeFile9(filePath, file.content, "utf8");
+      const filePath = path24.join(fixtureDir, file.relativePath);
+      await mkdir10(path24.dirname(filePath), { recursive: true });
+      await writeFile10(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
       async () => options.ingestionAdapter.ingest(await realpath5(fixtureDir))
@@ -25672,7 +25975,7 @@ async function runIngestionEntityRecallBenchmark(options) {
     ];
     return buildResult(options, tasks, durationMs);
   } finally {
-    await rm6(fixtureDir, { recursive: true, force: true });
+    await rm7(fixtureDir, { recursive: true, force: true });
   }
 }
 async function buildResult(options, tasks, totalLatencyMs) {
@@ -25718,9 +26021,9 @@ async function buildResult(options, tasks, totalLatencyMs) {
 // src/benchmarks/remnic/ingestion-schema-completeness/runner.ts
 import { randomUUID as randomUUID23 } from "crypto";
-import { mkdtemp as mkdtemp7, writeFile as writeFile10, rm as rm7, mkdir as mkdir10, realpath as realpath6 } from "fs/promises";
+import { mkdtemp as mkdtemp7, writeFile as writeFile11, rm as rm8, mkdir as mkdir11, realpath as realpath6 } from "fs/promises";
 import { tmpdir as tmpdir3 } from "os";
-import path24 from "path";
+import path25 from "path";
 var ingestionSchemaCompletenessDefinition = {
   id: "ingestion-schema-completeness",
   title: "Ingestion: Schema Completeness",
@@ -25739,13 +26042,13 @@ async function runIngestionSchemaCompletenessBenchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp7(path24.join(tmpdir3(), "bench-email-"));
+  const fixtureDir = await mkdtemp7(path25.join(tmpdir3(), "bench-email-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path24.join(fixtureDir, file.relativePath);
-      await mkdir10(path24.dirname(filePath), { recursive: true });
-      await writeFile10(filePath, file.content, "utf8");
+      const filePath = path25.join(fixtureDir, file.relativePath);
+      await mkdir11(path25.dirname(filePath), { recursive: true });
+      await writeFile11(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
       async () => options.ingestionAdapter.ingest(await realpath6(fixtureDir))
@@ -25885,15 +26188,15 @@ async function runIngestionSchemaCompletenessBenchmark(options) {
       }
     };
   } finally {
-    await rm7(fixtureDir, { recursive: true, force: true });
+    await rm8(fixtureDir, { recursive: true, force: true });
   }
 }
 // src/benchmarks/remnic/ingestion-backlink-f1/runner.ts
 import { randomUUID as randomUUID24 } from "crypto";
-import { mkdtemp as mkdtemp8, writeFile as writeFile11, rm as rm8, mkdir as mkdir11, realpath as realpath7 } from "fs/promises";
+import { mkdtemp as mkdtemp8, writeFile as writeFile12, rm as rm9, mkdir as mkdir12, realpath as realpath7 } from "fs/promises";
 import { tmpdir as tmpdir4 } from "os";
-import path25 from "path";
+import path26 from "path";
 var ingestionBacklinkF1Definition = {
   id: "ingestion-backlink-f1",
   title: "Ingestion: Backlink F1",
@@ -25912,13 +26215,13 @@ async function runIngestionBacklinkF1Benchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp8(path25.join(tmpdir4(), "bench-email-"));
+  const fixtureDir = await mkdtemp8(path26.join(tmpdir4(), "bench-email-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path25.join(fixtureDir, file.relativePath);
-      await mkdir11(path25.dirname(filePath), { recursive: true });
-      await writeFile11(filePath, file.content, "utf8");
+      const filePath = path26.join(fixtureDir, file.relativePath);
+      await mkdir12(path26.dirname(filePath), { recursive: true });
+      await writeFile12(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
       async () => options.ingestionAdapter.ingest(await realpath7(fixtureDir))
@@ -25986,15 +26289,15 @@ async function runIngestionBacklinkF1Benchmark(options) {
       }
     };
   } finally {
-    await rm8(fixtureDir, { recursive: true, force: true });
+    await rm9(fixtureDir, { recursive: true, force: true });
   }
 }
 // src/benchmarks/remnic/ingestion-setup-friction/runner.ts
 import { randomUUID as randomUUID25 } from "crypto";
-import { mkdtemp as mkdtemp9, writeFile as writeFile12, rm as rm9, mkdir as mkdir12, realpath as realpath8 } from "fs/promises";
+import { mkdtemp as mkdtemp9, writeFile as writeFile13, rm as rm10, mkdir as mkdir13, realpath as realpath8 } from "fs/promises";
 import { tmpdir as tmpdir5 } from "os";
-import path26 from "path";
+import path27 from "path";
 var INGESTION_SETUP_FRICTION_LOWER_IS_BETTER = /* @__PURE__ */ new Set(["setup_friction", "commands_count", "prompts_count", "errors_count"]);
 var ingestionSetupFrictionDefinition = {
   id: "ingestion-setup-friction",
@@ -26014,13 +26317,13 @@ async function runIngestionSetupFrictionBenchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp9(path26.join(tmpdir5(), "bench-friction-"));
+  const fixtureDir = await mkdtemp9(path27.join(tmpdir5(), "bench-friction-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path26.join(fixtureDir, file.relativePath);
-      await mkdir12(path26.dirname(filePath), { recursive: true });
-      await writeFile12(filePath, file.content, "utf8");
+      const filePath = path27.join(fixtureDir, file.relativePath);
+      await mkdir13(path27.dirname(filePath), { recursive: true });
+      await writeFile13(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
       async () => options.ingestionAdapter.ingest(await realpath8(fixtureDir))
@@ -26092,15 +26395,15 @@ async function runIngestionSetupFrictionBenchmark(options) {
       }
     };
   } finally {
-    await rm9(fixtureDir, { recursive: true, force: true });
+    await rm10(fixtureDir, { recursive: true, force: true });
   }
 }
 // src/benchmarks/remnic/ingestion-citation-accuracy/runner.ts
 import { randomUUID as randomUUID26 } from "crypto";
-import { mkdtemp as mkdtemp10, writeFile as writeFile13, rm as rm10, mkdir as mkdir13, realpath as realpath9 } from "fs/promises";
+import { mkdtemp as mkdtemp10, writeFile as writeFile14, rm as rm11, mkdir as mkdir14, realpath as realpath9 } from "fs/promises";
 import { tmpdir as tmpdir6 } from "os";
-import path27 from "path";
+import path28 from "path";
 var CITATION_SUPPORT_THRESHOLD = 0.72;
 var ingestionCitationAccuracyDefinition = {
   id: "ingestion-citation-accuracy",
@@ -26159,10 +26462,10 @@ function resolveCitedSources(sourceRefs, seeAlso, pageRef, sourceContentMap) {
     return "";
   }
   for (const ref of normalizedRefs) {
-    const refBase = path27.basename(ref).toLowerCase();
+    const refBase = path28.basename(ref).toLowerCase();
     let matched = false;
     for (const [relativePath, content] of sourceContentMap) {
-      if (relativePath === ref || relativePath.endsWith(ref) || path27.basename(relativePath).toLowerCase() === refBase) {
+      if (relativePath === ref || relativePath.endsWith(ref) || path28.basename(relativePath).toLowerCase() === refBase) {
         resolved.push(content);
         matched = true;
         break;
@@ -26178,9 +26481,9 @@ function resolveCitedSources(sourceRefs, seeAlso, pageRef, sourceContentMap) {
   if (normalizedRefs.length > 0) {
     return "";
   }
-  const pageBase = path27.basename(pageRef).toLowerCase();
+  const pageBase = path28.basename(pageRef).toLowerCase();
   for (const [relativePath, content] of sourceContentMap) {
-    if (path27.basename(relativePath).toLowerCase() === pageBase) {
+    if (path28.basename(relativePath).toLowerCase() === pageBase) {
       return content;
     }
   }
@@ -26191,13 +26494,13 @@ async function runIngestionCitationAccuracyBenchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp10(path27.join(tmpdir6(), "bench-citation-"));
+  const fixtureDir = await mkdtemp10(path28.join(tmpdir6(), "bench-citation-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path27.join(fixtureDir, file.relativePath);
-      await mkdir13(path27.dirname(filePath), { recursive: true });
-      await writeFile13(filePath, file.content, "utf8");
+      const filePath = path28.join(fixtureDir, file.relativePath);
+      await mkdir14(path28.dirname(filePath), { recursive: true });
+      await writeFile14(filePath, file.content, "utf8");
     }
     const benchmarkStart = performance.now();
     const { result: ingestionLog, durationMs: ingestionDurationMs } = await timed(
@@ -26394,7 +26697,7 @@ async function runIngestionCitationAccuracyBenchmark(options) {
       }
     };
   } finally {
-    await rm10(fixtureDir, { recursive: true, force: true });
+    await rm11(fixtureDir, { recursive: true, force: true });
   }
 }
 function citationSupportScore(claim, citedSources) {
@@ -26582,7 +26885,7 @@ var ASSISTANT_MORNING_BRIEF_SMOKE_SCENARIOS = ASSISTANT_MORNING_BRIEF_SCENARIOS.
 // src/benchmarks/remnic/_assistant-common/runner.ts
 import { randomUUID as randomUUID27 } from "crypto";
-import path29 from "path";
+import path30 from "path";
 // src/run-seeds.ts
 function buildBenchmarkRunSeeds(runCount, baseSeed) {
@@ -26672,9 +26975,9 @@ function pairedDeltaConfidenceInterval(candidateValues, baselineValues, options
 }
 // src/judges/sealed-rubric.ts
-import { createHash as createHash7 } from "crypto";
+import { createHash as createHash8 } from "crypto";
 import { appendFileSync, mkdirSync } from "fs";
-import path28 from "path";
+import path29 from "path";
 // src/judges/sealed-prompts/assistant-rubric-v1.ts
 var ASSISTANT_RUBRIC_V1 = `# Assistant rubric v1 (sealed)
@@ -26781,7 +27084,7 @@ function loadSealedRubric(id = DEFAULT_ASSISTANT_RUBRIC_ID, options = {}) {
   if (typeof prompt !== "string" || prompt.length === 0) {
     throw new Error(`sealed rubric not found in registry: ${id}`);
   }
-  const sha256 = createHash7("sha256").update(prompt, "utf8").digest("hex");
+  const sha256 = createHash8("sha256").update(prompt, "utf8").digest("hex");
   const version = parseVersionFromId(id);
   return { id, version, prompt, sha256 };
 }
@@ -26954,7 +27257,7 @@ function createSpotCheckFileLogger(options) {
     return { log() {
     } };
   }
-  const logPath = path28.join(directory, `${runId}.jsonl`);
+  const logPath = path29.join(directory, `${runId}.jsonl`);
   let written = 0;
   let warnedOnWriteFailure = false;
   const cap = typeof sampleSize === "number" && sampleSize > 0 ? sampleSize : 5;
@@ -27043,7 +27346,7 @@ async function runAssistantBenchmark(definition, scenarios, resolved, runnerOpti
   const runId = buildRunId(definition.id);
   const spotCheckLogger = createSpotCheckFileLogger({
     runId,
-    directory: runnerOptions.spotCheckDir ?? path29.join(process.cwd(), "benchmarks", "results", "spot-checks"),
+    directory: runnerOptions.spotCheckDir ?? path30.join(process.cwd(), "benchmarks", "results", "spot-checks"),
     sampleRate: 0.35,
     sampleSize: 5
   });
@@ -27692,9 +27995,9 @@ async function runAssistantSynthesisBenchmark(options) {
 // src/benchmarks/remnic/buffer-surprise-trigger/runner.ts
 import { randomUUID as randomUUID28 } from "crypto";
-import path30 from "path";
+import path31 from "path";
 import os8 from "os";
-import { mkdir as mkdir14, rm as rm11 } from "fs/promises";
+import { mkdir as mkdir15, rm as rm12 } from "fs/promises";
 import {
   SmartBuffer,
   computeSurprise,
@@ -27923,11 +28226,11 @@ function hasExplicitTopicPivotCue(text) {
 }
 async function runBufferSurpriseTriggerBenchmark(options) {
   const cases = loadCases10(options.mode, options.limit);
-  const tmpRoot = path30.join(
+  const tmpRoot = path31.join(
     os8.tmpdir(),
     `remnic-bench-buffer-surprise-${randomUUID28()}`
   );
-  await mkdir14(tmpRoot, { recursive: true });
+  await mkdir15(tmpRoot, { recursive: true });
   const tasks = [];
   const startedAt = performance.now();
   try {
@@ -27945,7 +28248,7 @@ async function runBufferSurpriseTriggerBenchmark(options) {
       tasks.push(buildTaskResult(caseDef, control, candidate));
     }
   } finally {
-    await rm11(tmpRoot, { recursive: true, force: true });
+    await rm12(tmpRoot, { recursive: true, force: true });
   }
   const totalLatencyMs = Math.round(performance.now() - startedAt);
   const aggregates = buildAggregates2(tasks);
@@ -27992,12 +28295,12 @@ async function runBufferSurpriseTriggerBenchmark(options) {
   };
 }
 async function runSingleCase(caseDef, options) {
-  const memoryDir = path30.join(
+  const memoryDir = path31.join(
     options.tmpRoot,
     `${caseDef.id}-${options.label}`
   );
-  const workspaceDir = path30.join(memoryDir, "workspace");
-  await mkdir14(workspaceDir, { recursive: true });
+  const workspaceDir = path31.join(memoryDir, "workspace");
+  await mkdir15(workspaceDir, { recursive: true });
   const config = parseConfig4({
     memoryDir,
     workspaceDir,
@@ -29151,8 +29454,8 @@ function finalizeBenchmarkResultConfig(result, options) {
 }
 // src/benchmark.ts
-var DEFAULT_BASELINE_PATH = path31.join(process.cwd(), "benchmarks", "baseline.json");
-var DEFAULT_REPORT_PATH = path31.join(process.cwd(), "benchmarks", "report.json");
+var DEFAULT_BASELINE_PATH = path32.join(process.cwd(), "benchmarks", "baseline.json");
+var DEFAULT_REPORT_PATH = path32.join(process.cwd(), "benchmarks", "report.json");
 var BASELINE_VERSION = 1;
 var DEFAULT_TOLERANCE = 10;
 var DEFAULT_FULL_RUN_COUNT = 5;
@@ -29214,7 +29517,33 @@ async function runBenchmark(benchmarkId, options) {
   const log = (message) => {
     console.error(`  ${message}`);
   };
-  const system = !shouldGuardSystem ? options.system : createTimeoutGuardedAdapter(options.system, {
+  const originalSystemJudge = options.system.judge;
+  let systemJudgeMutatedInPlace = false;
+  let judgeCacheCounters;
+  let cachedCrossJudge;
+  let crossJudgeCacheCounters;
+  let primaryDrainPendingWrites;
+  let crossDrainPendingWrites;
+  const cacheWiring = (() => {
+    if (options.noJudgeCache) {
+      return void 0;
+    }
+    const willWrapPrimary = options.system.judge !== void 0 && (options.judgeProvider ?? null) !== null;
+    const willWrapCross = options.amaBenchCrossJudge !== void 0 && (options.amaBenchCrossJudgeProvider ?? null) !== null;
+    if (!willWrapPrimary && !willWrapCross) {
+      return void 0;
+    }
+    const cacheDir = options.judgeCacheDir ? path32.resolve(expandTildePath3(options.judgeCacheDir)) : options.outputDir ? path32.join(path32.resolve(expandTildePath3(options.outputDir)), "judge-cache") : void 0;
+    if (cacheDir === void 0) {
+      return void 0;
+    }
+    return {
+      cache: new JudgeCache({ dir: cacheDir }),
+      willWrapPrimary,
+      willWrapCross
+    };
+  })();
+  let system = !shouldGuardSystem ? options.system : createTimeoutGuardedAdapter(options.system, {
     benchmarkId,
     ...timeoutMs !== void 0 ? { timeoutMs } : {},
     ...options.drainTimeoutMs !== void 0 ? { drainTimeoutMs: options.drainTimeoutMs } : {},
@@ -29245,18 +29574,118 @@ async function runBenchmark(benchmarkId, options) {
   }) : rawIngestionAdapter;
   let result;
   try {
+    if (cacheWiring?.willWrapPrimary && system.judge !== void 0) {
+      const primary = wrapJudgeWithCache({
+        role: "primary",
+        judge: system.judge,
+        benchmarkId,
+        datasetVersion: definition.meta.version,
+        amaBenchJudgeProtocol: options.amaBenchJudgeProtocol ?? "default",
+        provider: options.judgeProvider ?? null,
+        cache: cacheWiring.cache
+      });
+      judgeCacheCounters = primary.counters;
+      primaryDrainPendingWrites = primary.drainPendingWrites;
+      try {
+        system.judge = primary.judge;
+        systemJudgeMutatedInPlace = system === options.system;
+      } catch {
+        system = createJudgeOverrideProxy(system, primary.judge);
+      }
+    }
+    if (cacheWiring?.willWrapCross) {
+      const wrapped = wrapJudgeWithCache({
+        role: "cross",
+        judge: options.amaBenchCrossJudge,
+        benchmarkId,
+        datasetVersion: definition.meta.version,
+        amaBenchJudgeProtocol: options.amaBenchJudgeProtocol ?? "default",
+        provider: options.amaBenchCrossJudgeProvider ?? null,
+        cache: cacheWiring.cache
+      });
+      cachedCrossJudge = wrapped.judge;
+      crossJudgeCacheCounters = wrapped.counters;
+      crossDrainPendingWrites = wrapped.drainPendingWrites;
+    }
     result = await registeredBenchmark.run({
       ...options,
       system,
+      // PR #1591 P2 (thread #10): when caching is on AND a cross judge is
+      // configured, hand the cached cross judge to the runner so AMA-Bench
+      // cross-judge calls participate in the same content-keyed cache as
+      // the primary system judge. Without this override, the runner kept
+      // calling the unwrapped cross judge on every iteration.
+      ...cachedCrossJudge ? { amaBenchCrossJudge: cachedCrossJudge } : {},
       ...ingestionAdapter ? { ingestionAdapter } : {},
       mode: options.mode ?? "quick",
       benchmark: definition
     });
   } finally {
-    await destroyOwnedIngestionAdapter();
+    try {
+      await destroyOwnedIngestionAdapter();
+    } finally {
+      if (systemJudgeMutatedInPlace) {
+        options.system.judge = originalSystemJudge;
+      }
+    }
+    if (primaryDrainPendingWrites) {
+      await primaryDrainPendingWrites();
+    }
+    if (crossDrainPendingWrites) {
+      await crossDrainPendingWrites();
+    }
+  }
+  const primaryCalls = judgeCacheCounters?.modelCalls ?? 0;
+  const crossCalls = crossJudgeCacheCounters?.modelCalls ?? 0;
+  if (judgeCacheCounters !== void 0 || crossJudgeCacheCounters !== void 0) {
+    result.cost.judgeModelCalls = primaryCalls + crossCalls;
   }
   return finalizeBenchmarkResultConfig(result, options);
 }
+function wrapJudgeWithCache(args) {
+  const crossJudgeIdSuffix = args.role === "cross" ? "-cross" : "";
+  const wrapped = runJudgeWithCache({
+    judge: args.judge,
+    cache: args.cache,
+    keyExtras: {
+      benchmarkId: `${args.benchmarkId}${crossJudgeIdSuffix}`,
+      datasetVersion: args.datasetVersion,
+      // Protocol identity: bench judge protocol version + the selected
+      // judge protocol variant, suffixed by role so primary vs cross
+      // differentiator is part of the prompt hash. Bumping
+      // JUDGE_CACHE_PROTOCOL_VERSION invalidates verdicts when judge
+      // prompt/parse semantics change (PR #1591, High).
+      judgePromptHash: createHash9("sha256").update(JUDGE_CACHE_PROTOCOL_VERSION).update("").update(args.amaBenchJudgeProtocol).update("").update(args.role).digest("hex"),
+      judgeModelId: args.provider?.model !== void 0 && args.provider.model.length > 0 ? `${args.provider.model}${crossJudgeIdSuffix}` : `unknown-${args.role}-judge`,
+      // Full judge configuration, deterministically serialized (sorted
+      // keys) so provider/base-url/retry changes produce fresh cache
+      // keys. `role` is included so primary and cross judges never
+      // share a paramsHash.
+      judgeParamsHash: createHash9("sha256").update(
+        stableStringify2({
+          role: args.role,
+          provider: args.provider
+        })
+      ).digest("hex")
+    }
+  });
+  return {
+    judge: wrapped,
+    counters: wrapped.counters,
+    drainPendingWrites: wrapped.drainPendingWrites
+  };
+}
+function createJudgeOverrideProxy(adapter, judge) {
+  return new Proxy(adapter, {
+    get(target, prop) {
+      if (prop === "judge") {
+        return judge;
+      }
+      const value = Reflect.get(target, prop, target);
+      return typeof value === "function" ? value.bind(target) : value;
+    }
+  });
+}
 function benchmarkDefinition(id) {
   const definition = getBenchmark(id);
   if (!definition) {
@@ -29293,7 +29722,7 @@ function loadBaseline(baselinePath) {
   return raw;
 }
 function saveBaseline(baselinePath, baseline) {
-  fs2.mkdirSync(path31.dirname(baselinePath), { recursive: true });
+  fs2.mkdirSync(path32.dirname(baselinePath), { recursive: true });
   fs2.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}
 `);
 }
@@ -29523,7 +29952,7 @@ function generateReport(results, reportPath) {
     totalDurationMs: results.reduce((sum, result) => sum + result.totalDurationMs, 0)
   };
   if (reportPath) {
-    fs2.mkdirSync(path31.dirname(reportPath), { recursive: true });
+    fs2.mkdirSync(path32.dirname(reportPath), { recursive: true });
     fs2.writeFileSync(reportPath, `${JSON.stringify(report, null, 2)}
 `);
   }
@@ -29670,7 +30099,7 @@ function getBenchmarkLowerIsBetter(benchmarkId) {
 }
 // src/integrity/sealed-qrels.ts
-import { readFile as readFile17 } from "fs/promises";
+import { readFile as readFile18 } from "fs/promises";
 function isSealedQrelsArtifact(value) {
   if (!value || typeof value !== "object") {
     return false;
@@ -29740,7 +30169,7 @@ function parseSealedQrels(raw, options = {}) {
   };
 }
 async function loadSealedQrels(filePath, options = {}) {
-  const raw = await readFile17(filePath, "utf8");
+  const raw = await readFile18(filePath, "utf8");
   return parseSealedQrels(raw, options);
 }
 function serializeSealedQrels(artifact) {
@@ -29860,7 +30289,7 @@ function selectFixtureVariant(variants, seed) {
 }
 // src/benchmarks/custom/loader.ts
-import { readFile as readFile18 } from "fs/promises";
+import { readFile as readFile19 } from "fs/promises";
 import { parse as parseYaml } from "yaml";
 var CUSTOM_SCORING_VALUES = /* @__PURE__ */ new Set([
   "exact_match",
@@ -29880,7 +30309,7 @@ function parseCustomBenchmark(source) {
 async function loadCustomBenchmarkFile(filePath) {
   let source;
   try {
-    source = await readFile18(filePath, "utf8");
+    source = await readFile19(filePath, "utf8");
   } catch (error) {
     throw new Error(
       `Failed to read custom benchmark file ${filePath}: ${formatError(error)}`
@@ -29988,15 +30417,60 @@ function formatError(error) {
 // src/benchmarks/custom/runner.ts
 import { randomUUID as randomUUID31 } from "crypto";
-import path32 from "path";
+import path33 from "path";
+import { expandTildePath as expandTildePath4 } from "@remnic/core";
 async function runCustomBenchmarkFile(filePath, options) {
   const spec = await loadCustomBenchmarkFile(filePath);
   const benchmark = createCustomBenchmarkDefinition(spec, filePath);
-  return runCustomBenchmark(spec, {
+  const runOptions = {
     ...options,
     mode: options.mode ?? "quick",
     benchmark
-  });
+  };
+  let cacheRestore;
+  let cacheCounters;
+  if (spec.scoring === "llm_judge" && runOptions.system.judge !== void 0 && !runOptions.noJudgeCache && (runOptions.judgeProvider ?? null) !== null) {
+    const cacheDir = runOptions.judgeCacheDir ? path33.resolve(expandTildePath4(runOptions.judgeCacheDir)) : runOptions.outputDir ? path33.join(path33.resolve(expandTildePath4(runOptions.outputDir)), "judge-cache") : void 0;
+    if (cacheDir !== void 0) {
+      const originalJudge = runOptions.system.judge;
+      const wrapped = wrapJudgeWithCache({
+        role: "primary",
+        judge: originalJudge,
+        benchmarkId: benchmark.id,
+        datasetVersion: benchmark.meta.version,
+        amaBenchJudgeProtocol: runOptions.amaBenchJudgeProtocol ?? "default",
+        provider: runOptions.judgeProvider ?? null,
+        cache: new JudgeCache({ dir: cacheDir })
+      });
+      let systemJudgeMutatedInPlace = false;
+      try {
+        runOptions.system.judge = wrapped.judge;
+        systemJudgeMutatedInPlace = true;
+      } catch {
+        runOptions.system = createJudgeOverrideProxy(runOptions.system, wrapped.judge);
+      }
+      cacheCounters = wrapped.counters;
+      const needRestore = systemJudgeMutatedInPlace;
+      cacheRestore = async () => {
+        if (needRestore) {
+          runOptions.system.judge = originalJudge;
+        }
+        await wrapped.drainPendingWrites();
+      };
+    }
+  }
+  let result;
+  try {
+    result = await runCustomBenchmark(spec, runOptions);
+  } finally {
+    if (cacheRestore) {
+      await cacheRestore();
+    }
+  }
+  if (cacheCounters) {
+    result.cost.judgeModelCalls = cacheCounters.modelCalls;
+  }
+  return result;
 }
 async function runCustomBenchmark(spec, options) {
   if (spec.scoring === "llm_judge" && !options.system.judge) {
@@ -30157,7 +30631,7 @@ async function scoreTask(scoring, options, question, actual, expected) {
   }
 }
 function createCustomBenchmarkDefinition(benchmark, filePath) {
-  const id = `custom:${slugify(path32.basename(filePath, path32.extname(filePath)) || benchmark.name)}`;
+  const id = `custom:${slugify(path33.basename(filePath, path33.extname(filePath)) || benchmark.name)}`;
   return {
     id,
     title: benchmark.name,
@@ -31027,9 +31501,9 @@ var chatFixture = {
 };
 // src/benchmarks/remnic/procedural-recall/ablation.ts
-import { mkdir as mkdir15, mkdtemp as mkdtemp11, rm as rm12, writeFile as writeFile14, readFile as readFile19 } from "fs/promises";
+import { mkdir as mkdir16, mkdtemp as mkdtemp11, rm as rm13, writeFile as writeFile15, readFile as readFile20 } from "fs/promises";
 import os9 from "os";
-import path33 from "path";
+import path34 from "path";
 import {
   StorageManager as StorageManager3,
   parseConfig as parseConfig5,
@@ -31060,7 +31534,7 @@ async function runSide(scenarios, proceduralEnabled) {
   const observed = [];
   for (const scenario of scenarios) {
     const dir = await mkdtemp11(
-      path33.join(os9.tmpdir(), "remnic-bench-proc-ablation-")
+      path34.join(os9.tmpdir(), "remnic-bench-proc-ablation-")
     );
     try {
       const storage = new StorageManager3(dir);
@@ -31075,7 +31549,7 @@ ${body}`,
       );
       const config = parseConfig5({
         memoryDir: dir,
-        workspaceDir: path33.join(dir, "ws"),
+        workspaceDir: path34.join(dir, "ws"),
         openaiApiKey: "bench-key",
         procedural: {
           enabled: proceduralEnabled,
@@ -31089,7 +31563,7 @@ ${body}`,
       );
       observed.push(section !== null && section.length > 0);
     } finally {
-      await rm12(dir, { recursive: true, force: true });
+      await rm13(dir, { recursive: true, force: true });
     }
   }
   return observed;
@@ -31150,7 +31624,7 @@ async function runProceduralAblation(options) {
   };
 }
 async function loadAblationFixture(fixturePath) {
-  const raw = await readFile19(fixturePath, "utf8");
+  const raw = await readFile20(fixturePath, "utf8");
   let parsed;
   try {
     parsed = JSON.parse(raw);
@@ -31246,9 +31720,9 @@ async function runProceduralAblationCli(args) {
     random: args.random,
     seed: args.seed
   });
-  const outDir = path33.dirname(path33.resolve(args.outPath));
-  await mkdir15(outDir, { recursive: true });
-  await writeFile14(args.outPath, JSON.stringify(artifact, null, 2) + "\n", "utf8");
+  const outDir = path34.dirname(path34.resolve(args.outPath));
+  await mkdir16(outDir, { recursive: true });
+  await writeFile15(args.outPath, JSON.stringify(artifact, null, 2) + "\n", "utf8");
   return artifact;
 }