npm - @remnic/bench - Versions diffs - 1.0.0 → 1.0.1 - Mend

@remnic/bench 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,99 @@
+# @remnic/bench
+Benchmark suite and CI regression gates for [Remnic](https://github.com/joshuaswarren/remnic) memory pipelines. Ships the runners, adapters, and results store that the `remnic bench` CLI surface drives.
+`@remnic/bench` is an **optional companion** to [`@remnic/cli`](https://www.npmjs.com/package/@remnic/cli). Install it only when you need to run benchmarks, compare runs, or publish results. Memory-only users do not need it.
+## Install
+```bash
+# Alongside the CLI:
+npm install -g @remnic/cli @remnic/bench
+# Or in a project that drives benchmarks programmatically:
+pnpm add @remnic/bench
+```
+The CLI loads `@remnic/bench` via a computed-specifier dynamic import. If it's not installed, `remnic bench *` prints a clear install hint; the rest of the CLI keeps working.
+## What it does
+- **Benchmark runners** for a growing set of memory-oriented evals: `longmemeval`, `locomo`, `memory-arena`, `amemgym`, `ama-bench`, plus a lightweight smoke fixture.
+- **Stored-run management** — every `remnic bench run *` writes a timestamped JSON result under `~/.remnic/bench/results/`; `remnic bench runs list|show|delete` let you browse, inspect, and prune.
+- **Baselines + regression gates** — save a run as a named baseline, compare candidates against it, gate CI on threshold violations.
+- **Result export** — `remnic bench export <run> --format json|csv|html`.
+- **Published feed** — `remnic bench publish --target remnic-ai` builds the tamper-evident integrity manifest consumed by remnic.ai.
+- **Provider discovery** — `remnic bench providers discover` enumerates local OpenAI / Anthropic / Ollama / LiteLLM providers for adapter wiring.
+## CLI quick reference
+```bash
+# List available benchmarks:
+remnic bench list
+# Download a dataset for a full run:
+remnic bench datasets download longmemeval
+# Full run on the downloaded dataset:
+remnic bench run longmemeval
+# 60-second smoke run on the bundled fixture:
+remnic bench run --quick longmemeval
+# Browse stored runs:
+remnic bench runs list
+remnic bench runs show <run-id> --detail
+# Compare two runs:
+remnic bench compare base-run candidate-run
+# Save a baseline (archives the run under ~/.remnic/bench/baselines):
+remnic bench baseline save dashboard-v1 candidate-run
+# Gate CI against a stored run with a 2% threshold (compare takes run
+# ids / paths, not baseline names — use `baseline save` for archival,
+# then reference the underlying run id in `compare`):
+remnic bench compare candidate-run nightly-run --threshold 0.02
+# Ship results to remnic.ai:
+remnic bench publish --target remnic-ai
+```
+Dataset markers match the runner's accepted filenames, so `datasets status` reports "downloaded" exactly when the runner will load successfully.
+## Programmatic API
+```ts
+import {
+  listBenchmarks,
+  runBenchmark,
+  writeBenchmarkResult,
+  createLightweightAdapter,
+  createRemnicAdapter,
+  compareResults,
+  saveBenchmarkBaseline,
+  listBenchmarkResults,
+  deleteBenchmarkResults,
+  buildBenchmarkPublishFeed,
+  discoverAllProviders,
+  type BenchmarkResult,
+  type ComparisonResult,
+  type BenchmarkDefinition,
+} from "@remnic/bench";
+```
+Each runner accepts a `system` adapter — `createRemnicAdapter()` talks to a live `@remnic/core` Orchestrator; `createLightweightAdapter()` is a minimal in-memory stand-in used for CI smoke runs. Results conform to the `BenchmarkResult` schema (see `dist/index.d.ts`).
+## Agent note
+If you're an AI agent extending a Remnic-based stack: **do not** import `@remnic/bench` from a base install surface (CLI, core, plugin). Optional companion packages must be loaded via computed-specifier dynamic imports with an install-hint fallback. See `packages/remnic-cli/src/optional-bench.ts` in the repo for the canonical pattern, and the à-la-carte invariant in the repo's `AGENTS.md` §44 / `CLAUDE.md` gotcha #57.
+## Related
+- [`@remnic/cli`](https://www.npmjs.com/package/@remnic/cli) — the CLI that drives `remnic bench *`
+- [`@remnic/core`](https://www.npmjs.com/package/@remnic/core) — the memory engine bench adapters talk to
+- Source + issues: <https://github.com/joshuaswarren/remnic>
+## License
+MIT. See the root [LICENSE](https://github.com/joshuaswarren/remnic/blob/main/LICENSE) file.

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { GatewayConfig, EngramAccessService } from '@remnic/core';
+import { GatewayConfig, FallbackLlmRuntimeContext, FallbackLlmClient, EngramAccessService } from '@remnic/core';
 /**
  * Types for the ingestion benchmark tier.
@@ -907,6 +907,9 @@ declare function clampScore(value: number): number;
 interface GatewayResponderOptions {
     gatewayConfig?: GatewayConfig;
     agentId?: string;
+    agentDir?: string;
+    workspaceDir?: string;
+    llmFactory?: (gatewayConfig: GatewayConfig, runtimeContext: FallbackLlmRuntimeContext) => Pick<FallbackLlmClient, "chatCompletion">;
 }
 declare function createResponderFromProvider(provider: LlmProvider): BenchResponder;
 declare function createProviderBackedResponder(config: ProviderFactoryConfig, providerInstance?: LlmProvider): BenchResponder;

package/dist/index.js CHANGED Viewed

@@ -731,8 +731,9 @@ var OpenAiCompatibleProvider = class {
       })
     });
     if (!response.ok) {
+      const errorBody = await readErrorBody(response);
       throw new Error(
-        `OpenAI-compatible completion failed: ${response.status} ${response.statusText}`
+        `OpenAI-compatible completion failed: ${response.status} ${response.statusText}${errorBody ? ` \u2014 ${errorBody}` : ""}`
       );
     }
     const payload = await response.json();
@@ -801,6 +802,17 @@ var OpenAiCompatibleProvider = class {
     return `${normalizedBase}/${normalizedPath}`;
   }
 };
+async function readErrorBody(response) {
+  try {
+    const text = (await response.text()).trim();
+    if (text.length === 0) {
+      return "";
+    }
+    return text.replace(/\s+/g, " ").slice(0, 400);
+  } catch {
+    return "";
+  }
+}
 function readMessageText(payload) {
   const content = payload.choices?.[0]?.message?.content;
   if (typeof content === "string") {
@@ -1113,7 +1125,11 @@ function createGatewayResponder(options) {
   if (!options.gatewayConfig) {
     throw new Error("gateway responder requires gatewayConfig");
   }
-  const llm = new FallbackLlmClient(options.gatewayConfig);
+  const runtimeContext = {
+    ...options.agentDir ? { agentDir: options.agentDir } : {},
+    ...options.workspaceDir ? { workspaceDir: options.workspaceDir } : {}
+  };
+  const llm = options.llmFactory?.(options.gatewayConfig, runtimeContext) ?? new FallbackLlmClient(options.gatewayConfig, runtimeContext);
   return {
     async respond(question, recalledText) {
       const startedAt = performance.now();
@@ -1241,6 +1257,7 @@ function clampNormalizedScore(value) {
 }
 // src/runtime-profiles.ts
+import path2 from "path";
 import { readFile } from "fs/promises";
 import {
   resolveRemnicPluginEntry
@@ -1421,7 +1438,8 @@ async function resolveBenchRuntimeProfile(options) {
   const fastGatewayAgentId = options.fastGatewayAgentId ?? asNonEmptyString(openclawRuntime.remnicConfig.fastGatewayAgentId);
   const gatewayResponder = createGatewayResponder({
     gatewayConfig,
-    agentId: gatewayAgentId
+    agentId: gatewayAgentId,
+    ...openclawRuntime.runtimeContext
   });
   const persistedRemnicConfig = sanitizePersistedConfig(
     {
@@ -1485,7 +1503,15 @@ async function loadOpenclawRuntimeConfig(filePath) {
   return {
     remnicConfig,
     gatewayConfig,
-    persistedGatewayConfig: sanitizeGatewayConfig(gatewayConfig)
+    persistedGatewayConfig: sanitizeGatewayConfig(gatewayConfig),
+    runtimeContext: deriveOpenclawRuntimeContext(filePath)
+  };
+}
+function deriveOpenclawRuntimeContext(configPath) {
+  const rootDir = path2.dirname(path2.resolve(configPath));
+  return {
+    agentDir: path2.join(rootDir, "agents", "main", "agent"),
+    workspaceDir: path2.join(rootDir, "workspace")
   };
 }
 async function loadJsonObject(filePath, label) {
@@ -1604,12 +1630,12 @@ function isPlainObject(value) {
 // src/benchmark.ts
 import fs from "fs";
-import path23 from "path";
+import path24 from "path";
 // src/benchmarks/published/ama-bench/runner.ts
 import { randomUUID } from "crypto";
 import { readFile as readFile3 } from "fs/promises";
-import path3 from "path";
+import path4 from "path";
 // src/benchmarks/published/ama-bench/fixture.ts
 var AMA_BENCH_SMOKE_FIXTURE = [
@@ -1818,7 +1844,7 @@ function longestCommonSubsequence(left, right) {
 // src/reporter.ts
 import { execSync } from "child_process";
 import { mkdir as mkdir2, readFile as readFile2, writeFile } from "fs/promises";
-import path2 from "path";
+import path3 from "path";
 function sanitizeFilenameSegment(value) {
   const sanitized = value.trim().replace(/[^a-zA-Z0-9._-]/g, "_");
   return sanitized.length > 0 ? sanitized : "unknown";
@@ -1827,7 +1853,7 @@ async function writeBenchmarkResult(result, outputDir) {
   await mkdir2(outputDir, { recursive: true });
   const safeRemnicVersion = sanitizeFilenameSegment(result.meta.remnicVersion);
   const timestamp = result.meta.timestamp.replace(/[:.]/g, "-");
-  const filePath = path2.join(
+  const filePath = path3.join(
     outputDir,
     `${result.meta.benchmark}-v${safeRemnicVersion}-${timestamp}.json`
   );
@@ -1838,7 +1864,7 @@ async function getRemnicVersion() {
   try {
     const packageJson = JSON.parse(
       await readFile2(
-        path2.resolve(import.meta.dirname, "../../../package.json"),
+        path3.resolve(import.meta.dirname, "../../../package.json"),
         "utf8"
       )
     );
@@ -1990,7 +2016,7 @@ async function loadDataset(mode, datasetDir, limit) {
     return episodes;
   };
   if (datasetDir) {
-    const filePath = path3.join(datasetDir, "open_end_qa_set.jsonl");
+    const filePath = path4.join(datasetDir, "open_end_qa_set.jsonl");
     let raw;
     try {
       raw = await readFile3(filePath, "utf8");
@@ -2063,12 +2089,10 @@ function parseEpisode(line, lineNumber) {
   if (!Number.isFinite(record.num_turns) || !Number.isFinite(record.total_tokens)) {
     throw new Error(`${location} must include numeric num_turns and total_tokens fields.`);
   }
-  if (!isValidTrajectory(record.trajectory)) {
-    throw new Error(`${location} must include a trajectory array with action/observation turns.`);
-  }
   if (!isValidQaPairs(record.qa_pairs)) {
     throw new Error(`${location} must include a qa_pairs array with question/answer/type/question_uuid strings.`);
   }
+  const trajectory = normalizeTrajectory(record.trajectory, location);
   return {
     episode_id: record.episode_id,
     task: record.task,
@@ -2077,14 +2101,45 @@ function parseEpisode(line, lineNumber) {
     success: record.success,
     num_turns: record.num_turns,
     total_tokens: record.total_tokens,
-    trajectory: record.trajectory,
+    trajectory,
     qa_pairs: record.qa_pairs
   };
 }
-function isValidTrajectory(value) {
-  return Array.isArray(value) && value.every(
-    (turn) => !!turn && typeof turn === "object" && !Array.isArray(turn) && Number.isInteger(turn.turn_idx) && typeof turn.action === "string" && typeof turn.observation === "string"
-  );
+function normalizeTrajectory(value, location) {
+  if (!Array.isArray(value)) {
+    throw new Error(`${location} must include a trajectory array with action/observation turns.`);
+  }
+  return value.map((turn, index) => {
+    if (!turn || typeof turn !== "object" || Array.isArray(turn)) {
+      throw new Error(`${location} trajectory[${index}] must be an object.`);
+    }
+    const record = turn;
+    if (!Number.isInteger(record.turn_idx)) {
+      throw new Error(`${location} trajectory[${index}] must include an integer turn_idx.`);
+    }
+    if (!("action" in record) || !("observation" in record)) {
+      throw new Error(
+        `${location} must include a trajectory array with action/observation turns.`
+      );
+    }
+    return {
+      turn_idx: record.turn_idx,
+      action: normalizeTrajectoryText(record.action, `${location} trajectory[${index}].action`),
+      observation: normalizeTrajectoryText(
+        record.observation,
+        `${location} trajectory[${index}].observation`
+      )
+    };
+  });
+}
+function normalizeTrajectoryText(value, field) {
+  if (typeof value === "string") {
+    return value;
+  }
+  if (value === null) {
+    return "";
+  }
+  throw new Error(`${field} must be a string or null.`);
 }
 function isValidQaPairs(value) {
   return Array.isArray(value) && value.every(
@@ -2095,7 +2150,7 @@ function isValidQaPairs(value) {
 // src/benchmarks/published/amemgym/runner.ts
 import { randomUUID as randomUUID2 } from "crypto";
 import { readFile as readFile4 } from "fs/promises";
-import path4 from "path";
+import path5 from "path";
 // src/benchmarks/published/amemgym/fixture.ts
 var AMEMGYM_SMOKE_FIXTURE = [
@@ -2333,7 +2388,7 @@ async function loadDataset2(mode, datasetDir, limit) {
     const datasetErrors = [];
     for (const filename of DATASET_FILENAMES) {
       try {
-        const raw = await readFile4(path4.join(datasetDir, filename), "utf8");
+        const raw = await readFile4(path5.join(datasetDir, filename), "utf8");
         const parsed = parseDataset(raw, filename);
         return ensureDatasetProfiles(applyLimit2(parsed, normalizedLimit));
       } catch (error) {
@@ -2418,7 +2473,7 @@ function normalizeRole(role) {
 // src/benchmarks/published/memory-arena/runner.ts
 import { randomUUID as randomUUID3 } from "crypto";
 import { readFile as readFile5, readdir } from "fs/promises";
-import path5 from "path";
+import path6 from "path";
 // src/benchmarks/published/memory-arena/fixture.ts
 var MEMORY_ARENA_SMOKE_FIXTURE = [
@@ -2610,7 +2665,7 @@ async function loadDataset3(mode, datasetDir, limit) {
       if (remainingLimit2 === 0) {
         break;
       }
-      const raw = await readFile5(path5.join(datasetDir, filename), "utf8");
+      const raw = await readFile5(path6.join(datasetDir, filename), "utf8");
       const parsedTasks = [];
       raw.split("\n").forEach((line, lineIndex) => {
         if (line.trim().length === 0) {
@@ -2686,9 +2741,7 @@ function parseTask(line, filename, lineNumber) {
   if (!Number.isInteger(record.id)) {
     throw new Error(`${location} must include an integer id.`);
   }
-  if (typeof record.category !== "string") {
-    throw new Error(`${location} must include a string category.`);
-  }
+  const category = normalizeCategory(record.category, filename);
   if (!Array.isArray(record.questions) || record.questions.some((question) => typeof question !== "string")) {
     throw new Error(`${location} must include a questions array of strings.`);
   }
@@ -2701,11 +2754,23 @@ function parseTask(line, filename, lineNumber) {
   }
   return {
     id: record.id,
-    category: record.category,
+    category,
     questions: record.questions,
     answers: record.answers
   };
 }
+function normalizeCategory(value, filename) {
+  if (typeof value === "string" && value.trim().length > 0) {
+    return value;
+  }
+  const inferred = filename.replace(/\.jsonl$/i, "").trim();
+  if (inferred.length > 0) {
+    return inferred;
+  }
+  throw new Error(
+    `MemoryArena dataset file ${filename} must include a string category or use a filename that can be inferred as the category.`
+  );
+}
 function answerToString(answer) {
   if (typeof answer === "string") {
     return answer;
@@ -2753,7 +2818,7 @@ function isValidArenaAnswerObject(answer) {
 // src/benchmarks/published/longmemeval/runner.ts
 import { randomUUID as randomUUID4 } from "crypto";
 import { readFile as readFile6 } from "fs/promises";
-import path6 from "path";
+import path7 from "path";
 // src/benchmarks/published/longmemeval/fixture.ts
 var LONG_MEM_EVAL_SMOKE_FIXTURE = [
@@ -2928,7 +2993,7 @@ async function loadDataset4(mode, datasetDir, limit) {
       "longmemeval.json"
     ]) {
       try {
-        const raw = await readFile6(path6.join(datasetDir, filename), "utf8");
+        const raw = await readFile6(path7.join(datasetDir, filename), "utf8");
         const parsed = JSON.parse(raw);
         return ensureDatasetItems(limit ? parsed.slice(0, limit) : parsed);
       } catch (error) {
@@ -2957,7 +3022,7 @@ async function loadDataset4(mode, datasetDir, limit) {
 // src/benchmarks/published/locomo/runner.ts
 import { randomUUID as randomUUID5 } from "crypto";
 import { readFile as readFile7 } from "fs/promises";
-import path7 from "path";
+import path8 from "path";
 // src/benchmarks/published/locomo/fixture.ts
 var LOCOMO_SMOKE_FIXTURE = [
@@ -3186,7 +3251,7 @@ async function loadDataset5(mode, datasetDir, limit) {
     const datasetErrors = [];
     for (const filename of ["locomo10.json", "locomo.json"]) {
       try {
-        const raw = await readFile7(path7.join(datasetDir, filename), "utf8");
+        const raw = await readFile7(path8.join(datasetDir, filename), "utf8");
         const parsed = parseDataset2(raw, filename);
         return ensureDatasetConversations(
           applyLimit4(parsed, normalizedLimit)
@@ -3231,29 +3296,69 @@ function parseConversation(entry, filename, index) {
   if (!record.conversation || typeof record.conversation !== "object" || Array.isArray(record.conversation)) {
     throw new Error(`${location} must include a conversation object.`);
   }
-  if (!isValidQaArray(record.qa)) {
-    throw new Error(
-      `${location} must include a qa array with question/answer/evidence/category fields.`
-    );
-  }
+  const qa = normalizeQaArray(record.qa, location);
   return {
     sample_id: record.sample_id,
     conversation: record.conversation,
-    qa: record.qa,
+    qa,
     event_summary: record.event_summary,
     observation: record.observation,
     session_summary: record.session_summary
   };
 }
-function isValidQaArray(value) {
-  return Array.isArray(value) && value.every(isValidQa);
+function normalizeQaArray(value, location) {
+  if (!Array.isArray(value)) {
+    throw new Error(
+      `${location} must include a qa array with question/answer/evidence/category fields.`
+    );
+  }
+  return value.map(
+    (entry, index) => normalizeQa(entry, `${location} qa[${index}]`)
+  );
 }
-function isValidQa(value) {
+function normalizeQa(value, location) {
   if (!value || typeof value !== "object" || Array.isArray(value)) {
-    return false;
+    throw new Error(`${location} must be an object.`);
   }
   const record = value;
-  return typeof record.question === "string" && typeof record.answer === "string" && Number.isInteger(record.category) && Array.isArray(record.evidence) && record.evidence.every((item) => typeof item === "string");
+  if (typeof record.question !== "string" || record.question.trim().length === 0) {
+    throw new Error(`${location} must include a non-empty question string.`);
+  }
+  if (!Number.isInteger(record.category)) {
+    throw new Error(`${location} must include an integer category.`);
+  }
+  if (!Array.isArray(record.evidence) || record.evidence.some((item) => typeof item !== "string")) {
+    throw new Error(`${location} must include an evidence array of strings.`);
+  }
+  const answer = normalizeQaAnswer(record.answer, record.adversarial_answer, location);
+  return {
+    question: record.question,
+    answer,
+    evidence: record.evidence,
+    category: record.category
+  };
+}
+function normalizeQaAnswer(answer, adversarialAnswer, location) {
+  const direct = normalizeScalarAnswer(answer);
+  if (direct !== void 0) {
+    return direct;
+  }
+  const adversarial = normalizeScalarAnswer(adversarialAnswer);
+  if (adversarial !== void 0) {
+    return adversarial;
+  }
+  throw new Error(
+    `${location} must include a string or numeric answer, or an adversarial_answer fallback.`
+  );
+}
+function normalizeScalarAnswer(value) {
+  if (typeof value === "string" && value.trim().length > 0) {
+    return value;
+  }
+  if (typeof value === "number" && Number.isFinite(value)) {
+    return String(value);
+  }
+  return void 0;
 }
 function normalizeLimit4(limit) {
   if (limit === void 0) {
@@ -3276,7 +3381,7 @@ function applyLimit4(items, limit) {
 // src/benchmarks/published/beam/runner.ts
 import { randomUUID as randomUUID6 } from "crypto";
 import { readFile as readFile8, readdir as readdir2 } from "fs/promises";
-import path8 from "path";
+import path9 from "path";
 // src/benchmarks/published/beam/fixture.ts
 var BEAM_SMOKE_FIXTURE = [
@@ -3574,7 +3679,7 @@ async function loadDataset6(mode, datasetDir, limit) {
       if (remainingLimit === 0) {
         break;
       }
-      const raw = await readFile8(path8.join(datasetDir, filename), "utf8");
+      const raw = await readFile8(path9.join(datasetDir, filename), "utf8");
       const scale = inferScaleFromFilename(filename);
       const conversations = filename.endsWith(".jsonl") ? parseJsonlDataset(raw, filename) : parseJsonDataset(raw, filename);
       const limitedConversations = applyLimit5(conversations, remainingLimit);
@@ -4018,7 +4123,7 @@ var StructuredLiteralParser = class {
 // src/benchmarks/published/personamem/runner.ts
 import { randomUUID as randomUUID7 } from "crypto";
 import { readFile as readFile9, realpath } from "fs/promises";
-import path9 from "path";
+import path10 from "path";
 // src/benchmarks/published/personamem/fixture.ts
 var PERSONAMEM_SMOKE_FIXTURE = [
@@ -4220,7 +4325,7 @@ async function loadDataset7(mode, datasetDir, limit) {
   if (datasetDir) {
     const datasetErrors = [];
     for (const relativePath of DATASET_FILE_CANDIDATES) {
-      const datasetPath = path9.join(datasetDir, relativePath);
+      const datasetPath = path10.join(datasetDir, relativePath);
       try {
         const raw = await readFile9(datasetPath, "utf8");
         const rows = parseCsvRows(raw, relativePath, normalizedLimit);
@@ -4403,12 +4508,12 @@ function parseCsv(raw, limit) {
   return rows;
 }
 async function resolveDatasetFilePath(datasetRoot, relativePath) {
-  const rootPath = path9.resolve(datasetRoot);
+  const rootPath = path10.resolve(datasetRoot);
   const rootRealPath = await realpath(rootPath);
-  const candidatePath = path9.resolve(rootPath, relativePath);
+  const candidatePath = path10.resolve(rootPath, relativePath);
   const candidateRealPath = await realpath(candidatePath);
-  const relativeToRoot = path9.relative(rootRealPath, candidateRealPath);
-  if (relativeToRoot.startsWith("..") || path9.isAbsolute(relativeToRoot)) {
+  const relativeToRoot = path10.relative(rootRealPath, candidateRealPath);
+  if (relativeToRoot.startsWith("..") || path10.isAbsolute(relativeToRoot)) {
     throw new Error(
       `PersonaMem-v2 dataset file reference "${relativePath}" must stay within datasetDir.`
     );
@@ -4528,7 +4633,7 @@ function applyLimit6(items, limit) {
 // src/benchmarks/published/membench/runner.ts
 import { randomUUID as randomUUID8 } from "crypto";
 import { readFile as readFile10, readdir as readdir3 } from "fs/promises";
-import path10 from "path";
+import path11 from "path";
 // src/benchmarks/published/membench/fixture.ts
 var MEMBENCH_SMOKE_FIXTURE = [
@@ -4714,7 +4819,7 @@ async function loadDataset8(mode, datasetDir, limit) {
         break;
       }
       try {
-        const raw = await readFile10(path10.join(datasetDir, filename), "utf8");
+        const raw = await readFile10(path11.join(datasetDir, filename), "utf8");
         const parsed = filename.endsWith(".jsonl") ? parseJsonlDataset2(raw, filename) : parseJsonDataset2(raw, filename);
         const limitedCases = applyLimit7(parsed, remainingLimit);
         cases.push(...limitedCases);
@@ -5084,7 +5189,7 @@ function isPlainObject2(value) {
 // src/benchmarks/published/memoryagentbench/runner.ts
 import { randomUUID as randomUUID9 } from "crypto";
 import { readFile as readFile11 } from "fs/promises";
-import path11 from "path";
+import path12 from "path";
 // src/benchmarks/published/memoryagentbench/fixture.ts
 var MEMORY_AGENT_BENCH_SMOKE_FIXTURE = [
@@ -5354,7 +5459,7 @@ async function loadDataset9(mode, datasetDir, limit) {
     const datasetErrors = [];
     for (const filename of DATASET_BUNDLE_CANDIDATES) {
       const parsed = await tryReadDatasetFile(
-        path11.join(datasetDir, filename),
+        path12.join(datasetDir, filename),
         filename,
         datasetErrors
       );
@@ -5371,7 +5476,7 @@ async function loadDataset9(mode, datasetDir, limit) {
       let splitData;
       for (const filename of splitConfig.candidates) {
         try {
-          splitData = await readDatasetFile(path11.join(datasetDir, filename), filename);
+          splitData = await readDatasetFile(path12.join(datasetDir, filename), filename);
           break;
         } catch (error) {
           datasetErrors.push(
@@ -5939,7 +6044,7 @@ function loadCases(mode, limit) {
 // src/benchmarks/remnic/extraction-judge-calibration/runner.ts
 import { randomUUID as randomUUID11 } from "crypto";
 import os from "os";
-import path12 from "path";
+import path13 from "path";
 import { clearVerdictCache, judgeFactDurability, parseConfig as parseConfig2 } from "@remnic/core";
 // src/benchmarks/remnic/extraction-judge-calibration/fixture.ts
@@ -6045,8 +6150,8 @@ var extractionJudgeCalibrationDefinition = {
 async function runExtractionJudgeCalibrationBenchmark(options) {
   const cases = loadCases2(options.mode, options.limit);
   const config = parseConfig2({
-    memoryDir: path12.join(os.tmpdir(), "remnic-bench-extraction-judge"),
-    workspaceDir: path12.join(os.tmpdir(), "remnic-bench-extraction-judge-workspace"),
+    memoryDir: path13.join(os.tmpdir(), "remnic-bench-extraction-judge"),
+    workspaceDir: path13.join(os.tmpdir(), "remnic-bench-extraction-judge-workspace"),
     openaiApiKey: "bench-test-key",
     extractionJudgeEnabled: true,
     extractionJudgeBatchSize: 4,
@@ -6584,7 +6689,7 @@ function constantAggregate2(value) {
 // src/benchmarks/remnic/entity-consolidation/runner.ts
 import os2 from "os";
-import path13 from "path";
+import path14 from "path";
 import { randomUUID as randomUUID13 } from "crypto";
 import { mkdtemp as mkdtemp2, rm as rm2 } from "fs/promises";
 import { StorageManager } from "@remnic/core";
@@ -6747,7 +6852,7 @@ function loadCases4(mode, limit) {
   return limited;
 }
 async function executeCase(sample) {
-  const tmpDir = await mkdtemp2(path13.join(os2.tmpdir(), "remnic-bench-entity-consolidation-"));
+  const tmpDir = await mkdtemp2(path14.join(os2.tmpdir(), "remnic-bench-entity-consolidation-"));
   try {
     const storage = new StorageManager(tmpDir);
     await storage.ensureDirectories();
@@ -6926,7 +7031,7 @@ function parseNonNegativeInt(rawValue) {
 // src/benchmarks/remnic/page-versioning/runner.ts
 import os3 from "os";
-import path14 from "path";
+import path15 from "path";
 import { randomUUID as randomUUID14 } from "crypto";
 import { mkdtemp as mkdtemp3, mkdir as mkdir3, readFile as readFile12, rm as rm3, writeFile as writeFile2 } from "fs/promises";
 import {
@@ -7081,10 +7186,10 @@ function loadCases5(mode, limit) {
   return limited;
 }
 async function executeCase2(sample) {
-  const tmpDir = await mkdtemp3(path14.join(os3.tmpdir(), "remnic-bench-page-versioning-"));
+  const tmpDir = await mkdtemp3(path15.join(os3.tmpdir(), "remnic-bench-page-versioning-"));
   try {
-    const factsDir = path14.join(tmpDir, "facts");
-    const pagePath = path14.join(factsDir, `${sample.id}.md`);
+    const factsDir = path15.join(tmpDir, "facts");
+    const pagePath = path15.join(factsDir, `${sample.id}.md`);
     await mkdir3(factsDir, { recursive: true });
     const config = versioningConfig();
     switch (sample.scenario) {
@@ -8328,7 +8433,7 @@ async function runRetrievalDirectAnswerBenchmark(options) {
 import { randomUUID as randomUUID18 } from "crypto";
 import { mkdtemp as mkdtemp4, rm as rm4 } from "fs/promises";
 import os4 from "os";
-import path15 from "path";
+import path16 from "path";
 import {
   StorageManager as StorageManager2,
   parseConfig as parseConfig3,
@@ -8449,7 +8554,7 @@ async function runProceduralRecallBenchmark(options) {
   const e2eCases = sliceWithBudget(e2eSource, remainingBudget).picked;
   for (const sample of e2eCases) {
     const startedAt = performance.now();
-    const dir = await mkdtemp4(path15.join(os4.tmpdir(), "remnic-bench-procedural-recall-"));
+    const dir = await mkdtemp4(path16.join(os4.tmpdir(), "remnic-bench-procedural-recall-"));
     let section = null;
     try {
       const storage = new StorageManager2(dir);
@@ -8464,7 +8569,7 @@ ${body}`,
       );
       const config = parseConfig3({
         memoryDir: dir,
-        workspaceDir: path15.join(dir, "ws"),
+        workspaceDir: path16.join(dir, "ws"),
         openaiApiKey: "bench-key",
         procedural: {
           enabled: sample.proceduralEnabled !== false,
@@ -8535,7 +8640,7 @@ ${body}`,
 import { randomUUID as randomUUID19 } from "crypto";
 import { mkdtemp as mkdtemp5, writeFile as writeFile3, rm as rm5, mkdir as mkdir4 } from "fs/promises";
 import { tmpdir as tmpdir2 } from "os";
-import path16 from "path";
+import path17 from "path";
 // src/ingestion-scorer.ts
 function normalize(value) {
@@ -9100,12 +9205,12 @@ async function runIngestionEntityRecallBenchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp5(path16.join(tmpdir2(), "bench-email-"));
+  const fixtureDir = await mkdtemp5(path17.join(tmpdir2(), "bench-email-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path16.join(fixtureDir, file.relativePath);
-      await mkdir4(path16.dirname(filePath), { recursive: true });
+      const filePath = path17.join(fixtureDir, file.relativePath);
+      await mkdir4(path17.dirname(filePath), { recursive: true });
       await writeFile3(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
@@ -9181,7 +9286,7 @@ async function runIngestionEntityRecallBenchmark(options) {
 import { randomUUID as randomUUID20 } from "crypto";
 import { mkdtemp as mkdtemp6, writeFile as writeFile4, rm as rm6, mkdir as mkdir5 } from "fs/promises";
 import { tmpdir as tmpdir3 } from "os";
-import path17 from "path";
+import path18 from "path";
 var ingestionSchemaCompletenessDefinition = {
   id: "ingestion-schema-completeness",
   title: "Ingestion: Schema Completeness",
@@ -9200,12 +9305,12 @@ async function runIngestionSchemaCompletenessBenchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp6(path17.join(tmpdir3(), "bench-email-"));
+  const fixtureDir = await mkdtemp6(path18.join(tmpdir3(), "bench-email-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path17.join(fixtureDir, file.relativePath);
-      await mkdir5(path17.dirname(filePath), { recursive: true });
+      const filePath = path18.join(fixtureDir, file.relativePath);
+      await mkdir5(path18.dirname(filePath), { recursive: true });
       await writeFile4(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
@@ -9291,7 +9396,7 @@ async function runIngestionSchemaCompletenessBenchmark(options) {
 import { randomUUID as randomUUID21 } from "crypto";
 import { mkdtemp as mkdtemp7, writeFile as writeFile5, rm as rm7, mkdir as mkdir6 } from "fs/promises";
 import { tmpdir as tmpdir4 } from "os";
-import path18 from "path";
+import path19 from "path";
 var ingestionBacklinkF1Definition = {
   id: "ingestion-backlink-f1",
   title: "Ingestion: Backlink F1",
@@ -9310,12 +9415,12 @@ async function runIngestionBacklinkF1Benchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp7(path18.join(tmpdir4(), "bench-email-"));
+  const fixtureDir = await mkdtemp7(path19.join(tmpdir4(), "bench-email-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path18.join(fixtureDir, file.relativePath);
-      await mkdir6(path18.dirname(filePath), { recursive: true });
+      const filePath = path19.join(fixtureDir, file.relativePath);
+      await mkdir6(path19.dirname(filePath), { recursive: true });
       await writeFile5(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
@@ -9392,7 +9497,7 @@ async function runIngestionBacklinkF1Benchmark(options) {
 import { randomUUID as randomUUID22 } from "crypto";
 import { mkdtemp as mkdtemp8, writeFile as writeFile6, rm as rm8, mkdir as mkdir7 } from "fs/promises";
 import { tmpdir as tmpdir5 } from "os";
-import path19 from "path";
+import path20 from "path";
 var INGESTION_SETUP_FRICTION_LOWER_IS_BETTER = /* @__PURE__ */ new Set(["setup_friction", "commands_count", "prompts_count", "errors_count"]);
 var ingestionSetupFrictionDefinition = {
   id: "ingestion-setup-friction",
@@ -9409,12 +9514,12 @@ var ingestionSetupFrictionDefinition = {
 };
 async function runIngestionSetupFrictionBenchmark(options) {
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp8(path19.join(tmpdir5(), "bench-friction-"));
+  const fixtureDir = await mkdtemp8(path20.join(tmpdir5(), "bench-friction-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path19.join(fixtureDir, file.relativePath);
-      await mkdir7(path19.dirname(filePath), { recursive: true });
+      const filePath = path20.join(fixtureDir, file.relativePath);
+      await mkdir7(path20.dirname(filePath), { recursive: true });
       await writeFile6(filePath, file.content, "utf8");
     }
     const { result: ingestionLog, durationMs } = await timed(
@@ -9494,7 +9599,7 @@ async function runIngestionSetupFrictionBenchmark(options) {
 import { randomUUID as randomUUID23 } from "crypto";
 import { mkdtemp as mkdtemp9, writeFile as writeFile7, rm as rm9, mkdir as mkdir8 } from "fs/promises";
 import { tmpdir as tmpdir6 } from "os";
-import path20 from "path";
+import path21 from "path";
 var ingestionCitationAccuracyDefinition = {
   id: "ingestion-citation-accuracy",
   title: "Ingestion: Citation Accuracy",
@@ -9535,9 +9640,9 @@ function extractClaims(pages) {
 function resolveCitedSources(seeAlso, pageRef, sourceContentMap) {
   const resolved = [];
   for (const ref of seeAlso) {
-    const refBase = path20.basename(ref).toLowerCase();
+    const refBase = path21.basename(ref).toLowerCase();
     for (const [relativePath, content] of sourceContentMap) {
-      if (relativePath === ref || relativePath.endsWith(ref) || path20.basename(relativePath).toLowerCase() === refBase) {
+      if (relativePath === ref || relativePath.endsWith(ref) || path21.basename(relativePath).toLowerCase() === refBase) {
         resolved.push(content);
         break;
       }
@@ -9546,9 +9651,9 @@ function resolveCitedSources(seeAlso, pageRef, sourceContentMap) {
   if (resolved.length > 0) {
     return resolved.join("\n\n---\n\n");
   }
-  const pageBase = path20.basename(pageRef).toLowerCase();
+  const pageBase = path21.basename(pageRef).toLowerCase();
   for (const [relativePath, content] of sourceContentMap) {
-    if (path20.basename(relativePath).toLowerCase() === pageBase) {
+    if (path21.basename(relativePath).toLowerCase() === pageBase) {
       return content;
     }
   }
@@ -9559,12 +9664,12 @@ async function runIngestionCitationAccuracyBenchmark(options) {
     throw new Error("ingestionAdapter is required for ingestion benchmarks");
   }
   const fixture = emailFixture.generate();
-  const fixtureDir = await mkdtemp9(path20.join(tmpdir6(), "bench-citation-"));
+  const fixtureDir = await mkdtemp9(path21.join(tmpdir6(), "bench-citation-"));
   try {
     await options.ingestionAdapter.reset();
     for (const file of fixture.files) {
-      const filePath = path20.join(fixtureDir, file.relativePath);
-      await mkdir8(path20.dirname(filePath), { recursive: true });
+      const filePath = path21.join(fixtureDir, file.relativePath);
+      await mkdir8(path21.dirname(filePath), { recursive: true });
       await writeFile7(filePath, file.content, "utf8");
     }
     const benchmarkStart = performance.now();
@@ -9757,7 +9862,7 @@ var ASSISTANT_MORNING_BRIEF_SMOKE_SCENARIOS = ASSISTANT_MORNING_BRIEF_SCENARIOS.
 // src/benchmarks/remnic/_assistant-common/runner.ts
 import { randomUUID as randomUUID24 } from "crypto";
-import path22 from "path";
+import path23 from "path";
 // src/run-seeds.ts
 function buildBenchmarkRunSeeds(runCount, baseSeed) {
@@ -9845,7 +9950,7 @@ function pairedDeltaConfidenceInterval(candidateValues, baselineValues, options
 // src/judges/sealed-rubric.ts
 import { createHash as createHash2 } from "crypto";
 import { appendFileSync, mkdirSync } from "fs";
-import path21 from "path";
+import path22 from "path";
 // src/judges/sealed-prompts/assistant-rubric-v1.ts
 var ASSISTANT_RUBRIC_V1 = `# Assistant rubric v1 (sealed)
@@ -10095,7 +10200,7 @@ function createSpotCheckFileLogger(options) {
     return { log() {
     } };
   }
-  const logPath = path21.join(directory, `${runId}.jsonl`);
+  const logPath = path22.join(directory, `${runId}.jsonl`);
   let written = 0;
   let warnedOnWriteFailure = false;
   const cap = typeof sampleSize === "number" && sampleSize > 0 ? sampleSize : 5;
@@ -10184,7 +10289,7 @@ async function runAssistantBenchmark(definition, scenarios, resolved, runnerOpti
   const runId = buildRunId(definition.id);
   const spotCheckLogger = createSpotCheckFileLogger({
     runId,
-    directory: runnerOptions.spotCheckDir ?? path22.join(process.cwd(), "benchmarks", "results", "spot-checks"),
+    directory: runnerOptions.spotCheckDir ?? path23.join(process.cwd(), "benchmarks", "results", "spot-checks"),
     sampleRate: 0.35,
     sampleSize: 5
   });
@@ -10911,8 +11016,8 @@ function finalizeBenchmarkResultConfig(result, options) {
 }
 // src/benchmark.ts
-var DEFAULT_BASELINE_PATH = path23.join(process.cwd(), "benchmarks", "baseline.json");
-var DEFAULT_REPORT_PATH = path23.join(process.cwd(), "benchmarks", "report.json");
+var DEFAULT_BASELINE_PATH = path24.join(process.cwd(), "benchmarks", "baseline.json");
+var DEFAULT_REPORT_PATH = path24.join(process.cwd(), "benchmarks", "report.json");
 var BASELINE_VERSION = 1;
 var DEFAULT_TOLERANCE = 10;
 var DEFAULT_FULL_RUN_COUNT = 5;
@@ -11005,7 +11110,7 @@ function loadBaseline(baselinePath) {
   }
 }
 function saveBaseline(baselinePath, baseline) {
-  fs.mkdirSync(path23.dirname(baselinePath), { recursive: true });
+  fs.mkdirSync(path24.dirname(baselinePath), { recursive: true });
   fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}
 `);
 }
@@ -11216,7 +11321,7 @@ function generateReport(results, reportPath) {
     totalDurationMs: results.reduce((sum, result) => sum + result.totalDurationMs, 0)
   };
   if (reportPath) {
-    fs.mkdirSync(path23.dirname(reportPath), { recursive: true });
+    fs.mkdirSync(path24.dirname(reportPath), { recursive: true });
     fs.writeFileSync(reportPath, `${JSON.stringify(report, null, 2)}
 `);
   }
@@ -11342,7 +11447,7 @@ function getBenchmarkLowerIsBetter(benchmarkId) {
 import { mkdir as mkdir9, readdir as readdir4, readFile as readFile13, unlink, writeFile as writeFile8 } from "fs/promises";
 import fs2 from "fs";
 import os5 from "os";
-import path24 from "path";
+import path25 from "path";
 // src/integrity/contamination.ts
 var EMPTY_CONTAMINATION_MANIFEST = {
@@ -11405,13 +11510,13 @@ function mergeContaminationManifests(...manifests) {
 var BASELINE_NAME_PATTERN = /^[A-Za-z0-9_-]+$/;
 function defaultBenchmarkBaselineDir() {
   const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? os5.homedir();
-  return path24.join(homeDir, ".remnic", "bench", "baselines");
+  return path25.join(homeDir, ".remnic", "bench", "baselines");
 }
 function defaultBenchmarkPublishPath(target) {
   const homeDir = process.env.HOME ?? process.env.USERPROFILE ?? os5.homedir();
   switch (target) {
     case "remnic-ai":
-      return path24.join(homeDir, ".remnic", "published", "benchmarks.json");
+      return path25.join(homeDir, ".remnic", "published", "benchmarks.json");
   }
 }
 function compareResultSummaries(left, right) {
@@ -11508,7 +11613,7 @@ async function listBenchmarkResults(outputDir) {
     if (!entry.isFile() || !entry.name.endsWith(".json")) {
       continue;
     }
-    const filePath = path24.join(outputDir, entry.name);
+    const filePath = path25.join(outputDir, entry.name);
     try {
       const result = await loadBenchmarkResult(filePath);
       results.push(toSummary(result, filePath));
@@ -11522,7 +11627,7 @@ async function saveBenchmarkBaseline(baselineDir, name, result, source) {
   assertValidBaselineName(name);
   assertUsableBaselineDir(baselineDir);
   await mkdir9(baselineDir, { recursive: true });
-  const filePath = path24.join(baselineDir, `${name}.json`);
+  const filePath = path25.join(baselineDir, `${name}.json`);
   const payload = {
     name,
     savedAt: (/* @__PURE__ */ new Date()).toISOString(),
@@ -11552,7 +11657,7 @@ async function listBenchmarkBaselines(baselineDir) {
     if (!entry.isFile() || !entry.name.endsWith(".json")) {
       continue;
     }
-    const filePath = path24.join(baselineDir, entry.name);
+    const filePath = path25.join(baselineDir, entry.name);
     try {
       const baseline = await loadBenchmarkBaseline(filePath);
       baselines.push(toBaselineSummary(baseline, filePath));
@@ -11569,7 +11674,7 @@ async function resolveBenchmarkResultReference(outputDir, reference) {
     return exactIdMatch;
   }
   const basenameMatch = summaries.find(
-    (summary) => path24.basename(summary.path) === reference
+    (summary) => path25.basename(summary.path) === reference
   );
   if (basenameMatch) {
     return basenameMatch;
@@ -11585,7 +11690,7 @@ async function resolveBenchmarkResultReference(outputDir, reference) {
   return void 0;
 }
 function looksLikeFilesystemPath(reference) {
-  return path24.isAbsolute(reference) || reference.includes("/") || reference.includes(path24.sep) || reference.endsWith(".json");
+  return path25.isAbsolute(reference) || reference.includes("/") || reference.includes(path25.sep) || reference.endsWith(".json");
 }
 async function deleteBenchmarkResults(outputDir, references) {
   const summaries = await listBenchmarkResults(outputDir);
@@ -11593,9 +11698,9 @@ async function deleteBenchmarkResults(outputDir, references) {
   const missing = [];
   const seenPaths = /* @__PURE__ */ new Set();
   for (const reference of references) {
-    let summary = summaries.find((entry) => entry.id === reference) ?? summaries.find((entry) => path24.basename(entry.path) === reference);
+    let summary = summaries.find((entry) => entry.id === reference) ?? summaries.find((entry) => path25.basename(entry.path) === reference);
     if (!summary && looksLikeFilesystemPath(reference)) {
-      const canonicalRef = path24.resolve(reference);
+      const canonicalRef = path25.resolve(reference);
       if (seenPaths.has(canonicalRef)) {
         continue;
       }
@@ -11612,7 +11717,7 @@ async function deleteBenchmarkResults(outputDir, references) {
       missing.push(reference);
       continue;
     }
-    const canonicalPath = path24.resolve(summary.path);
+    const canonicalPath = path25.resolve(summary.path);
     if (seenPaths.has(canonicalPath)) {
       continue;
     }
@@ -11738,7 +11843,7 @@ async function buildBenchmarkPublishFeed(outputDir, target, options = {}) {
   };
 }
 async function writeBenchmarkPublishFeed(feed, outputPath) {
-  await mkdir9(path24.dirname(outputPath), { recursive: true });
+  await mkdir9(path25.dirname(outputPath), { recursive: true });
   await writeFile8(outputPath, `${JSON.stringify(feed, null, 2)}
 `);
   return outputPath;
@@ -12293,7 +12398,7 @@ function formatError(error) {
 // src/benchmarks/custom/runner.ts
 import { randomUUID as randomUUID25 } from "crypto";
-import path25 from "path";
+import path26 from "path";
 async function runCustomBenchmarkFile(filePath, options) {
   const spec = await loadCustomBenchmarkFile(filePath);
   const benchmark = createCustomBenchmarkDefinition(spec, filePath);
@@ -12443,7 +12548,7 @@ async function scoreTask(scoring, options, question, actual, expected) {
   }
 }
 function createCustomBenchmarkDefinition(benchmark, filePath) {
-  const id = `custom:${slugify(path25.basename(filePath, path25.extname(filePath)) || benchmark.name)}`;
+  const id = `custom:${slugify(path26.basename(filePath, path26.extname(filePath)) || benchmark.name)}`;
   return {
     id,
     title: benchmark.name,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@remnic/bench",
-  "version": "1.0.0",
+  "version": "1.0.1",
   "description": "Retrieval latency ladder benchmarks + CI regression gates for @remnic/core",
   "type": "module",
   "main": "./dist/index.js",