npm - @joshuaswarren/openclaw-engram - Versions diffs - 9.0.50 → 9.0.52 - Mend

@joshuaswarren/openclaw-engram 9.0.50 → 9.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -31,6 +31,8 @@ AI agents forget everything between conversations. Engram fixes that.
 - **Pluggable search** — Choose from six search backends: QMD (hybrid BM25+vector+reranking), LanceDB, Meilisearch, Orama, remote HTTP, or bring your own.
 - **Memory OS features** — Graph recall, temporal memory tree, lifecycle policy, compounding, shared context, memory boxes, and identity continuity can be enabled progressively as your install grows.
 - **Benchmark-first roadmap** — Engram now has an evaluation harness with live shadow recall recording and a CI benchmark delta gate, so memory improvements can be measured and regression-checked instead of argued from anecdotes.
+- **Baseline snapshot discipline** — Engram can now, when `benchmarkBaselineSnapshotsEnabled` is enabled, capture typed baseline snapshots of the latest completed benchmark runs so later PR delta reporting can compare candidates against a stable stored reference instead of an ad hoc branch state.
+- **Named baseline delta reporting** — Engram can now, when `benchmarkDeltaReporterEnabled` is enabled, compare the current eval store against a stored baseline snapshot, emit a machine-readable delta report plus markdown summary, and fail fast when a candidate regresses a benchmark that previously passed.
 - **Objective-state recall** — Engram can now store normalized file, process, and tool outcomes and, when `objectiveStateRecallEnabled` is enabled, inject the most relevant objective-state snapshots back into recall context as a separate `Objective State` section.
 - **Causal trajectory graph foundation** — Engram can now persist typed `goal -> action -> observation -> outcome -> follow-up` chains when `causalTrajectoryMemoryEnabled` is enabled and, with `actionGraphRecallEnabled`, emit deterministic action-conditioned edges into the causal graph for later trajectory-aware retrieval.
 - **Causal trajectory recall** — Engram can now, when `causalTrajectoryRecallEnabled` is enabled, inject prompt-relevant causal chains back into recall context as a separate `Causal Trajectories` section with lightweight match explainability.
@@ -170,6 +172,8 @@ openclaw engram compat --strict              # Compatibility check
 openclaw engram benchmark-status             # Benchmark/eval harness packs, runs, shadow recalls, latest summaries
 openclaw engram benchmark-validate <path>    # Validate a benchmark manifest or pack directory
 openclaw engram benchmark-import <path>      # Import a validated benchmark pack into the eval store
+openclaw engram benchmark-baseline-snapshot  # Capture a typed baseline snapshot of the latest completed benchmark runs
+openclaw engram benchmark-baseline-report    # Compare the current eval store against a stored baseline snapshot
 openclaw engram benchmark-ci-gate            # Compare base vs candidate eval stores and fail on regressions
 openclaw engram objective-state-status       # Objective-state snapshot counts and latest stored snapshot
 openclaw engram causal-trajectory-status    # Causal-trajectory record counts and latest stored chain
@@ -209,6 +213,8 @@ Key settings:
 | `memoryDir` | `~/.openclaw/workspace/memory/local` | Memory storage root |
 | `evalHarnessEnabled` | `false` | Enable the evaluation harness for benchmark packs, run summaries, and shadow recall bookkeeping |
 | `evalShadowModeEnabled` | `false` | Record live recall decisions to the eval store without changing injected output |
+| `benchmarkBaselineSnapshotsEnabled` | `false` | Enable versioned baseline snapshot artifacts for the latest completed benchmark runs |
+| `benchmarkDeltaReporterEnabled` | `false` | Enable named-baseline delta reports against the current eval store |
 | `evalStoreDir` | `{memoryDir}/state/evals` | Root directory for benchmark packs, run summaries, and shadow recall records |
 | `objectiveStateMemoryEnabled` | `false` | Enable the objective-state memory foundation for normalized world/tool state snapshots |
 | `objectiveStateSnapshotWritesEnabled` | `false` | Permit objective-state snapshot writers to persist typed state records |

package/dist/index.js CHANGED Viewed

@@ -287,6 +287,8 @@ function parseConfig(raw) {
     conversationRecallTimeoutMs: typeof cfg.conversationRecallTimeoutMs === "number" ? cfg.conversationRecallTimeoutMs : 800,
     evalHarnessEnabled: cfg.evalHarnessEnabled === true,
     evalShadowModeEnabled: cfg.evalShadowModeEnabled === true,
+    benchmarkBaselineSnapshotsEnabled: cfg.benchmarkBaselineSnapshotsEnabled === true,
+    benchmarkDeltaReporterEnabled: cfg.benchmarkDeltaReporterEnabled === true,
     evalStoreDir: typeof cfg.evalStoreDir === "string" && cfg.evalStoreDir.trim().length > 0 ? cfg.evalStoreDir.trim() : path.join(memoryDir, "state", "evals"),
     objectiveStateMemoryEnabled: cfg.objectiveStateMemoryEnabled === true,
     objectiveStateSnapshotWritesEnabled: cfg.objectiveStateSnapshotWritesEnabled === true,
@@ -11666,11 +11668,14 @@ function resolveEvalStoreDir(memoryDir, overrideDir) {
   }
   return path15.join(memoryDir, "state", "evals");
 }
-function assertSafeBenchmarkId(benchmarkId) {
-  if (benchmarkId === "." || benchmarkId === ".." || benchmarkId.includes("/") || benchmarkId.includes("\\")) {
-    throw new Error("benchmarkId must be a safe path segment");
+function assertSafePathSegment(value, field) {
+  if (value === "." || value === ".." || value.includes("/") || value.includes("\\")) {
+    throw new Error(`${field} must be a safe path segment`);
   }
-  return benchmarkId;
+  return value;
+}
+function assertSafeBenchmarkId(benchmarkId) {
+  return assertSafePathSegment(benchmarkId, "benchmarkId");
 }
 function validateEvalBenchmarkManifest(raw, options) {
   if (!isRecord(raw)) throw new Error("benchmark manifest must be an object");
@@ -11727,14 +11732,7 @@ function validateEvalRunSummary(raw) {
   if (!Number.isFinite(totalCases) || totalCases < 0) throw new Error("totalCases must be a non-negative number");
   if (!Number.isFinite(passedCases) || passedCases < 0) throw new Error("passedCases must be a non-negative number");
   if (!Number.isFinite(failedCases) || failedCases < 0) throw new Error("failedCases must be a non-negative number");
-  const metrics = isRecord(raw.metrics) ? {
-    recallPrecisionAtK: typeof raw.metrics.recallPrecisionAtK === "number" ? raw.metrics.recallPrecisionAtK : void 0,
-    actionOutcomeScore: typeof raw.metrics.actionOutcomeScore === "number" ? raw.metrics.actionOutcomeScore : void 0,
-    objectiveStateCoverage: typeof raw.metrics.objectiveStateCoverage === "number" ? raw.metrics.objectiveStateCoverage : void 0,
-    causalPathRecall: typeof raw.metrics.causalPathRecall === "number" ? raw.metrics.causalPathRecall : void 0,
-    trustViolationRate: typeof raw.metrics.trustViolationRate === "number" ? raw.metrics.trustViolationRate : void 0,
-    creationRecoveryScore: typeof raw.metrics.creationRecoveryScore === "number" ? raw.metrics.creationRecoveryScore : void 0
-  } : void 0;
+  const metrics = parseOptionalEvalRunMetrics(raw.metrics);
   return {
     schemaVersion: 1,
     runId: assertString(raw.runId, "runId"),
@@ -11750,6 +11748,55 @@ function validateEvalRunSummary(raw) {
     gitRef: typeof raw.gitRef === "string" && raw.gitRef.trim().length > 0 ? raw.gitRef.trim() : void 0
   };
 }
+function validateEvalBaselineSnapshot(raw) {
+  if (!isRecord(raw)) throw new Error("eval baseline snapshot must be an object");
+  if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
+  if (!Array.isArray(raw.benchmarks)) throw new Error("benchmarks must be an array");
+  const benchmarks = raw.benchmarks.map((item, index) => {
+    if (!isRecord(item)) throw new Error(`benchmarks[${index}] must be an object`);
+    const passRate = Number(item.passRate);
+    if (!Number.isFinite(passRate) || passRate < 0 || passRate > 1) {
+      throw new Error(`benchmarks[${index}].passRate must be a number between 0 and 1`);
+    }
+    const metrics = parseOptionalEvalRunMetrics(item.metrics);
+    return {
+      benchmarkId: assertString(item.benchmarkId, `benchmarks[${index}].benchmarkId`),
+      runId: assertString(item.runId, `benchmarks[${index}].runId`),
+      completedAt: typeof item.completedAt === "string" && item.completedAt.trim().length > 0 ? item.completedAt.trim() : void 0,
+      gitRef: typeof item.gitRef === "string" && item.gitRef.trim().length > 0 ? item.gitRef.trim() : void 0,
+      passRate,
+      metrics
+    };
+  });
+  const benchmarkCount = Number(raw.benchmarkCount);
+  if (!Number.isFinite(benchmarkCount) || benchmarkCount < 0) {
+    throw new Error("benchmarkCount must be a non-negative number");
+  }
+  if (benchmarkCount !== benchmarks.length) {
+    throw new Error("benchmarkCount must match benchmarks.length");
+  }
+  return {
+    schemaVersion: 1,
+    snapshotId: assertString(raw.snapshotId, "snapshotId"),
+    createdAt: assertString(raw.createdAt, "createdAt"),
+    sourceRootDir: assertString(raw.sourceRootDir, "sourceRootDir"),
+    benchmarkCount,
+    benchmarks,
+    notes: typeof raw.notes === "string" && raw.notes.trim().length > 0 ? raw.notes.trim() : void 0,
+    gitRef: typeof raw.gitRef === "string" && raw.gitRef.trim().length > 0 ? raw.gitRef.trim() : void 0
+  };
+}
+function parseOptionalEvalRunMetrics(raw) {
+  if (!isRecord(raw)) return void 0;
+  return {
+    recallPrecisionAtK: typeof raw.recallPrecisionAtK === "number" ? raw.recallPrecisionAtK : void 0,
+    actionOutcomeScore: typeof raw.actionOutcomeScore === "number" ? raw.actionOutcomeScore : void 0,
+    objectiveStateCoverage: typeof raw.objectiveStateCoverage === "number" ? raw.objectiveStateCoverage : void 0,
+    causalPathRecall: typeof raw.causalPathRecall === "number" ? raw.causalPathRecall : void 0,
+    trustViolationRate: typeof raw.trustViolationRate === "number" ? raw.trustViolationRate : void 0,
+    creationRecoveryScore: typeof raw.creationRecoveryScore === "number" ? raw.creationRecoveryScore : void 0
+  };
+}
 function validateEvalShadowRecallRecord(raw) {
   if (!isRecord(raw)) throw new Error("eval shadow recall record must be an object");
   if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
@@ -11862,17 +11909,62 @@ function compareMetricDeltas(baseMetrics, candidateMetrics) {
   }
   return { deltas, regressions, improvements };
 }
+function formatEvalBaselineDeltaMarkdown(report) {
+  const lines = [
+    "# Eval Baseline Delta Report",
+    "",
+    `- Passed: ${report.passed ? "yes" : "no"}`,
+    `- Baseline snapshot: ${report.baselineSnapshotId}`,
+    `- Baseline created: ${report.baselineCreatedAt}`,
+    `- Baseline source root: ${report.baselineSourceRootDir}`,
+    `- Candidate root: ${report.candidateRootDir}`,
+    `- Benchmarks compared: ${report.comparedBenchmarks}`
+  ];
+  if (report.missingCandidateBenchmarks.length > 0) {
+    lines.push(`- Missing candidate benchmarks: ${report.missingCandidateBenchmarks.join(", ")}`);
+  }
+  lines.push(
+    `- Invalid candidate artifacts: benchmarks=${report.invalidArtifacts.candidate.benchmarks}, runs=${report.invalidArtifacts.candidate.runs}, shadows=${report.invalidArtifacts.candidate.shadows}, baselines=${report.invalidArtifacts.candidate.baselines}`,
+    "",
+    "## Regressions"
+  );
+  if (report.regressions.length === 0) {
+    lines.push("- none");
+  } else {
+    for (const regression of report.regressions) lines.push(`- ${regression}`);
+  }
+  lines.push("", "## Improvements");
+  if (report.improvements.length === 0) {
+    lines.push("- none");
+  } else {
+    for (const improvement of report.improvements) lines.push(`- ${improvement}`);
+  }
+  lines.push("", "## Benchmark Deltas");
+  if (report.deltas.length === 0) {
+    lines.push("- none");
+  } else {
+    for (const delta of report.deltas) {
+      lines.push(
+        `- ${delta.benchmarkId}: passRate ${delta.basePassRate} -> ${delta.candidatePassRate} (delta ${delta.passRateDelta})`
+      );
+    }
+  }
+  return lines.join("\n");
+}
 async function collectEvalStoreSnapshot(options) {
   const rootDir = options.rootDir;
   const benchmarkDir = path15.join(rootDir, "benchmarks");
   const runsDir = path15.join(rootDir, "runs");
   const shadowDir = path15.join(rootDir, "shadow");
+  const baselineDir = path15.join(rootDir, "baselines");
   const benchmarkFiles = await listNamedFiles(benchmarkDir, "manifest.json");
   const runFiles = await listJsonFiles(runsDir);
   const shadowFiles = await listJsonFiles(shadowDir);
+  const baselineFiles = await listJsonFiles(baselineDir);
   const invalidBenchmarks = [];
   const invalidRuns = [];
   const invalidShadows = [];
+  const invalidBaselines = [];
   const manifests = [];
   for (const filePath of benchmarkFiles) {
     try {
@@ -11910,12 +12002,24 @@ async function collectEvalStoreSnapshot(options) {
       });
     }
   }
+  const baselines = [];
+  for (const filePath of baselineFiles) {
+    try {
+      baselines.push(validateEvalBaselineSnapshot(await readJsonFile(filePath)));
+    } catch (error) {
+      invalidBaselines.push({
+        path: filePath,
+        error: error instanceof Error ? error.message : String(error)
+      });
+    }
+  }
   runs.sort((a, b) => {
     const aTime = Date.parse(a.completedAt ?? a.startedAt);
     const bTime = Date.parse(b.completedAt ?? b.startedAt);
     return (Number.isNaN(bTime) ? 0 : bTime) - (Number.isNaN(aTime) ? 0 : aTime);
   });
   shadows.sort((a, b) => b.recordedAt.localeCompare(a.recordedAt));
+  baselines.sort((a, b) => b.createdAt.localeCompare(a.createdAt));
   const tags = /* @__PURE__ */ new Set();
   const attackClasses = /* @__PURE__ */ new Set();
   const sourceLinks = /* @__PURE__ */ new Set();
@@ -11968,15 +12072,26 @@ async function collectEvalStoreSnapshot(options) {
         latestRecordedAt: shadows[0]?.recordedAt,
         latestSessionKey: shadows[0]?.sessionKey
       },
+      baselines: {
+        enabled: options.baselineSnapshotsEnabled === true,
+        total: baselineFiles.length,
+        invalid: invalidBaselines.length,
+        latestSnapshotId: baselines[0]?.snapshotId,
+        latestCreatedAt: baselines[0]?.createdAt,
+        latestBenchmarkCount: baselines[0]?.benchmarkCount
+      },
       latestRun: runs[0],
       latestShadow: shadows[0],
+      latestBaseline: baselines[0],
       invalidBenchmarks,
       invalidRuns,
-      invalidShadows
+      invalidShadows,
+      invalidBaselines
     },
     manifests,
     runs,
-    shadows
+    shadows,
+    baselines
   };
 }
 async function resolveBenchmarkManifestPath(sourcePath) {
@@ -12066,9 +12181,146 @@ async function getEvalHarnessStatus(options) {
     rootDir: resolveEvalStoreDir(options.memoryDir, options.evalStoreDir),
     enabled: options.enabled,
     shadowModeEnabled: options.shadowModeEnabled,
+    baselineSnapshotsEnabled: options.baselineSnapshotsEnabled,
     memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
   })).status;
 }
+async function createEvalBaselineSnapshot(options) {
+  if (options.baselineSnapshotsEnabled !== true) {
+    throw new Error("benchmark baseline snapshots are disabled");
+  }
+  const snapshotId = assertSafePathSegment(assertString(options.snapshotId, "snapshotId"), "snapshotId");
+  const rootDir = resolveEvalStoreDir(options.memoryDir, options.evalStoreDir);
+  const store = await collectEvalStoreSnapshot({
+    rootDir,
+    enabled: true,
+    shadowModeEnabled: true,
+    baselineSnapshotsEnabled: true,
+    memoryRedTeamBenchEnabled: true
+  });
+  const latestRuns = latestCompletedRunsByBenchmark(store.runs);
+  const benchmarks = [...latestRuns.values()].sort((a, b) => a.benchmarkId.localeCompare(b.benchmarkId)).map((run) => ({
+    benchmarkId: run.benchmarkId,
+    runId: run.runId,
+    completedAt: run.completedAt,
+    gitRef: run.gitRef,
+    passRate: computePassRate(run),
+    metrics: run.metrics
+  }));
+  const snapshot = validateEvalBaselineSnapshot({
+    schemaVersion: 1,
+    snapshotId,
+    createdAt: options.createdAt ?? (/* @__PURE__ */ new Date()).toISOString(),
+    sourceRootDir: rootDir,
+    benchmarkCount: benchmarks.length,
+    benchmarks,
+    notes: options.notes,
+    gitRef: options.gitRef
+  });
+  const targetPath = path15.join(rootDir, "baselines", `${snapshot.snapshotId}.json`);
+  await mkdir10(path15.dirname(targetPath), { recursive: true });
+  await writeFile11(targetPath, JSON.stringify(snapshot, null, 2), "utf-8");
+  return { targetPath, snapshot };
+}
+async function runEvalBaselineDeltaReport(options) {
+  if (options.benchmarkDeltaReporterEnabled !== true) {
+    throw new Error("benchmark delta reporter is disabled");
+  }
+  const snapshotId = assertSafePathSegment(assertString(options.snapshotId, "snapshotId"), "snapshotId");
+  const candidateRootDir = resolveEvalStoreDir(options.memoryDir, options.evalStoreDir);
+  const candidateSnapshot = await collectEvalStoreSnapshot({
+    rootDir: candidateRootDir,
+    enabled: true,
+    shadowModeEnabled: true,
+    baselineSnapshotsEnabled: true,
+    memoryRedTeamBenchEnabled: true
+  });
+  const baselineSnapshot = candidateSnapshot.baselines.find((snapshot) => snapshot.snapshotId === snapshotId);
+  if (!baselineSnapshot) {
+    throw new Error(`benchmark baseline snapshot not found: ${snapshotId}`);
+  }
+  const regressions = [];
+  const improvements = [];
+  if (candidateSnapshot.status.invalidBenchmarks.length > 0) {
+    regressions.push(`candidate store has ${candidateSnapshot.status.invalidBenchmarks.length} invalid benchmark manifest(s)`);
+  }
+  if (candidateSnapshot.status.invalidRuns.length > 0) {
+    regressions.push(`candidate store has ${candidateSnapshot.status.invalidRuns.length} invalid run summary file(s)`);
+  }
+  if (candidateSnapshot.status.invalidShadows.length > 0) {
+    regressions.push(`candidate store has ${candidateSnapshot.status.invalidShadows.length} invalid shadow record(s)`);
+  }
+  if (candidateSnapshot.status.invalidBaselines.length > 0) {
+    regressions.push(`candidate store has ${candidateSnapshot.status.invalidBaselines.length} invalid baseline snapshot file(s)`);
+  }
+  const candidateRuns = latestCompletedRunsByBenchmark(candidateSnapshot.runs);
+  const baselineBenchmarks = new Map(
+    baselineSnapshot.benchmarks.map((benchmark) => [benchmark.benchmarkId, benchmark])
+  );
+  const missingCandidateBenchmarks = [...baselineBenchmarks.keys()].filter((benchmarkId) => !candidateRuns.has(benchmarkId)).sort();
+  for (const benchmarkId of missingCandidateBenchmarks) {
+    regressions.push(`candidate is missing latest completed benchmark run for ${benchmarkId}`);
+  }
+  const deltas = [];
+  for (const benchmarkId of [...baselineBenchmarks.keys()].sort()) {
+    const baseBenchmark = baselineBenchmarks.get(benchmarkId);
+    const candidateRun = candidateRuns.get(benchmarkId);
+    if (!baseBenchmark || !candidateRun) continue;
+    const passRateDelta = computePassRate(candidateRun) - baseBenchmark.passRate;
+    const delta = {
+      benchmarkId,
+      baseRunId: baseBenchmark.runId,
+      candidateRunId: candidateRun.runId,
+      basePassRate: baseBenchmark.passRate,
+      candidatePassRate: computePassRate(candidateRun),
+      passRateDelta,
+      metricDeltas: {},
+      regressions: [],
+      improvements: []
+    };
+    if (passRateDelta < 0) {
+      delta.regressions.push(`passRate ${baseBenchmark.passRate} -> ${delta.candidatePassRate}`);
+      regressions.push(`${benchmarkId} pass rate regressed (${baseBenchmark.passRate} -> ${delta.candidatePassRate})`);
+    } else if (passRateDelta > 0) {
+      delta.improvements.push(`passRate ${baseBenchmark.passRate} -> ${delta.candidatePassRate}`);
+      improvements.push(`${benchmarkId} pass rate improved (${baseBenchmark.passRate} -> ${delta.candidatePassRate})`);
+    }
+    const metricDelta = compareMetricDeltas(baseBenchmark.metrics, candidateRun.metrics);
+    delta.metricDeltas = metricDelta.deltas;
+    for (const regression of metricDelta.regressions) {
+      delta.regressions.push(regression);
+      regressions.push(`${benchmarkId} ${regression}`);
+    }
+    for (const improvement of metricDelta.improvements) {
+      delta.improvements.push(improvement);
+      improvements.push(`${benchmarkId} ${improvement}`);
+    }
+    deltas.push(delta);
+  }
+  const report = {
+    passed: regressions.length === 0,
+    baselineSnapshotId: baselineSnapshot.snapshotId,
+    baselineCreatedAt: baselineSnapshot.createdAt,
+    baselineSourceRootDir: baselineSnapshot.sourceRootDir,
+    candidateRootDir: candidateSnapshot.status.rootDir,
+    comparedBenchmarks: deltas.length,
+    missingCandidateBenchmarks,
+    invalidArtifacts: {
+      candidate: {
+        benchmarks: candidateSnapshot.status.invalidBenchmarks.length,
+        runs: candidateSnapshot.status.invalidRuns.length,
+        shadows: candidateSnapshot.status.invalidShadows.length,
+        baselines: candidateSnapshot.status.invalidBaselines.length
+      }
+    },
+    regressions,
+    improvements,
+    deltas,
+    markdownReport: ""
+  };
+  report.markdownReport = formatEvalBaselineDeltaMarkdown(report);
+  return report;
+}
 function resolveRequiredEvalStoreRoot(options, label) {
   if (typeof options.evalStoreDir === "string" && options.evalStoreDir.trim().length > 0) {
     return options.evalStoreDir.trim();
@@ -14331,7 +14583,7 @@ function optionalString(value) {
   if (typeof value !== "string" || value.trim().length === 0) return void 0;
   return value.trim();
 }
-function assertSafePathSegment(value, field) {
+function assertSafePathSegment2(value, field) {
   if (value === "." || value === ".." || value.includes("/") || value.includes("\\")) {
     throw new Error(`${field} must be a safe path segment`);
   }
@@ -14386,7 +14638,7 @@ function validateCausalTrajectoryRecord(raw) {
   }
   return {
     schemaVersion: 1,
-    trajectoryId: assertSafePathSegment(assertString2(raw.trajectoryId, "trajectoryId"), "trajectoryId"),
+    trajectoryId: assertSafePathSegment2(assertString2(raw.trajectoryId, "trajectoryId"), "trajectoryId"),
     recordedAt: assertIsoRecordedAt(assertString2(raw.recordedAt, "recordedAt")),
     sessionKey: assertString2(raw.sessionKey, "sessionKey"),
     goal: assertString2(raw.goal, "goal"),
@@ -14547,7 +14799,7 @@ function validateObjectiveStateSnapshot(raw) {
   }
   return {
     schemaVersion: 1,
-    snapshotId: assertSafePathSegment(assertString2(raw.snapshotId, "snapshotId"), "snapshotId"),
+    snapshotId: assertSafePathSegment2(assertString2(raw.snapshotId, "snapshotId"), "snapshotId"),
     recordedAt: assertIsoRecordedAt(assertString2(raw.recordedAt, "recordedAt")),
     sessionKey: assertString2(raw.sessionKey, "sessionKey"),
     source,
@@ -14725,7 +14977,7 @@ function validateTrustZoneRecord(raw) {
   if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
   return {
     schemaVersion: 1,
-    recordId: assertSafePathSegment(assertString2(raw.recordId, "recordId"), "recordId"),
+    recordId: assertSafePathSegment2(assertString2(raw.recordId, "recordId"), "recordId"),
     zone: validateZone(raw.zone, "zone"),
     recordedAt: assertIsoRecordedAt(assertString2(raw.recordedAt, "recordedAt")),
     kind: validateKind(raw.kind),
@@ -14863,7 +15115,7 @@ async function promoteTrustZoneRecord(options) {
   const sourceRecord = await findTrustZoneRecordById({
     memoryDir: options.memoryDir,
     trustZoneStoreDir: options.trustZoneStoreDir,
-    recordId: assertSafePathSegment(assertString2(options.sourceRecordId, "sourceRecordId"), "sourceRecordId")
+    recordId: assertSafePathSegment2(assertString2(options.sourceRecordId, "sourceRecordId"), "sourceRecordId")
   });
   if (!sourceRecord) {
     throw new Error(`source trust-zone record not found: ${options.sourceRecordId}`);
@@ -15090,7 +15342,7 @@ function validateAbstractionNode(raw) {
   if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
   return {
     schemaVersion: 1,
-    nodeId: assertSafePathSegment(assertString2(raw.nodeId, "nodeId"), "nodeId"),
+    nodeId: assertSafePathSegment2(assertString2(raw.nodeId, "nodeId"), "nodeId"),
     recordedAt: assertIsoRecordedAt(assertString2(raw.recordedAt, "recordedAt")),
     sessionKey: assertString2(raw.sessionKey, "sessionKey"),
     kind: validateKind2(raw.kind),
@@ -15161,7 +15413,7 @@ function validateNodeRefs(raw) {
   if (!nodeRefs || nodeRefs.length === 0) {
     throw new Error("nodeRefs must contain at least one node reference");
   }
-  return nodeRefs.map((nodeRef, index) => assertSafePathSegment(nodeRef, `nodeRefs[${index}]`));
+  return nodeRefs.map((nodeRef, index) => assertSafePathSegment2(nodeRef, `nodeRefs[${index}]`));
 }
 function resolveCueAnchorStoreDir(abstractionNodeStoreDir, overrideDir) {
   if (typeof overrideDir === "string" && overrideDir.trim().length > 0) {
@@ -15174,7 +15426,7 @@ function validateCueAnchor(raw) {
   if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
   return {
     schemaVersion: 1,
-    anchorId: assertSafePathSegment(assertString2(raw.anchorId, "anchorId"), "anchorId"),
+    anchorId: assertSafePathSegment2(assertString2(raw.anchorId, "anchorId"), "anchorId"),
     anchorType: validateAnchorType(raw.anchorType),
     anchorValue: assertString2(raw.anchorValue, "anchorValue"),
     normalizedCue: assertString2(raw.normalizedCue, "normalizedCue"),
@@ -15588,7 +15840,7 @@ function validateCommitmentLedgerEntry(raw) {
   const normalizedResolvedAt = resolvedAt ?? (state === "open" ? void 0 : normalizedStateChangedAt);
   return {
     schemaVersion: 1,
-    entryId: assertSafePathSegment(assertString2(raw.entryId, "entryId"), "entryId"),
+    entryId: assertSafePathSegment2(assertString2(raw.entryId, "entryId"), "entryId"),
     recordedAt: assertIsoRecordedAt(assertString2(raw.recordedAt, "recordedAt")),
     sessionKey: assertString2(raw.sessionKey, "sessionKey"),
     source,
@@ -15771,7 +16023,7 @@ function validateWorkProductLedgerEntry(raw) {
   }
   return {
     schemaVersion: 1,
-    entryId: assertSafePathSegment(assertString2(raw.entryId, "entryId"), "entryId"),
+    entryId: assertSafePathSegment2(assertString2(raw.entryId, "entryId"), "entryId"),
     recordedAt: assertIsoRecordedAt(assertString2(raw.recordedAt, "recordedAt")),
     sessionKey: assertString2(raw.sessionKey, "sessionKey"),
     source,
@@ -18090,7 +18342,7 @@ function validateUtilityTelemetryEvent(raw) {
   }
   return {
     schemaVersion: 1,
-    eventId: assertSafePathSegment(assertString2(raw.eventId, "eventId"), "eventId"),
+    eventId: assertSafePathSegment2(assertString2(raw.eventId, "eventId"), "eventId"),
     recordedAt: assertIsoRecordedAt(assertString2(raw.recordedAt, "recordedAt")),
     sessionKey: assertString2(raw.sessionKey, "sessionKey"),
     source,
@@ -28329,7 +28581,7 @@ function validateResumeBundle(raw) {
   }
   return {
     schemaVersion: 1,
-    bundleId: assertSafePathSegment(assertString2(raw.bundleId, "bundleId"), "bundleId"),
+    bundleId: assertSafePathSegment2(assertString2(raw.bundleId, "bundleId"), "bundleId"),
     recordedAt,
     sessionKey: assertString2(raw.sessionKey, "sessionKey"),
     source,
@@ -28819,9 +29071,21 @@ async function runBenchmarkStatusCliCommand(options) {
     evalStoreDir: options.evalStoreDir,
     enabled: options.evalHarnessEnabled,
     shadowModeEnabled: options.evalShadowModeEnabled,
+    baselineSnapshotsEnabled: options.benchmarkBaselineSnapshotsEnabled,
     memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
   });
 }
+async function runBenchmarkBaselineSnapshotCliCommand(options) {
+  return createEvalBaselineSnapshot({
+    memoryDir: options.memoryDir,
+    evalStoreDir: options.evalStoreDir,
+    baselineSnapshotsEnabled: options.benchmarkBaselineSnapshotsEnabled,
+    snapshotId: options.snapshotId,
+    createdAt: options.createdAt,
+    notes: options.notes,
+    gitRef: options.gitRef
+  });
+}
 async function runBenchmarkValidateCliCommand(options) {
   return validateEvalBenchmarkPack(options.path, {
     memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
@@ -28842,6 +29106,14 @@ async function runBenchmarkCiGateCliCommand(options) {
     candidateEvalStoreDir: options.candidateEvalStoreDir
   });
 }
+async function runBenchmarkBaselineReportCliCommand(options) {
+  return runEvalBaselineDeltaReport({
+    memoryDir: options.memoryDir,
+    evalStoreDir: options.evalStoreDir,
+    benchmarkDeltaReporterEnabled: options.benchmarkDeltaReporterEnabled,
+    snapshotId: options.snapshotId
+  });
+}
 async function runObjectiveStateStatusCliCommand(options) {
   return getObjectiveStateStoreStatus({
     memoryDir: options.memoryDir,
@@ -30178,6 +30450,7 @@ function registerCli(api, orchestrator) {
           evalStoreDir: orchestrator.config.evalStoreDir,
           evalHarnessEnabled: orchestrator.config.evalHarnessEnabled,
           evalShadowModeEnabled: orchestrator.config.evalShadowModeEnabled,
+          benchmarkBaselineSnapshotsEnabled: orchestrator.config.benchmarkBaselineSnapshotsEnabled,
           memoryRedTeamBenchEnabled: orchestrator.config.memoryRedTeamBenchEnabled
         });
         console.log(JSON.stringify(status, null, 2));
@@ -30192,6 +30465,20 @@ function registerCli(api, orchestrator) {
         console.log(JSON.stringify(summary, null, 2));
         console.log("OK");
       });
+      cmd.command("benchmark-baseline-snapshot").description("Capture a versioned baseline snapshot of the latest completed benchmark runs").requiredOption("--snapshot-id <id>", "Stable snapshot identifier").option("--created-at <iso>", "Override snapshot creation timestamp").option("--git-ref <ref>", "Override the git ref recorded in the snapshot").option("--notes <text>", "Optional operator notes for the snapshot").action(async (...args) => {
+        const options = args[0] ?? {};
+        const summary = await runBenchmarkBaselineSnapshotCliCommand({
+          memoryDir: orchestrator.config.memoryDir,
+          evalStoreDir: orchestrator.config.evalStoreDir,
+          benchmarkBaselineSnapshotsEnabled: orchestrator.config.benchmarkBaselineSnapshotsEnabled,
+          snapshotId: typeof options.snapshotId === "string" ? options.snapshotId : "",
+          createdAt: typeof options.createdAt === "string" ? options.createdAt : void 0,
+          gitRef: typeof options.gitRef === "string" ? options.gitRef : void 0,
+          notes: typeof options.notes === "string" ? options.notes : void 0
+        });
+        console.log(JSON.stringify(summary, null, 2));
+        console.log("OK");
+      });
       cmd.command("benchmark-import").description("Validate and import a benchmark manifest file or pack directory into Engram's eval store").argument("<path>", "Path to a benchmark manifest JSON file or a directory with manifest.json").option("--force", "Replace an existing imported benchmark pack with the same benchmarkId").action(async (...args) => {
         const inputPath = args[0];
         const options = args[1] ?? {};
@@ -30217,6 +30504,22 @@ function registerCli(api, orchestrator) {
         }
         console.log("OK");
       });
+      cmd.command("benchmark-baseline-report").description("Compare the current eval store against a named stored benchmark baseline snapshot").requiredOption("--snapshot-id <id>", "Stable baseline snapshot identifier").action(async (...args) => {
+        const options = args[0] ?? {};
+        const summary = await runBenchmarkBaselineReportCliCommand({
+          memoryDir: orchestrator.config.memoryDir,
+          evalStoreDir: orchestrator.config.evalStoreDir,
+          benchmarkDeltaReporterEnabled: orchestrator.config.benchmarkDeltaReporterEnabled,
+          snapshotId: typeof options.snapshotId === "string" ? options.snapshotId : ""
+        });
+        const { markdownReport, ...jsonSummary } = summary;
+        console.log(JSON.stringify(jsonSummary, null, 2));
+        console.log(markdownReport);
+        if (!summary.passed) {
+          throw new Error("benchmark baseline report detected regressions");
+        }
+        console.log("OK");
+      });
       cmd.command("objective-state-status").description("Show objective-state store status, snapshot counts, and latest stored snapshot").action(async () => {
         const status = await runObjectiveStateStatusCliCommand({
           memoryDir: orchestrator.config.memoryDir,