@joshuaswarren/openclaw-engram 9.0.33 → 9.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,6 +37,7 @@ AI agents forget everything between conversations. Engram fixes that.
37
37
  - **Trust-zone promotion path** — Engram can now, when `trustZonesEnabled` and `quarantinePromotionEnabled` are enabled, persist typed quarantine, working, and trusted records, plan explicit promotions, block direct `quarantine -> trusted` jumps, and require anchored provenance before promoting risky working records into `trusted`.
38
38
  - **Trust-zone recall** — Engram can now, when `trustZoneRecallEnabled` is enabled, inject prompt-relevant `working` and `trusted` trust-zone records into recall context as a separate `Trust Zones` section while keeping `quarantine` material out of recall by default.
39
39
  - **Poisoning-defense corroboration** — Engram can now, when `memoryPoisoningDefenseEnabled` is enabled, score trust-zone provenance deterministically and require independent non-quarantine corroboration before risky `working -> trusted` promotions succeed.
40
+ - **Red-team benchmark packs** — Engram's eval harness can now validate and count typed `memory-red-team` benchmark packs so poisoning-defense regression suites stay explicit and reviewable instead of hiding inside generic benchmark metadata.
40
41
  - **Zero-config start** — Install, add an API key, restart. Engram works out of the box with sensible defaults and progressively unlocks advanced features as you enable them.
41
42
 
42
43
  ## Quick Start
@@ -199,6 +200,7 @@ Key settings:
199
200
  | `trustZoneStoreDir` | `{memoryDir}/state/trust-zones` | Root directory for trust-zone records |
200
201
  | `trustZoneRecallEnabled` | `false` | Inject prompt-relevant working and trusted trust-zone records into recall context |
201
202
  | `memoryPoisoningDefenseEnabled` | `false` | Enable deterministic provenance trust scoring and corroboration requirements for risky trusted promotions |
203
+ | `memoryRedTeamBenchEnabled` | `false` | Enable typed memory red-team benchmark pack support and status accounting for poisoning-defense suites |
202
204
 
203
205
  Full reference: [Config Reference](docs/config-reference.md)
204
206
 
package/dist/index.js CHANGED
@@ -301,6 +301,7 @@ function parseConfig(raw) {
301
301
  trustZoneStoreDir: typeof cfg.trustZoneStoreDir === "string" && cfg.trustZoneStoreDir.trim().length > 0 ? cfg.trustZoneStoreDir.trim() : path.join(memoryDir, "state", "trust-zones"),
302
302
  trustZoneRecallEnabled: cfg.trustZoneRecallEnabled === true,
303
303
  memoryPoisoningDefenseEnabled: cfg.memoryPoisoningDefenseEnabled === true,
304
+ memoryRedTeamBenchEnabled: cfg.memoryRedTeamBenchEnabled === true,
304
305
  // Local LLM Provider (v2.1)
305
306
  localLlmEnabled: cfg.localLlmEnabled === true || cfg.localLlmEnabled === "true",
306
307
  // default: false
@@ -6333,7 +6334,7 @@ import path6 from "path";
6333
6334
  import { mkdir as mkdir2, readFile as readFile3, stat, writeFile as writeFile2 } from "fs/promises";
6334
6335
  import path5 from "path";
6335
6336
  function toSafeTimestamp(ts) {
6336
- return ts.toISOString().replace(/[:.]/g, "").replace("Z", "Z");
6337
+ return ts.toISOString().replace(/[:.]/g, "");
6337
6338
  }
6338
6339
  async function lintWorkspaceFiles(opts) {
6339
6340
  const warnings = [];
@@ -11630,10 +11631,14 @@ function assertSafeBenchmarkId(benchmarkId) {
11630
11631
  }
11631
11632
  return benchmarkId;
11632
11633
  }
11633
- function validateEvalBenchmarkManifest(raw) {
11634
+ function validateEvalBenchmarkManifest(raw, options) {
11634
11635
  if (!isRecord(raw)) throw new Error("benchmark manifest must be an object");
11635
11636
  if (raw.schemaVersion !== 1) throw new Error("schemaVersion must be 1");
11636
11637
  if (!Array.isArray(raw.cases)) throw new Error("cases must be an array");
11638
+ const benchmarkTypeRaw = typeof raw.benchmarkType === "string" && raw.benchmarkType.trim().length > 0 ? raw.benchmarkType.trim() : "standard";
11639
+ if (!["standard", "memory-red-team"].includes(benchmarkTypeRaw)) {
11640
+ throw new Error("benchmarkType must be one of standard|memory-red-team");
11641
+ }
11637
11642
  const cases = raw.cases.map((item, index) => {
11638
11643
  if (!isRecord(item)) throw new Error(`cases[${index}] must be an object`);
11639
11644
  return {
@@ -11643,13 +11648,28 @@ function validateEvalBenchmarkManifest(raw) {
11643
11648
  notes: typeof item.notes === "string" && item.notes.trim().length > 0 ? item.notes.trim() : void 0
11644
11649
  };
11645
11650
  });
11651
+ const benchmarkType = benchmarkTypeRaw;
11652
+ if (benchmarkType === "memory-red-team" && options?.memoryRedTeamBenchEnabled !== true) {
11653
+ throw new Error("memory-red-team benchmark packs require memoryRedTeamBenchEnabled");
11654
+ }
11655
+ const attackClass = typeof raw.attackClass === "string" && raw.attackClass.trim().length > 0 ? raw.attackClass.trim() : void 0;
11656
+ const targetSurface = typeof raw.targetSurface === "string" && raw.targetSurface.trim().length > 0 ? raw.targetSurface.trim() : void 0;
11657
+ if (benchmarkType === "memory-red-team" && attackClass === void 0) {
11658
+ throw new Error("attackClass must be a non-empty string");
11659
+ }
11660
+ if (benchmarkType === "memory-red-team" && targetSurface === void 0) {
11661
+ throw new Error("targetSurface must be a non-empty string");
11662
+ }
11646
11663
  return {
11647
11664
  schemaVersion: 1,
11648
11665
  benchmarkId: assertString(raw.benchmarkId, "benchmarkId"),
11666
+ benchmarkType,
11649
11667
  title: assertString(raw.title, "title"),
11650
11668
  description: typeof raw.description === "string" && raw.description.trim().length > 0 ? raw.description.trim() : void 0,
11651
11669
  tags: optionalStringArray(raw.tags, "tags"),
11652
11670
  sourceLinks: optionalStringArray(raw.sourceLinks, "sourceLinks"),
11671
+ attackClass,
11672
+ targetSurface,
11653
11673
  cases
11654
11674
  };
11655
11675
  }
@@ -11815,7 +11835,11 @@ async function collectEvalStoreSnapshot(options) {
11815
11835
  const manifests = [];
11816
11836
  for (const filePath of benchmarkFiles) {
11817
11837
  try {
11818
- manifests.push(validateEvalBenchmarkManifest(await readJsonFile(filePath)));
11838
+ manifests.push(
11839
+ validateEvalBenchmarkManifest(await readJsonFile(filePath), {
11840
+ memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
11841
+ })
11842
+ );
11819
11843
  } catch (error) {
11820
11844
  invalidBenchmarks.push({
11821
11845
  path: filePath,
@@ -11852,10 +11876,18 @@ async function collectEvalStoreSnapshot(options) {
11852
11876
  });
11853
11877
  shadows.sort((a, b) => b.recordedAt.localeCompare(a.recordedAt));
11854
11878
  const tags = /* @__PURE__ */ new Set();
11879
+ const attackClasses = /* @__PURE__ */ new Set();
11855
11880
  const sourceLinks = /* @__PURE__ */ new Set();
11881
+ const targetSurfaces = /* @__PURE__ */ new Set();
11856
11882
  let totalCases = 0;
11883
+ let redTeam = 0;
11857
11884
  for (const manifest of manifests) {
11858
11885
  totalCases += manifest.cases.length;
11886
+ if (manifest.benchmarkType === "memory-red-team") {
11887
+ redTeam += 1;
11888
+ if (manifest.attackClass) attackClasses.add(manifest.attackClass);
11889
+ if (manifest.targetSurface) targetSurfaces.add(manifest.targetSurface);
11890
+ }
11859
11891
  for (const tag of manifest.tags ?? []) tags.add(tag);
11860
11892
  for (const link of manifest.sourceLinks ?? []) sourceLinks.add(link);
11861
11893
  }
@@ -11870,8 +11902,11 @@ async function collectEvalStoreSnapshot(options) {
11870
11902
  total: benchmarkFiles.length,
11871
11903
  valid: manifests.length,
11872
11904
  invalid: invalidBenchmarks.length,
11905
+ redTeam,
11873
11906
  totalCases,
11907
+ attackClasses: [...attackClasses].sort(),
11874
11908
  tags: [...tags].sort(),
11909
+ targetSurfaces: [...targetSurfaces].sort(),
11875
11910
  sourceLinks: [...sourceLinks].sort()
11876
11911
  },
11877
11912
  runs: {
@@ -11919,25 +11954,32 @@ async function resolveBenchmarkManifestPath(sourcePath) {
11919
11954
  }
11920
11955
  throw new Error("benchmark pack source must be a file or directory");
11921
11956
  }
11922
- async function validateEvalBenchmarkPack(sourcePath) {
11957
+ async function validateEvalBenchmarkPack(sourcePath, options) {
11923
11958
  const trimmedSourcePath = sourcePath.trim();
11924
11959
  if (trimmedSourcePath.length === 0) {
11925
11960
  throw new Error("benchmark pack path must be a non-empty string");
11926
11961
  }
11927
11962
  const { manifestPath } = await resolveBenchmarkManifestPath(trimmedSourcePath);
11928
- const manifest = validateEvalBenchmarkManifest(await readJsonFile(manifestPath));
11963
+ const manifest = validateEvalBenchmarkManifest(await readJsonFile(manifestPath), {
11964
+ memoryRedTeamBenchEnabled: options?.memoryRedTeamBenchEnabled
11965
+ });
11929
11966
  return {
11930
11967
  sourcePath: trimmedSourcePath,
11931
11968
  manifestPath,
11932
11969
  benchmarkId: assertSafeBenchmarkId(manifest.benchmarkId),
11970
+ benchmarkType: manifest.benchmarkType ?? "standard",
11933
11971
  title: manifest.title,
11972
+ attackClass: manifest.attackClass,
11973
+ targetSurface: manifest.targetSurface,
11934
11974
  totalCases: manifest.cases.length,
11935
11975
  tags: [...manifest.tags ?? []],
11936
11976
  sourceLinks: [...manifest.sourceLinks ?? []]
11937
11977
  };
11938
11978
  }
11939
11979
  async function importEvalBenchmarkPack(options) {
11940
- const summary = await validateEvalBenchmarkPack(options.sourcePath);
11980
+ const summary = await validateEvalBenchmarkPack(options.sourcePath, {
11981
+ memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
11982
+ });
11941
11983
  const rootDir = resolveEvalStoreDir(options.memoryDir, options.evalStoreDir);
11942
11984
  const benchmarkDir = path15.join(rootDir, "benchmarks");
11943
11985
  const targetDir = path15.join(benchmarkDir, summary.benchmarkId);
@@ -11982,7 +12024,8 @@ async function getEvalHarnessStatus(options) {
11982
12024
  return (await collectEvalStoreSnapshot({
11983
12025
  rootDir: resolveEvalStoreDir(options.memoryDir, options.evalStoreDir),
11984
12026
  enabled: options.enabled,
11985
- shadowModeEnabled: options.shadowModeEnabled
12027
+ shadowModeEnabled: options.shadowModeEnabled,
12028
+ memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
11986
12029
  })).status;
11987
12030
  }
11988
12031
  function resolveRequiredEvalStoreRoot(options, label) {
@@ -12006,12 +12049,14 @@ async function runEvalBenchmarkCiGate(options) {
12006
12049
  const baseSnapshot = await collectEvalStoreSnapshot({
12007
12050
  rootDir: baseRootDir,
12008
12051
  enabled: true,
12009
- shadowModeEnabled: true
12052
+ shadowModeEnabled: true,
12053
+ memoryRedTeamBenchEnabled: true
12010
12054
  });
12011
12055
  const candidateSnapshot = await collectEvalStoreSnapshot({
12012
12056
  rootDir: candidateRootDir,
12013
12057
  enabled: true,
12014
- shadowModeEnabled: true
12058
+ shadowModeEnabled: true,
12059
+ memoryRedTeamBenchEnabled: true
12015
12060
  });
12016
12061
  const regressions = [];
12017
12062
  const improvements = [];
@@ -26852,18 +26897,22 @@ async function runBenchmarkStatusCliCommand(options) {
26852
26897
  memoryDir: options.memoryDir,
26853
26898
  evalStoreDir: options.evalStoreDir,
26854
26899
  enabled: options.evalHarnessEnabled,
26855
- shadowModeEnabled: options.evalShadowModeEnabled
26900
+ shadowModeEnabled: options.evalShadowModeEnabled,
26901
+ memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
26856
26902
  });
26857
26903
  }
26858
26904
  async function runBenchmarkValidateCliCommand(options) {
26859
- return validateEvalBenchmarkPack(options.path);
26905
+ return validateEvalBenchmarkPack(options.path, {
26906
+ memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
26907
+ });
26860
26908
  }
26861
26909
  async function runBenchmarkImportCliCommand(options) {
26862
26910
  return importEvalBenchmarkPack({
26863
26911
  sourcePath: options.path,
26864
26912
  memoryDir: options.memoryDir,
26865
26913
  evalStoreDir: options.evalStoreDir,
26866
- force: options.force === true
26914
+ force: options.force === true,
26915
+ memoryRedTeamBenchEnabled: options.memoryRedTeamBenchEnabled
26867
26916
  });
26868
26917
  }
26869
26918
  async function runBenchmarkCiGateCliCommand(options) {
@@ -28017,7 +28066,8 @@ function registerCli(api, orchestrator) {
28017
28066
  memoryDir: orchestrator.config.memoryDir,
28018
28067
  evalStoreDir: orchestrator.config.evalStoreDir,
28019
28068
  evalHarnessEnabled: orchestrator.config.evalHarnessEnabled,
28020
- evalShadowModeEnabled: orchestrator.config.evalShadowModeEnabled
28069
+ evalShadowModeEnabled: orchestrator.config.evalShadowModeEnabled,
28070
+ memoryRedTeamBenchEnabled: orchestrator.config.memoryRedTeamBenchEnabled
28021
28071
  });
28022
28072
  console.log(JSON.stringify(status, null, 2));
28023
28073
  console.log("OK");
@@ -28025,7 +28075,8 @@ function registerCli(api, orchestrator) {
28025
28075
  cmd.command("benchmark-validate").description("Validate a benchmark manifest file or pack directory without importing it").argument("<path>", "Path to a benchmark manifest JSON file or a directory with manifest.json").action(async (...args) => {
28026
28076
  const inputPath = args[0];
28027
28077
  const summary = await runBenchmarkValidateCliCommand({
28028
- path: typeof inputPath === "string" ? inputPath : ""
28078
+ path: typeof inputPath === "string" ? inputPath : "",
28079
+ memoryRedTeamBenchEnabled: orchestrator.config.memoryRedTeamBenchEnabled
28029
28080
  });
28030
28081
  console.log(JSON.stringify(summary, null, 2));
28031
28082
  console.log("OK");
@@ -28037,7 +28088,8 @@ function registerCli(api, orchestrator) {
28037
28088
  path: typeof inputPath === "string" ? inputPath : "",
28038
28089
  memoryDir: orchestrator.config.memoryDir,
28039
28090
  evalStoreDir: orchestrator.config.evalStoreDir,
28040
- force: options.force === true
28091
+ force: options.force === true,
28092
+ memoryRedTeamBenchEnabled: orchestrator.config.memoryRedTeamBenchEnabled
28041
28093
  });
28042
28094
  console.log(JSON.stringify(summary, null, 2));
28043
28095
  console.log("OK");