audrey 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/README.md +30 -6
  3. package/benchmarks/adapter-self-test.mjs +6 -2
  4. package/benchmarks/adapters/example-allow.mjs +5 -2
  5. package/benchmarks/adapters/mem0-platform.mjs +19 -12
  6. package/benchmarks/adapters/zep-cloud.mjs +51 -27
  7. package/benchmarks/baselines.js +11 -6
  8. package/benchmarks/build-leaderboard.mjs +36 -23
  9. package/benchmarks/cases.js +24 -12
  10. package/benchmarks/create-conformance-card.mjs +12 -3
  11. package/benchmarks/create-submission-bundle.mjs +22 -8
  12. package/benchmarks/dry-run-external-adapters.mjs +24 -12
  13. package/benchmarks/guardbench.js +354 -124
  14. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
  15. package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  16. package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  17. package/benchmarks/output/guardbench-conformance-card.json +12 -12
  18. package/benchmarks/output/guardbench-raw.json +243 -144
  19. package/benchmarks/output/guardbench-summary.json +354 -230
  20. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  21. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  22. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
  23. package/benchmarks/output/submission-bundle/guardbench-raw.json +243 -144
  24. package/benchmarks/output/submission-bundle/guardbench-summary.json +354 -230
  25. package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +21 -1
  26. package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +23 -2
  27. package/benchmarks/output/submission-bundle/submission-manifest.json +15 -15
  28. package/benchmarks/output/submission-bundle/validation-report.json +1 -1
  29. package/benchmarks/output/summary.json +58 -58
  30. package/benchmarks/perf-snapshot.js +12 -9
  31. package/benchmarks/perf.bench.js +14 -6
  32. package/benchmarks/public-paths.mjs +11 -5
  33. package/benchmarks/reference-results.js +10 -5
  34. package/benchmarks/report.js +48 -27
  35. package/benchmarks/run-external-guardbench.mjs +47 -25
  36. package/benchmarks/run.js +112 -59
  37. package/benchmarks/schemas/guardbench-raw.schema.json +21 -1
  38. package/benchmarks/schemas/guardbench-summary.schema.json +23 -2
  39. package/benchmarks/validate-adapter-module.mjs +13 -10
  40. package/benchmarks/validate-adapter-registry.mjs +16 -5
  41. package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
  42. package/benchmarks/verify-external-evidence.mjs +86 -31
  43. package/benchmarks/verify-publication-artifacts.mjs +34 -11
  44. package/benchmarks/verify-submission-bundle.mjs +9 -4
  45. package/dist/mcp-server/config.d.ts +1 -1
  46. package/dist/mcp-server/config.d.ts.map +1 -1
  47. package/dist/mcp-server/config.js +5 -3
  48. package/dist/mcp-server/config.js.map +1 -1
  49. package/dist/mcp-server/index.d.ts +4 -3
  50. package/dist/mcp-server/index.d.ts.map +1 -1
  51. package/dist/mcp-server/index.js +479 -172
  52. package/dist/mcp-server/index.js.map +1 -1
  53. package/dist/src/action-key.d.ts.map +1 -1
  54. package/dist/src/action-key.js +6 -2
  55. package/dist/src/action-key.js.map +1 -1
  56. package/dist/src/adaptive.d.ts.map +1 -1
  57. package/dist/src/adaptive.js +4 -2
  58. package/dist/src/adaptive.js.map +1 -1
  59. package/dist/src/affect.d.ts.map +1 -1
  60. package/dist/src/affect.js +8 -5
  61. package/dist/src/affect.js.map +1 -1
  62. package/dist/src/audrey.d.ts +11 -1
  63. package/dist/src/audrey.d.ts.map +1 -1
  64. package/dist/src/audrey.js +110 -53
  65. package/dist/src/audrey.js.map +1 -1
  66. package/dist/src/capsule.d.ts.map +1 -1
  67. package/dist/src/capsule.js +37 -15
  68. package/dist/src/capsule.js.map +1 -1
  69. package/dist/src/causal.d.ts +1 -1
  70. package/dist/src/causal.d.ts.map +1 -1
  71. package/dist/src/causal.js +4 -2
  72. package/dist/src/causal.js.map +1 -1
  73. package/dist/src/confidence.d.ts.map +1 -1
  74. package/dist/src/confidence.js +5 -5
  75. package/dist/src/confidence.js.map +1 -1
  76. package/dist/src/consolidate.d.ts.map +1 -1
  77. package/dist/src/consolidate.js +17 -9
  78. package/dist/src/consolidate.js.map +1 -1
  79. package/dist/src/context.js +1 -1
  80. package/dist/src/context.js.map +1 -1
  81. package/dist/src/controller.d.ts +17 -1
  82. package/dist/src/controller.d.ts.map +1 -1
  83. package/dist/src/controller.js +73 -23
  84. package/dist/src/controller.js.map +1 -1
  85. package/dist/src/db.d.ts.map +1 -1
  86. package/dist/src/db.js +78 -27
  87. package/dist/src/db.js.map +1 -1
  88. package/dist/src/decay.d.ts +1 -1
  89. package/dist/src/decay.d.ts.map +1 -1
  90. package/dist/src/decay.js +1 -1
  91. package/dist/src/decay.js.map +1 -1
  92. package/dist/src/embedding.d.ts +12 -4
  93. package/dist/src/embedding.d.ts.map +1 -1
  94. package/dist/src/embedding.js +18 -16
  95. package/dist/src/embedding.js.map +1 -1
  96. package/dist/src/encode.d.ts.map +1 -1
  97. package/dist/src/encode.js +5 -4
  98. package/dist/src/encode.js.map +1 -1
  99. package/dist/src/events.d.ts +3 -2
  100. package/dist/src/events.d.ts.map +1 -1
  101. package/dist/src/events.js +7 -3
  102. package/dist/src/events.js.map +1 -1
  103. package/dist/src/export.d.ts.map +1 -1
  104. package/dist/src/export.js +21 -7
  105. package/dist/src/export.js.map +1 -1
  106. package/dist/src/feedback.d.ts.map +1 -1
  107. package/dist/src/feedback.js +1 -1
  108. package/dist/src/feedback.js.map +1 -1
  109. package/dist/src/forget.d.ts.map +1 -1
  110. package/dist/src/forget.js +12 -6
  111. package/dist/src/forget.js.map +1 -1
  112. package/dist/src/fts.d.ts.map +1 -1
  113. package/dist/src/fts.js +20 -8
  114. package/dist/src/fts.js.map +1 -1
  115. package/dist/src/hybrid-recall.d.ts.map +1 -1
  116. package/dist/src/hybrid-recall.js +12 -6
  117. package/dist/src/hybrid-recall.js.map +1 -1
  118. package/dist/src/impact.d.ts.map +1 -1
  119. package/dist/src/impact.js +26 -10
  120. package/dist/src/impact.js.map +1 -1
  121. package/dist/src/import.d.ts.map +1 -1
  122. package/dist/src/import.js +11 -6
  123. package/dist/src/import.js.map +1 -1
  124. package/dist/src/index.d.ts +5 -4
  125. package/dist/src/index.d.ts.map +1 -1
  126. package/dist/src/index.js +4 -4
  127. package/dist/src/index.js.map +1 -1
  128. package/dist/src/interference.d.ts.map +1 -1
  129. package/dist/src/interference.js +10 -5
  130. package/dist/src/interference.js.map +1 -1
  131. package/dist/src/introspect.d.ts.map +1 -1
  132. package/dist/src/introspect.js +12 -6
  133. package/dist/src/introspect.js.map +1 -1
  134. package/dist/src/llm.d.ts +2 -2
  135. package/dist/src/llm.d.ts.map +1 -1
  136. package/dist/src/llm.js +6 -6
  137. package/dist/src/llm.js.map +1 -1
  138. package/dist/src/migrate.d.ts.map +1 -1
  139. package/dist/src/migrate.js +10 -4
  140. package/dist/src/migrate.js.map +1 -1
  141. package/dist/src/preflight.d.ts.map +1 -1
  142. package/dist/src/preflight.js +6 -8
  143. package/dist/src/preflight.js.map +1 -1
  144. package/dist/src/profile.d.ts.map +1 -1
  145. package/dist/src/profile.js.map +1 -1
  146. package/dist/src/promote.d.ts.map +1 -1
  147. package/dist/src/promote.js +16 -7
  148. package/dist/src/promote.js.map +1 -1
  149. package/dist/src/prompts.d.ts.map +1 -1
  150. package/dist/src/prompts.js +1 -2
  151. package/dist/src/prompts.js.map +1 -1
  152. package/dist/src/recall.d.ts.map +1 -1
  153. package/dist/src/recall.js +85 -18
  154. package/dist/src/recall.js.map +1 -1
  155. package/dist/src/redact.d.ts.map +1 -1
  156. package/dist/src/redact.js +9 -4
  157. package/dist/src/redact.js.map +1 -1
  158. package/dist/src/reflexes.d.ts.map +1 -1
  159. package/dist/src/reflexes.js +1 -7
  160. package/dist/src/reflexes.js.map +1 -1
  161. package/dist/src/rollback.d.ts.map +1 -1
  162. package/dist/src/rollback.js +4 -2
  163. package/dist/src/rollback.js.map +1 -1
  164. package/dist/src/routes.d.ts.map +1 -1
  165. package/dist/src/routes.js +37 -14
  166. package/dist/src/routes.js.map +1 -1
  167. package/dist/src/rules-compiler.d.ts.map +1 -1
  168. package/dist/src/rules-compiler.js +24 -2
  169. package/dist/src/rules-compiler.js.map +1 -1
  170. package/dist/src/server.js +2 -2
  171. package/dist/src/server.js.map +1 -1
  172. package/dist/src/tool-trace.d.ts +2 -2
  173. package/dist/src/tool-trace.d.ts.map +1 -1
  174. package/dist/src/tool-trace.js +12 -4
  175. package/dist/src/tool-trace.js.map +1 -1
  176. package/dist/src/types.d.ts.map +1 -1
  177. package/dist/src/ulid.js +1 -1
  178. package/dist/src/ulid.js.map +1 -1
  179. package/dist/src/utils.d.ts.map +1 -1
  180. package/dist/src/utils.js.map +1 -1
  181. package/dist/src/validate.d.ts.map +1 -1
  182. package/dist/src/validate.js +20 -10
  183. package/dist/src/validate.js.map +1 -1
  184. package/docs/paper/07-evaluation.md +5 -5
  185. package/docs/paper/audrey-paper-v1.md +6 -6
  186. package/docs/paper/evidence-ledger.md +1 -1
  187. package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  188. package/docs/paper/output/arxiv/main.tex +6 -6
  189. package/docs/paper/output/arxiv-compile-report.json +3 -3
  190. package/docs/paper/output/submission-bundle/README.md +30 -6
  191. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
  192. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  193. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  194. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
  195. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +243 -144
  196. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +354 -230
  197. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  198. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  199. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +15 -15
  200. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
  201. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +52 -52
  202. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +21 -1
  203. package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +23 -2
  204. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
  205. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +6 -6
  206. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
  207. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  208. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +6 -6
  209. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
  210. package/docs/paper/output/submission-bundle/package.json +18 -5
  211. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +40 -40
  212. package/examples/fintech-ops-demo.js +12 -5
  213. package/examples/healthcare-ops-demo.js +8 -4
  214. package/examples/ollama-memory-agent.js +41 -13
  215. package/examples/stripe-demo.js +12 -5
  216. package/package.json +18 -5
  217. package/scripts/audit-release-completion.mjs +179 -101
  218. package/scripts/create-arxiv-source.mjs +20 -14
  219. package/scripts/create-paper-submission-bundle.mjs +6 -2
  220. package/scripts/finalize-release.mjs +111 -36
  221. package/scripts/prepare-release-cut.mjs +14 -6
  222. package/scripts/publish-release-bundle.mjs +62 -23
  223. package/scripts/publish-release-github-api.mjs +89 -24
  224. package/scripts/smoke-cli.js +26 -6
  225. package/scripts/sync-paper-artifacts.mjs +5 -1
  226. package/scripts/verify-arxiv-compile.mjs +52 -16
  227. package/scripts/verify-arxiv-source.mjs +45 -15
  228. package/scripts/verify-browser-launch-plan.mjs +28 -11
  229. package/scripts/verify-browser-launch-results.mjs +32 -14
  230. package/scripts/verify-paper-artifacts.mjs +539 -79
  231. package/scripts/verify-paper-claims.mjs +48 -20
  232. package/scripts/verify-paper-submission-bundle.mjs +22 -11
  233. package/scripts/verify-publication-pack.mjs +23 -9
  234. package/scripts/verify-release-readiness.mjs +250 -71
@@ -157,8 +157,8 @@ function parseArgs(argv = process.argv.slice(2)) {
157
157
  if (token === '--sizes' && argv[i + 1]) {
158
158
  args.sizes = argv[++i]
159
159
  .split(',')
160
- .map((s) => Number.parseInt(s.trim(), 10))
161
- .filter((n) => Number.isFinite(n) && n > 0);
160
+ .map(s => Number.parseInt(s.trim(), 10))
161
+ .filter(n => Number.isFinite(n) && n > 0);
162
162
  } else if (token === '--recall-runs' && argv[i + 1]) {
163
163
  args.recallRuns = Number.parseInt(argv[++i], 10);
164
164
  } else if (token === '--out' && argv[i + 1]) {
@@ -180,7 +180,7 @@ async function runOneSize({ size, recallRuns }) {
180
180
  });
181
181
 
182
182
  const queueProcessingTimes = [];
183
- audrey.on('post-encode-complete', (event) => {
183
+ audrey.on('post-encode-complete', event => {
184
184
  queueProcessingTimes.push(event.processing_ms);
185
185
  });
186
186
 
@@ -223,7 +223,10 @@ async function runOneSize({ size, recallRuns }) {
223
223
  }
224
224
  }
225
225
 
226
- export async function runPerfSnapshot({ sizes = DEFAULT_SIZES, recallRuns = DEFAULT_RECALL_RUNS } = {}) {
226
+ export async function runPerfSnapshot({
227
+ sizes = DEFAULT_SIZES,
228
+ recallRuns = DEFAULT_RECALL_RUNS,
229
+ } = {}) {
227
230
  const startedAt = Date.now();
228
231
  const sized = [];
229
232
  for (const size of sizes) {
@@ -265,11 +268,11 @@ export function formatMarkdownTable(snapshot) {
265
268
  lines.push(
266
269
  `Node ${snapshot.machine.node} · ${snapshot.machine.cpuCount}x ${snapshot.machine.cpuModel} · ${snapshot.machine.memoryGb} GB RAM`,
267
270
  );
271
+ lines.push(`Generated ${snapshot.generatedAt}${snapshot.gitSha ? ` (${snapshot.gitSha})` : ''}`);
272
+ lines.push('');
268
273
  lines.push(
269
- `Generated ${snapshot.generatedAt}${snapshot.gitSha ? ` (${snapshot.gitSha})` : ''}`,
274
+ '| Corpus size | Encode p50 (ms) | Encode p95 (ms) | Recall p50 (ms) | Recall p95 (ms) | Recall p99 (ms) |',
270
275
  );
271
- lines.push('');
272
- lines.push('| Corpus size | Encode p50 (ms) | Encode p95 (ms) | Recall p50 (ms) | Recall p95 (ms) | Recall p99 (ms) |');
273
276
  lines.push('|---|---|---|---|---|---|');
274
277
  for (const row of snapshot.sizes) {
275
278
  lines.push(
@@ -287,7 +290,7 @@ export function formatMarkdownTable(snapshot) {
287
290
  if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
288
291
  const args = parseArgs();
289
292
  runPerfSnapshot({ sizes: args.sizes, recallRuns: args.recallRuns })
290
- .then((snapshot) => {
293
+ .then(snapshot => {
291
294
  if (args.out) {
292
295
  writeFileSync(args.out, JSON.stringify(snapshot, null, 2) + '\n');
293
296
  }
@@ -297,7 +300,7 @@ if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href)
297
300
  process.stdout.write(formatMarkdownTable(snapshot) + '\n');
298
301
  }
299
302
  })
300
- .catch((err) => {
303
+ .catch(err => {
301
304
  console.error('[audrey] perf snapshot failed:', err);
302
305
  process.exit(1);
303
306
  });
@@ -136,16 +136,24 @@ export async function runPerfBenchmark({
136
136
  };
137
137
 
138
138
  if (queueProcessingTimes.length !== runs) {
139
- throw new Error(`expected ${runs} post-encode queue events, got ${queueProcessingTimes.length}`);
139
+ throw new Error(
140
+ `expected ${runs} post-encode queue events, got ${queueProcessingTimes.length}`,
141
+ );
140
142
  }
141
143
 
142
144
  assertBudget('encode response p95', result.encode_response_ms.p95, budgets.encodeResponseP95Ms);
143
145
  assertBudget('hybrid recall p95', result.hybrid_recall_ms.p95, budgets.hybridRecallP95Ms);
144
- assertBudget('queue processing p50', result.queue_processing_ms.p50, budgets.queueProcessingP50Ms);
145
-
146
- out(`Audrey perf gate passed: encode p95=${result.encode_response_ms.p95}ms, `
147
- + `hybrid recall p95=${result.hybrid_recall_ms.p95}ms, `
148
- + `queue p50=${result.queue_processing_ms.p50}ms`);
146
+ assertBudget(
147
+ 'queue processing p50',
148
+ result.queue_processing_ms.p50,
149
+ budgets.queueProcessingP50Ms,
150
+ );
151
+
152
+ out(
153
+ `Audrey perf gate passed: encode p95=${result.encode_response_ms.p95}ms, ` +
154
+ `hybrid recall p95=${result.hybrid_recall_ms.p95}ms, ` +
155
+ `queue p50=${result.queue_processing_ms.p50}ms`,
156
+ );
149
157
  return result;
150
158
  } finally {
151
159
  audrey.close();
@@ -34,15 +34,19 @@ export function publicCommand(command = []) {
34
34
  export function publicArtifactValue(value) {
35
35
  if (Array.isArray(value)) return value.map(item => publicArtifactValue(item));
36
36
  if (value && typeof value === 'object') {
37
- return Object.fromEntries(Object.entries(value).map(([key, item]) => [key, publicArtifactValue(item)]));
37
+ return Object.fromEntries(
38
+ Object.entries(value).map(([key, item]) => [key, publicArtifactValue(item)]),
39
+ );
38
40
  }
39
41
  return publicPath(value);
40
42
  }
41
43
 
42
44
  export function containsLocalPath(text) {
43
- return WINDOWS_DRIVE_PATTERN.test(text)
44
- || EXTENDED_PATH_PATTERN.test(text)
45
- || FILE_URL_PATTERN.test(text);
45
+ return (
46
+ WINDOWS_DRIVE_PATTERN.test(text) ||
47
+ EXTENDED_PATH_PATTERN.test(text) ||
48
+ FILE_URL_PATTERN.test(text)
49
+ );
46
50
  }
47
51
 
48
52
  export function findLocalPathLeaks(value, path = '$') {
@@ -53,7 +57,9 @@ export function findLocalPathLeaks(value, path = '$') {
53
57
  return value.flatMap((item, index) => findLocalPathLeaks(item, `${path}[${index}]`));
54
58
  }
55
59
  if (value && typeof value === 'object') {
56
- return Object.entries(value).flatMap(([key, item]) => findLocalPathLeaks(item, `${path}.${key}`));
60
+ return Object.entries(value).flatMap(([key, item]) =>
61
+ findLocalPathLeaks(item, `${path}.${key}`),
62
+ );
57
63
  }
58
64
  return [];
59
65
  }
@@ -44,27 +44,32 @@ export const PUBLISHED_LEADERBOARD = [
44
44
  export const MEMORY_TRENDS = [
45
45
  {
46
46
  title: 'Memory is moving from flat retrieval to typed systems',
47
- summary: 'Recent work treats episodic, semantic, procedural, and graph memory as separate but cooperating layers.',
47
+ summary:
48
+ 'Recent work treats episodic, semantic, procedural, and graph memory as separate but cooperating layers.',
48
49
  source: 'https://arxiv.org/abs/2507.03724',
49
50
  },
50
51
  {
51
52
  title: 'Benchmarks now emphasize multi-session realism',
52
- summary: 'LongMemEval and LoCoMo push memory systems toward temporal updates, abstraction, and cross-session reasoning instead of single-turn fact recall.',
53
+ summary:
54
+ 'LongMemEval and LoCoMo push memory systems toward temporal updates, abstraction, and cross-session reasoning instead of single-turn fact recall.',
53
55
  source: 'https://arxiv.org/abs/2410.10813',
54
56
  },
55
57
  {
56
58
  title: 'Context engineering is now competing with retrieval-first designs',
57
- summary: 'Letta argues filesystem and memory-block approaches can outperform simpler retrieval-only memory on realistic long-horizon tasks.',
59
+ summary:
60
+ 'Letta argues filesystem and memory-block approaches can outperform simpler retrieval-only memory on realistic long-horizon tasks.',
58
61
  source: 'https://www.letta.com/blog/memory-blocks',
59
62
  },
60
63
  {
61
64
  title: 'Production teams care about latency and token footprint, not just recall quality',
62
- summary: 'Mem0 frames memory as a cost and latency optimization surface in addition to a personalization surface.',
65
+ summary:
66
+ 'Mem0 frames memory as a cost and latency optimization surface in addition to a personalization surface.',
63
67
  source: 'https://arxiv.org/abs/2504.19413',
64
68
  },
65
69
  {
66
70
  title: 'Temporal and multimodal memory are becoming table stakes',
67
- summary: 'MIRIX and Graphiti both model time and state change explicitly instead of assuming memories stay forever true.',
71
+ summary:
72
+ 'MIRIX and Graphiti both model time and state change explicitly instead of assuming memories stay forever true.',
68
73
  source: 'https://arxiv.org/abs/2507.07957',
69
74
  },
70
75
  ];
@@ -38,25 +38,29 @@ function renderBarChart({ title, rows, valueSuffix = '%', maxValue = 100 }) {
38
38
  const barWidth = Math.max(32, Math.floor(plotWidth / Math.max(rows.length, 1)) - 18);
39
39
  const gap = rows.length > 1 ? (plotWidth - barWidth * rows.length) / (rows.length - 1) : 0;
40
40
 
41
- const bars = rows.map((row, index) => {
42
- const value = Math.max(0, Math.min(maxValue, row.value));
43
- const barHeight = (value / maxValue) * plotHeight;
44
- const x = margin.left + index * (barWidth + gap);
45
- const y = margin.top + plotHeight - barHeight;
46
- return `
41
+ const bars = rows
42
+ .map((row, index) => {
43
+ const value = Math.max(0, Math.min(maxValue, row.value));
44
+ const barHeight = (value / maxValue) * plotHeight;
45
+ const x = margin.left + index * (barWidth + gap);
46
+ const y = margin.top + plotHeight - barHeight;
47
+ return `
47
48
  <rect x="${x}" y="${y}" width="${barWidth}" height="${barHeight}" rx="8" fill="${chartBarColor(row.label)}" />
48
49
  <text x="${x + barWidth / 2}" y="${y - 10}" text-anchor="middle" font-size="15" fill="${PALETTE.accent}">${value.toFixed(1)}${valueSuffix}</text>
49
50
  <text x="${x + barWidth / 2}" y="${height - 42}" text-anchor="middle" font-size="14" fill="${PALETTE.muted}">${escapeHtml(row.label)}</text>
50
51
  `;
51
- }).join('\n');
52
+ })
53
+ .join('\n');
52
54
 
53
- const grid = [0, 25, 50, 75, 100].map(tick => {
54
- const y = margin.top + plotHeight - (tick / maxValue) * plotHeight;
55
- return `
55
+ const grid = [0, 25, 50, 75, 100]
56
+ .map(tick => {
57
+ const y = margin.top + plotHeight - (tick / maxValue) * plotHeight;
58
+ return `
56
59
  <line x1="${margin.left}" y1="${y}" x2="${width - margin.right}" y2="${y}" stroke="${PALETTE.border}" stroke-dasharray="4 4" />
57
60
  <text x="${margin.left - 10}" y="${y + 5}" text-anchor="end" font-size="13" fill="${PALETTE.muted}">${tick}${valueSuffix}</text>
58
61
  `;
59
- }).join('\n');
62
+ })
63
+ .join('\n');
60
64
 
61
65
  return `<?xml version="1.0" encoding="UTF-8"?>
62
66
  <svg xmlns="http://www.w3.org/2000/svg" width="${width}" height="${height}" viewBox="0 0 ${width} ${height}" role="img" aria-label="${escapeHtml(title)}">
@@ -68,39 +72,53 @@ function renderBarChart({ title, rows, valueSuffix = '%', maxValue = 100 }) {
68
72
  }
69
73
 
70
74
  function renderTrendList(trends) {
71
- return trends.map(trend => `
75
+ return trends
76
+ .map(
77
+ trend => `
72
78
  <li>
73
79
  <strong>${escapeHtml(trend.title)}</strong><br />
74
80
  ${escapeHtml(trend.summary)}<br />
75
81
  <a href="${trend.source}">${escapeHtml(trend.source)}</a>
76
82
  </li>
77
- `).join('\n');
83
+ `,
84
+ )
85
+ .join('\n');
78
86
  }
79
87
 
80
88
  function renderCaseRows(localCases) {
81
- return localCases.map(caseResult => `
89
+ return localCases
90
+ .map(
91
+ caseResult => `
82
92
  <tr>
83
93
  <td>${escapeHtml(caseResult.title)}</td>
84
94
  <td>${escapeHtml(caseResult.suite)}</td>
85
95
  <td>${escapeHtml(caseResult.family)}</td>
86
- ${caseResult.results.map(result => {
87
- const bg = result.passed ? '#ecfdf5' : result.score >= 0.5 ? '#fff7ed' : '#fef2f2';
88
- const fg = result.passed ? '#065f46' : result.score >= 0.5 ? '#9a3412' : '#991b1b';
89
- return `<td style="background:${bg};color:${fg}">${result.score.toFixed(2)}<br /><span style="font-size:12px">${escapeHtml(result.summary)}</span></td>`;
90
- }).join('')}
96
+ ${caseResult.results
97
+ .map(result => {
98
+ const bg = result.passed ? '#ecfdf5' : result.score >= 0.5 ? '#fff7ed' : '#fef2f2';
99
+ const fg = result.passed ? '#065f46' : result.score >= 0.5 ? '#9a3412' : '#991b1b';
100
+ return `<td style="background:${bg};color:${fg}">${result.score.toFixed(2)}<br /><span style="font-size:12px">${escapeHtml(result.summary)}</span></td>`;
101
+ })
102
+ .join('')}
91
103
  </tr>
92
- `).join('\n');
104
+ `,
105
+ )
106
+ .join('\n');
93
107
  }
94
108
 
95
109
  function renderSuiteSections(suiteCharts) {
96
110
  if (suiteCharts.length === 0) return '';
97
- return suiteCharts.map(chart => `
111
+ return suiteCharts
112
+ .map(
113
+ chart => `
98
114
  <section class="callout">
99
115
  <h2>${escapeHtml(chart.title)}</h2>
100
116
  <p>${escapeHtml(chart.description)}</p>
101
117
  <img src="./${escapeHtml(chart.fileName)}" alt="${escapeHtml(chart.title)} chart" />
102
118
  </section>
103
- `).join('\n');
119
+ `,
120
+ )
121
+ .join('\n');
104
122
  }
105
123
 
106
124
  export function writeBenchmarkArtifacts({
@@ -114,9 +132,10 @@ export function writeBenchmarkArtifacts({
114
132
  }) {
115
133
  mkdirSync(outputDir, { recursive: true });
116
134
 
117
- const localChartTitle = summary.local?.overall_scope === 'comparable_suites'
118
- ? 'Audrey vs Comparable Local Memory Baselines'
119
- : 'Selected Audrey Regression Suite';
135
+ const localChartTitle =
136
+ summary.local?.overall_scope === 'comparable_suites'
137
+ ? 'Audrey vs Comparable Local Memory Baselines'
138
+ : 'Selected Audrey Regression Suite';
120
139
  const localChart = renderBarChart({
121
140
  title: localChartTitle,
122
141
  rows: localOverall.map(row => ({ label: row.system, value: row.scorePercent })),
@@ -162,8 +181,10 @@ export function writeBenchmarkArtifacts({
162
181
  operationsReadmeChart,
163
182
  renderBarChart({
164
183
  title: 'Audrey Memory Operations Benchmark',
165
- rows: (localSuites.find(suite => suite.id === 'operations')?.overall || [])
166
- .map(row => ({ label: row.system, value: row.scorePercent })),
184
+ rows: (localSuites.find(suite => suite.id === 'operations')?.overall || []).map(row => ({
185
+ label: row.system,
186
+ value: row.scorePercent,
187
+ })),
167
188
  }),
168
189
  'utf8',
169
190
  );
@@ -3,31 +3,46 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
3
3
  import { basename, dirname, resolve } from 'node:path';
4
4
  import { fileURLToPath } from 'node:url';
5
5
  import { writeGuardBenchConformanceCard } from './create-conformance-card.mjs';
6
- import { computeGuardBenchArtifactHashes, validateGuardBenchArtifacts } from './validate-guardbench-artifacts.mjs';
6
+ import {
7
+ computeGuardBenchArtifactHashes,
8
+ validateGuardBenchArtifacts,
9
+ } from './validate-guardbench-artifacts.mjs';
7
10
  import { publicArtifactValue } from './public-paths.mjs';
8
11
 
9
12
  const ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '..');
10
13
  const KNOWN_ADAPTERS = new Map([
11
- ['mem0', {
12
- name: 'mem0-platform',
13
- path: 'benchmarks/adapters/mem0-platform.mjs',
14
- requiredEnv: ['MEM0_API_KEY'],
15
- }],
16
- ['mem0-platform', {
17
- name: 'mem0-platform',
18
- path: 'benchmarks/adapters/mem0-platform.mjs',
19
- requiredEnv: ['MEM0_API_KEY'],
20
- }],
21
- ['zep', {
22
- name: 'zep-cloud',
23
- path: 'benchmarks/adapters/zep-cloud.mjs',
24
- requiredEnv: ['ZEP_API_KEY'],
25
- }],
26
- ['zep-cloud', {
27
- name: 'zep-cloud',
28
- path: 'benchmarks/adapters/zep-cloud.mjs',
29
- requiredEnv: ['ZEP_API_KEY'],
30
- }],
14
+ [
15
+ 'mem0',
16
+ {
17
+ name: 'mem0-platform',
18
+ path: 'benchmarks/adapters/mem0-platform.mjs',
19
+ requiredEnv: ['MEM0_API_KEY'],
20
+ },
21
+ ],
22
+ [
23
+ 'mem0-platform',
24
+ {
25
+ name: 'mem0-platform',
26
+ path: 'benchmarks/adapters/mem0-platform.mjs',
27
+ requiredEnv: ['MEM0_API_KEY'],
28
+ },
29
+ ],
30
+ [
31
+ 'zep',
32
+ {
33
+ name: 'zep-cloud',
34
+ path: 'benchmarks/adapters/zep-cloud.mjs',
35
+ requiredEnv: ['ZEP_API_KEY'],
36
+ },
37
+ ],
38
+ [
39
+ 'zep-cloud',
40
+ {
41
+ name: 'zep-cloud',
42
+ path: 'benchmarks/adapters/zep-cloud.mjs',
43
+ requiredEnv: ['ZEP_API_KEY'],
44
+ },
45
+ ],
31
46
  ]);
32
47
 
33
48
  export function parseExternalArgs(argv = process.argv.slice(2)) {
@@ -127,13 +142,19 @@ export function evaluateAdapterConformance(summary, adapterName) {
127
142
  .filter(row => row.system === resolvedAdapterName);
128
143
 
129
144
  if (adapterRows.length !== expectedScenarios) {
130
- failures.push(`Adapter ${resolvedAdapterName} returned ${adapterRows.length}/${expectedScenarios} scenario rows`);
145
+ failures.push(
146
+ `Adapter ${resolvedAdapterName} returned ${adapterRows.length}/${expectedScenarios} scenario rows`,
147
+ );
131
148
  }
132
149
  if (systemSummary && systemSummary.scenarios !== expectedScenarios) {
133
- failures.push(`Adapter ${resolvedAdapterName} system summary has ${systemSummary.scenarios}/${expectedScenarios} scenarios`);
150
+ failures.push(
151
+ `Adapter ${resolvedAdapterName} system summary has ${systemSummary.scenarios}/${expectedScenarios} scenarios`,
152
+ );
134
153
  }
135
154
  if (systemSummary && systemSummary.redactionLeaks !== 0) {
136
- failures.push(`Adapter ${resolvedAdapterName} leaked ${systemSummary.redactionLeaks} seeded secret(s) in decision output`);
155
+ failures.push(
156
+ `Adapter ${resolvedAdapterName} leaked ${systemSummary.redactionLeaks} seeded secret(s) in decision output`,
157
+ );
137
158
  }
138
159
  if (adapterRows.some(row => row.external !== true)) {
139
160
  failures.push(`Adapter ${resolvedAdapterName} rows are not marked external`);
@@ -270,7 +291,8 @@ async function main() {
270
291
  const card = child.status === 0 ? writeGuardBenchConformanceCard({ dir: run.outDir }) : null;
271
292
  console.log(`External GuardBench metadata: ${metadataPath}`);
272
293
  if (card) console.log(`External GuardBench conformance card: ${card.path}`);
273
- process.exitCode = child.status === 0 && validation.ok && adapterConformance.ok ? 0 : (child.status ?? 1);
294
+ process.exitCode =
295
+ child.status === 0 && validation.ok && adapterConformance.ok ? 0 : (child.status ?? 1);
274
296
  }
275
297
 
276
298
  if (process.argv[1] && process.argv[1].endsWith('run-external-guardbench.mjs')) {