audrey 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,514 @@
1
+ import { mkdirSync, mkdtempSync, rmSync } from 'node:fs';
2
+ import { join, resolve } from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ import { Audrey } from '../src/audrey.js';
5
+ import { LOCAL_BENCHMARK_SUITES, FAMILY_ORDER } from './cases.js';
6
+ import { runBaselineScenario } from './baselines.js';
7
+ import { MEMORY_TRENDS, PUBLISHED_LEADERBOARD } from './reference-results.js';
8
+ import { writeBenchmarkArtifacts } from './report.js';
9
+
10
+ const SUITE_LABELS = new Map(LOCAL_BENCHMARK_SUITES.map(suite => [suite.id, suite.title]));
11
+ const ALL_SUITE_IDS = LOCAL_BENCHMARK_SUITES.map(suite => suite.id);
12
+
13
+ function parseArgs(argv = process.argv.slice(2)) {
14
+ const args = {
15
+ provider: 'mock',
16
+ dimensions: 64,
17
+ outDir: resolve('benchmarks/output'),
18
+ jsonOnly: false,
19
+ check: false,
20
+ minAudreyScore: 80,
21
+ minAudreyPassRate: 75,
22
+ minMarginOverBaseline: 15,
23
+ readmeAssetsDir: null,
24
+ suite: 'all',
25
+ };
26
+
27
+ for (let i = 0; i < argv.length; i++) {
28
+ const token = argv[i];
29
+ if (token === '--provider' && argv[i + 1]) {
30
+ args.provider = argv[++i];
31
+ if (args.provider === 'local') args.dimensions = 384;
32
+ if (args.provider === 'openai') args.dimensions = 1536;
33
+ if (args.provider === 'gemini') args.dimensions = 3072;
34
+ } else if (token === '--dimensions' && argv[i + 1]) {
35
+ args.dimensions = Number.parseInt(argv[++i], 10);
36
+ } else if (token === '--out-dir' && argv[i + 1]) {
37
+ args.outDir = resolve(argv[++i]);
38
+ } else if (token === '--json') {
39
+ args.jsonOnly = true;
40
+ } else if (token === '--check') {
41
+ args.check = true;
42
+ } else if (token === '--min-audrey-score' && argv[i + 1]) {
43
+ args.minAudreyScore = Number.parseFloat(argv[++i]);
44
+ } else if (token === '--min-audrey-pass-rate' && argv[i + 1]) {
45
+ args.minAudreyPassRate = Number.parseFloat(argv[++i]);
46
+ } else if (token === '--min-margin-over-baseline' && argv[i + 1]) {
47
+ args.minMarginOverBaseline = Number.parseFloat(argv[++i]);
48
+ } else if (token === '--readme-assets-dir' && argv[i + 1]) {
49
+ args.readmeAssetsDir = resolve(argv[++i]);
50
+ } else if (token === '--suite' && argv[i + 1]) {
51
+ args.suite = argv[++i];
52
+ }
53
+ }
54
+
55
+ return args;
56
+ }
57
+
58
+ function normalize(text) {
59
+ return String(text || '').toLowerCase();
60
+ }
61
+
62
+ function normalizeSuiteSelection(value = 'all') {
63
+ if (value === 'all') return [...ALL_SUITE_IDS];
64
+ const selected = String(value)
65
+ .split(',')
66
+ .map(token => token.trim().toLowerCase())
67
+ .filter(Boolean);
68
+
69
+ const invalid = selected.filter(token => !ALL_SUITE_IDS.includes(token));
70
+ if (invalid.length > 0) {
71
+ throw new Error(`Unknown benchmark suite(s): ${invalid.join(', ')}. Valid: all, ${ALL_SUITE_IDS.join(', ')}`);
72
+ }
73
+ return [...new Set(selected)];
74
+ }
75
+
76
+ function selectedSuitesOrThrow(suiteIds) {
77
+ const suites = LOCAL_BENCHMARK_SUITES.filter(suite => suiteIds.includes(suite.id));
78
+ if (suites.length === 0) {
79
+ throw new Error('No benchmark suites selected.');
80
+ }
81
+ return suites;
82
+ }
83
+
84
+ function summarizeResults(results) {
85
+ if (!results.length) return 'no retrieval';
86
+ return results
87
+ .slice(0, 2)
88
+ .map(result => result.content.slice(0, 72))
89
+ .join(' | ');
90
+ }
91
+
92
+ function evaluateCase(benchmarkCase, results) {
93
+ const normalizedContents = results.map(result => normalize(result.content));
94
+ const expected = (benchmarkCase.expectAny || []).map(normalize);
95
+ const forbidden = (benchmarkCase.forbid || []).map(normalize);
96
+ const firstMatchIndex = expected.length === 0
97
+ ? -1
98
+ : normalizedContents.findIndex(content => expected.some(expectation => content.includes(expectation)));
99
+ const firstForbiddenIndex = normalizedContents.findIndex(content => forbidden.some(blocked => content.includes(blocked)));
100
+ const matched = firstMatchIndex !== -1;
101
+ const leakedForbidden = firstForbiddenIndex !== -1;
102
+
103
+ if (benchmarkCase.expectNone) {
104
+ const score = leakedForbidden ? 0 : results.length === 0 ? 1 : 0.5;
105
+ return {
106
+ passed: score === 1,
107
+ score,
108
+ summary: leakedForbidden ? 'leaked restricted content' : results.length === 0 ? 'correct abstention' : 'no leak, but retrieved tangential context',
109
+ };
110
+ }
111
+
112
+ let score = 0;
113
+ if (matched && !leakedForbidden) {
114
+ score = 1;
115
+ } else if (matched && leakedForbidden) {
116
+ score = firstForbiddenIndex > firstMatchIndex ? 0.5 : 0;
117
+ }
118
+
119
+ return {
120
+ passed: score === 1,
121
+ score,
122
+ summary: matched
123
+ ? leakedForbidden
124
+ ? firstForbiddenIndex > firstMatchIndex
125
+ ? 'retrieved expected evidence, but conflicting evidence still appeared later'
126
+ : 'blocked content outranked the correct answer'
127
+ : 'retrieved expected evidence'
128
+ : 'missed target evidence',
129
+ };
130
+ }
131
+
132
+ async function seedRetrievalCase(brain, benchmarkCase) {
133
+ const ids = [];
134
+ for (let index = 0; index < benchmarkCase.memory.length; index++) {
135
+ const memory = benchmarkCase.memory[index];
136
+ const supersedes = Number.isInteger(memory.supersedesIndex) ? ids[memory.supersedesIndex] : undefined;
137
+ const id = await brain.encode({
138
+ content: memory.content,
139
+ source: memory.source,
140
+ tags: memory.tags,
141
+ context: memory.context,
142
+ affect: memory.affect,
143
+ private: memory.private,
144
+ salience: memory.salience,
145
+ supersedes,
146
+ });
147
+
148
+ if (memory.createdAt) {
149
+ brain.db.prepare('UPDATE episodes SET created_at = ? WHERE id = ?').run(memory.createdAt, id);
150
+ }
151
+
152
+ ids.push(id);
153
+ }
154
+
155
+ if (benchmarkCase.consolidate) {
156
+ await brain.waitForIdle();
157
+ await brain.consolidate({
158
+ minClusterSize: benchmarkCase.consolidate.minClusterSize,
159
+ similarityThreshold: benchmarkCase.consolidate.similarityThreshold,
160
+ extractPrinciple: () => benchmarkCase.consolidate.principle,
161
+ });
162
+ }
163
+ }
164
+
165
+ async function executeAudreyStep(brain, step, refs) {
166
+ if (step.type === 'encode') {
167
+ const supersedes = step.supersedesRef ? refs.get(step.supersedesRef) : undefined;
168
+ const id = await brain.encode({
169
+ ...step.memory,
170
+ supersedes,
171
+ });
172
+ if (step.saveAs) {
173
+ refs.set(step.saveAs, id);
174
+ }
175
+ return;
176
+ }
177
+
178
+ if (step.type === 'forgetByQuery') {
179
+ await brain.waitForIdle();
180
+ await brain.forgetByQuery(step.query, step.options || {});
181
+ return;
182
+ }
183
+
184
+ if (step.type === 'consolidate') {
185
+ await brain.waitForIdle();
186
+ await brain.consolidate({
187
+ minClusterSize: step.minClusterSize,
188
+ similarityThreshold: step.similarityThreshold,
189
+ extractPrinciple: () => step.principle,
190
+ });
191
+ return;
192
+ }
193
+
194
+ throw new Error(`Unsupported Audrey benchmark step: ${step.type}`);
195
+ }
196
+
197
+ async function seedOperationsCase(brain, benchmarkCase) {
198
+ const refs = new Map();
199
+ for (const step of benchmarkCase.steps || []) {
200
+ await executeAudreyStep(brain, step, refs);
201
+ }
202
+ }
203
+
204
+ async function runAudreyCase(benchmarkCase, providerConfig) {
205
+ const tempRoot = resolve('benchmarks/.tmp');
206
+ mkdirSync(tempRoot, { recursive: true });
207
+ const tempDir = mkdtempSync(join(tempRoot, 'audrey-bench-'));
208
+ const brain = new Audrey({
209
+ dataDir: tempDir,
210
+ agent: `benchmark-${benchmarkCase.id}`,
211
+ embedding: providerConfig,
212
+ });
213
+
214
+ try {
215
+ if (typeof brain.embeddingProvider.ready === 'function') {
216
+ await brain.embeddingProvider.ready();
217
+ }
218
+
219
+ if (benchmarkCase.kind === 'operations') {
220
+ await seedOperationsCase(brain, benchmarkCase);
221
+ } else {
222
+ await seedRetrievalCase(brain, benchmarkCase);
223
+ }
224
+
225
+ await brain.waitForIdle();
226
+ return await brain.recall(benchmarkCase.query, {
227
+ limit: 5,
228
+ minConfidence: 0.05,
229
+ ...benchmarkCase.options,
230
+ });
231
+ } finally {
232
+ brain.close();
233
+ rmSync(tempDir, { recursive: true, force: true });
234
+ }
235
+ }
236
+
237
+ async function runBaselineCase(system, benchmarkCase, providerConfig) {
238
+ return runBaselineScenario(system, benchmarkCase, providerConfig, 5);
239
+ }
240
+
241
+ async function runSystemsForCase(benchmarkCase, providerConfig) {
242
+ const systems = [
243
+ { system: 'Audrey', run: () => runAudreyCase(benchmarkCase, providerConfig) },
244
+ { system: 'Vector Only', run: () => runBaselineCase('Vector Only', benchmarkCase, providerConfig) },
245
+ { system: 'Keyword + Recency', run: () => runBaselineCase('Keyword + Recency', benchmarkCase, providerConfig) },
246
+ { system: 'Recent Window', run: () => runBaselineCase('Recent Window', benchmarkCase, providerConfig) },
247
+ ];
248
+
249
+ const results = [];
250
+ for (const system of systems) {
251
+ const started = Date.now();
252
+ const items = await system.run();
253
+ const evaluation = evaluateCase(benchmarkCase, items);
254
+ results.push({
255
+ system: system.system,
256
+ durationMs: Date.now() - started,
257
+ passed: evaluation.passed,
258
+ score: evaluation.score,
259
+ summary: evaluation.summary,
260
+ topResults: items.slice(0, 3).map(item => item.content),
261
+ retrievalSummary: summarizeResults(items),
262
+ });
263
+ }
264
+
265
+ return results;
266
+ }
267
+
268
+ function summarizeLocalResults(caseResults) {
269
+ const systems = new Map();
270
+ for (const caseResult of caseResults) {
271
+ for (const result of caseResult.results) {
272
+ if (!systems.has(result.system)) {
273
+ systems.set(result.system, {
274
+ system: result.system,
275
+ totalScore: 0,
276
+ passCount: 0,
277
+ totalCases: 0,
278
+ durationMs: 0,
279
+ });
280
+ }
281
+ const summary = systems.get(result.system);
282
+ summary.totalScore += result.score;
283
+ summary.passCount += result.passed ? 1 : 0;
284
+ summary.totalCases += 1;
285
+ summary.durationMs += result.durationMs;
286
+ }
287
+ }
288
+
289
+ return [...systems.values()]
290
+ .map(system => ({
291
+ system: system.system,
292
+ scorePercent: system.totalCases === 0 ? 0 : (system.totalScore / system.totalCases) * 100,
293
+ passRate: system.totalCases === 0 ? 0 : (system.passCount / system.totalCases) * 100,
294
+ avgDurationMs: system.totalCases === 0 ? 0 : system.durationMs / system.totalCases,
295
+ }))
296
+ .sort((a, b) => b.scorePercent - a.scorePercent);
297
+ }
298
+
299
+ function summarizeByFamily(caseResults) {
300
+ const families = new Map();
301
+ for (const family of FAMILY_ORDER) {
302
+ families.set(family, { family, systems: {} });
303
+ }
304
+
305
+ for (const caseResult of caseResults) {
306
+ const entry = families.get(caseResult.family) || { family: caseResult.family, systems: {} };
307
+ for (const result of caseResult.results) {
308
+ entry.systems[result.system] = result.score;
309
+ }
310
+ families.set(caseResult.family, entry);
311
+ }
312
+
313
+ return [...families.values()].filter(entry => Object.keys(entry.systems).length > 0);
314
+ }
315
+
316
+ function summarizeSuites(caseResults, suites) {
317
+ return suites.map(suite => {
318
+ const suiteCases = caseResults.filter(caseResult => caseResult.suite === suite.id);
319
+ return {
320
+ id: suite.id,
321
+ title: suite.title,
322
+ description: suite.description,
323
+ overall: summarizeLocalResults(suiteCases),
324
+ byFamily: summarizeByFamily(suiteCases),
325
+ cases: suiteCases,
326
+ };
327
+ });
328
+ }
329
+
330
+ function commandForSummary(providerConfig, suiteIds) {
331
+ const suiteArg = suiteIds.length === ALL_SUITE_IDS.length ? '' : ` --suite ${suiteIds.join(',')}`;
332
+ return `node benchmarks/run.js --provider ${providerConfig.provider} --dimensions ${providerConfig.dimensions}${suiteArg}`;
333
+ }
334
+
335
+ export function assertBenchmarkGuardrails(summary, options = {}) {
336
+ const settings = {
337
+ minAudreyScore: options.minAudreyScore ?? 80,
338
+ minAudreyPassRate: options.minAudreyPassRate ?? 75,
339
+ minMarginOverBaseline: options.minMarginOverBaseline ?? 15,
340
+ };
341
+ const audrey = summary.local.overall.find(row => row.system === 'Audrey');
342
+ if (!audrey) {
343
+ throw new Error('Audrey results were missing from the local benchmark summary.');
344
+ }
345
+
346
+ const strongestBaseline = summary.local.overall
347
+ .filter(row => row.system !== 'Audrey')
348
+ .sort((a, b) => b.scorePercent - a.scorePercent)[0];
349
+ const failures = [];
350
+
351
+ if (audrey.scorePercent < settings.minAudreyScore) {
352
+ failures.push(
353
+ `Audrey score ${audrey.scorePercent.toFixed(1)}% fell below ${settings.minAudreyScore.toFixed(1)}%.`
354
+ );
355
+ }
356
+
357
+ if (audrey.passRate < settings.minAudreyPassRate) {
358
+ failures.push(
359
+ `Audrey pass rate ${audrey.passRate.toFixed(1)}% fell below ${settings.minAudreyPassRate.toFixed(1)}%.`
360
+ );
361
+ }
362
+
363
+ if (strongestBaseline) {
364
+ const margin = audrey.scorePercent - strongestBaseline.scorePercent;
365
+ if (margin < settings.minMarginOverBaseline) {
366
+ failures.push(
367
+ `Audrey beat ${strongestBaseline.system} by ${margin.toFixed(1)} points, below the required `
368
+ + `${settings.minMarginOverBaseline.toFixed(1)}-point margin.`
369
+ );
370
+ }
371
+ }
372
+
373
+ if (failures.length) {
374
+ throw new Error(`Benchmark regression gate failed:\n- ${failures.join('\n- ')}`);
375
+ }
376
+
377
+ return {
378
+ audrey,
379
+ strongestBaseline,
380
+ marginOverBaseline: strongestBaseline ? audrey.scorePercent - strongestBaseline.scorePercent : null,
381
+ thresholds: settings,
382
+ };
383
+ }
384
+
385
+ export async function runBenchmarkSuite(options = {}) {
386
+ const providerConfig = {
387
+ provider: options.provider || 'mock',
388
+ dimensions: options.dimensions || 64,
389
+ };
390
+ const suiteIds = normalizeSuiteSelection(options.suite || 'all');
391
+ const selectedSuites = selectedSuitesOrThrow(suiteIds);
392
+
393
+ const caseResults = [];
394
+ for (const suite of selectedSuites) {
395
+ for (const benchmarkCase of suite.cases) {
396
+ const results = await runSystemsForCase(benchmarkCase, providerConfig);
397
+ caseResults.push({
398
+ id: benchmarkCase.id,
399
+ suite: benchmarkCase.suite,
400
+ title: benchmarkCase.title,
401
+ family: benchmarkCase.family,
402
+ description: benchmarkCase.description,
403
+ query: benchmarkCase.query,
404
+ results,
405
+ });
406
+ }
407
+ }
408
+
409
+ const localOverall = summarizeLocalResults(caseResults);
410
+ const localByFamily = summarizeByFamily(caseResults);
411
+ const localSuites = summarizeSuites(caseResults, selectedSuites);
412
+
413
+ return {
414
+ generatedAt: new Date().toISOString(),
415
+ command: commandForSummary(providerConfig, suiteIds),
416
+ config: {
417
+ ...providerConfig,
418
+ suites: suiteIds,
419
+ },
420
+ methodology: {
421
+ localBenchmark: 'LongMemEval-inspired retrieval benchmark plus operation-level lifecycle benchmark',
422
+ retrievalBenchmark: 'Information extraction, updates, reasoning, procedural learning, privacy, abstention, and conflict handling',
423
+ operationsBenchmark: 'Update, overwrite, delete, merge, and abstention behavior after lifecycle operations',
424
+ externalLeaderboard: 'Published LoCoMo scores from official papers and project blogs',
425
+ },
426
+ local: {
427
+ overall: localOverall,
428
+ byFamily: localByFamily,
429
+ suites: localSuites,
430
+ cases: caseResults,
431
+ },
432
+ external: {
433
+ benchmark: 'LoCoMo',
434
+ leaderboard: [...PUBLISHED_LEADERBOARD].sort((a, b) => b.score - a.score),
435
+ },
436
+ trends: MEMORY_TRENDS,
437
+ };
438
+ }
439
+
440
+ export async function runBenchmarkCli({ argv = process.argv.slice(2), out = console.log } = {}) {
441
+ const args = parseArgs(argv);
442
+ const summary = await runBenchmarkSuite(args);
443
+ const artifacts = writeBenchmarkArtifacts({
444
+ outputDir: args.outDir,
445
+ summary,
446
+ localOverall: summary.local.overall,
447
+ localSuites: summary.local.suites,
448
+ externalOverall: summary.external.leaderboard,
449
+ trends: summary.trends,
450
+ readmeAssetsDir: args.readmeAssetsDir,
451
+ });
452
+ const gate = args.check
453
+ ? assertBenchmarkGuardrails(summary, {
454
+ minAudreyScore: args.minAudreyScore,
455
+ minAudreyPassRate: args.minAudreyPassRate,
456
+ minMarginOverBaseline: args.minMarginOverBaseline,
457
+ })
458
+ : null;
459
+
460
+ if (args.jsonOnly) {
461
+ out(JSON.stringify({ summary, artifacts, gate }, null, 2));
462
+ return { summary, artifacts, gate };
463
+ }
464
+
465
+ const lines = [];
466
+ lines.push('Audrey benchmark complete.');
467
+ lines.push('');
468
+ lines.push(`Suites: ${summary.config.suites.map(suiteId => SUITE_LABELS.get(suiteId) || suiteId).join(', ')}`);
469
+ for (const row of summary.local.overall) {
470
+ lines.push(
471
+ `${row.system}: ${row.scorePercent.toFixed(1)}% score, ${row.passRate.toFixed(1)}% pass rate, `
472
+ + `${row.avgDurationMs.toFixed(1)} ms avg/case`
473
+ );
474
+ }
475
+ lines.push('');
476
+ for (const suite of summary.local.suites) {
477
+ const audrey = suite.overall.find(row => row.system === 'Audrey');
478
+ lines.push(`${suite.title}: Audrey ${audrey?.scorePercent.toFixed(1) ?? '0.0'}%`);
479
+ }
480
+ lines.push('');
481
+ lines.push(`JSON report: ${artifacts.json}`);
482
+ lines.push(`HTML report: ${artifacts.html}`);
483
+ lines.push(`Local chart: ${artifacts.localChart}`);
484
+ if (artifacts.suiteCharts.length > 0) {
485
+ for (const suiteChart of artifacts.suiteCharts) {
486
+ lines.push(`${suiteChart.title}: ${suiteChart.path}`);
487
+ }
488
+ }
489
+ lines.push(`Published chart: ${artifacts.externalChart}`);
490
+ if (artifacts.readmeAssets) {
491
+ lines.push(`README local chart: ${artifacts.readmeAssets.localChart}`);
492
+ if (artifacts.readmeAssets.operationsChart) {
493
+ lines.push(`README operations chart: ${artifacts.readmeAssets.operationsChart}`);
494
+ }
495
+ lines.push(`README published chart: ${artifacts.readmeAssets.externalChart}`);
496
+ }
497
+ if (gate) {
498
+ const baselineLabel = gate.strongestBaseline
499
+ ? `${gate.strongestBaseline.system} by ${gate.marginOverBaseline.toFixed(1)} points`
500
+ : 'all local baselines';
501
+ lines.push('');
502
+ lines.push(`Regression gate passed: Audrey stayed above ${gate.thresholds.minAudreyScore.toFixed(1)}% and ahead of ${baselineLabel}.`);
503
+ }
504
+
505
+ out(lines.join('\n'));
506
+ return { summary, artifacts, gate };
507
+ }
508
+
509
+ if (process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1])) {
510
+ runBenchmarkCli().catch(err => {
511
+ console.error('[audrey] benchmark failed:', err);
512
+ process.exitCode = 1;
513
+ });
514
+ }
@@ -0,0 +1,45 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <svg xmlns="http://www.w3.org/2000/svg" width="960" height="420" viewBox="0 0 960 420" role="img" aria-label="Audrey vs Local Memory Baselines">
3
+ <rect width="100%" height="100%" fill="white" />
4
+ <text x="64" y="34" font-size="24" font-weight="700" fill="#111827">Audrey vs Local Memory Baselines</text>
5
+
6
+ <line x1="64" y1="332" x2="928" y2="332" stroke="#cbd5e1" stroke-dasharray="4 4" />
7
+ <text x="54" y="337" text-anchor="end" font-size="13" fill="#6b7280">0%</text>
8
+
9
+
10
+ <line x1="64" y1="263" x2="928" y2="263" stroke="#cbd5e1" stroke-dasharray="4 4" />
11
+ <text x="54" y="268" text-anchor="end" font-size="13" fill="#6b7280">25%</text>
12
+
13
+
14
+ <line x1="64" y1="194" x2="928" y2="194" stroke="#cbd5e1" stroke-dasharray="4 4" />
15
+ <text x="54" y="199" text-anchor="end" font-size="13" fill="#6b7280">50%</text>
16
+
17
+
18
+ <line x1="64" y1="125" x2="928" y2="125" stroke="#cbd5e1" stroke-dasharray="4 4" />
19
+ <text x="54" y="130" text-anchor="end" font-size="13" fill="#6b7280">75%</text>
20
+
21
+
22
+ <line x1="64" y1="56" x2="928" y2="56" stroke="#cbd5e1" stroke-dasharray="4 4" />
23
+ <text x="54" y="61" text-anchor="end" font-size="13" fill="#6b7280">100%</text>
24
+
25
+
26
+ <rect x="64" y="56" width="198" height="276" rx="8" fill="#0f766e" />
27
+ <text x="163" y="46" text-anchor="middle" font-size="15" fill="#111827">100.0%</text>
28
+ <text x="163" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Audrey</text>
29
+
30
+
31
+ <rect x="286" y="217" width="198" height="115.00000000000001" rx="8" fill="#0369a1" />
32
+ <text x="385" y="207" text-anchor="middle" font-size="15" fill="#111827">41.7%</text>
33
+ <text x="385" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Vector Only</text>
34
+
35
+
36
+ <rect x="508" y="217" width="198" height="115.00000000000001" rx="8" fill="#6d28d9" />
37
+ <text x="607" y="207" text-anchor="middle" font-size="15" fill="#111827">41.7%</text>
38
+ <text x="607" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Keyword + Recency</text>
39
+
40
+
41
+ <rect x="730" y="228.5" width="198" height="103.5" rx="8" fill="#b45309" />
42
+ <text x="829" y="218.5" text-anchor="middle" font-size="15" fill="#111827">37.5%</text>
43
+ <text x="829" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Recent Window</text>
44
+
45
+ </svg>
@@ -0,0 +1,45 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <svg xmlns="http://www.w3.org/2000/svg" width="960" height="420" viewBox="0 0 960 420" role="img" aria-label="Audrey Memory Operations Benchmark">
3
+ <rect width="100%" height="100%" fill="white" />
4
+ <text x="64" y="34" font-size="24" font-weight="700" fill="#111827">Audrey Memory Operations Benchmark</text>
5
+
6
+ <line x1="64" y1="332" x2="928" y2="332" stroke="#cbd5e1" stroke-dasharray="4 4" />
7
+ <text x="54" y="337" text-anchor="end" font-size="13" fill="#6b7280">0%</text>
8
+
9
+
10
+ <line x1="64" y1="263" x2="928" y2="263" stroke="#cbd5e1" stroke-dasharray="4 4" />
11
+ <text x="54" y="268" text-anchor="end" font-size="13" fill="#6b7280">25%</text>
12
+
13
+
14
+ <line x1="64" y1="194" x2="928" y2="194" stroke="#cbd5e1" stroke-dasharray="4 4" />
15
+ <text x="54" y="199" text-anchor="end" font-size="13" fill="#6b7280">50%</text>
16
+
17
+
18
+ <line x1="64" y1="125" x2="928" y2="125" stroke="#cbd5e1" stroke-dasharray="4 4" />
19
+ <text x="54" y="130" text-anchor="end" font-size="13" fill="#6b7280">75%</text>
20
+
21
+
22
+ <line x1="64" y1="56" x2="928" y2="56" stroke="#cbd5e1" stroke-dasharray="4 4" />
23
+ <text x="54" y="61" text-anchor="end" font-size="13" fill="#6b7280">100%</text>
24
+
25
+
26
+ <rect x="64" y="56" width="198" height="276" rx="8" fill="#0f766e" />
27
+ <text x="163" y="46" text-anchor="middle" font-size="15" fill="#111827">100.0%</text>
28
+ <text x="163" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Audrey</text>
29
+
30
+
31
+ <rect x="286" y="263" width="198" height="69" rx="8" fill="#6d28d9" />
32
+ <text x="385" y="253" text-anchor="middle" font-size="15" fill="#111827">25.0%</text>
33
+ <text x="385" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Keyword + Recency</text>
34
+
35
+
36
+ <rect x="508" y="297.5" width="198" height="34.5" rx="8" fill="#0369a1" />
37
+ <text x="607" y="287.5" text-anchor="middle" font-size="15" fill="#111827">12.5%</text>
38
+ <text x="607" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Vector Only</text>
39
+
40
+
41
+ <rect x="730" y="297.5" width="198" height="34.5" rx="8" fill="#b45309" />
42
+ <text x="829" y="287.5" text-anchor="middle" font-size="15" fill="#111827">12.5%</text>
43
+ <text x="829" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Recent Window</text>
44
+
45
+ </svg>
@@ -0,0 +1,50 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <svg xmlns="http://www.w3.org/2000/svg" width="960" height="420" viewBox="0 0 960 420" role="img" aria-label="Published LLM Memory Standards (LoCoMo)">
3
+ <rect width="100%" height="100%" fill="white" />
4
+ <text x="64" y="34" font-size="24" font-weight="700" fill="#111827">Published LLM Memory Standards (LoCoMo)</text>
5
+
6
+ <line x1="64" y1="332" x2="928" y2="332" stroke="#cbd5e1" stroke-dasharray="4 4" />
7
+ <text x="54" y="337" text-anchor="end" font-size="13" fill="#6b7280">0%</text>
8
+
9
+
10
+ <line x1="64" y1="263" x2="928" y2="263" stroke="#cbd5e1" stroke-dasharray="4 4" />
11
+ <text x="54" y="268" text-anchor="end" font-size="13" fill="#6b7280">25%</text>
12
+
13
+
14
+ <line x1="64" y1="194" x2="928" y2="194" stroke="#cbd5e1" stroke-dasharray="4 4" />
15
+ <text x="54" y="199" text-anchor="end" font-size="13" fill="#6b7280">50%</text>
16
+
17
+
18
+ <line x1="64" y1="125" x2="928" y2="125" stroke="#cbd5e1" stroke-dasharray="4 4" />
19
+ <text x="54" y="130" text-anchor="end" font-size="13" fill="#6b7280">75%</text>
20
+
21
+
22
+ <line x1="64" y1="56" x2="928" y2="56" stroke="#cbd5e1" stroke-dasharray="4 4" />
23
+ <text x="54" y="61" text-anchor="end" font-size="13" fill="#6b7280">100%</text>
24
+
25
+
26
+ <rect x="64" y="96.29599999999996" width="154" height="235.70400000000004" rx="8" fill="#1d4ed8" />
27
+ <text x="141" y="86.29599999999996" text-anchor="middle" font-size="15" fill="#111827">85.4%</text>
28
+ <text x="141" y="378" text-anchor="middle" font-size="14" fill="#6b7280">MIRIX</text>
29
+
30
+
31
+ <rect x="241.5" y="127.75999999999999" width="154" height="204.24" rx="8" fill="#1d4ed8" />
32
+ <text x="318.5" y="117.75999999999999" text-anchor="middle" font-size="15" fill="#111827">74.0%</text>
33
+ <text x="318.5" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Letta Filesystem</text>
34
+
35
+
36
+ <rect x="419" y="142.94" width="154" height="189.06" rx="8" fill="#1d4ed8" />
37
+ <text x="496" y="132.94" text-anchor="middle" font-size="15" fill="#111827">68.5%</text>
38
+ <text x="496" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Mem0 Graph Memory</text>
39
+
40
+
41
+ <rect x="596.5" y="147.356" width="154" height="184.644" rx="8" fill="#1d4ed8" />
42
+ <text x="673.5" y="137.356" text-anchor="middle" font-size="15" fill="#111827">66.9%</text>
43
+ <text x="673.5" y="378" text-anchor="middle" font-size="14" fill="#6b7280">Mem0</text>
44
+
45
+
46
+ <rect x="774" y="185.99599999999998" width="154" height="146.00400000000002" rx="8" fill="#1d4ed8" />
47
+ <text x="851" y="175.99599999999998" text-anchor="middle" font-size="15" fill="#111827">52.9%</text>
48
+ <text x="851" y="378" text-anchor="middle" font-size="14" fill="#6b7280">OpenAI Memory</text>
49
+
50
+ </svg>