audrey 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +310 -643
- package/benchmarks/baselines.js +169 -0
- package/benchmarks/cases.js +421 -0
- package/benchmarks/reference-results.js +70 -0
- package/benchmarks/report.js +255 -0
- package/benchmarks/run.js +514 -0
- package/docs/assets/benchmarks/local-benchmark.svg +45 -0
- package/docs/assets/benchmarks/operations-benchmark.svg +45 -0
- package/docs/assets/benchmarks/published-memory-standards.svg +50 -0
- package/docs/benchmarking.md +151 -0
- package/docs/production-readiness.md +96 -0
- package/examples/fintech-ops-demo.js +67 -0
- package/examples/healthcare-ops-demo.js +67 -0
- package/examples/stripe-demo.js +105 -0
- package/mcp-server/config.js +81 -24
- package/mcp-server/index.js +611 -75
- package/mcp-server/serve.js +482 -0
- package/package.json +24 -5
- package/src/audrey.js +51 -13
- package/src/consolidate.js +70 -54
- package/src/db.js +22 -1
- package/src/embedding.js +16 -12
- package/src/encode.js +8 -2
- package/src/fts.js +134 -0
- package/src/import.js +28 -0
- package/src/llm.js +6 -3
- package/src/migrate.js +2 -2
- package/src/recall.js +253 -32
- package/src/utils.js +25 -0
- package/types/index.d.ts +434 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
|
|
4
|
+
const PALETTE = {
|
|
5
|
+
audrey: '#0f766e',
|
|
6
|
+
vector: '#0369a1',
|
|
7
|
+
keyword: '#6d28d9',
|
|
8
|
+
recent: '#b45309',
|
|
9
|
+
external: '#1d4ed8',
|
|
10
|
+
accent: '#111827',
|
|
11
|
+
muted: '#6b7280',
|
|
12
|
+
surface: '#f8fafc',
|
|
13
|
+
border: '#cbd5e1',
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
function escapeHtml(text) {
|
|
17
|
+
return String(text)
|
|
18
|
+
.replaceAll('&', '&')
|
|
19
|
+
.replaceAll('<', '<')
|
|
20
|
+
.replaceAll('>', '>')
|
|
21
|
+
.replaceAll('"', '"');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function chartBarColor(label) {
|
|
25
|
+
if (label === 'Audrey') return PALETTE.audrey;
|
|
26
|
+
if (label.includes('Vector')) return PALETTE.vector;
|
|
27
|
+
if (label.includes('Keyword')) return PALETTE.keyword;
|
|
28
|
+
if (label.includes('Recent')) return PALETTE.recent;
|
|
29
|
+
return PALETTE.external;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function renderBarChart({ title, rows, valueSuffix = '%', maxValue = 100 }) {
|
|
33
|
+
const width = 960;
|
|
34
|
+
const height = 420;
|
|
35
|
+
const margin = { top: 56, right: 32, bottom: 88, left: 64 };
|
|
36
|
+
const plotWidth = width - margin.left - margin.right;
|
|
37
|
+
const plotHeight = height - margin.top - margin.bottom;
|
|
38
|
+
const barWidth = Math.max(32, Math.floor(plotWidth / Math.max(rows.length, 1)) - 18);
|
|
39
|
+
const gap = rows.length > 1 ? (plotWidth - barWidth * rows.length) / (rows.length - 1) : 0;
|
|
40
|
+
|
|
41
|
+
const bars = rows.map((row, index) => {
|
|
42
|
+
const value = Math.max(0, Math.min(maxValue, row.value));
|
|
43
|
+
const barHeight = (value / maxValue) * plotHeight;
|
|
44
|
+
const x = margin.left + index * (barWidth + gap);
|
|
45
|
+
const y = margin.top + plotHeight - barHeight;
|
|
46
|
+
return `
|
|
47
|
+
<rect x="${x}" y="${y}" width="${barWidth}" height="${barHeight}" rx="8" fill="${chartBarColor(row.label)}" />
|
|
48
|
+
<text x="${x + barWidth / 2}" y="${y - 10}" text-anchor="middle" font-size="15" fill="${PALETTE.accent}">${value.toFixed(1)}${valueSuffix}</text>
|
|
49
|
+
<text x="${x + barWidth / 2}" y="${height - 42}" text-anchor="middle" font-size="14" fill="${PALETTE.muted}">${escapeHtml(row.label)}</text>
|
|
50
|
+
`;
|
|
51
|
+
}).join('\n');
|
|
52
|
+
|
|
53
|
+
const grid = [0, 25, 50, 75, 100].map(tick => {
|
|
54
|
+
const y = margin.top + plotHeight - (tick / maxValue) * plotHeight;
|
|
55
|
+
return `
|
|
56
|
+
<line x1="${margin.left}" y1="${y}" x2="${width - margin.right}" y2="${y}" stroke="${PALETTE.border}" stroke-dasharray="4 4" />
|
|
57
|
+
<text x="${margin.left - 10}" y="${y + 5}" text-anchor="end" font-size="13" fill="${PALETTE.muted}">${tick}${valueSuffix}</text>
|
|
58
|
+
`;
|
|
59
|
+
}).join('\n');
|
|
60
|
+
|
|
61
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
62
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="${width}" height="${height}" viewBox="0 0 ${width} ${height}" role="img" aria-label="${escapeHtml(title)}">
|
|
63
|
+
<rect width="100%" height="100%" fill="white" />
|
|
64
|
+
<text x="${margin.left}" y="34" font-size="24" font-weight="700" fill="${PALETTE.accent}">${escapeHtml(title)}</text>
|
|
65
|
+
${grid}
|
|
66
|
+
${bars}
|
|
67
|
+
</svg>`;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function renderTrendList(trends) {
|
|
71
|
+
return trends.map(trend => `
|
|
72
|
+
<li>
|
|
73
|
+
<strong>${escapeHtml(trend.title)}</strong><br />
|
|
74
|
+
${escapeHtml(trend.summary)}<br />
|
|
75
|
+
<a href="${trend.source}">${escapeHtml(trend.source)}</a>
|
|
76
|
+
</li>
|
|
77
|
+
`).join('\n');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function renderCaseRows(localCases) {
|
|
81
|
+
return localCases.map(caseResult => `
|
|
82
|
+
<tr>
|
|
83
|
+
<td>${escapeHtml(caseResult.title)}</td>
|
|
84
|
+
<td>${escapeHtml(caseResult.suite)}</td>
|
|
85
|
+
<td>${escapeHtml(caseResult.family)}</td>
|
|
86
|
+
${caseResult.results.map(result => {
|
|
87
|
+
const bg = result.passed ? '#ecfdf5' : result.score >= 0.5 ? '#fff7ed' : '#fef2f2';
|
|
88
|
+
const fg = result.passed ? '#065f46' : result.score >= 0.5 ? '#9a3412' : '#991b1b';
|
|
89
|
+
return `<td style="background:${bg};color:${fg}">${result.score.toFixed(2)}<br /><span style="font-size:12px">${escapeHtml(result.summary)}</span></td>`;
|
|
90
|
+
}).join('')}
|
|
91
|
+
</tr>
|
|
92
|
+
`).join('\n');
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function renderSuiteSections(suiteCharts) {
|
|
96
|
+
if (suiteCharts.length === 0) return '';
|
|
97
|
+
return suiteCharts.map(chart => `
|
|
98
|
+
<section class="callout">
|
|
99
|
+
<h2>${escapeHtml(chart.title)}</h2>
|
|
100
|
+
<p>${escapeHtml(chart.description)}</p>
|
|
101
|
+
<img src="./${escapeHtml(chart.fileName)}" alt="${escapeHtml(chart.title)} chart" />
|
|
102
|
+
</section>
|
|
103
|
+
`).join('\n');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function writeBenchmarkArtifacts({
|
|
107
|
+
outputDir,
|
|
108
|
+
summary,
|
|
109
|
+
localOverall,
|
|
110
|
+
localSuites,
|
|
111
|
+
externalOverall,
|
|
112
|
+
trends,
|
|
113
|
+
readmeAssetsDir,
|
|
114
|
+
}) {
|
|
115
|
+
mkdirSync(outputDir, { recursive: true });
|
|
116
|
+
|
|
117
|
+
const localChart = renderBarChart({
|
|
118
|
+
title: 'Audrey vs Local Memory Baselines',
|
|
119
|
+
rows: localOverall.map(row => ({ label: row.system, value: row.scorePercent })),
|
|
120
|
+
});
|
|
121
|
+
const externalChart = renderBarChart({
|
|
122
|
+
title: 'Published LLM Memory Standards (LoCoMo)',
|
|
123
|
+
rows: externalOverall.map(row => ({ label: row.system, value: row.score })),
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
writeFileSync(join(outputDir, 'local-overall.svg'), localChart, 'utf8');
|
|
127
|
+
writeFileSync(join(outputDir, 'published-locomo.svg'), externalChart, 'utf8');
|
|
128
|
+
writeFileSync(join(outputDir, 'summary.json'), JSON.stringify(summary, null, 2), 'utf8');
|
|
129
|
+
|
|
130
|
+
const suiteCharts = localSuites.map(suite => {
|
|
131
|
+
const fileName = `${suite.id}-overall.svg`;
|
|
132
|
+
const chart = renderBarChart({
|
|
133
|
+
title: `${suite.title} Benchmark`,
|
|
134
|
+
rows: suite.overall.map(row => ({ label: row.system, value: row.scorePercent })),
|
|
135
|
+
});
|
|
136
|
+
writeFileSync(join(outputDir, fileName), chart, 'utf8');
|
|
137
|
+
return {
|
|
138
|
+
id: suite.id,
|
|
139
|
+
title: `${suite.title} Benchmark`,
|
|
140
|
+
description: suite.description,
|
|
141
|
+
fileName,
|
|
142
|
+
path: join(outputDir, fileName),
|
|
143
|
+
};
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
let readmeAssets = null;
|
|
147
|
+
if (readmeAssetsDir) {
|
|
148
|
+
mkdirSync(readmeAssetsDir, { recursive: true });
|
|
149
|
+
const localReadmeChart = join(readmeAssetsDir, 'local-benchmark.svg');
|
|
150
|
+
const externalReadmeChart = join(readmeAssetsDir, 'published-memory-standards.svg');
|
|
151
|
+
writeFileSync(localReadmeChart, localChart, 'utf8');
|
|
152
|
+
writeFileSync(externalReadmeChart, externalChart, 'utf8');
|
|
153
|
+
|
|
154
|
+
const operationsSuite = suiteCharts.find(chart => chart.id === 'operations');
|
|
155
|
+
let operationsReadmeChart = null;
|
|
156
|
+
if (operationsSuite) {
|
|
157
|
+
operationsReadmeChart = join(readmeAssetsDir, 'operations-benchmark.svg');
|
|
158
|
+
writeFileSync(
|
|
159
|
+
operationsReadmeChart,
|
|
160
|
+
renderBarChart({
|
|
161
|
+
title: 'Audrey Memory Operations Benchmark',
|
|
162
|
+
rows: (localSuites.find(suite => suite.id === 'operations')?.overall || [])
|
|
163
|
+
.map(row => ({ label: row.system, value: row.scorePercent })),
|
|
164
|
+
}),
|
|
165
|
+
'utf8',
|
|
166
|
+
);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
readmeAssets = {
|
|
170
|
+
localChart: localReadmeChart,
|
|
171
|
+
operationsChart: operationsReadmeChart,
|
|
172
|
+
externalChart: externalReadmeChart,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const html = `<!doctype html>
|
|
177
|
+
<html lang="en">
|
|
178
|
+
<head>
|
|
179
|
+
<meta charset="utf-8" />
|
|
180
|
+
<title>Audrey Memory Benchmark</title>
|
|
181
|
+
<style>
|
|
182
|
+
body { font-family: "Segoe UI", Arial, sans-serif; margin: 32px; color: ${PALETTE.accent}; background: ${PALETTE.surface}; }
|
|
183
|
+
main { max-width: 1120px; margin: 0 auto; }
|
|
184
|
+
h1, h2 { margin-bottom: 12px; }
|
|
185
|
+
p, li { line-height: 1.5; }
|
|
186
|
+
.callout { background: white; border: 1px solid ${PALETTE.border}; border-radius: 16px; padding: 20px; margin-bottom: 24px; }
|
|
187
|
+
.grid { display: grid; gap: 24px; grid-template-columns: 1fr; }
|
|
188
|
+
img { width: 100%; border: 1px solid ${PALETTE.border}; border-radius: 16px; background: white; }
|
|
189
|
+
table { width: 100%; border-collapse: collapse; background: white; border-radius: 16px; overflow: hidden; }
|
|
190
|
+
th, td { border: 1px solid ${PALETTE.border}; padding: 12px; vertical-align: top; text-align: left; }
|
|
191
|
+
th { background: #e2e8f0; }
|
|
192
|
+
code { background: #e2e8f0; padding: 2px 6px; border-radius: 6px; }
|
|
193
|
+
</style>
|
|
194
|
+
</head>
|
|
195
|
+
<body>
|
|
196
|
+
<main>
|
|
197
|
+
<h1>Audrey Memory Benchmark</h1>
|
|
198
|
+
<div class="callout">
|
|
199
|
+
<p><strong>Method:</strong> Audrey is scored on a LongMemEval-inspired retrieval benchmark plus an operation-level lifecycle benchmark. The report still separates local Audrey-versus-baseline results from published external LoCoMo numbers so the comparison stays honest.</p>
|
|
200
|
+
<p><strong>Run:</strong> <code>${escapeHtml(summary.command)}</code></p>
|
|
201
|
+
<p><strong>Generated:</strong> ${escapeHtml(summary.generatedAt)}</p>
|
|
202
|
+
</div>
|
|
203
|
+
|
|
204
|
+
<div class="grid">
|
|
205
|
+
<section class="callout">
|
|
206
|
+
<h2>Combined Local Benchmark</h2>
|
|
207
|
+
<img src="./local-overall.svg" alt="Combined local benchmark bar chart" />
|
|
208
|
+
</section>
|
|
209
|
+
|
|
210
|
+
${renderSuiteSections(suiteCharts)}
|
|
211
|
+
|
|
212
|
+
<section class="callout">
|
|
213
|
+
<h2>Published Leaderboard</h2>
|
|
214
|
+
<img src="./published-locomo.svg" alt="Published LoCoMo leaderboard bar chart" />
|
|
215
|
+
</section>
|
|
216
|
+
</div>
|
|
217
|
+
|
|
218
|
+
<section class="callout">
|
|
219
|
+
<h2>Case Matrix</h2>
|
|
220
|
+
<table>
|
|
221
|
+
<thead>
|
|
222
|
+
<tr>
|
|
223
|
+
<th>Case</th>
|
|
224
|
+
<th>Suite</th>
|
|
225
|
+
<th>Family</th>
|
|
226
|
+
${summary.local.overall.map(row => `<th>${escapeHtml(row.system)}</th>`).join('')}
|
|
227
|
+
</tr>
|
|
228
|
+
</thead>
|
|
229
|
+
<tbody>
|
|
230
|
+
${renderCaseRows(summary.local.cases)}
|
|
231
|
+
</tbody>
|
|
232
|
+
</table>
|
|
233
|
+
</section>
|
|
234
|
+
|
|
235
|
+
<section class="callout">
|
|
236
|
+
<h2>March 23, 2026 Memory Trends</h2>
|
|
237
|
+
<ul>
|
|
238
|
+
${renderTrendList(trends)}
|
|
239
|
+
</ul>
|
|
240
|
+
</section>
|
|
241
|
+
</main>
|
|
242
|
+
</body>
|
|
243
|
+
</html>`;
|
|
244
|
+
|
|
245
|
+
writeFileSync(join(outputDir, 'report.html'), html, 'utf8');
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
json: join(outputDir, 'summary.json'),
|
|
249
|
+
html: join(outputDir, 'report.html'),
|
|
250
|
+
localChart: join(outputDir, 'local-overall.svg'),
|
|
251
|
+
suiteCharts,
|
|
252
|
+
externalChart: join(outputDir, 'published-locomo.svg'),
|
|
253
|
+
readmeAssets,
|
|
254
|
+
};
|
|
255
|
+
}
|