@shadowforge0/aquifer-memory 1.5.8 → 1.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +100 -3
- package/consumers/cli.js +34 -0
- package/consumers/mcp.js +38 -5
- package/consumers/miranda/context-inject.js +1 -0
- package/consumers/openclaw-plugin.js +43 -3
- package/consumers/shared/config.js +20 -0
- package/consumers/shared/factory.js +1 -0
- package/consumers/shared/recall-format.js +26 -0
- package/core/aquifer.js +12 -0
- package/core/insights.js +210 -58
- package/core/mcp-manifest.js +18 -1
- package/core/storage.js +71 -0
- package/package.json +10 -2
- package/scripts/backfill-canonical-key.js +250 -0
- package/scripts/queries.json +0 -45
- package/scripts/retro-recall-bench.js +0 -409
- package/scripts/sample-bench-queries.sql +0 -75
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Backfill canonical_key_v2 for legacy insights rows.
|
|
6
|
+
*
|
|
7
|
+
* Pre-1.5.3 rows (those predating the Phase 2 C1 canonical-identity
|
|
8
|
+
* layer) carry `canonical_key_v2 IS NULL`, so they never match the
|
|
9
|
+
* canonical lookup inside commitInsight and never participate in the
|
|
10
|
+
* revision/supersede path. This script fills the key deterministically
|
|
11
|
+
* from `title` using the same normalization and hashing functions the
|
|
12
|
+
* writer uses, so backfilled rows behave identically to a freshly
|
|
13
|
+
* written row whose LLM extractor happened to emit a canonicalClaim
|
|
14
|
+
* equal to its title.
|
|
15
|
+
*
|
|
16
|
+
* Why JS not SQL: pgcrypto is NOT a default-installed extension in
|
|
17
|
+
* our production PG (verified 2026-04-20). Even with pgcrypto, matching
|
|
18
|
+
* JS's Unicode NFKC normalization in pure SQL is fragile. Single source
|
|
19
|
+
* of truth lives in core/insights.js; this script reuses it.
|
|
20
|
+
*
|
|
21
|
+
* Idempotent: every UPDATE is guarded by WHERE canonical_key_v2 IS NULL,
|
|
22
|
+
* so reruns and concurrent live writers converge cleanly.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
const { Pool } = require('pg');
|
|
26
|
+
const { defaultCanonicalKey } = require('../core/insights');
|
|
27
|
+
|
|
28
|
+
const BACKFILL_METADATA_PATCH = { canonicalBackfill: 'title_deterministic' };
|
|
29
|
+
|
|
30
|
+
function printUsageAndExit(code = 0) {
|
|
31
|
+
const usage = [
|
|
32
|
+
'Usage: node scripts/backfill-canonical-key.js --schema <name> [options]',
|
|
33
|
+
'',
|
|
34
|
+
'Required:',
|
|
35
|
+
' --schema <name> Target schema (e.g. miranda, jenny)',
|
|
36
|
+
' --agent <id> Limit to one agent (or use --all-agents)',
|
|
37
|
+
'',
|
|
38
|
+
'Optional:',
|
|
39
|
+
' --all-agents Backfill across all agents in the tenant',
|
|
40
|
+
' (mutually exclusive with --agent)',
|
|
41
|
+
' --tenant-id <id> Default: $AQUIFER_TENANT_ID or "default"',
|
|
42
|
+
' --batch-size <N> Rows per batch (1..1000, default 50)',
|
|
43
|
+
' --dry-run Print would-updates, do not execute',
|
|
44
|
+
' -h, --help Show this help',
|
|
45
|
+
'',
|
|
46
|
+
'Env:',
|
|
47
|
+
' DATABASE_URL Postgres connection string (required)',
|
|
48
|
+
' AQUIFER_TENANT_ID Fallback tenant id',
|
|
49
|
+
'',
|
|
50
|
+
].join('\n');
|
|
51
|
+
(code === 0 ? console.log : console.error)(usage);
|
|
52
|
+
process.exit(code);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function parseArgs(argv) {
|
|
56
|
+
const args = {
|
|
57
|
+
schema: null,
|
|
58
|
+
agent: null,
|
|
59
|
+
allAgents: false,
|
|
60
|
+
tenantId: process.env.AQUIFER_TENANT_ID || 'default',
|
|
61
|
+
batchSize: 50,
|
|
62
|
+
dryRun: false,
|
|
63
|
+
help: false,
|
|
64
|
+
};
|
|
65
|
+
for (let i = 0; i < argv.length; i++) {
|
|
66
|
+
const a = argv[i], v = argv[i + 1];
|
|
67
|
+
if (a === '--schema') { args.schema = v; i++; }
|
|
68
|
+
else if (a === '--agent') { args.agent = v; i++; }
|
|
69
|
+
else if (a === '--all-agents') { args.allAgents = true; }
|
|
70
|
+
else if (a === '--tenant-id') { args.tenantId = v; i++; }
|
|
71
|
+
else if (a === '--batch-size') {
|
|
72
|
+
const n = parseInt(v, 10);
|
|
73
|
+
if (!Number.isFinite(n) || n < 1) {
|
|
74
|
+
console.error(`--batch-size must be an integer >= 1, got: ${v}`);
|
|
75
|
+
process.exit(2);
|
|
76
|
+
}
|
|
77
|
+
args.batchSize = Math.min(n, 1000);
|
|
78
|
+
i++;
|
|
79
|
+
} else if (a === '--dry-run') { args.dryRun = true; }
|
|
80
|
+
else if (a === '-h' || a === '--help') { args.help = true; }
|
|
81
|
+
else {
|
|
82
|
+
console.error(`Unknown argument: ${a}`);
|
|
83
|
+
printUsageAndExit(2);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return args;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function validate(args) {
|
|
90
|
+
if (args.help) printUsageAndExit(0);
|
|
91
|
+
if (!args.schema) {
|
|
92
|
+
console.error('Missing required --schema');
|
|
93
|
+
printUsageAndExit(2);
|
|
94
|
+
}
|
|
95
|
+
if (!args.agent && !args.allAgents) {
|
|
96
|
+
console.error('Must specify --agent <id> or --all-agents');
|
|
97
|
+
printUsageAndExit(2);
|
|
98
|
+
}
|
|
99
|
+
if (args.agent && args.allAgents) {
|
|
100
|
+
console.error('--agent and --all-agents are mutually exclusive');
|
|
101
|
+
printUsageAndExit(2);
|
|
102
|
+
}
|
|
103
|
+
if (!process.env.DATABASE_URL) {
|
|
104
|
+
console.error('DATABASE_URL is required');
|
|
105
|
+
printUsageAndExit(2);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Safe schema identifier quoting — same pattern as
|
|
110
|
+
// scripts/extract-insights-from-recent-sessions.js:218-219.
|
|
111
|
+
const qi = (s) => `"${String(s).replace(/"/g, '""')}"`;
|
|
112
|
+
|
|
113
|
+
function truncateForLog(s, n = 60) {
|
|
114
|
+
if (typeof s !== 'string') return '';
|
|
115
|
+
return s.length > n ? s.slice(0, n) + '…' : s;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async function main() {
|
|
119
|
+
const args = parseArgs(process.argv.slice(2));
|
|
120
|
+
validate(args);
|
|
121
|
+
|
|
122
|
+
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
|
123
|
+
|
|
124
|
+
const schemaIdent = qi(args.schema);
|
|
125
|
+
const agentLabel = args.allAgents ? '(all)' : args.agent;
|
|
126
|
+
console.log(
|
|
127
|
+
`[backfill] tenant=${args.tenantId} schema=${args.schema} `
|
|
128
|
+
+ `agent=${agentLabel} batch_size=${args.batchSize} dry_run=${args.dryRun}`
|
|
129
|
+
);
|
|
130
|
+
if (args.dryRun) {
|
|
131
|
+
console.log('[backfill] DRY RUN — no updates will be executed.');
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const whereClauses = [
|
|
135
|
+
'canonical_key_v2 IS NULL',
|
|
136
|
+
`status = 'active'`,
|
|
137
|
+
'tenant_id = $1',
|
|
138
|
+
];
|
|
139
|
+
const whereParams = [args.tenantId];
|
|
140
|
+
if (!args.allAgents) {
|
|
141
|
+
whereClauses.push(`agent_id = $${whereParams.length + 1}`);
|
|
142
|
+
whereParams.push(args.agent);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
let totalBackfilled = 0;
|
|
146
|
+
let totalSkipped = 0;
|
|
147
|
+
let totalAlreadySet = 0;
|
|
148
|
+
let batchNum = 0;
|
|
149
|
+
// Id watermark: prevents infinite loop when a batch yields no state
|
|
150
|
+
// transitions (dry-run always, or when every row has empty title, or
|
|
151
|
+
// when races cause 0 UPDATEs). WHERE id > $N advances the cursor even
|
|
152
|
+
// if the current rows aren't removed from the candidate set.
|
|
153
|
+
let lastId = 0;
|
|
154
|
+
|
|
155
|
+
try {
|
|
156
|
+
while (true) {
|
|
157
|
+
batchNum += 1;
|
|
158
|
+
|
|
159
|
+
const selectSql =
|
|
160
|
+
`SELECT id, tenant_id, agent_id, insight_type, title
|
|
161
|
+
FROM ${schemaIdent}.insights
|
|
162
|
+
WHERE ${whereClauses.join(' AND ')}
|
|
163
|
+
AND id > $${whereParams.length + 1}
|
|
164
|
+
ORDER BY id ASC
|
|
165
|
+
LIMIT $${whereParams.length + 2}`;
|
|
166
|
+
const res = await pool.query(selectSql, [...whereParams, lastId, args.batchSize]);
|
|
167
|
+
|
|
168
|
+
if (res.rowCount === 0) break;
|
|
169
|
+
lastId = Number(res.rows[res.rows.length - 1].id);
|
|
170
|
+
|
|
171
|
+
let batchBackfilled = 0;
|
|
172
|
+
let batchSkipped = 0;
|
|
173
|
+
let batchAlreadySet = 0;
|
|
174
|
+
|
|
175
|
+
for (const row of res.rows) {
|
|
176
|
+
const title = typeof row.title === 'string' ? row.title.trim() : '';
|
|
177
|
+
if (!title) {
|
|
178
|
+
console.warn(
|
|
179
|
+
`[backfill] skip id=${row.id} empty or whitespace title`
|
|
180
|
+
);
|
|
181
|
+
batchSkipped += 1;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const canonicalKey = defaultCanonicalKey({
|
|
186
|
+
tenantId: row.tenant_id,
|
|
187
|
+
agentId: row.agent_id,
|
|
188
|
+
type: row.insight_type,
|
|
189
|
+
canonicalClaim: title,
|
|
190
|
+
entities: [],
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
if (args.dryRun) {
|
|
194
|
+
console.log(
|
|
195
|
+
`[backfill] would_update id=${row.id} agent=${row.agent_id} `
|
|
196
|
+
+ `type=${row.insight_type} title="${truncateForLog(title)}"`
|
|
197
|
+
);
|
|
198
|
+
batchBackfilled += 1;
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const updSql =
|
|
203
|
+
`UPDATE ${schemaIdent}.insights
|
|
204
|
+
SET canonical_key_v2 = $1,
|
|
205
|
+
metadata = metadata || $2::jsonb,
|
|
206
|
+
updated_at = now()
|
|
207
|
+
WHERE id = $3 AND canonical_key_v2 IS NULL`;
|
|
208
|
+
const upd = await pool.query(updSql, [
|
|
209
|
+
canonicalKey,
|
|
210
|
+
JSON.stringify(BACKFILL_METADATA_PATCH),
|
|
211
|
+
row.id,
|
|
212
|
+
]);
|
|
213
|
+
if (upd.rowCount === 0) {
|
|
214
|
+
batchAlreadySet += 1;
|
|
215
|
+
} else {
|
|
216
|
+
batchBackfilled += 1;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
totalBackfilled += batchBackfilled;
|
|
221
|
+
totalSkipped += batchSkipped;
|
|
222
|
+
totalAlreadySet += batchAlreadySet;
|
|
223
|
+
|
|
224
|
+
console.log(
|
|
225
|
+
`[backfill] batch ${batchNum}: selected=${res.rowCount} `
|
|
226
|
+
+ `${args.dryRun ? 'would_backfill' : 'backfilled'}=${batchBackfilled} `
|
|
227
|
+
+ `skipped=${batchSkipped} already_set=${batchAlreadySet}`
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
// No all-skip-break guard needed: the `id > lastId` cursor
|
|
231
|
+
// advances past skipped rows each iteration, so an empty-title
|
|
232
|
+
// row in an otherwise healthy batch doesn't trap the loop.
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const verb = args.dryRun ? 'would_backfill' : 'backfilled';
|
|
236
|
+
console.log(
|
|
237
|
+
`[backfill] DONE${args.dryRun ? ' dry_run' : ''} total: `
|
|
238
|
+
+ `${verb}=${totalBackfilled} skipped=${totalSkipped} `
|
|
239
|
+
+ `already_set=${totalAlreadySet}`
|
|
240
|
+
);
|
|
241
|
+
} catch (e) {
|
|
242
|
+
console.error('[backfill] fatal:', e.stack || e.message);
|
|
243
|
+
await pool.end().catch(() => {});
|
|
244
|
+
process.exit(1);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
await pool.end().catch(() => {});
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
main();
|
package/scripts/queries.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"version": 1,
|
|
3
|
-
"queries": [
|
|
4
|
-
{
|
|
5
|
-
"id": "q-001",
|
|
6
|
-
"lang": "en",
|
|
7
|
-
"text": "How do I set up Aquifer memory storage with PostgreSQL?"
|
|
8
|
-
},
|
|
9
|
-
{
|
|
10
|
-
"id": "q-002",
|
|
11
|
-
"lang": "en",
|
|
12
|
-
"text": "What is the difference between memory_search and session_recall in Aquifer?"
|
|
13
|
-
},
|
|
14
|
-
{
|
|
15
|
-
"id": "q-003",
|
|
16
|
-
"lang": "zh",
|
|
17
|
-
"text": "Aquifer 的 session recall 是怎麼做 hybrid 檢索的?"
|
|
18
|
-
},
|
|
19
|
-
{
|
|
20
|
-
"id": "q-004",
|
|
21
|
-
"lang": "zh",
|
|
22
|
-
"text": "為什麼 zhcfg 會依賴 jieba 或 zhparser?"
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
"id": "q-005",
|
|
26
|
-
"lang": "mixed",
|
|
27
|
-
"text": "How to debug fts-zhcfg pipeline 在 jieba migration 後失敗的問題?"
|
|
28
|
-
},
|
|
29
|
-
{
|
|
30
|
-
"id": "q-006",
|
|
31
|
-
"lang": "mixed",
|
|
32
|
-
"text": "memory_search 找不到結果時,應該先看哪個 log 或 table?"
|
|
33
|
-
},
|
|
34
|
-
{
|
|
35
|
-
"id": "q-007",
|
|
36
|
-
"lang": "en",
|
|
37
|
-
"text": "How does hybrid-rerank differ from hybrid mode in retro recall bench?"
|
|
38
|
-
},
|
|
39
|
-
{
|
|
40
|
-
"id": "q-008",
|
|
41
|
-
"lang": "zh",
|
|
42
|
-
"text": "Aquifer 初始化後要如何驗證 embeddings pipeline 有正常工作?"
|
|
43
|
-
}
|
|
44
|
-
]
|
|
45
|
-
}
|
|
@@ -1,409 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Retro recall bench — runs the same query set across 6 pipelines and
|
|
6
|
-
* reports nDCG@5 / MRR / latency / empty-rate. Designed for the post-1.3.0
|
|
7
|
-
* Phase 0 audit; see ~/.claude/develop-runs/20260419-142432-aquifer-memory-routes/spec.md.
|
|
8
|
-
*
|
|
9
|
-
* Pipelines:
|
|
10
|
-
* fts-simple storage.searchSessions(ftsConfig='simple')
|
|
11
|
-
* fts-zhcfg storage.searchSessions(ftsConfig='zhcfg') [skip if zhcfg missing]
|
|
12
|
-
* summary-vector storage.searchSummaryEmbeddings
|
|
13
|
-
* turn-only storage.searchTurnEmbeddings
|
|
14
|
-
* hybrid aquifer.recall(mode='hybrid', rerank disabled)
|
|
15
|
-
* hybrid-rerank aquifer.recall(mode='hybrid', rerank forced)
|
|
16
|
-
*
|
|
17
|
-
* Usage:
|
|
18
|
-
* node scripts/retro-recall-bench.js \
|
|
19
|
-
* --query-set queries.json \
|
|
20
|
-
* [--judgements judgements.json] \
|
|
21
|
-
* [--output report.json] \
|
|
22
|
-
* [--markdown summary.md] \
|
|
23
|
-
* [--pipelines fts-simple,fts-zhcfg,summary-vector,turn-only,hybrid,hybrid-rerank] \
|
|
24
|
-
* [--limit 5] [--warmup 1] [--schema miranda] [--tenant-id default]
|
|
25
|
-
*
|
|
26
|
-
* env:
|
|
27
|
-
* DATABASE_URL required
|
|
28
|
-
* AQUIFER_SCHEMA default 'miranda'
|
|
29
|
-
* EMBED_PROVIDER + key required for vector pipelines
|
|
30
|
-
* AQUIFER_LLM_PROVIDER unused here (no enrich)
|
|
31
|
-
* OPENROUTER_API_KEY required for hybrid-rerank pipeline
|
|
32
|
-
*/
|
|
33
|
-
|
|
34
|
-
const fs = require('fs');
|
|
35
|
-
const { Pool } = require('pg');
|
|
36
|
-
const aquiferIndex = require('..');
|
|
37
|
-
const storage = require('../core/storage');
|
|
38
|
-
const { createEmbedder } = require('..');
|
|
39
|
-
|
|
40
|
-
const ALL_PIPELINES = [
|
|
41
|
-
'fts-simple',
|
|
42
|
-
'fts-zhcfg',
|
|
43
|
-
'summary-vector',
|
|
44
|
-
'turn-only',
|
|
45
|
-
'hybrid',
|
|
46
|
-
'hybrid-rerank',
|
|
47
|
-
];
|
|
48
|
-
|
|
49
|
-
function parseArgs(argv) {
|
|
50
|
-
const args = {
|
|
51
|
-
querySet: null,
|
|
52
|
-
judgements: null,
|
|
53
|
-
output: null,
|
|
54
|
-
markdown: null,
|
|
55
|
-
pipelines: ALL_PIPELINES,
|
|
56
|
-
limit: 5,
|
|
57
|
-
warmup: 1,
|
|
58
|
-
schema: process.env.AQUIFER_SCHEMA || 'miranda',
|
|
59
|
-
tenantId: process.env.AQUIFER_TENANT_ID || 'default',
|
|
60
|
-
rerankTopK: 20,
|
|
61
|
-
};
|
|
62
|
-
for (let i = 0; i < argv.length; i++) {
|
|
63
|
-
const a = argv[i];
|
|
64
|
-
const v = argv[i + 1];
|
|
65
|
-
if (a === '--query-set') { args.querySet = v; i++; }
|
|
66
|
-
else if (a === '--judgements') { args.judgements = v; i++; }
|
|
67
|
-
else if (a === '--output') { args.output = v; i++; }
|
|
68
|
-
else if (a === '--markdown') { args.markdown = v; i++; }
|
|
69
|
-
else if (a === '--pipelines') { args.pipelines = v.split(',').map(s => s.trim()).filter(Boolean); i++; }
|
|
70
|
-
else if (a === '--limit') { args.limit = parseInt(v, 10); i++; }
|
|
71
|
-
else if (a === '--warmup') { args.warmup = parseInt(v, 10); i++; }
|
|
72
|
-
else if (a === '--schema') { args.schema = v; i++; }
|
|
73
|
-
else if (a === '--tenant-id') { args.tenantId = v; i++; }
|
|
74
|
-
else if (a === '--rerank-topk') { args.rerankTopK = parseInt(v, 10); i++; }
|
|
75
|
-
else if (a === '-h' || a === '--help') { args.help = true; }
|
|
76
|
-
}
|
|
77
|
-
return args;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
function printHelp() {
|
|
81
|
-
console.log(fs.readFileSync(__filename, 'utf8').split('\n').slice(0, 36).join('\n'));
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
function detectFtsConfigsAvailable(pool) {
|
|
85
|
-
return pool.query(`SELECT cfgname FROM pg_ts_config WHERE cfgname IN ('simple','zhcfg')`)
|
|
86
|
-
.then(r => new Set(r.rows.map(row => row.cfgname)));
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
async function withLatency(fn) {
|
|
90
|
-
const t0 = process.hrtime.bigint();
|
|
91
|
-
let result; let error = null;
|
|
92
|
-
try { result = await fn(); } catch (e) { error = e; }
|
|
93
|
-
const t1 = process.hrtime.bigint();
|
|
94
|
-
return { latencyMs: Number(t1 - t0) / 1e6, result, error };
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function normalizeHits(rows, scoreKey) {
|
|
98
|
-
return rows.map((r, i) => ({
|
|
99
|
-
rank: i + 1,
|
|
100
|
-
sessionId: r.session_id || r.sessionId,
|
|
101
|
-
sessionRowId: r.id || r.session_row_id || r.sessionRowId || null,
|
|
102
|
-
agentId: r.agent_id || r.agentId || null,
|
|
103
|
-
score: r[scoreKey] ?? null,
|
|
104
|
-
summaryDistance: r.distance ?? null,
|
|
105
|
-
turnDistance: r.turn_distance ?? null,
|
|
106
|
-
}));
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
function normalizeAquiferHits(rows) {
|
|
110
|
-
return rows.map((r, i) => ({
|
|
111
|
-
rank: i + 1,
|
|
112
|
-
sessionId: r.sessionId,
|
|
113
|
-
sessionRowId: null,
|
|
114
|
-
agentId: r.agentId,
|
|
115
|
-
score: r.score ?? null,
|
|
116
|
-
summaryDistance: r._debug?.hybridScore ?? null,
|
|
117
|
-
turnDistance: null,
|
|
118
|
-
rerankApplied: r._debug?.rerankApplied ?? false,
|
|
119
|
-
rerankReason: r._debug?.rerankReason ?? null,
|
|
120
|
-
}));
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
async function runPipeline(name, ctx, query) {
|
|
124
|
-
const { pool, schema, tenantId, limit, queryVec, aquifer, ftsAvailable } = ctx;
|
|
125
|
-
switch (name) {
|
|
126
|
-
case 'fts-simple': {
|
|
127
|
-
if (!ftsAvailable.has('simple')) return { skipped: true, reason: 'simple tsconfig missing' };
|
|
128
|
-
return withLatency(async () => {
|
|
129
|
-
const rows = await storage.searchSessions(pool, query.text, {
|
|
130
|
-
schema, tenantId, agentId: query.agentId, limit, ftsConfig: 'simple',
|
|
131
|
-
});
|
|
132
|
-
return normalizeHits(rows, 'fts_rank');
|
|
133
|
-
});
|
|
134
|
-
}
|
|
135
|
-
case 'fts-zhcfg': {
|
|
136
|
-
if (!ftsAvailable.has('zhcfg')) return { skipped: true, reason: 'zhcfg tsconfig missing — install zhparser' };
|
|
137
|
-
return withLatency(async () => {
|
|
138
|
-
const rows = await storage.searchSessions(pool, query.text, {
|
|
139
|
-
schema, tenantId, agentId: query.agentId, limit, ftsConfig: 'zhcfg',
|
|
140
|
-
});
|
|
141
|
-
return normalizeHits(rows, 'fts_rank');
|
|
142
|
-
});
|
|
143
|
-
}
|
|
144
|
-
case 'summary-vector': {
|
|
145
|
-
if (!queryVec) return { skipped: true, reason: 'no embed provider' };
|
|
146
|
-
return withLatency(async () => {
|
|
147
|
-
const { rows } = await storage.searchSummaryEmbeddings(pool, {
|
|
148
|
-
schema, tenantId, queryVec, agentId: query.agentId, limit,
|
|
149
|
-
});
|
|
150
|
-
return normalizeHits(rows, 'distance');
|
|
151
|
-
});
|
|
152
|
-
}
|
|
153
|
-
case 'turn-only': {
|
|
154
|
-
if (!queryVec) return { skipped: true, reason: 'no embed provider' };
|
|
155
|
-
return withLatency(async () => {
|
|
156
|
-
const { rows } = await storage.searchTurnEmbeddings(pool, {
|
|
157
|
-
schema, tenantId, queryVec, agentId: query.agentId, limit,
|
|
158
|
-
});
|
|
159
|
-
return normalizeHits(rows, 'turn_distance');
|
|
160
|
-
});
|
|
161
|
-
}
|
|
162
|
-
case 'hybrid': {
|
|
163
|
-
return withLatency(async () => {
|
|
164
|
-
const rows = await aquifer.recall(query.text, {
|
|
165
|
-
agentId: query.agentId, limit, mode: 'hybrid', rerank: false,
|
|
166
|
-
});
|
|
167
|
-
return normalizeAquiferHits(rows);
|
|
168
|
-
});
|
|
169
|
-
}
|
|
170
|
-
case 'hybrid-rerank': {
|
|
171
|
-
return withLatency(async () => {
|
|
172
|
-
const rows = await aquifer.recall(query.text, {
|
|
173
|
-
agentId: query.agentId, limit, mode: 'hybrid', rerank: true,
|
|
174
|
-
});
|
|
175
|
-
return normalizeAquiferHits(rows);
|
|
176
|
-
});
|
|
177
|
-
}
|
|
178
|
-
default:
|
|
179
|
-
return { skipped: true, reason: `unknown pipeline ${name}` };
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
function dcg(rels) {
|
|
184
|
-
return rels.reduce((acc, rel, i) => acc + (Math.pow(2, rel) - 1) / Math.log2(i + 2), 0);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
function nDcgAtK(judgedHits, k) {
|
|
188
|
-
const at = judgedHits.slice(0, k);
|
|
189
|
-
const ideal = [...judgedHits].sort((a, b) => b - a).slice(0, k);
|
|
190
|
-
const idcg = dcg(ideal);
|
|
191
|
-
if (idcg === 0) return null;
|
|
192
|
-
return dcg(at) / idcg;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
function reciprocalRank(judgedHits) {
|
|
196
|
-
const idx = judgedHits.findIndex(r => r > 0);
|
|
197
|
-
return idx < 0 ? 0 : 1 / (idx + 1);
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
function computeMetrics(runs, judgements, k) {
|
|
201
|
-
const judgeMap = new Map();
|
|
202
|
-
for (const j of judgements) {
|
|
203
|
-
judgeMap.set(`${j.queryId}::${j.sessionId}`, j.relevance);
|
|
204
|
-
}
|
|
205
|
-
const byPipeline = {};
|
|
206
|
-
for (const run of runs) {
|
|
207
|
-
if (run.skipped || run.error) continue;
|
|
208
|
-
const arr = byPipeline[run.pipeline] || (byPipeline[run.pipeline] = { judged: [], latency: [], empty: 0, total: 0, judgeable: 0 });
|
|
209
|
-
arr.total++;
|
|
210
|
-
arr.latency.push(run.latencyMs);
|
|
211
|
-
if (!run.hits || run.hits.length === 0) arr.empty++;
|
|
212
|
-
const rels = (run.hits || []).map(h => judgeMap.get(`${run.queryId}::${h.sessionId}`) ?? 0);
|
|
213
|
-
if (rels.some(r => r > 0)) arr.judgeable++;
|
|
214
|
-
arr.judged.push(rels);
|
|
215
|
-
}
|
|
216
|
-
const result = [];
|
|
217
|
-
for (const [pipeline, agg] of Object.entries(byPipeline)) {
|
|
218
|
-
const ndcgs = agg.judged.map(rels => nDcgAtK(rels, k)).filter(v => v !== null);
|
|
219
|
-
const mrrs = agg.judged.map(reciprocalRank);
|
|
220
|
-
const lat = agg.latency.slice().sort((a, b) => a - b);
|
|
221
|
-
const p = (frac) => lat.length === 0 ? null : lat[Math.min(lat.length - 1, Math.floor(frac * lat.length))];
|
|
222
|
-
result.push({
|
|
223
|
-
pipeline,
|
|
224
|
-
count: agg.total,
|
|
225
|
-
nDCG5: ndcgs.length ? ndcgs.reduce((a, b) => a + b, 0) / ndcgs.length : null,
|
|
226
|
-
MRR: mrrs.length ? mrrs.reduce((a, b) => a + b, 0) / mrrs.length : null,
|
|
227
|
-
latencyMs: {
|
|
228
|
-
mean: lat.length ? lat.reduce((a, b) => a + b, 0) / lat.length : null,
|
|
229
|
-
p50: p(0.5),
|
|
230
|
-
p95: p(0.95),
|
|
231
|
-
},
|
|
232
|
-
emptyResultRate: agg.total ? agg.empty / agg.total : 0,
|
|
233
|
-
judgeableRate: agg.total ? agg.judgeable / agg.total : 0,
|
|
234
|
-
});
|
|
235
|
-
}
|
|
236
|
-
return result;
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
function renderMarkdown(report, k) {
|
|
240
|
-
const lines = [];
|
|
241
|
-
lines.push(`# Aquifer Retro Recall Bench`);
|
|
242
|
-
lines.push('');
|
|
243
|
-
lines.push(`- Generated: ${report.meta.generatedAt}`);
|
|
244
|
-
lines.push(`- Schema: \`${report.meta.schema}\` / Tenant: \`${report.meta.tenantId}\``);
|
|
245
|
-
lines.push(`- Queries: ${report.queries.length} (warmup ${report.meta.warmup} excluded from metrics)`);
|
|
246
|
-
lines.push(`- Pipelines: ${report.meta.pipelines.join(', ')}`);
|
|
247
|
-
lines.push('');
|
|
248
|
-
lines.push(`## Overall (top ${k})`);
|
|
249
|
-
lines.push('| Pipeline | nDCG@5 | MRR | Mean ms | p50 | p95 | Empty% | Judgeable% | N |');
|
|
250
|
-
lines.push('|---|---|---|---|---|---|---|---|---|');
|
|
251
|
-
for (const m of report.metrics.overall) {
|
|
252
|
-
lines.push(`| ${m.pipeline} | ${fmt(m.nDCG5)} | ${fmt(m.MRR)} | ${fmtMs(m.latencyMs.mean)} | ${fmtMs(m.latencyMs.p50)} | ${fmtMs(m.latencyMs.p95)} | ${pct(m.emptyResultRate)} | ${pct(m.judgeableRate)} | ${m.count} |`);
|
|
253
|
-
}
|
|
254
|
-
if (report.metrics.zhMixed) {
|
|
255
|
-
lines.push('');
|
|
256
|
-
lines.push(`## ZH+Mixed subset (top ${k})`);
|
|
257
|
-
lines.push('| Pipeline | nDCG@5 | MRR | Empty% | N |');
|
|
258
|
-
lines.push('|---|---|---|---|---|');
|
|
259
|
-
for (const m of report.metrics.zhMixed) {
|
|
260
|
-
lines.push(`| ${m.pipeline} | ${fmt(m.nDCG5)} | ${fmt(m.MRR)} | ${pct(m.emptyResultRate)} | ${m.count} |`);
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
if (report.metrics.en) {
|
|
264
|
-
lines.push('');
|
|
265
|
-
lines.push(`## EN subset (top ${k})`);
|
|
266
|
-
lines.push('| Pipeline | nDCG@5 | MRR | Empty% | N |');
|
|
267
|
-
lines.push('|---|---|---|---|---|');
|
|
268
|
-
for (const m of report.metrics.en) {
|
|
269
|
-
lines.push(`| ${m.pipeline} | ${fmt(m.nDCG5)} | ${fmt(m.MRR)} | ${pct(m.emptyResultRate)} | ${m.count} |`);
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
if (report.skipped.length > 0) {
|
|
273
|
-
lines.push('');
|
|
274
|
-
lines.push('## Skipped pipelines');
|
|
275
|
-
for (const s of report.skipped) lines.push(`- \`${s.pipeline}\`: ${s.reason}`);
|
|
276
|
-
}
|
|
277
|
-
return lines.join('\n') + '\n';
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
function fmt(v) { return (v === null || v === undefined) ? '—' : v.toFixed(3); }
|
|
281
|
-
function fmtMs(v) { return (v === null || v === undefined) ? '—' : v.toFixed(1); }
|
|
282
|
-
function pct(v) { return (v === null || v === undefined) ? '—' : `${(v * 100).toFixed(0)}%`; }
|
|
283
|
-
|
|
284
|
-
async function main() {
|
|
285
|
-
const args = parseArgs(process.argv.slice(2));
|
|
286
|
-
if (args.help || !args.querySet) { printHelp(); process.exit(args.help ? 0 : 2); }
|
|
287
|
-
|
|
288
|
-
const querySet = JSON.parse(fs.readFileSync(args.querySet, 'utf8'));
|
|
289
|
-
const queries = querySet.queries || [];
|
|
290
|
-
if (queries.length === 0) { console.error('Empty query set'); process.exit(2); }
|
|
291
|
-
|
|
292
|
-
const judgements = args.judgements
|
|
293
|
-
? (JSON.parse(fs.readFileSync(args.judgements, 'utf8')).judgements || [])
|
|
294
|
-
: [];
|
|
295
|
-
|
|
296
|
-
const dbUrl = process.env.DATABASE_URL || process.env.AQUIFER_DB_URL;
|
|
297
|
-
if (!dbUrl) { console.error('DATABASE_URL is required'); process.exit(2); }
|
|
298
|
-
|
|
299
|
-
const pool = new Pool({ connectionString: dbUrl });
|
|
300
|
-
const ftsAvailable = await detectFtsConfigsAvailable(pool);
|
|
301
|
-
|
|
302
|
-
const embedFn = (() => {
|
|
303
|
-
try {
|
|
304
|
-
const e = createEmbedder({}); // autodetect via EMBED_PROVIDER
|
|
305
|
-
return (texts) => e.embedBatch(texts);
|
|
306
|
-
} catch (err) {
|
|
307
|
-
console.warn(`[bench] embed unavailable: ${err.message} — vector pipelines will skip`);
|
|
308
|
-
return null;
|
|
309
|
-
}
|
|
310
|
-
})();
|
|
311
|
-
|
|
312
|
-
const aquifer = aquiferIndex.createAquifer({
|
|
313
|
-
db: pool,
|
|
314
|
-
schema: args.schema,
|
|
315
|
-
tenantId: args.tenantId,
|
|
316
|
-
embed: embedFn ? { fn: embedFn } : undefined,
|
|
317
|
-
rerank: process.env.OPENROUTER_API_KEY
|
|
318
|
-
? { provider: 'openrouter', openrouterApiKey: process.env.OPENROUTER_API_KEY, topK: args.rerankTopK, autoTrigger: { enabled: false } }
|
|
319
|
-
: null,
|
|
320
|
-
});
|
|
321
|
-
|
|
322
|
-
const ctx = {
|
|
323
|
-
pool, schema: args.schema, tenantId: args.tenantId,
|
|
324
|
-
limit: args.limit, queryVec: null, aquifer, ftsAvailable,
|
|
325
|
-
};
|
|
326
|
-
|
|
327
|
-
const runs = [];
|
|
328
|
-
const skipped = new Set();
|
|
329
|
-
|
|
330
|
-
// Warmup: just run hybrid once to prime the pool.
|
|
331
|
-
for (let w = 0; w < args.warmup; w++) {
|
|
332
|
-
try { await aquifer.recall(queries[0].text, { limit: args.limit }); } catch { /* ignore */ }
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
for (const q of queries) {
|
|
336
|
-
let queryVec = null;
|
|
337
|
-
if (embedFn) {
|
|
338
|
-
try { queryVec = (await embedFn([q.text]))[0]; }
|
|
339
|
-
catch (err) { console.warn(`[bench] embed failed for "${q.id}": ${err.message}`); }
|
|
340
|
-
}
|
|
341
|
-
ctx.queryVec = queryVec;
|
|
342
|
-
|
|
343
|
-
for (const pipeline of args.pipelines) {
|
|
344
|
-
const r = await runPipeline(pipeline, ctx, q);
|
|
345
|
-
if (r.skipped) {
|
|
346
|
-
skipped.add(JSON.stringify({ pipeline, reason: r.reason }));
|
|
347
|
-
continue;
|
|
348
|
-
}
|
|
349
|
-
runs.push({
|
|
350
|
-
queryId: q.id,
|
|
351
|
-
pipeline,
|
|
352
|
-
latencyMs: r.latencyMs,
|
|
353
|
-
empty: !r.result || r.result.length === 0,
|
|
354
|
-
error: r.error ? { code: r.error.code || 'ERR', message: r.error.message } : null,
|
|
355
|
-
hits: r.result || [],
|
|
356
|
-
});
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
await aquifer.close?.().catch(() => {});
|
|
361
|
-
await pool.end().catch(() => {});
|
|
362
|
-
|
|
363
|
-
const overallRuns = runs;
|
|
364
|
-
const zhMixedQueries = new Set(queries.filter(q => q.lang === 'zh' || q.lang === 'mixed').map(q => q.id));
|
|
365
|
-
const enQueries = new Set(queries.filter(q => q.lang === 'en').map(q => q.id));
|
|
366
|
-
const subset = (set) => runs.filter(r => set.has(r.queryId));
|
|
367
|
-
|
|
368
|
-
const report = {
|
|
369
|
-
meta: {
|
|
370
|
-
generatedAt: new Date().toISOString(),
|
|
371
|
-
schema: args.schema,
|
|
372
|
-
tenantId: args.tenantId,
|
|
373
|
-
limit: args.limit,
|
|
374
|
-
warmup: args.warmup,
|
|
375
|
-
pipelines: args.pipelines,
|
|
376
|
-
ftsConfigsAvailable: [...ftsAvailable],
|
|
377
|
-
},
|
|
378
|
-
queries,
|
|
379
|
-
judgements,
|
|
380
|
-
runs,
|
|
381
|
-
metrics: {
|
|
382
|
-
overall: computeMetrics(overallRuns, judgements, args.limit),
|
|
383
|
-
zhMixed: zhMixedQueries.size > 0 ? computeMetrics(subset(zhMixedQueries), judgements, args.limit) : null,
|
|
384
|
-
en: enQueries.size > 0 ? computeMetrics(subset(enQueries), judgements, args.limit) : null,
|
|
385
|
-
},
|
|
386
|
-
skipped: [...skipped].map(s => JSON.parse(s)),
|
|
387
|
-
};
|
|
388
|
-
|
|
389
|
-
const outPath = args.output || `bench-report-${Date.now()}.json`;
|
|
390
|
-
fs.writeFileSync(outPath, JSON.stringify(report, null, 2));
|
|
391
|
-
console.log(`Wrote ${outPath}`);
|
|
392
|
-
|
|
393
|
-
const md = renderMarkdown(report, args.limit);
|
|
394
|
-
const mdPath = args.markdown || outPath.replace(/\.json$/, '.md');
|
|
395
|
-
fs.writeFileSync(mdPath, md);
|
|
396
|
-
console.log(`Wrote ${mdPath}`);
|
|
397
|
-
|
|
398
|
-
if (judgements.length === 0) {
|
|
399
|
-
console.log('\nNo judgements provided — metrics are coverage/latency only.');
|
|
400
|
-
console.log('Edit the JSON output to add judgements like:');
|
|
401
|
-
console.log(' "judgements": [{"queryId":"q-001","sessionId":"sess_abc","relevance":3}, ...]');
|
|
402
|
-
console.log('then re-run with --judgements <path>.');
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
main().catch(err => {
|
|
407
|
-
console.error('[bench] fatal:', err.stack || err.message);
|
|
408
|
-
process.exit(1);
|
|
409
|
-
});
|