@shadowforge0/aquifer-memory 1.0.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -29
- package/consumers/claude-code.js +117 -0
- package/consumers/cli.js +28 -1
- package/consumers/default/daily-entries.js +196 -0
- package/consumers/default/index.js +282 -0
- package/consumers/default/prompts/summary.js +153 -0
- package/consumers/mcp.js +3 -23
- package/consumers/miranda/context-inject.js +119 -0
- package/consumers/miranda/daily-entries.js +224 -0
- package/consumers/miranda/index.js +353 -0
- package/consumers/miranda/instance.js +55 -0
- package/consumers/miranda/llm.js +99 -0
- package/consumers/miranda/profile.json +145 -0
- package/consumers/miranda/prompts/summary.js +303 -0
- package/consumers/miranda/recall-format.js +74 -0
- package/consumers/miranda/render-daily-md.js +186 -0
- package/consumers/miranda/workspace-files.js +91 -0
- package/consumers/openclaw-ext/index.js +38 -0
- package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
- package/consumers/openclaw-ext/package.json +10 -0
- package/consumers/openclaw-plugin.js +66 -74
- package/consumers/opencode.js +21 -24
- package/consumers/shared/autodetect.js +64 -0
- package/consumers/shared/entity-parser.js +119 -0
- package/consumers/shared/ingest.js +148 -0
- package/consumers/shared/llm-autodetect.js +137 -0
- package/consumers/shared/normalize.js +129 -0
- package/consumers/shared/recall-format.js +110 -0
- package/core/aquifer.js +209 -71
- package/core/artifacts.js +174 -0
- package/core/bundles.js +400 -0
- package/core/consolidation.js +340 -0
- package/core/decisions.js +164 -0
- package/core/entity.js +1 -3
- package/core/errors.js +97 -0
- package/core/handoff.js +153 -0
- package/core/mcp-manifest.js +131 -0
- package/core/narratives.js +212 -0
- package/core/profiles.js +171 -0
- package/core/state.js +163 -0
- package/core/storage.js +86 -28
- package/core/timeline.js +152 -0
- package/docs/postprocess-contract.md +132 -0
- package/index.js +23 -1
- package/package.json +23 -2
- package/pipeline/_http.js +1 -1
- package/pipeline/consolidation/apply.js +176 -0
- package/pipeline/consolidation/index.js +21 -0
- package/pipeline/extract-entities.js +2 -2
- package/pipeline/rerank.js +1 -1
- package/pipeline/summarize.js +4 -1
- package/schema/001-base.sql +61 -24
- package/schema/002-entities.sql +17 -3
- package/schema/004-completion.sql +375 -0
- package/schema/004-facts.sql +67 -0
- package/scripts/diagnose-fts-zh.js +168 -134
- package/scripts/diagnose-vector.js +188 -0
- package/scripts/install-openclaw.sh +59 -0
- package/scripts/smoke.mjs +2 -2
|
@@ -1,161 +1,195 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* FTS
|
|
4
|
+
* Aquifer FTS 中文診斷
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
6
|
+
* 測 aquifer 實際搜尋主路徑(trigram ILIKE on search_text + similarity ranking)
|
|
7
|
+
* vs fallback 路徑(tsvector @@ plainto_tsquery('simple', q))對中文 query 的表現。
|
|
8
|
+
*
|
|
9
|
+
* env:
|
|
10
|
+
* DATABASE_URL — required
|
|
11
|
+
* AQUIFER_SCHEMA — default 'public'
|
|
12
|
+
* DIAGNOSE_QUERIES — comma-separated, overrides built-in set
|
|
10
13
|
*/
|
|
11
14
|
|
|
12
15
|
const { Pool } = require('pg');
|
|
13
16
|
|
|
14
|
-
const DB_URL = process.env.DATABASE_URL
|
|
15
|
-
|
|
17
|
+
const DB_URL = process.env.DATABASE_URL;
|
|
18
|
+
if (!DB_URL) {
|
|
19
|
+
console.error('DATABASE_URL is required');
|
|
20
|
+
process.exit(2);
|
|
21
|
+
}
|
|
22
|
+
const SCHEMA = process.env.AQUIFER_SCHEMA || 'public';
|
|
23
|
+
|
|
24
|
+
const DEFAULT_QUERIES = [
|
|
25
|
+
// latin
|
|
26
|
+
'afterburn', 'bootstrap', 'session', 'recall', 'entity', 'OpenCode', 'Jenny', 'Aquifer',
|
|
27
|
+
// CJK short tokens — 最容易暴露 tokenizer 問題
|
|
28
|
+
'記憶', '時區', '去重', '架構', '修復',
|
|
29
|
+
// CJK phrase
|
|
30
|
+
'消化模式', 'daily entries',
|
|
31
|
+
];
|
|
32
|
+
const QUERIES = process.env.DIAGNOSE_QUERIES
|
|
33
|
+
? process.env.DIAGNOSE_QUERIES.split(',').map(s => s.trim()).filter(Boolean)
|
|
34
|
+
: DEFAULT_QUERIES;
|
|
16
35
|
|
|
17
36
|
const pool = new Pool({ connectionString: DB_URL });
|
|
37
|
+
const qi = (s) => `"${s.replace(/"/g, '""')}"`;
|
|
18
38
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
// 1. 看 token 分佈
|
|
25
|
-
console.log('--- 1. Token 分析 ---');
|
|
26
|
-
const tokenSample = await pool.query(`
|
|
27
|
-
SELECT ss.session_id,
|
|
28
|
-
array_length(tsvector_to_array(ss.search_tsv), 1) as token_count,
|
|
29
|
-
left(ss.summary_text, 80) as preview
|
|
30
|
-
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
31
|
-
WHERE ss.search_tsv IS NOT NULL
|
|
32
|
-
ORDER BY ss.updated_at DESC
|
|
33
|
-
LIMIT 10
|
|
34
|
-
`);
|
|
35
|
-
|
|
36
|
-
let totalTokens = 0;
|
|
37
|
-
let sessionCount = 0;
|
|
38
|
-
for (const r of tokenSample.rows) {
|
|
39
|
-
totalTokens += r.token_count || 0;
|
|
40
|
-
sessionCount++;
|
|
41
|
-
console.log(` ${r.session_id?.slice(0, 8)} | ${r.token_count || 0} tokens | ${r.preview}`);
|
|
42
|
-
}
|
|
43
|
-
console.log(` avg: ${sessionCount ? Math.round(totalTokens / sessionCount) : 0} tokens/session\n`);
|
|
39
|
+
function pct(n, d) {
|
|
40
|
+
if (d === 0) return n === 0 ? '—' : '∞%';
|
|
41
|
+
return `${Math.round((n / d) * 100)}%`;
|
|
42
|
+
}
|
|
44
43
|
|
|
45
|
-
|
|
46
|
-
console.log(
|
|
44
|
+
async function main() {
|
|
45
|
+
console.log(`=== Aquifer FTS 中文診斷 (schema=${SCHEMA}) ===\n`);
|
|
46
|
+
|
|
47
|
+
// -------------------------------------------------------------------------
|
|
48
|
+
// 0. 覆蓋率:search_text NULL 率 → 看 fallback 觸發比例
|
|
49
|
+
// -------------------------------------------------------------------------
|
|
50
|
+
const cov = await pool.query(`
|
|
51
|
+
SELECT
|
|
52
|
+
COUNT(*) AS total,
|
|
53
|
+
COUNT(*) FILTER (WHERE search_text IS NOT NULL) AS with_text,
|
|
54
|
+
COUNT(*) FILTER (WHERE search_tsv IS NOT NULL) AS with_tsv,
|
|
55
|
+
COUNT(*) FILTER (WHERE search_text IS NULL
|
|
56
|
+
AND search_tsv IS NOT NULL) AS tsv_only
|
|
57
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
58
|
+
`);
|
|
59
|
+
const c = cov.rows[0];
|
|
60
|
+
console.log('--- 0. 搜尋欄位覆蓋率 ---');
|
|
61
|
+
console.log(` total rows : ${c.total}`);
|
|
62
|
+
console.log(` has search_text : ${c.with_text} (${pct(c.with_text, c.total)})`);
|
|
63
|
+
console.log(` has search_tsv : ${c.with_tsv} (${pct(c.with_tsv, c.total)})`);
|
|
64
|
+
console.log(` tsv-only (NULL search_text, falls back to FTS): ${c.tsv_only} (${pct(c.tsv_only, c.total)})\n`);
|
|
65
|
+
|
|
66
|
+
// -------------------------------------------------------------------------
|
|
67
|
+
// 1. Token 範例(tsvector lexeme 粒度觀察)
|
|
68
|
+
// -------------------------------------------------------------------------
|
|
69
|
+
console.log('--- 1. tsvector lexeme 粒度範例(最近 1 筆)---');
|
|
47
70
|
const tokenDetail = await pool.query(`
|
|
48
|
-
SELECT
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
71
|
+
SELECT session_id,
|
|
72
|
+
array_length(tsvector_to_array(search_tsv), 1) AS token_count,
|
|
73
|
+
array_to_string(tsvector_to_array(search_tsv), ' | ') AS tokens
|
|
74
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
75
|
+
WHERE search_tsv IS NOT NULL
|
|
76
|
+
ORDER BY updated_at DESC
|
|
53
77
|
LIMIT 1
|
|
54
78
|
`);
|
|
55
79
|
if (tokenDetail.rows[0]) {
|
|
56
|
-
|
|
57
|
-
const
|
|
58
|
-
// 分類 token
|
|
59
|
-
const all = tokens.split(' | ');
|
|
80
|
+
const r = tokenDetail.rows[0];
|
|
81
|
+
const all = (r.tokens || '').split(' | ').filter(Boolean);
|
|
60
82
|
const cjk = all.filter(t => /[\u4e00-\u9fff]/.test(t));
|
|
61
83
|
const latin = all.filter(t => /^[a-z0-9]/.test(t));
|
|
62
|
-
|
|
63
|
-
console.log(`
|
|
64
|
-
console.log(` CJK
|
|
65
|
-
console.log(`
|
|
84
|
+
console.log(` session: ${String(r.session_id).slice(0, 8)} | total tokens: ${r.token_count || 0}`);
|
|
85
|
+
console.log(` latin: ${latin.length} | cjk-containing: ${cjk.length}`);
|
|
86
|
+
console.log(` CJK lexemes (前 15): ${cjk.slice(0, 15).join(' | ')}`);
|
|
87
|
+
console.log(` → CJK lexeme 若是 phrase 級(整句無空白),簡 tokenizer 對中文短 query 會 miss\n`);
|
|
88
|
+
} else {
|
|
89
|
+
console.log(' (no rows)\n');
|
|
66
90
|
}
|
|
67
91
|
|
|
68
|
-
//
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
const ftsResult = await pool.query(`
|
|
97
|
-
SELECT COUNT(*) as cnt
|
|
98
|
-
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
99
|
-
WHERE ss.search_tsv @@ plainto_tsquery('simple', $1)
|
|
100
|
-
`, [q]);
|
|
101
|
-
const ftsHits = parseInt(ftsResult.rows[0].cnt);
|
|
102
|
-
|
|
103
|
-
// 同時看 summary_text ILIKE 能找到幾筆(ground truth)
|
|
104
|
-
const ilikeResult = await pool.query(`
|
|
105
|
-
SELECT COUNT(*) as cnt
|
|
106
|
-
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
107
|
-
WHERE ss.summary_text ILIKE $1
|
|
108
|
-
OR ss.structured_summary::text ILIKE $1
|
|
109
|
-
`, [`%${q}%`]);
|
|
110
|
-
const ilikeHits = parseInt(ilikeResult.rows[0].cnt);
|
|
111
|
-
|
|
112
|
-
const ftsRecall = ilikeHits > 0 ? Math.round(ftsHits / ilikeHits * 100) : (ftsHits === 0 ? 100 : 0);
|
|
113
|
-
const status = ftsHits === ilikeHits ? '✓' : (ftsHits < ilikeHits ? '✗ MISS' : '?');
|
|
114
|
-
console.log(` "${q}" | FTS: ${ftsHits} | ILIKE: ${ilikeHits} | recall: ${ftsRecall}% | ${status}`);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// 4. FTS 對 RRF 的貢獻度
|
|
118
|
-
console.log('\n--- 4. FTS 在 hybrid search 中的貢獻度 ---');
|
|
119
|
-
// 跑幾個查詢,看 FTS 跟 vector 的 session 重疊率
|
|
120
|
-
const overlapQueries = ['afterburn', 'bootstrap', '記憶', 'recall', 'entity'];
|
|
121
|
-
for (const q of overlapQueries) {
|
|
122
|
-
const ftsResult = await pool.query(`
|
|
123
|
-
SELECT ss.session_id
|
|
124
|
-
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
125
|
-
JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
|
|
126
|
-
WHERE ss.search_tsv @@ plainto_tsquery('simple', $1)
|
|
127
|
-
AND s.processing_status = 'succeeded'
|
|
128
|
-
ORDER BY ts_rank(ss.search_tsv, plainto_tsquery('simple', $1)) DESC
|
|
129
|
-
LIMIT 10
|
|
130
|
-
`, [q]);
|
|
131
|
-
const ftsIds = new Set(ftsResult.rows.map(r => r.session_id));
|
|
132
|
-
|
|
133
|
-
// vector search (if embedding available)
|
|
134
|
-
const embResult = await pool.query(`
|
|
135
|
-
SELECT ss.session_id
|
|
136
|
-
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
137
|
-
JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
|
|
138
|
-
WHERE ss.embedding IS NOT NULL
|
|
139
|
-
AND s.processing_status = 'succeeded'
|
|
140
|
-
ORDER BY ss.embedding <=> (
|
|
141
|
-
SELECT ss2.embedding FROM ${qi(SCHEMA)}.session_summaries ss2
|
|
142
|
-
WHERE ss2.search_tsv @@ plainto_tsquery('simple', $1)
|
|
143
|
-
ORDER BY ts_rank(ss2.search_tsv, plainto_tsquery('simple', $1)) DESC
|
|
144
|
-
LIMIT 1
|
|
92
|
+
// -------------------------------------------------------------------------
|
|
93
|
+
// 2. 主路徑 vs fallback:binary match 比對
|
|
94
|
+
//
|
|
95
|
+
// Ground truth = search_text ILIKE '%q%'(所有源欄位拼出的純文字 superset)
|
|
96
|
+
// 主路徑 = search_text ILIKE(GIN trgm 加速,語意等價 ILIKE)
|
|
97
|
+
// Fallback = search_tsv @@ plainto_tsquery('simple', q)
|
|
98
|
+
// -------------------------------------------------------------------------
|
|
99
|
+
console.log('--- 2. 主路徑(trigram)vs fallback(tsvector)binary match ---');
|
|
100
|
+
console.log(' query | truth | trgm | tsv | trgm% | tsv% | tsv-extra');
|
|
101
|
+
console.log(' ' + '-'.repeat(82));
|
|
102
|
+
|
|
103
|
+
const rowCount = await pool.query(
|
|
104
|
+
`SELECT COUNT(*) AS n FROM ${qi(SCHEMA)}.session_summaries WHERE search_text IS NOT NULL`
|
|
105
|
+
);
|
|
106
|
+
const withTextN = parseInt(rowCount.rows[0].n, 10);
|
|
107
|
+
console.log(` (ground truth 基數:含 search_text 的 row ${withTextN})`);
|
|
108
|
+
|
|
109
|
+
const summary = [];
|
|
110
|
+
for (const q of QUERIES) {
|
|
111
|
+
const r = await pool.query(
|
|
112
|
+
`
|
|
113
|
+
WITH base AS (
|
|
114
|
+
SELECT search_text,
|
|
115
|
+
search_tsv,
|
|
116
|
+
(search_text ILIKE '%' || $1 || '%') AS trgm_hit,
|
|
117
|
+
(search_tsv @@ plainto_tsquery('simple', $2)) AS tsv_hit
|
|
118
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
119
|
+
WHERE search_text IS NOT NULL
|
|
145
120
|
)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
121
|
+
SELECT
|
|
122
|
+
COUNT(*) FILTER (WHERE trgm_hit) AS truth,
|
|
123
|
+
COUNT(*) FILTER (WHERE trgm_hit) AS trgm,
|
|
124
|
+
COUNT(*) FILTER (WHERE tsv_hit) AS tsv,
|
|
125
|
+
COUNT(*) FILTER (WHERE tsv_hit AND NOT trgm_hit) AS tsv_extra
|
|
126
|
+
FROM base
|
|
127
|
+
`,
|
|
128
|
+
[q.replace(/[%_\\]/g, '\\$&'), q]
|
|
129
|
+
);
|
|
130
|
+
const { truth, trgm, tsv, tsv_extra } = r.rows[0];
|
|
131
|
+
const T = parseInt(truth, 10);
|
|
132
|
+
const A = parseInt(trgm, 10);
|
|
133
|
+
const B = parseInt(tsv, 10);
|
|
134
|
+
const E = parseInt(tsv_extra, 10);
|
|
135
|
+
summary.push({ q, T, A, B, E });
|
|
136
|
+
console.log(
|
|
137
|
+
` ${q.padEnd(19)} | ${String(T).padStart(5)} | ${String(A).padStart(5)} | ${String(B).padStart(5)} | ${pct(A, T).padStart(5)} | ${pct(B, T).padStart(5)} | ${String(E).padStart(5)}`
|
|
138
|
+
);
|
|
139
|
+
}
|
|
140
|
+
console.log(' (tsv-extra = tsvector 命中但 trigram 沒命中 → 通常是 0,代表 tsv 對整體搜尋無額外貢獻)\n');
|
|
141
|
+
|
|
142
|
+
// -------------------------------------------------------------------------
|
|
143
|
+
// 3. Ranking 品質對比:舊 ranking (similarity only) vs 新 ranking (substr-hit first)
|
|
144
|
+
// -------------------------------------------------------------------------
|
|
145
|
+
console.log('--- 3. Ranking 品質對比:top-5 substring-hit 命中率 ---');
|
|
146
|
+
console.log(' query | truth | old (sim only) | new (hit+sim)');
|
|
147
|
+
console.log(' ' + '-'.repeat(70));
|
|
148
|
+
for (const q of QUERIES) {
|
|
149
|
+
const like = q.replace(/[%_\\]/g, '\\$&');
|
|
150
|
+
const truthR = await pool.query(
|
|
151
|
+
`SELECT COUNT(*) AS n
|
|
152
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
153
|
+
WHERE search_text ILIKE '%' || $1 || '%'`,
|
|
154
|
+
[like]
|
|
155
|
+
);
|
|
156
|
+
const T = parseInt(truthR.rows[0].n, 10);
|
|
157
|
+
|
|
158
|
+
const oldR = await pool.query(
|
|
159
|
+
`
|
|
160
|
+
SELECT (search_text ILIKE '%' || $1 || '%') AS substr_hit
|
|
161
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
162
|
+
WHERE search_text IS NOT NULL
|
|
163
|
+
ORDER BY similarity(search_text, $2) DESC
|
|
164
|
+
LIMIT 5
|
|
165
|
+
`,
|
|
166
|
+
[like, q]
|
|
167
|
+
);
|
|
168
|
+
const oldHits = oldR.rows.filter(x => x.substr_hit).length;
|
|
169
|
+
|
|
170
|
+
const newR = await pool.query(
|
|
171
|
+
`
|
|
172
|
+
SELECT (search_text ILIKE '%' || $1 || '%') AS substr_hit
|
|
173
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
174
|
+
WHERE search_text IS NOT NULL
|
|
175
|
+
ORDER BY
|
|
176
|
+
(search_text ILIKE '%' || $1 || '%') DESC,
|
|
177
|
+
similarity(search_text, $2) DESC
|
|
178
|
+
LIMIT 5
|
|
179
|
+
`,
|
|
180
|
+
[like, q]
|
|
181
|
+
);
|
|
182
|
+
const newHits = newR.rows.filter(x => x.substr_hit).length;
|
|
183
|
+
|
|
184
|
+
const expected = Math.min(5, T);
|
|
185
|
+
console.log(
|
|
186
|
+
` ${q.padEnd(19)} | ${String(T).padStart(5)} | ${String(oldHits).padStart(3)}/5 → ${String(expected).padStart(1)}/5 ${oldHits < expected ? '✗' : '✓'} | ${String(newHits).padStart(3)}/5 ${newHits < expected ? '✗' : '✓'}`
|
|
187
|
+
);
|
|
155
188
|
}
|
|
189
|
+
console.log(' (truth = 含該字串的 row 數;ideal top-5 substr-hit = min(truth, 5))');
|
|
156
190
|
|
|
157
191
|
await pool.end();
|
|
158
192
|
console.log('\n=== 完成 ===');
|
|
159
193
|
}
|
|
160
194
|
|
|
161
|
-
|
|
195
|
+
main().catch(err => { console.error(err); process.exit(1); });
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Aquifer vector recall 診斷
|
|
5
|
+
*
|
|
6
|
+
* 驗 summary-vector + turn-vector 兩路 infrastructure:
|
|
7
|
+
* - embedding coverage
|
|
8
|
+
* - vector dim 是否一致(summary vs turn)
|
|
9
|
+
* - self-retrieval sanity(拿自己 embedding 當 query,top-1 distance 應 ≈ 0)
|
|
10
|
+
*
|
|
11
|
+
* env:
|
|
12
|
+
* DATABASE_URL — required
|
|
13
|
+
* AQUIFER_SCHEMA — default 'public'
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const { Pool } = require('pg');
|
|
17
|
+
|
|
18
|
+
const DB_URL = process.env.DATABASE_URL;
|
|
19
|
+
if (!DB_URL) {
|
|
20
|
+
console.error('DATABASE_URL is required');
|
|
21
|
+
process.exit(2);
|
|
22
|
+
}
|
|
23
|
+
const SCHEMA = process.env.AQUIFER_SCHEMA || 'public';
|
|
24
|
+
|
|
25
|
+
const pool = new Pool({ connectionString: DB_URL });
|
|
26
|
+
const qi = (s) => `"${s.replace(/"/g, '""')}"`;
|
|
27
|
+
const pct = (n, d) => (d === 0 ? (n === 0 ? '—' : '∞%') : `${Math.round((n / d) * 100)}%`);
|
|
28
|
+
const clean = (s) => (s ? String(s).replace(/\s+/g, ' ').slice(0, 70) : '');
|
|
29
|
+
|
|
30
|
+
async function main() {
|
|
31
|
+
console.log(`=== Aquifer vector recall 診斷 (schema=${SCHEMA}) ===\n`);
|
|
32
|
+
|
|
33
|
+
// -------------------------------------------------------------------------
|
|
34
|
+
// 1. Summary embedding coverage + dim
|
|
35
|
+
// -------------------------------------------------------------------------
|
|
36
|
+
const s = (await pool.query(`
|
|
37
|
+
SELECT
|
|
38
|
+
COUNT(*) AS total,
|
|
39
|
+
COUNT(*) FILTER (WHERE embedding IS NOT NULL) AS with_emb,
|
|
40
|
+
MIN(vector_dims(embedding)) AS min_dim,
|
|
41
|
+
MAX(vector_dims(embedding)) AS max_dim
|
|
42
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
43
|
+
`)).rows[0];
|
|
44
|
+
console.log('--- 1. session_summaries.embedding ---');
|
|
45
|
+
console.log(` total ${s.total} | with_emb ${s.with_emb} (${pct(s.with_emb, s.total)})`);
|
|
46
|
+
const summaryDim = s.min_dim;
|
|
47
|
+
console.log(` dim min=${s.min_dim} max=${s.max_dim}${s.min_dim !== s.max_dim ? ' ⚠ 不一致' : ''}\n`);
|
|
48
|
+
|
|
49
|
+
// -------------------------------------------------------------------------
|
|
50
|
+
// 2. Turn embedding coverage + dim
|
|
51
|
+
// -------------------------------------------------------------------------
|
|
52
|
+
const t = (await pool.query(`
|
|
53
|
+
SELECT
|
|
54
|
+
COUNT(*) AS total,
|
|
55
|
+
COUNT(DISTINCT session_row_id) AS distinct_sessions,
|
|
56
|
+
MIN(vector_dims(embedding)) AS min_dim,
|
|
57
|
+
MAX(vector_dims(embedding)) AS max_dim
|
|
58
|
+
FROM ${qi(SCHEMA)}.turn_embeddings
|
|
59
|
+
`)).rows[0];
|
|
60
|
+
console.log('--- 2. turn_embeddings.embedding ---');
|
|
61
|
+
console.log(` total turns ${t.total} | distinct sessions ${t.distinct_sessions}`);
|
|
62
|
+
console.log(` dim min=${t.min_dim} max=${t.max_dim}${t.min_dim !== t.max_dim ? ' ⚠ 不一致' : ''}`);
|
|
63
|
+
const turnDim = t.min_dim;
|
|
64
|
+
if (turnDim && summaryDim && turnDim !== summaryDim) {
|
|
65
|
+
console.log(` ⚠ summary dim ${summaryDim} != turn dim ${turnDim} → query embedding 只會對得上其中一條`);
|
|
66
|
+
}
|
|
67
|
+
console.log();
|
|
68
|
+
|
|
69
|
+
// -------------------------------------------------------------------------
|
|
70
|
+
// 3. 缺 turn 但有 summary 的 session 比例
|
|
71
|
+
// -------------------------------------------------------------------------
|
|
72
|
+
const gap = (await pool.query(`
|
|
73
|
+
SELECT
|
|
74
|
+
COUNT(DISTINCT ss.session_row_id) AS with_summary_emb,
|
|
75
|
+
COUNT(DISTINCT te.session_row_id) AS with_turn_emb,
|
|
76
|
+
COUNT(DISTINCT ss.session_row_id) FILTER (
|
|
77
|
+
WHERE te.session_row_id IS NULL
|
|
78
|
+
) AS summary_no_turn
|
|
79
|
+
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
80
|
+
LEFT JOIN ${qi(SCHEMA)}.turn_embeddings te
|
|
81
|
+
ON te.session_row_id = ss.session_row_id
|
|
82
|
+
WHERE ss.embedding IS NOT NULL
|
|
83
|
+
`)).rows[0];
|
|
84
|
+
console.log('--- 3. 兩路覆蓋差 ---');
|
|
85
|
+
console.log(` sessions with summary emb : ${gap.with_summary_emb}`);
|
|
86
|
+
console.log(` sessions with turn emb : ${gap.with_turn_emb}`);
|
|
87
|
+
console.log(` summary-only (no turns) : ${gap.summary_no_turn} (${pct(gap.summary_no_turn, gap.with_summary_emb)})`);
|
|
88
|
+
console.log(' (summary-only 是常見的—某些 session 沒有合適的 user turn 可 embed)\n');
|
|
89
|
+
|
|
90
|
+
// -------------------------------------------------------------------------
|
|
91
|
+
// 4. Self-retrieval sanity: summary vector
|
|
92
|
+
// 拿最近一筆 summary.embedding 當 query,top-1 應該是自己且 distance ≈ 0
|
|
93
|
+
// -------------------------------------------------------------------------
|
|
94
|
+
console.log('--- 4. Summary vector self-retrieval sanity ---');
|
|
95
|
+
const seedS = (await pool.query(`
|
|
96
|
+
SELECT s.session_id, ss.summary_text, ss.embedding
|
|
97
|
+
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
98
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
|
|
99
|
+
WHERE ss.embedding IS NOT NULL
|
|
100
|
+
ORDER BY ss.updated_at DESC
|
|
101
|
+
LIMIT 1
|
|
102
|
+
`)).rows[0];
|
|
103
|
+
|
|
104
|
+
if (!seedS) {
|
|
105
|
+
console.log(' (no summary with embedding)\n');
|
|
106
|
+
} else {
|
|
107
|
+
const r = await pool.query(`
|
|
108
|
+
SELECT s.session_id,
|
|
109
|
+
(ss.embedding <=> $1::vector) AS distance,
|
|
110
|
+
ss.summary_text
|
|
111
|
+
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
112
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
|
|
113
|
+
WHERE ss.embedding IS NOT NULL
|
|
114
|
+
ORDER BY ss.embedding <=> $1::vector ASC
|
|
115
|
+
LIMIT 5
|
|
116
|
+
`, [seedS.embedding]);
|
|
117
|
+
console.log(` seed : ${String(seedS.session_id).slice(0, 8)} | ${clean(seedS.summary_text)}`);
|
|
118
|
+
for (const row of r.rows) {
|
|
119
|
+
const mark = String(row.session_id) === String(seedS.session_id) ? ' ← self' : '';
|
|
120
|
+
console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} | ${clean(row.summary_text)}${mark}`);
|
|
121
|
+
}
|
|
122
|
+
const top = r.rows[0];
|
|
123
|
+
const selfOK = top && String(top.session_id) === String(seedS.session_id) && Number(top.distance) < 0.001;
|
|
124
|
+
console.log(` → self top-1 @ distance≈0: ${selfOK ? 'YES ✓' : 'NO ✗'}\n`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// -------------------------------------------------------------------------
|
|
128
|
+
// 5. Self-retrieval sanity: turn vector
|
|
129
|
+
// -------------------------------------------------------------------------
|
|
130
|
+
console.log('--- 5. Turn vector self-retrieval sanity ---');
|
|
131
|
+
const seedT = (await pool.query(`
|
|
132
|
+
SELECT te.session_row_id, te.turn_index, te.content_text, te.embedding,
|
|
133
|
+
s.session_id
|
|
134
|
+
FROM ${qi(SCHEMA)}.turn_embeddings te
|
|
135
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
|
|
136
|
+
ORDER BY te.created_at DESC
|
|
137
|
+
LIMIT 1
|
|
138
|
+
`)).rows[0];
|
|
139
|
+
|
|
140
|
+
if (!seedT) {
|
|
141
|
+
console.log(' (no turn embeddings)\n');
|
|
142
|
+
} else {
|
|
143
|
+
const r = await pool.query(`
|
|
144
|
+
SELECT s.session_id, te.turn_index, te.content_text,
|
|
145
|
+
(te.embedding <=> $1::vector) AS distance
|
|
146
|
+
FROM ${qi(SCHEMA)}.turn_embeddings te
|
|
147
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
|
|
148
|
+
ORDER BY te.embedding <=> $1::vector ASC
|
|
149
|
+
LIMIT 5
|
|
150
|
+
`, [seedT.embedding]);
|
|
151
|
+
console.log(` seed : ${String(seedT.session_id).slice(0, 8)} turn=${seedT.turn_index} | ${clean(seedT.content_text)}`);
|
|
152
|
+
for (const row of r.rows) {
|
|
153
|
+
const self = String(row.session_id) === String(seedT.session_id) && row.turn_index === seedT.turn_index;
|
|
154
|
+
console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} turn=${row.turn_index} | ${clean(row.content_text)}${self ? ' ← self' : ''}`);
|
|
155
|
+
}
|
|
156
|
+
const top = r.rows[0];
|
|
157
|
+
const selfOK = top && Number(top.distance) < 0.001;
|
|
158
|
+
console.log(` → self top-1 @ distance≈0: ${selfOK ? 'YES ✓' : 'NO ✗'}\n`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// -------------------------------------------------------------------------
|
|
162
|
+
// 6. 跨路比較:用同一筆 summary embedding 去 turn table 找鄰居
|
|
163
|
+
// 只在 dim 一致時做;看 summary 代表 vs 其最近 turn 的距離分佈
|
|
164
|
+
// -------------------------------------------------------------------------
|
|
165
|
+
if (summaryDim && turnDim && summaryDim === turnDim && seedS) {
|
|
166
|
+
console.log('--- 6. Cross-path:summary emb → turn search (dim 相同才跑) ---');
|
|
167
|
+
const r = await pool.query(`
|
|
168
|
+
SELECT DISTINCT ON (te.session_row_id)
|
|
169
|
+
s.session_id, te.turn_index,
|
|
170
|
+
(te.embedding <=> $1::vector) AS distance,
|
|
171
|
+
te.content_text
|
|
172
|
+
FROM ${qi(SCHEMA)}.turn_embeddings te
|
|
173
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
|
|
174
|
+
ORDER BY te.session_row_id, te.embedding <=> $1::vector ASC
|
|
175
|
+
`, [seedS.embedding]);
|
|
176
|
+
r.rows.sort((a, b) => Number(a.distance) - Number(b.distance));
|
|
177
|
+
for (const row of r.rows.slice(0, 5)) {
|
|
178
|
+
const mark = String(row.session_id) === String(seedS.session_id) ? ' ← same session' : '';
|
|
179
|
+
console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} turn=${row.turn_index} | ${clean(row.content_text)}${mark}`);
|
|
180
|
+
}
|
|
181
|
+
console.log(' (不要求 top-1 是 seed session,兩路語意不同;只看距離是否合理 ≪ 1)\n');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
await pool.end();
|
|
185
|
+
console.log('=== 完成 ===');
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
main().catch(err => { console.error(err); process.exit(1); });
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Aquifer — Install drop-in OpenClaw extension
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# bash scripts/install-openclaw.sh [OPENCLAW_HOME]
|
|
6
|
+
#
|
|
7
|
+
# Default OPENCLAW_HOME: $HOME/.openclaw
|
|
8
|
+
#
|
|
9
|
+
# What it does:
|
|
10
|
+
# 1. Creates / overwrites $OPENCLAW_HOME/extensions/aquifer-memory/
|
|
11
|
+
# as a symlink to <this_package>/consumers/openclaw-ext/
|
|
12
|
+
# 2. Prints follow-up instructions: set the .env keys, restart the gateway.
|
|
13
|
+
#
|
|
14
|
+
# Idempotent; safe to re-run.
|
|
15
|
+
|
|
16
|
+
set -euo pipefail
|
|
17
|
+
|
|
18
|
+
OPENCLAW_HOME="${1:-${OPENCLAW_HOME:-$HOME/.openclaw}}"
|
|
19
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
20
|
+
PKG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
21
|
+
EXT_SRC="$PKG_ROOT/consumers/openclaw-ext"
|
|
22
|
+
EXT_DEST="$OPENCLAW_HOME/extensions/aquifer-memory"
|
|
23
|
+
|
|
24
|
+
if [[ ! -d "$EXT_SRC" ]]; then
|
|
25
|
+
echo "error: $EXT_SRC not found (expected inside the Aquifer package)" >&2
|
|
26
|
+
exit 1
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
if [[ ! -d "$OPENCLAW_HOME" ]]; then
|
|
30
|
+
echo "error: OPENCLAW_HOME=$OPENCLAW_HOME not found" >&2
|
|
31
|
+
exit 1
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
mkdir -p "$OPENCLAW_HOME/extensions"
|
|
35
|
+
|
|
36
|
+
if [[ -L "$EXT_DEST" || -e "$EXT_DEST" ]]; then
|
|
37
|
+
echo "note: $EXT_DEST already exists — replacing"
|
|
38
|
+
rm -rf "$EXT_DEST"
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
ln -s "$EXT_SRC" "$EXT_DEST"
|
|
42
|
+
echo "ok: linked $EXT_DEST → $EXT_SRC"
|
|
43
|
+
|
|
44
|
+
cat <<'EOF'
|
|
45
|
+
|
|
46
|
+
Next steps:
|
|
47
|
+
1. Edit $OPENCLAW_HOME/.env and set:
|
|
48
|
+
DATABASE_URL=postgresql://user:pass@host:5432/db
|
|
49
|
+
EMBED_PROVIDER=ollama # or openai
|
|
50
|
+
AQUIFER_LLM_PROVIDER=minimax # or openai / openrouter / opencode
|
|
51
|
+
MINIMAX_API_KEY=... # (or the key for your chosen provider)
|
|
52
|
+
# Optional:
|
|
53
|
+
AQUIFER_SCHEMA=my_namespace
|
|
54
|
+
AQUIFER_PERSONA=/path/to/host-local/persona-module
|
|
55
|
+
2. Restart OpenClaw:
|
|
56
|
+
systemctl --user restart openclaw-gateway
|
|
57
|
+
3. Verify:
|
|
58
|
+
journalctl --user -u openclaw-gateway -f | grep aquifer-memory
|
|
59
|
+
EOF
|
package/scripts/smoke.mjs
CHANGED
|
@@ -99,8 +99,8 @@ try {
|
|
|
99
99
|
const { Pool } = require('pg');
|
|
100
100
|
const pool = new Pool({ connectionString: config.db.url });
|
|
101
101
|
const schema = config.schema || 'aquifer';
|
|
102
|
-
await pool.query(`DELETE FROM ${schema}.turn_embeddings WHERE session_id
|
|
103
|
-
await pool.query(`DELETE FROM ${schema}.session_summaries WHERE session_id
|
|
102
|
+
await pool.query(`DELETE FROM ${schema}.turn_embeddings WHERE session_id = $1`, [SESSION_ID]);
|
|
103
|
+
await pool.query(`DELETE FROM ${schema}.session_summaries WHERE session_id = $1`, [SESSION_ID]);
|
|
104
104
|
await pool.query(`DELETE FROM ${schema}.sessions WHERE session_id = $1`, [SESSION_ID]);
|
|
105
105
|
await pool.end();
|
|
106
106
|
console.log(' OK');
|