@shadowforge0/aquifer-memory 1.0.2 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -20
- package/consumers/claude-code.js +117 -0
- package/consumers/cli.js +17 -0
- package/consumers/default/daily-entries.js +196 -0
- package/consumers/default/index.js +282 -0
- package/consumers/default/prompts/summary.js +153 -0
- package/consumers/mcp.js +3 -23
- package/consumers/miranda/context-inject.js +119 -0
- package/consumers/miranda/daily-entries.js +224 -0
- package/consumers/miranda/index.js +353 -0
- package/consumers/miranda/instance.js +55 -0
- package/consumers/miranda/llm.js +99 -0
- package/consumers/miranda/prompts/summary.js +303 -0
- package/consumers/miranda/recall-format.js +74 -0
- package/consumers/miranda/workspace-files.js +91 -0
- package/consumers/openclaw-ext/index.js +38 -0
- package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
- package/consumers/openclaw-ext/package.json +10 -0
- package/consumers/openclaw-plugin.js +66 -74
- package/consumers/opencode.js +21 -24
- package/consumers/shared/autodetect.js +64 -0
- package/consumers/shared/entity-parser.js +119 -0
- package/consumers/shared/ingest.js +148 -0
- package/consumers/shared/llm-autodetect.js +137 -0
- package/consumers/shared/normalize.js +129 -0
- package/consumers/shared/recall-format.js +110 -0
- package/core/aquifer.js +200 -82
- package/core/entity.js +29 -17
- package/core/storage.js +116 -45
- package/docs/postprocess-contract.md +132 -0
- package/index.js +9 -1
- package/package.json +23 -2
- package/pipeline/_http.js +1 -1
- package/pipeline/consolidation/apply.js +176 -0
- package/pipeline/consolidation/index.js +21 -0
- package/pipeline/extract-entities.js +2 -2
- package/pipeline/rerank.js +1 -1
- package/pipeline/summarize.js +4 -1
- package/schema/001-base.sql +61 -24
- package/schema/002-entities.sql +17 -3
- package/schema/004-facts.sql +67 -0
- package/scripts/diagnose-fts-zh.js +168 -134
- package/scripts/diagnose-vector.js +188 -0
- package/scripts/install-openclaw.sh +59 -0
- package/scripts/smoke.mjs +2 -2
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Aquifer vector recall 診斷
|
|
5
|
+
*
|
|
6
|
+
* 驗 summary-vector + turn-vector 兩路 infrastructure:
|
|
7
|
+
* - embedding coverage
|
|
8
|
+
* - vector dim 是否一致(summary vs turn)
|
|
9
|
+
* - self-retrieval sanity(拿自己 embedding 當 query,top-1 distance 應 ≈ 0)
|
|
10
|
+
*
|
|
11
|
+
* env:
|
|
12
|
+
* DATABASE_URL — required
|
|
13
|
+
* AQUIFER_SCHEMA — default 'public'
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const { Pool } = require('pg');
|
|
17
|
+
|
|
18
|
+
const DB_URL = process.env.DATABASE_URL;
|
|
19
|
+
if (!DB_URL) {
|
|
20
|
+
console.error('DATABASE_URL is required');
|
|
21
|
+
process.exit(2);
|
|
22
|
+
}
|
|
23
|
+
const SCHEMA = process.env.AQUIFER_SCHEMA || 'public';
|
|
24
|
+
|
|
25
|
+
const pool = new Pool({ connectionString: DB_URL });
|
|
26
|
+
const qi = (s) => `"${s.replace(/"/g, '""')}"`;
|
|
27
|
+
const pct = (n, d) => (d === 0 ? (n === 0 ? '—' : '∞%') : `${Math.round((n / d) * 100)}%`);
|
|
28
|
+
const clean = (s) => (s ? String(s).replace(/\s+/g, ' ').slice(0, 70) : '');
|
|
29
|
+
|
|
30
|
+
async function main() {
|
|
31
|
+
console.log(`=== Aquifer vector recall 診斷 (schema=${SCHEMA}) ===\n`);
|
|
32
|
+
|
|
33
|
+
// -------------------------------------------------------------------------
|
|
34
|
+
// 1. Summary embedding coverage + dim
|
|
35
|
+
// -------------------------------------------------------------------------
|
|
36
|
+
const s = (await pool.query(`
|
|
37
|
+
SELECT
|
|
38
|
+
COUNT(*) AS total,
|
|
39
|
+
COUNT(*) FILTER (WHERE embedding IS NOT NULL) AS with_emb,
|
|
40
|
+
MIN(vector_dims(embedding)) AS min_dim,
|
|
41
|
+
MAX(vector_dims(embedding)) AS max_dim
|
|
42
|
+
FROM ${qi(SCHEMA)}.session_summaries
|
|
43
|
+
`)).rows[0];
|
|
44
|
+
console.log('--- 1. session_summaries.embedding ---');
|
|
45
|
+
console.log(` total ${s.total} | with_emb ${s.with_emb} (${pct(s.with_emb, s.total)})`);
|
|
46
|
+
const summaryDim = s.min_dim;
|
|
47
|
+
console.log(` dim min=${s.min_dim} max=${s.max_dim}${s.min_dim !== s.max_dim ? ' ⚠ 不一致' : ''}\n`);
|
|
48
|
+
|
|
49
|
+
// -------------------------------------------------------------------------
|
|
50
|
+
// 2. Turn embedding coverage + dim
|
|
51
|
+
// -------------------------------------------------------------------------
|
|
52
|
+
const t = (await pool.query(`
|
|
53
|
+
SELECT
|
|
54
|
+
COUNT(*) AS total,
|
|
55
|
+
COUNT(DISTINCT session_row_id) AS distinct_sessions,
|
|
56
|
+
MIN(vector_dims(embedding)) AS min_dim,
|
|
57
|
+
MAX(vector_dims(embedding)) AS max_dim
|
|
58
|
+
FROM ${qi(SCHEMA)}.turn_embeddings
|
|
59
|
+
`)).rows[0];
|
|
60
|
+
console.log('--- 2. turn_embeddings.embedding ---');
|
|
61
|
+
console.log(` total turns ${t.total} | distinct sessions ${t.distinct_sessions}`);
|
|
62
|
+
console.log(` dim min=${t.min_dim} max=${t.max_dim}${t.min_dim !== t.max_dim ? ' ⚠ 不一致' : ''}`);
|
|
63
|
+
const turnDim = t.min_dim;
|
|
64
|
+
if (turnDim && summaryDim && turnDim !== summaryDim) {
|
|
65
|
+
console.log(` ⚠ summary dim ${summaryDim} != turn dim ${turnDim} → query embedding 只會對得上其中一條`);
|
|
66
|
+
}
|
|
67
|
+
console.log();
|
|
68
|
+
|
|
69
|
+
// -------------------------------------------------------------------------
|
|
70
|
+
// 3. 缺 turn 但有 summary 的 session 比例
|
|
71
|
+
// -------------------------------------------------------------------------
|
|
72
|
+
const gap = (await pool.query(`
|
|
73
|
+
SELECT
|
|
74
|
+
COUNT(DISTINCT ss.session_row_id) AS with_summary_emb,
|
|
75
|
+
COUNT(DISTINCT te.session_row_id) AS with_turn_emb,
|
|
76
|
+
COUNT(DISTINCT ss.session_row_id) FILTER (
|
|
77
|
+
WHERE te.session_row_id IS NULL
|
|
78
|
+
) AS summary_no_turn
|
|
79
|
+
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
80
|
+
LEFT JOIN ${qi(SCHEMA)}.turn_embeddings te
|
|
81
|
+
ON te.session_row_id = ss.session_row_id
|
|
82
|
+
WHERE ss.embedding IS NOT NULL
|
|
83
|
+
`)).rows[0];
|
|
84
|
+
console.log('--- 3. 兩路覆蓋差 ---');
|
|
85
|
+
console.log(` sessions with summary emb : ${gap.with_summary_emb}`);
|
|
86
|
+
console.log(` sessions with turn emb : ${gap.with_turn_emb}`);
|
|
87
|
+
console.log(` summary-only (no turns) : ${gap.summary_no_turn} (${pct(gap.summary_no_turn, gap.with_summary_emb)})`);
|
|
88
|
+
console.log(' (summary-only 是常見的—某些 session 沒有合適的 user turn 可 embed)\n');
|
|
89
|
+
|
|
90
|
+
// -------------------------------------------------------------------------
|
|
91
|
+
// 4. Self-retrieval sanity: summary vector
|
|
92
|
+
// 拿最近一筆 summary.embedding 當 query,top-1 應該是自己且 distance ≈ 0
|
|
93
|
+
// -------------------------------------------------------------------------
|
|
94
|
+
console.log('--- 4. Summary vector self-retrieval sanity ---');
|
|
95
|
+
const seedS = (await pool.query(`
|
|
96
|
+
SELECT s.session_id, ss.summary_text, ss.embedding
|
|
97
|
+
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
98
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
|
|
99
|
+
WHERE ss.embedding IS NOT NULL
|
|
100
|
+
ORDER BY ss.updated_at DESC
|
|
101
|
+
LIMIT 1
|
|
102
|
+
`)).rows[0];
|
|
103
|
+
|
|
104
|
+
if (!seedS) {
|
|
105
|
+
console.log(' (no summary with embedding)\n');
|
|
106
|
+
} else {
|
|
107
|
+
const r = await pool.query(`
|
|
108
|
+
SELECT s.session_id,
|
|
109
|
+
(ss.embedding <=> $1::vector) AS distance,
|
|
110
|
+
ss.summary_text
|
|
111
|
+
FROM ${qi(SCHEMA)}.session_summaries ss
|
|
112
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
|
|
113
|
+
WHERE ss.embedding IS NOT NULL
|
|
114
|
+
ORDER BY ss.embedding <=> $1::vector ASC
|
|
115
|
+
LIMIT 5
|
|
116
|
+
`, [seedS.embedding]);
|
|
117
|
+
console.log(` seed : ${String(seedS.session_id).slice(0, 8)} | ${clean(seedS.summary_text)}`);
|
|
118
|
+
for (const row of r.rows) {
|
|
119
|
+
const mark = String(row.session_id) === String(seedS.session_id) ? ' ← self' : '';
|
|
120
|
+
console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} | ${clean(row.summary_text)}${mark}`);
|
|
121
|
+
}
|
|
122
|
+
const top = r.rows[0];
|
|
123
|
+
const selfOK = top && String(top.session_id) === String(seedS.session_id) && Number(top.distance) < 0.001;
|
|
124
|
+
console.log(` → self top-1 @ distance≈0: ${selfOK ? 'YES ✓' : 'NO ✗'}\n`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// -------------------------------------------------------------------------
|
|
128
|
+
// 5. Self-retrieval sanity: turn vector
|
|
129
|
+
// -------------------------------------------------------------------------
|
|
130
|
+
console.log('--- 5. Turn vector self-retrieval sanity ---');
|
|
131
|
+
const seedT = (await pool.query(`
|
|
132
|
+
SELECT te.session_row_id, te.turn_index, te.content_text, te.embedding,
|
|
133
|
+
s.session_id
|
|
134
|
+
FROM ${qi(SCHEMA)}.turn_embeddings te
|
|
135
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
|
|
136
|
+
ORDER BY te.created_at DESC
|
|
137
|
+
LIMIT 1
|
|
138
|
+
`)).rows[0];
|
|
139
|
+
|
|
140
|
+
if (!seedT) {
|
|
141
|
+
console.log(' (no turn embeddings)\n');
|
|
142
|
+
} else {
|
|
143
|
+
const r = await pool.query(`
|
|
144
|
+
SELECT s.session_id, te.turn_index, te.content_text,
|
|
145
|
+
(te.embedding <=> $1::vector) AS distance
|
|
146
|
+
FROM ${qi(SCHEMA)}.turn_embeddings te
|
|
147
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
|
|
148
|
+
ORDER BY te.embedding <=> $1::vector ASC
|
|
149
|
+
LIMIT 5
|
|
150
|
+
`, [seedT.embedding]);
|
|
151
|
+
console.log(` seed : ${String(seedT.session_id).slice(0, 8)} turn=${seedT.turn_index} | ${clean(seedT.content_text)}`);
|
|
152
|
+
for (const row of r.rows) {
|
|
153
|
+
const self = String(row.session_id) === String(seedT.session_id) && row.turn_index === seedT.turn_index;
|
|
154
|
+
console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} turn=${row.turn_index} | ${clean(row.content_text)}${self ? ' ← self' : ''}`);
|
|
155
|
+
}
|
|
156
|
+
const top = r.rows[0];
|
|
157
|
+
const selfOK = top && Number(top.distance) < 0.001;
|
|
158
|
+
console.log(` → self top-1 @ distance≈0: ${selfOK ? 'YES ✓' : 'NO ✗'}\n`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// -------------------------------------------------------------------------
|
|
162
|
+
// 6. 跨路比較:用同一筆 summary embedding 去 turn table 找鄰居
|
|
163
|
+
// 只在 dim 一致時做;看 summary 代表 vs 其最近 turn 的距離分佈
|
|
164
|
+
// -------------------------------------------------------------------------
|
|
165
|
+
if (summaryDim && turnDim && summaryDim === turnDim && seedS) {
|
|
166
|
+
console.log('--- 6. Cross-path:summary emb → turn search (dim 相同才跑) ---');
|
|
167
|
+
const r = await pool.query(`
|
|
168
|
+
SELECT DISTINCT ON (te.session_row_id)
|
|
169
|
+
s.session_id, te.turn_index,
|
|
170
|
+
(te.embedding <=> $1::vector) AS distance,
|
|
171
|
+
te.content_text
|
|
172
|
+
FROM ${qi(SCHEMA)}.turn_embeddings te
|
|
173
|
+
JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
|
|
174
|
+
ORDER BY te.session_row_id, te.embedding <=> $1::vector ASC
|
|
175
|
+
`, [seedS.embedding]);
|
|
176
|
+
r.rows.sort((a, b) => Number(a.distance) - Number(b.distance));
|
|
177
|
+
for (const row of r.rows.slice(0, 5)) {
|
|
178
|
+
const mark = String(row.session_id) === String(seedS.session_id) ? ' ← same session' : '';
|
|
179
|
+
console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} turn=${row.turn_index} | ${clean(row.content_text)}${mark}`);
|
|
180
|
+
}
|
|
181
|
+
console.log(' (不要求 top-1 是 seed session,兩路語意不同;只看距離是否合理 ≪ 1)\n');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
await pool.end();
|
|
185
|
+
console.log('=== 完成 ===');
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
main().catch(err => { console.error(err); process.exit(1); });
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Aquifer — Install drop-in OpenClaw extension
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# bash scripts/install-openclaw.sh [OPENCLAW_HOME]
|
|
6
|
+
#
|
|
7
|
+
# Default OPENCLAW_HOME: $HOME/.openclaw
|
|
8
|
+
#
|
|
9
|
+
# What it does:
|
|
10
|
+
# 1. Creates / overwrites $OPENCLAW_HOME/extensions/aquifer-memory/
|
|
11
|
+
# as a symlink to <this_package>/consumers/openclaw-ext/
|
|
12
|
+
# 2. Prints follow-up instructions: set the .env keys, restart the gateway.
|
|
13
|
+
#
|
|
14
|
+
# Idempotent; safe to re-run.
|
|
15
|
+
|
|
16
|
+
set -euo pipefail
|
|
17
|
+
|
|
18
|
+
OPENCLAW_HOME="${1:-${OPENCLAW_HOME:-$HOME/.openclaw}}"
|
|
19
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
20
|
+
PKG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
21
|
+
EXT_SRC="$PKG_ROOT/consumers/openclaw-ext"
|
|
22
|
+
EXT_DEST="$OPENCLAW_HOME/extensions/aquifer-memory"
|
|
23
|
+
|
|
24
|
+
if [[ ! -d "$EXT_SRC" ]]; then
|
|
25
|
+
echo "error: $EXT_SRC not found (expected inside the Aquifer package)" >&2
|
|
26
|
+
exit 1
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
if [[ ! -d "$OPENCLAW_HOME" ]]; then
|
|
30
|
+
echo "error: OPENCLAW_HOME=$OPENCLAW_HOME not found" >&2
|
|
31
|
+
exit 1
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
mkdir -p "$OPENCLAW_HOME/extensions"
|
|
35
|
+
|
|
36
|
+
if [[ -L "$EXT_DEST" || -e "$EXT_DEST" ]]; then
|
|
37
|
+
echo "note: $EXT_DEST already exists — replacing"
|
|
38
|
+
rm -rf "$EXT_DEST"
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
ln -s "$EXT_SRC" "$EXT_DEST"
|
|
42
|
+
echo "ok: linked $EXT_DEST → $EXT_SRC"
|
|
43
|
+
|
|
44
|
+
cat <<'EOF'
|
|
45
|
+
|
|
46
|
+
Next steps:
|
|
47
|
+
1. Edit $OPENCLAW_HOME/.env and set:
|
|
48
|
+
DATABASE_URL=postgresql://user:pass@host:5432/db
|
|
49
|
+
EMBED_PROVIDER=ollama # or openai
|
|
50
|
+
AQUIFER_LLM_PROVIDER=minimax # or openai / openrouter / opencode
|
|
51
|
+
MINIMAX_API_KEY=... # (or the key for your chosen provider)
|
|
52
|
+
# Optional:
|
|
53
|
+
AQUIFER_SCHEMA=my_namespace
|
|
54
|
+
AQUIFER_PERSONA=/path/to/host-local/persona-module
|
|
55
|
+
2. Restart OpenClaw:
|
|
56
|
+
systemctl --user restart openclaw-gateway
|
|
57
|
+
3. Verify:
|
|
58
|
+
journalctl --user -u openclaw-gateway -f | grep aquifer-memory
|
|
59
|
+
EOF
|
package/scripts/smoke.mjs
CHANGED
|
@@ -99,8 +99,8 @@ try {
|
|
|
99
99
|
const { Pool } = require('pg');
|
|
100
100
|
const pool = new Pool({ connectionString: config.db.url });
|
|
101
101
|
const schema = config.schema || 'aquifer';
|
|
102
|
-
await pool.query(`DELETE FROM ${schema}.turn_embeddings WHERE session_id
|
|
103
|
-
await pool.query(`DELETE FROM ${schema}.session_summaries WHERE session_id
|
|
102
|
+
await pool.query(`DELETE FROM ${schema}.turn_embeddings WHERE session_id = $1`, [SESSION_ID]);
|
|
103
|
+
await pool.query(`DELETE FROM ${schema}.session_summaries WHERE session_id = $1`, [SESSION_ID]);
|
|
104
104
|
await pool.query(`DELETE FROM ${schema}.sessions WHERE session_id = $1`, [SESSION_ID]);
|
|
105
105
|
await pool.end();
|
|
106
106
|
console.log(' OK');
|