@shadowforge0/aquifer-memory 1.0.2 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +29 -20
  2. package/consumers/claude-code.js +117 -0
  3. package/consumers/cli.js +17 -0
  4. package/consumers/default/daily-entries.js +196 -0
  5. package/consumers/default/index.js +282 -0
  6. package/consumers/default/prompts/summary.js +153 -0
  7. package/consumers/mcp.js +3 -23
  8. package/consumers/miranda/context-inject.js +119 -0
  9. package/consumers/miranda/daily-entries.js +224 -0
  10. package/consumers/miranda/index.js +353 -0
  11. package/consumers/miranda/instance.js +55 -0
  12. package/consumers/miranda/llm.js +99 -0
  13. package/consumers/miranda/prompts/summary.js +303 -0
  14. package/consumers/miranda/recall-format.js +74 -0
  15. package/consumers/miranda/workspace-files.js +91 -0
  16. package/consumers/openclaw-ext/index.js +38 -0
  17. package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
  18. package/consumers/openclaw-ext/package.json +10 -0
  19. package/consumers/openclaw-plugin.js +66 -74
  20. package/consumers/opencode.js +21 -24
  21. package/consumers/shared/autodetect.js +64 -0
  22. package/consumers/shared/entity-parser.js +119 -0
  23. package/consumers/shared/ingest.js +148 -0
  24. package/consumers/shared/llm-autodetect.js +137 -0
  25. package/consumers/shared/normalize.js +129 -0
  26. package/consumers/shared/recall-format.js +110 -0
  27. package/core/aquifer.js +200 -82
  28. package/core/entity.js +29 -17
  29. package/core/storage.js +116 -45
  30. package/docs/postprocess-contract.md +132 -0
  31. package/index.js +9 -1
  32. package/package.json +23 -2
  33. package/pipeline/_http.js +1 -1
  34. package/pipeline/consolidation/apply.js +176 -0
  35. package/pipeline/consolidation/index.js +21 -0
  36. package/pipeline/extract-entities.js +2 -2
  37. package/pipeline/rerank.js +1 -1
  38. package/pipeline/summarize.js +4 -1
  39. package/schema/001-base.sql +61 -24
  40. package/schema/002-entities.sql +17 -3
  41. package/schema/004-facts.sql +67 -0
  42. package/scripts/diagnose-fts-zh.js +168 -134
  43. package/scripts/diagnose-vector.js +188 -0
  44. package/scripts/install-openclaw.sh +59 -0
  45. package/scripts/smoke.mjs +2 -2
@@ -0,0 +1,188 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Aquifer vector recall 診斷
5
+ *
6
+ * 驗 summary-vector + turn-vector 兩路 infrastructure:
7
+ * - embedding coverage
8
+ * - vector dim 是否一致(summary vs turn)
9
+ * - self-retrieval sanity(拿自己 embedding 當 query,top-1 distance 應 ≈ 0)
10
+ *
11
+ * env:
12
+ * DATABASE_URL — required
13
+ * AQUIFER_SCHEMA — default 'public'
14
+ */
15
+
16
+ const { Pool } = require('pg');
17
+
18
+ const DB_URL = process.env.DATABASE_URL;
19
+ if (!DB_URL) {
20
+ console.error('DATABASE_URL is required');
21
+ process.exit(2);
22
+ }
23
+ const SCHEMA = process.env.AQUIFER_SCHEMA || 'public';
24
+
25
+ const pool = new Pool({ connectionString: DB_URL });
26
+ const qi = (s) => `"${s.replace(/"/g, '""')}"`;
27
+ const pct = (n, d) => (d === 0 ? (n === 0 ? '—' : '∞%') : `${Math.round((n / d) * 100)}%`);
28
+ const clean = (s) => (s ? String(s).replace(/\s+/g, ' ').slice(0, 70) : '');
29
+
30
+ async function main() {
31
+ console.log(`=== Aquifer vector recall 診斷 (schema=${SCHEMA}) ===\n`);
32
+
33
+ // -------------------------------------------------------------------------
34
+ // 1. Summary embedding coverage + dim
35
+ // -------------------------------------------------------------------------
36
+ const s = (await pool.query(`
37
+ SELECT
38
+ COUNT(*) AS total,
39
+ COUNT(*) FILTER (WHERE embedding IS NOT NULL) AS with_emb,
40
+ MIN(vector_dims(embedding)) AS min_dim,
41
+ MAX(vector_dims(embedding)) AS max_dim
42
+ FROM ${qi(SCHEMA)}.session_summaries
43
+ `)).rows[0];
44
+ console.log('--- 1. session_summaries.embedding ---');
45
+ console.log(` total ${s.total} | with_emb ${s.with_emb} (${pct(s.with_emb, s.total)})`);
46
+ const summaryDim = s.min_dim;
47
+ console.log(` dim min=${s.min_dim} max=${s.max_dim}${s.min_dim !== s.max_dim ? ' ⚠ 不一致' : ''}\n`);
48
+
49
+ // -------------------------------------------------------------------------
50
+ // 2. Turn embedding coverage + dim
51
+ // -------------------------------------------------------------------------
52
+ const t = (await pool.query(`
53
+ SELECT
54
+ COUNT(*) AS total,
55
+ COUNT(DISTINCT session_row_id) AS distinct_sessions,
56
+ MIN(vector_dims(embedding)) AS min_dim,
57
+ MAX(vector_dims(embedding)) AS max_dim
58
+ FROM ${qi(SCHEMA)}.turn_embeddings
59
+ `)).rows[0];
60
+ console.log('--- 2. turn_embeddings.embedding ---');
61
+ console.log(` total turns ${t.total} | distinct sessions ${t.distinct_sessions}`);
62
+ console.log(` dim min=${t.min_dim} max=${t.max_dim}${t.min_dim !== t.max_dim ? ' ⚠ 不一致' : ''}`);
63
+ const turnDim = t.min_dim;
64
+ if (turnDim && summaryDim && turnDim !== summaryDim) {
65
+ console.log(` ⚠ summary dim ${summaryDim} != turn dim ${turnDim} → query embedding 只會對得上其中一條`);
66
+ }
67
+ console.log();
68
+
69
+ // -------------------------------------------------------------------------
70
+ // 3. 缺 turn 但有 summary 的 session 比例
71
+ // -------------------------------------------------------------------------
72
+ const gap = (await pool.query(`
73
+ SELECT
74
+ COUNT(DISTINCT ss.session_row_id) AS with_summary_emb,
75
+ COUNT(DISTINCT te.session_row_id) AS with_turn_emb,
76
+ COUNT(DISTINCT ss.session_row_id) FILTER (
77
+ WHERE te.session_row_id IS NULL
78
+ ) AS summary_no_turn
79
+ FROM ${qi(SCHEMA)}.session_summaries ss
80
+ LEFT JOIN ${qi(SCHEMA)}.turn_embeddings te
81
+ ON te.session_row_id = ss.session_row_id
82
+ WHERE ss.embedding IS NOT NULL
83
+ `)).rows[0];
84
+ console.log('--- 3. 兩路覆蓋差 ---');
85
+ console.log(` sessions with summary emb : ${gap.with_summary_emb}`);
86
+ console.log(` sessions with turn emb : ${gap.with_turn_emb}`);
87
+ console.log(` summary-only (no turns) : ${gap.summary_no_turn} (${pct(gap.summary_no_turn, gap.with_summary_emb)})`);
88
+ console.log(' (summary-only 是常見的—某些 session 沒有合適的 user turn 可 embed)\n');
89
+
90
+ // -------------------------------------------------------------------------
91
+ // 4. Self-retrieval sanity: summary vector
92
+ // 拿最近一筆 summary.embedding 當 query,top-1 應該是自己且 distance ≈ 0
93
+ // -------------------------------------------------------------------------
94
+ console.log('--- 4. Summary vector self-retrieval sanity ---');
95
+ const seedS = (await pool.query(`
96
+ SELECT s.session_id, ss.summary_text, ss.embedding
97
+ FROM ${qi(SCHEMA)}.session_summaries ss
98
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
99
+ WHERE ss.embedding IS NOT NULL
100
+ ORDER BY ss.updated_at DESC
101
+ LIMIT 1
102
+ `)).rows[0];
103
+
104
+ if (!seedS) {
105
+ console.log(' (no summary with embedding)\n');
106
+ } else {
107
+ const r = await pool.query(`
108
+ SELECT s.session_id,
109
+ (ss.embedding <=> $1::vector) AS distance,
110
+ ss.summary_text
111
+ FROM ${qi(SCHEMA)}.session_summaries ss
112
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
113
+ WHERE ss.embedding IS NOT NULL
114
+ ORDER BY ss.embedding <=> $1::vector ASC
115
+ LIMIT 5
116
+ `, [seedS.embedding]);
117
+ console.log(` seed : ${String(seedS.session_id).slice(0, 8)} | ${clean(seedS.summary_text)}`);
118
+ for (const row of r.rows) {
119
+ const mark = String(row.session_id) === String(seedS.session_id) ? ' ← self' : '';
120
+ console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} | ${clean(row.summary_text)}${mark}`);
121
+ }
122
+ const top = r.rows[0];
123
+ const selfOK = top && String(top.session_id) === String(seedS.session_id) && Number(top.distance) < 0.001;
124
+ console.log(` → self top-1 @ distance≈0: ${selfOK ? 'YES ✓' : 'NO ✗'}\n`);
125
+ }
126
+
127
+ // -------------------------------------------------------------------------
128
+ // 5. Self-retrieval sanity: turn vector
129
+ // -------------------------------------------------------------------------
130
+ console.log('--- 5. Turn vector self-retrieval sanity ---');
131
+ const seedT = (await pool.query(`
132
+ SELECT te.session_row_id, te.turn_index, te.content_text, te.embedding,
133
+ s.session_id
134
+ FROM ${qi(SCHEMA)}.turn_embeddings te
135
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
136
+ ORDER BY te.created_at DESC
137
+ LIMIT 1
138
+ `)).rows[0];
139
+
140
+ if (!seedT) {
141
+ console.log(' (no turn embeddings)\n');
142
+ } else {
143
+ const r = await pool.query(`
144
+ SELECT s.session_id, te.turn_index, te.content_text,
145
+ (te.embedding <=> $1::vector) AS distance
146
+ FROM ${qi(SCHEMA)}.turn_embeddings te
147
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
148
+ ORDER BY te.embedding <=> $1::vector ASC
149
+ LIMIT 5
150
+ `, [seedT.embedding]);
151
+ console.log(` seed : ${String(seedT.session_id).slice(0, 8)} turn=${seedT.turn_index} | ${clean(seedT.content_text)}`);
152
+ for (const row of r.rows) {
153
+ const self = String(row.session_id) === String(seedT.session_id) && row.turn_index === seedT.turn_index;
154
+ console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} turn=${row.turn_index} | ${clean(row.content_text)}${self ? ' ← self' : ''}`);
155
+ }
156
+ const top = r.rows[0];
157
+ const selfOK = top && Number(top.distance) < 0.001;
158
+ console.log(` → self top-1 @ distance≈0: ${selfOK ? 'YES ✓' : 'NO ✗'}\n`);
159
+ }
160
+
161
+ // -------------------------------------------------------------------------
162
+ // 6. 跨路比較:用同一筆 summary embedding 去 turn table 找鄰居
163
+ // 只在 dim 一致時做;看 summary 代表 vs 其最近 turn 的距離分佈
164
+ // -------------------------------------------------------------------------
165
+ if (summaryDim && turnDim && summaryDim === turnDim && seedS) {
166
+ console.log('--- 6. Cross-path:summary emb → turn search (dim 相同才跑) ---');
167
+ const r = await pool.query(`
168
+ SELECT DISTINCT ON (te.session_row_id)
169
+ s.session_id, te.turn_index,
170
+ (te.embedding <=> $1::vector) AS distance,
171
+ te.content_text
172
+ FROM ${qi(SCHEMA)}.turn_embeddings te
173
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = te.session_row_id
174
+ ORDER BY te.session_row_id, te.embedding <=> $1::vector ASC
175
+ `, [seedS.embedding]);
176
+ r.rows.sort((a, b) => Number(a.distance) - Number(b.distance));
177
+ for (const row of r.rows.slice(0, 5)) {
178
+ const mark = String(row.session_id) === String(seedS.session_id) ? ' ← same session' : '';
179
+ console.log(` [${Number(row.distance).toFixed(4)}] ${String(row.session_id).slice(0, 8)} turn=${row.turn_index} | ${clean(row.content_text)}${mark}`);
180
+ }
181
+ console.log(' (不要求 top-1 是 seed session,兩路語意不同;只看距離是否合理 ≪ 1)\n');
182
+ }
183
+
184
+ await pool.end();
185
+ console.log('=== 完成 ===');
186
+ }
187
+
188
+ main().catch(err => { console.error(err); process.exit(1); });
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env bash
2
+ # Aquifer — Install drop-in OpenClaw extension
3
+ #
4
+ # Usage:
5
+ # bash scripts/install-openclaw.sh [OPENCLAW_HOME]
6
+ #
7
+ # Default OPENCLAW_HOME: $HOME/.openclaw
8
+ #
9
+ # What it does:
10
+ # 1. Creates / overwrites $OPENCLAW_HOME/extensions/aquifer-memory/
11
+ # as a symlink to <this_package>/consumers/openclaw-ext/
12
+ # 2. Prints follow-up instructions: set the .env keys, restart the gateway.
13
+ #
14
+ # Idempotent; safe to re-run.
15
+
16
+ set -euo pipefail
17
+
18
+ OPENCLAW_HOME="${1:-${OPENCLAW_HOME:-$HOME/.openclaw}}"
19
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
+ PKG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
21
+ EXT_SRC="$PKG_ROOT/consumers/openclaw-ext"
22
+ EXT_DEST="$OPENCLAW_HOME/extensions/aquifer-memory"
23
+
24
+ if [[ ! -d "$EXT_SRC" ]]; then
25
+ echo "error: $EXT_SRC not found (expected inside the Aquifer package)" >&2
26
+ exit 1
27
+ fi
28
+
29
+ if [[ ! -d "$OPENCLAW_HOME" ]]; then
30
+ echo "error: OPENCLAW_HOME=$OPENCLAW_HOME not found" >&2
31
+ exit 1
32
+ fi
33
+
34
+ mkdir -p "$OPENCLAW_HOME/extensions"
35
+
36
+ if [[ -L "$EXT_DEST" || -e "$EXT_DEST" ]]; then
37
+ echo "note: $EXT_DEST already exists — replacing"
38
+ rm -rf "$EXT_DEST"
39
+ fi
40
+
41
+ ln -s "$EXT_SRC" "$EXT_DEST"
42
+ echo "ok: linked $EXT_DEST → $EXT_SRC"
43
+
44
+ cat <<'EOF'
45
+
46
+ Next steps:
47
+ 1. Edit $OPENCLAW_HOME/.env and set:
48
+ DATABASE_URL=postgresql://user:pass@host:5432/db
49
+ EMBED_PROVIDER=ollama # or openai
50
+ AQUIFER_LLM_PROVIDER=minimax # or openai / openrouter / opencode
51
+ MINIMAX_API_KEY=... # (or the key for your chosen provider)
52
+ # Optional:
53
+ AQUIFER_SCHEMA=my_namespace
54
+ AQUIFER_PERSONA=/path/to/host-local/persona-module
55
+ 2. Restart OpenClaw:
56
+ systemctl --user restart openclaw-gateway
57
+ 3. Verify:
58
+ journalctl --user -u openclaw-gateway -f | grep aquifer-memory
59
+ EOF
package/scripts/smoke.mjs CHANGED
@@ -99,8 +99,8 @@ try {
99
99
  const { Pool } = require('pg');
100
100
  const pool = new Pool({ connectionString: config.db.url });
101
101
  const schema = config.schema || 'aquifer';
102
- await pool.query(`DELETE FROM ${schema}.turn_embeddings WHERE session_id IN (SELECT id FROM ${schema}.sessions WHERE session_id = $1)`, [SESSION_ID]);
103
- await pool.query(`DELETE FROM ${schema}.session_summaries WHERE session_id IN (SELECT id FROM ${schema}.sessions WHERE session_id = $1)`, [SESSION_ID]);
102
+ await pool.query(`DELETE FROM ${schema}.turn_embeddings WHERE session_id = $1`, [SESSION_ID]);
103
+ await pool.query(`DELETE FROM ${schema}.session_summaries WHERE session_id = $1`, [SESSION_ID]);
104
104
  await pool.query(`DELETE FROM ${schema}.sessions WHERE session_id = $1`, [SESSION_ID]);
105
105
  await pool.end();
106
106
  console.log(' OK');