@shadowforge0/aquifer-memory 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/aquifer.js CHANGED
@@ -100,19 +100,6 @@ function createAquifer(config) {
100
100
  const entityPromptFn = config.entities && config.entities.prompt ? config.entities.prompt : null;
101
101
  const entityScope = (config.entities && config.entities.scope) || 'default';
102
102
 
103
- // FTS config — locked to 'simple'.
104
- // The search_tsv trigger always uses to_tsvector('simple', ...), so query-time
105
- // config must match. Warn and override if someone passes anything else.
106
- const _rawFtsConfig = config.ftsConfig || 'simple';
107
- if (_rawFtsConfig !== 'simple') {
108
- console.warn(
109
- `[aquifer] ftsConfig '${_rawFtsConfig}' is not currently supported. ` +
110
- `The search_tsv index is built with 'simple'; only 'simple' is valid at query time. ` +
111
- `Overriding to 'simple'.`
112
- );
113
- }
114
- const ftsConfig = 'simple';
115
-
116
103
  // Rank weights
117
104
  const rankWeights = {
118
105
  rrf: 0.65,
@@ -706,7 +693,7 @@ function createAquifer(config) {
706
693
  const [ftsRows, embRows, turnResult] = await Promise.all([
707
694
  runFts
708
695
  ? storage.searchSessions(pool, query, {
709
- schema, tenantId, agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit, ftsConfig,
696
+ schema, tenantId, agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit,
710
697
  }).catch((err) => {
711
698
  recordSearchError('fts', err);
712
699
  return [];
package/core/storage.js CHANGED
@@ -211,7 +211,7 @@ async function getMessages(pool, sessionId, agentId, { schema, tenantId } = {})
211
211
  }
212
212
 
213
213
  // ---------------------------------------------------------------------------
214
- // searchSessions (FTS)
214
+ // searchSessions (trigram + FTS fallback)
215
215
  // ---------------------------------------------------------------------------
216
216
 
217
217
  async function searchSessions(pool, query, {
@@ -220,34 +220,27 @@ async function searchSessions(pool, query, {
220
220
  agentId,
221
221
  agentIds: rawAgentIds,
222
222
  source,
223
- dateFrom, // m1: add date filtering
223
+ dateFrom,
224
224
  dateTo,
225
225
  limit = 20,
226
- ftsConfig = 'simple',
227
226
  } = {}) {
228
227
  const clampedLimit = Math.max(1, Math.min(100, limit));
229
- // FTS config is locked to 'simple' — the search_tsv trigger always uses
230
- // to_tsvector('simple', ...) so query semantics must match. Warn callers
231
- // that pass a different value rather than silently honouring it.
232
- if (ftsConfig !== 'simple') {
233
- console.warn(
234
- `[aquifer/storage] searchSessions: ftsConfig '${ftsConfig}' ignored. ` +
235
- `Only 'simple' is supported (index is built with simple tokenizer). ` +
236
- `Using 'simple'.`
237
- );
238
- }
239
- const safeFts = 'simple';
240
228
 
241
229
  // Normalize agentId/agentIds
242
230
  const agentIds = rawAgentIds && rawAgentIds.length > 0
243
231
  ? rawAgentIds
244
232
  : (agentId ? [agentId] : null);
245
233
 
234
+ // Escape LIKE special characters in query
235
+ const likeQuery = query.replace(/[%_\\]/g, '\\$&');
236
+
237
+ // Primary: trigram ILIKE on search_text (works for CJK + Latin)
238
+ // Fallback: tsvector FTS (for installations without search_text populated)
246
239
  const where = [
247
- `ss.search_tsv @@ plainto_tsquery('${safeFts}', $1)`,
248
- `s.tenant_id = $2`,
240
+ `(ss.search_text ILIKE '%' || $1 || '%' OR ss.search_tsv @@ plainto_tsquery('simple', $2))`,
241
+ `s.tenant_id = $3`,
249
242
  ];
250
- const params = [query, tenantId];
243
+ const params = [likeQuery, query, tenantId];
251
244
 
252
245
  if (agentIds) {
253
246
  params.push(agentIds);
@@ -281,8 +274,10 @@ async function searchSessions(pool, query, {
281
274
  ss.access_count,
282
275
  ss.last_accessed_at,
283
276
  ss.trust_score,
284
- ts_headline('${safeFts}', COALESCE(ss.summary_text, ''), plainto_tsquery('${safeFts}', $1)) AS summary_snippet,
285
- ts_rank(ss.search_tsv, plainto_tsquery('${safeFts}', $1)) AS fts_rank
277
+ CASE WHEN ss.search_text IS NOT NULL
278
+ THEN similarity(ss.search_text, $2)
279
+ ELSE ts_rank(ss.search_tsv, plainto_tsquery('simple', $2))
280
+ END AS fts_rank
286
281
  FROM ${qi(schema)}.sessions s
287
282
  LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
288
283
  WHERE ${where.join(' AND ')}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shadowforge0/aquifer-memory",
3
- "version": "1.0.1",
3
+ "version": "1.0.2",
4
4
  "description": "PG-native long-term memory for AI agents. Turn-level embedding, hybrid RRF ranking, optional knowledge graph. MCP server, CLI, and library API.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -2,6 +2,7 @@
2
2
  -- Usage: replace ${schema} with actual schema name (e.g., 'aquifer')
3
3
 
4
4
  CREATE EXTENSION IF NOT EXISTS vector;
5
+ CREATE EXTENSION IF NOT EXISTS pg_trgm;
5
6
  CREATE SCHEMA IF NOT EXISTS ${schema};
6
7
 
7
8
  -- =========================================================================
@@ -85,6 +86,7 @@ CREATE TABLE IF NOT EXISTS ${schema}.session_summaries (
85
86
  structured_summary JSONB NOT NULL DEFAULT '{}',
86
87
  embedding vector,
87
88
  search_tsv TSVECTOR,
89
+ search_text TEXT,
88
90
  access_count INT NOT NULL DEFAULT 0,
89
91
  last_accessed_at TIMESTAMPTZ,
90
92
  updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
@@ -96,6 +98,9 @@ CREATE INDEX IF NOT EXISTS idx_summaries_tenant
96
98
  CREATE INDEX IF NOT EXISTS idx_summaries_search_tsv
97
99
  ON ${schema}.session_summaries USING GIN (search_tsv);
98
100
 
101
+ CREATE INDEX IF NOT EXISTS idx_summaries_search_text_trgm
102
+ ON ${schema}.session_summaries USING GIN (search_text gin_trgm_ops);
103
+
99
104
  CREATE INDEX IF NOT EXISTS idx_summaries_embedding
100
105
  ON ${schema}.session_summaries (session_row_id)
101
106
  WHERE embedding IS NOT NULL;
@@ -141,6 +146,11 @@ BEGIN
141
146
  setweight(to_tsvector('simple', COALESCE(NEW.summary_text, '')), 'C') ||
142
147
  setweight(to_tsvector('simple', open_loops_text || ' ' || facts_text), 'D');
143
148
 
149
+ NEW.search_text :=
150
+ title_text || ' ' || overview_text || ' ' || topics_text || ' ' ||
151
+ decisions_text || ' ' || COALESCE(NEW.summary_text, '') || ' ' ||
152
+ open_loops_text || ' ' || facts_text;
153
+
144
154
  RETURN NEW;
145
155
  END;
146
156
  $$;
@@ -149,7 +159,7 @@ DROP TRIGGER IF EXISTS trg_session_summaries_search_tsv
149
159
  ON ${schema}.session_summaries;
150
160
 
151
161
  CREATE TRIGGER trg_session_summaries_search_tsv
152
- BEFORE INSERT OR UPDATE OF summary_text, structured_summary
162
+ BEFORE INSERT OR UPDATE OF summary_text, structured_summary, search_text
153
163
  ON ${schema}.session_summaries
154
164
  FOR EACH ROW
155
165
  EXECUTE FUNCTION ${schema}.session_summaries_search_tsv_update();
@@ -0,0 +1,161 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * FTS 中文診斷:檢查 'simple' tokenizer 在實際中文資料上的表現
5
+ *
6
+ * 測試項目:
7
+ * 1. FTS tokenization — 實際 token 長什麼樣
8
+ * 2. FTS recall — 常見中文查詢的命中率
9
+ * 3. FTS vs vector — FTS 有沒有在幫忙還是在拖後腿
10
+ */
11
+
12
+ const { Pool } = require('pg');
13
+
14
+ const DB_URL = process.env.DATABASE_URL || 'postgresql://burk:790476@localhost:5432/openclaw_db';
15
+ const SCHEMA = process.env.AQUIFER_SCHEMA || 'miranda';
16
+
17
+ const pool = new Pool({ connectionString: DB_URL });
18
+
19
+ async function run() {
20
+ const qi = (s) => `"${s}"`;
21
+
22
+ console.log('=== FTS 中文診斷 ===\n');
23
+
24
+ // 1. 看 token 分佈
25
+ console.log('--- 1. Token 分析 ---');
26
+ const tokenSample = await pool.query(`
27
+ SELECT ss.session_id,
28
+ array_length(tsvector_to_array(ss.search_tsv), 1) as token_count,
29
+ left(ss.summary_text, 80) as preview
30
+ FROM ${qi(SCHEMA)}.session_summaries ss
31
+ WHERE ss.search_tsv IS NOT NULL
32
+ ORDER BY ss.updated_at DESC
33
+ LIMIT 10
34
+ `);
35
+
36
+ let totalTokens = 0;
37
+ let sessionCount = 0;
38
+ for (const r of tokenSample.rows) {
39
+ totalTokens += r.token_count || 0;
40
+ sessionCount++;
41
+ console.log(` ${r.session_id?.slice(0, 8)} | ${r.token_count || 0} tokens | ${r.preview}`);
42
+ }
43
+ console.log(` avg: ${sessionCount ? Math.round(totalTokens / sessionCount) : 0} tokens/session\n`);
44
+
45
+ // 2. 看一個 session 的實際 token
46
+ console.log('--- 2. Token 範例(最近 session)---');
47
+ const tokenDetail = await pool.query(`
48
+ SELECT ss.session_id,
49
+ array_to_string(tsvector_to_array(ss.search_tsv), ' | ') as tokens
50
+ FROM ${qi(SCHEMA)}.session_summaries ss
51
+ WHERE ss.search_tsv IS NOT NULL
52
+ ORDER BY ss.updated_at DESC
53
+ LIMIT 1
54
+ `);
55
+ if (tokenDetail.rows[0]) {
56
+ console.log(` session: ${tokenDetail.rows[0].session_id?.slice(0, 8)}`);
57
+ const tokens = tokenDetail.rows[0].tokens || '';
58
+ // 分類 token
59
+ const all = tokens.split(' | ');
60
+ const cjk = all.filter(t => /[\u4e00-\u9fff]/.test(t));
61
+ const latin = all.filter(t => /^[a-z0-9]/.test(t));
62
+ const other = all.filter(t => !(/[\u4e00-\u9fff]/.test(t)) && !(/^[a-z0-9]/.test(t)));
63
+ console.log(` total: ${all.length} | latin: ${latin.length} | cjk: ${cjk.length} | other: ${other.length}`);
64
+ console.log(` CJK tokens (前 20): ${cjk.slice(0, 20).join(' | ')}`);
65
+ console.log(` Latin tokens (前 20): ${latin.slice(0, 20).join(' | ')}\n`);
66
+ }
67
+
68
+ // 3. 中文查詢命中率測試
69
+ console.log('--- 3. 中文查詢 FTS 命中率 ---');
70
+ const testQueries = [
71
+ 'afterburn',
72
+ 'bootstrap',
73
+ 'session',
74
+ 'recall',
75
+ '記憶',
76
+ '修復',
77
+ '架構',
78
+ '時區',
79
+ '去重',
80
+ 'daily entries',
81
+ 'OpenCode',
82
+ 'entity',
83
+ 'Jenny',
84
+ 'Aquifer',
85
+ '消化模式',
86
+ ];
87
+
88
+ // 總 session 數
89
+ const totalResult = await pool.query(`
90
+ SELECT COUNT(*) as cnt FROM ${qi(SCHEMA)}.session_summaries WHERE search_tsv IS NOT NULL
91
+ `);
92
+ const totalSessions = parseInt(totalResult.rows[0].cnt);
93
+ console.log(` total sessions with FTS index: ${totalSessions}\n`);
94
+
95
+ for (const q of testQueries) {
96
+ const ftsResult = await pool.query(`
97
+ SELECT COUNT(*) as cnt
98
+ FROM ${qi(SCHEMA)}.session_summaries ss
99
+ WHERE ss.search_tsv @@ plainto_tsquery('simple', $1)
100
+ `, [q]);
101
+ const ftsHits = parseInt(ftsResult.rows[0].cnt);
102
+
103
+ // 同時看 summary_text ILIKE 能找到幾筆(ground truth)
104
+ const ilikeResult = await pool.query(`
105
+ SELECT COUNT(*) as cnt
106
+ FROM ${qi(SCHEMA)}.session_summaries ss
107
+ WHERE ss.summary_text ILIKE $1
108
+ OR ss.structured_summary::text ILIKE $1
109
+ `, [`%${q}%`]);
110
+ const ilikeHits = parseInt(ilikeResult.rows[0].cnt);
111
+
112
+ const ftsRecall = ilikeHits > 0 ? Math.round(ftsHits / ilikeHits * 100) : (ftsHits === 0 ? 100 : 0);
113
+ const status = ftsHits === ilikeHits ? '✓' : (ftsHits < ilikeHits ? '✗ MISS' : '?');
114
+ console.log(` "${q}" | FTS: ${ftsHits} | ILIKE: ${ilikeHits} | recall: ${ftsRecall}% | ${status}`);
115
+ }
116
+
117
+ // 4. FTS 對 RRF 的貢獻度
118
+ console.log('\n--- 4. FTS 在 hybrid search 中的貢獻度 ---');
119
+ // 跑幾個查詢,看 FTS 跟 vector 的 session 重疊率
120
+ const overlapQueries = ['afterburn', 'bootstrap', '記憶', 'recall', 'entity'];
121
+ for (const q of overlapQueries) {
122
+ const ftsResult = await pool.query(`
123
+ SELECT ss.session_id
124
+ FROM ${qi(SCHEMA)}.session_summaries ss
125
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
126
+ WHERE ss.search_tsv @@ plainto_tsquery('simple', $1)
127
+ AND s.processing_status = 'succeeded'
128
+ ORDER BY ts_rank(ss.search_tsv, plainto_tsquery('simple', $1)) DESC
129
+ LIMIT 10
130
+ `, [q]);
131
+ const ftsIds = new Set(ftsResult.rows.map(r => r.session_id));
132
+
133
+ // vector search (if embedding available)
134
+ const embResult = await pool.query(`
135
+ SELECT ss.session_id
136
+ FROM ${qi(SCHEMA)}.session_summaries ss
137
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
138
+ WHERE ss.embedding IS NOT NULL
139
+ AND s.processing_status = 'succeeded'
140
+ ORDER BY ss.embedding <=> (
141
+ SELECT ss2.embedding FROM ${qi(SCHEMA)}.session_summaries ss2
142
+ WHERE ss2.search_tsv @@ plainto_tsquery('simple', $1)
143
+ ORDER BY ts_rank(ss2.search_tsv, plainto_tsquery('simple', $1)) DESC
144
+ LIMIT 1
145
+ )
146
+ LIMIT 10
147
+ `, [q]);
148
+ const embIds = new Set(embResult.rows.map(r => r.session_id));
149
+
150
+ const overlap = [...ftsIds].filter(id => embIds.has(id)).length;
151
+ const ftsOnly = [...ftsIds].filter(id => !embIds.has(id)).length;
152
+ const embOnly = [...embIds].filter(id => !ftsIds.has(id)).length;
153
+
154
+ console.log(` "${q}" | FTS top10: ${ftsIds.size} | Vec top10: ${embIds.size} | overlap: ${overlap} | FTS-only: ${ftsOnly} | Vec-only: ${embOnly}`);
155
+ }
156
+
157
+ await pool.end();
158
+ console.log('\n=== 完成 ===');
159
+ }
160
+
161
+ run().catch(err => { console.error(err); process.exit(1); });