@shadowforge0/aquifer-memory 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/aquifer.js CHANGED
@@ -100,19 +100,6 @@ function createAquifer(config) {
100
100
  const entityPromptFn = config.entities && config.entities.prompt ? config.entities.prompt : null;
101
101
  const entityScope = (config.entities && config.entities.scope) || 'default';
102
102
 
103
- // FTS config — locked to 'simple'.
104
- // The search_tsv trigger always uses to_tsvector('simple', ...), so query-time
105
- // config must match. Warn and override if someone passes anything else.
106
- const _rawFtsConfig = config.ftsConfig || 'simple';
107
- if (_rawFtsConfig !== 'simple') {
108
- console.warn(
109
- `[aquifer] ftsConfig '${_rawFtsConfig}' is not currently supported. ` +
110
- `The search_tsv index is built with 'simple'; only 'simple' is valid at query time. ` +
111
- `Overriding to 'simple'.`
112
- );
113
- }
114
- const ftsConfig = 'simple';
115
-
116
103
  // Rank weights
117
104
  const rankWeights = {
118
105
  rrf: 0.65,
@@ -200,21 +187,30 @@ function createAquifer(config) {
200
187
  // --- lifecycle ---
201
188
 
202
189
  async migrate() {
203
- // 1. Run base DDL
204
- const baseSql = loadSql('001-base.sql', schema);
205
- await pool.query(baseSql);
206
-
207
- // 2. If entities enabled, run entity DDL
208
- if (entitiesEnabled) {
209
- const entitySql = loadSql('002-entities.sql', schema);
210
- await pool.query(entitySql);
211
- }
190
+ // Advisory lock prevents concurrent migrations across processes.
191
+ // Lock key is derived from schema name to allow parallel migration
192
+ // of different schemas in the same database.
193
+ const lockKey = Buffer.from(`aquifer:${schema}`).reduce((h, b) => (h * 31 + b) & 0x7fffffff, 0);
194
+ await pool.query('SELECT pg_advisory_lock($1)', [lockKey]);
195
+ try {
196
+ // 1. Run base DDL
197
+ const baseSql = loadSql('001-base.sql', schema);
198
+ await pool.query(baseSql);
199
+
200
+ // 2. If entities enabled, run entity DDL
201
+ if (entitiesEnabled) {
202
+ const entitySql = loadSql('002-entities.sql', schema);
203
+ await pool.query(entitySql);
204
+ }
212
205
 
213
- // 3. Trust + feedback (always, not gated by entities)
214
- const trustSql = loadSql('003-trust-feedback.sql', schema);
215
- await pool.query(trustSql);
206
+ // 3. Trust + feedback (always, not gated by entities)
207
+ const trustSql = loadSql('003-trust-feedback.sql', schema);
208
+ await pool.query(trustSql);
216
209
 
217
- migrated = true;
210
+ migrated = true;
211
+ } finally {
212
+ await pool.query('SELECT pg_advisory_unlock($1)', [lockKey]).catch(() => {});
213
+ }
218
214
  },
219
215
 
220
216
  async close() {
@@ -706,7 +702,7 @@ function createAquifer(config) {
706
702
  const [ftsRows, embRows, turnResult] = await Promise.all([
707
703
  runFts
708
704
  ? storage.searchSessions(pool, query, {
709
- schema, tenantId, agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit, ftsConfig,
705
+ schema, tenantId, agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit,
710
706
  }).catch((err) => {
711
707
  recordSearchError('fts', err);
712
708
  return [];
package/core/entity.js CHANGED
@@ -222,27 +222,41 @@ async function upsertEntityRelations(pool, {
222
222
  }) {
223
223
  if (!pairs || pairs.length === 0) return { upserted: 0 };
224
224
  const ts = occurredAt || new Date().toISOString();
225
- let upserted = 0;
226
225
 
226
+ // Filter and normalize pairs
227
+ const validPairs = [];
227
228
  for (const { srcEntityId, dstEntityId } of pairs) {
228
229
  if (!srcEntityId || !dstEntityId || srcEntityId === dstEntityId) continue;
230
+ validPairs.push({
231
+ lo: Math.min(srcEntityId, dstEntityId),
232
+ hi: Math.max(srcEntityId, dstEntityId),
233
+ });
234
+ }
229
235
 
230
- const lo = Math.min(srcEntityId, dstEntityId);
231
- const hi = Math.max(srcEntityId, dstEntityId);
232
-
233
- await pool.query(
234
- `INSERT INTO ${qi(schema)}.entity_relations
235
- (src_entity_id, dst_entity_id, co_occurrence_count, first_seen_at, last_seen_at)
236
- VALUES ($1, $2, 1, $3, $3)
237
- ON CONFLICT (src_entity_id, dst_entity_id) DO UPDATE SET
238
- co_occurrence_count = ${qi(schema)}.entity_relations.co_occurrence_count + 1,
239
- last_seen_at = GREATEST(${qi(schema)}.entity_relations.last_seen_at, EXCLUDED.last_seen_at)`,
240
- [lo, hi, ts]
241
- );
242
- upserted++;
236
+ if (validPairs.length === 0) return { upserted: 0 };
237
+
238
+ // Batch insert: multi-row VALUES
239
+ const COLS_PER_ROW = 3;
240
+ const valueClauses = [];
241
+ const params = [];
242
+
243
+ for (const { lo, hi } of validPairs) {
244
+ const off = params.length;
245
+ params.push(lo, hi, ts);
246
+ valueClauses.push(`($${off+1}, $${off+2}, 1, $${off+3}, $${off+3})`);
243
247
  }
244
248
 
245
- return { upserted };
249
+ await pool.query(
250
+ `INSERT INTO ${qi(schema)}.entity_relations
251
+ (src_entity_id, dst_entity_id, co_occurrence_count, first_seen_at, last_seen_at)
252
+ VALUES ${valueClauses.join(',\n')}
253
+ ON CONFLICT (src_entity_id, dst_entity_id) DO UPDATE SET
254
+ co_occurrence_count = ${qi(schema)}.entity_relations.co_occurrence_count + 1,
255
+ last_seen_at = GREATEST(${qi(schema)}.entity_relations.last_seen_at, EXCLUDED.last_seen_at)`,
256
+ params
257
+ );
258
+
259
+ return { upserted: validPairs.length };
246
260
  }
247
261
 
248
262
  // ---------------------------------------------------------------------------
package/core/storage.js CHANGED
@@ -211,7 +211,7 @@ async function getMessages(pool, sessionId, agentId, { schema, tenantId } = {})
211
211
  }
212
212
 
213
213
  // ---------------------------------------------------------------------------
214
- // searchSessions (FTS)
214
+ // searchSessions (trigram + FTS fallback)
215
215
  // ---------------------------------------------------------------------------
216
216
 
217
217
  async function searchSessions(pool, query, {
@@ -220,34 +220,27 @@ async function searchSessions(pool, query, {
220
220
  agentId,
221
221
  agentIds: rawAgentIds,
222
222
  source,
223
- dateFrom, // m1: add date filtering
223
+ dateFrom,
224
224
  dateTo,
225
225
  limit = 20,
226
- ftsConfig = 'simple',
227
226
  } = {}) {
228
227
  const clampedLimit = Math.max(1, Math.min(100, limit));
229
- // FTS config is locked to 'simple' — the search_tsv trigger always uses
230
- // to_tsvector('simple', ...) so query semantics must match. Warn callers
231
- // that pass a different value rather than silently honouring it.
232
- if (ftsConfig !== 'simple') {
233
- console.warn(
234
- `[aquifer/storage] searchSessions: ftsConfig '${ftsConfig}' ignored. ` +
235
- `Only 'simple' is supported (index is built with simple tokenizer). ` +
236
- `Using 'simple'.`
237
- );
238
- }
239
- const safeFts = 'simple';
240
228
 
241
229
  // Normalize agentId/agentIds
242
230
  const agentIds = rawAgentIds && rawAgentIds.length > 0
243
231
  ? rawAgentIds
244
232
  : (agentId ? [agentId] : null);
245
233
 
234
+ // Escape LIKE special characters in query
235
+ const likeQuery = query.replace(/[%_\\]/g, '\\$&');
236
+
237
+ // Primary: trigram ILIKE on search_text (works for CJK + Latin)
238
+ // Fallback: tsvector FTS (for installations without search_text populated)
246
239
  const where = [
247
- `ss.search_tsv @@ plainto_tsquery('${safeFts}', $1)`,
248
- `s.tenant_id = $2`,
240
+ `(ss.search_text ILIKE '%' || $1 || '%' OR ss.search_tsv @@ plainto_tsquery('simple', $2))`,
241
+ `s.tenant_id = $3`,
249
242
  ];
250
- const params = [query, tenantId];
243
+ const params = [likeQuery, query, tenantId];
251
244
 
252
245
  if (agentIds) {
253
246
  params.push(agentIds);
@@ -281,8 +274,10 @@ async function searchSessions(pool, query, {
281
274
  ss.access_count,
282
275
  ss.last_accessed_at,
283
276
  ss.trust_score,
284
- ts_headline('${safeFts}', COALESCE(ss.summary_text, ''), plainto_tsquery('${safeFts}', $1)) AS summary_snippet,
285
- ts_rank(ss.search_tsv, plainto_tsquery('${safeFts}', $1)) AS fts_rank
277
+ CASE WHEN ss.search_text IS NOT NULL
278
+ THEN similarity(ss.search_text, $2)
279
+ ELSE ts_rank(ss.search_tsv, plainto_tsquery('simple', $2))
280
+ END AS fts_rank
286
281
  FROM ${qi(schema)}.sessions s
287
282
  LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
288
283
  WHERE ${where.join(' AND ')}
@@ -365,32 +360,45 @@ async function upsertTurnEmbeddings(pool, sessionRowId, {
365
360
  throw new Error(`turns.length (${turns.length}) !== vectors.length (${vectors.length})`);
366
361
  }
367
362
 
363
+ // Batch insert: build multi-row VALUES clause
364
+ const COLS_PER_ROW = 10;
365
+ const valueClauses = [];
366
+ const params = [];
367
+
368
368
  for (let i = 0; i < turns.length; i++) {
369
369
  const t = turns[i];
370
370
  const vec = vectors[i];
371
371
  if (!vec) continue;
372
372
 
373
373
  const contentHash = crypto.createHash('sha256').update(t.text).digest('hex').slice(0, 16);
374
- await pool.query(
375
- `INSERT INTO ${qi(schema)}.turn_embeddings
376
- (session_row_id, tenant_id, session_id, agent_id, source,
377
- turn_index, message_index, role, content_text, content_hash, embedding)
378
- VALUES ($1,$2,$3,$4,$5,$6,$7,'user',$8,$9,$10::vector)
379
- ON CONFLICT (session_row_id, message_index) DO UPDATE SET
380
- content_text = EXCLUDED.content_text,
381
- content_hash = EXCLUDED.content_hash,
382
- embedding = CASE
383
- WHEN ${qi(schema)}.turn_embeddings.content_hash = EXCLUDED.content_hash
384
- THEN ${qi(schema)}.turn_embeddings.embedding
385
- ELSE EXCLUDED.embedding
386
- END`,
387
- [
388
- sessionRowId, tenantId, sessionId, agentId, source || null,
389
- t.turnIndex, t.messageIndex,
390
- t.text, contentHash, vecToStr(vec),
391
- ]
374
+ const off = params.length;
375
+ params.push(
376
+ sessionRowId, tenantId, sessionId, agentId, source || null,
377
+ t.turnIndex, t.messageIndex,
378
+ t.text, contentHash, vecToStr(vec),
379
+ );
380
+ valueClauses.push(
381
+ `($${off+1},$${off+2},$${off+3},$${off+4},$${off+5},$${off+6},$${off+7},'user',$${off+8},$${off+9},$${off+10}::vector)`
392
382
  );
393
383
  }
384
+
385
+ if (valueClauses.length === 0) return;
386
+
387
+ await pool.query(
388
+ `INSERT INTO ${qi(schema)}.turn_embeddings
389
+ (session_row_id, tenant_id, session_id, agent_id, source,
390
+ turn_index, message_index, role, content_text, content_hash, embedding)
391
+ VALUES ${valueClauses.join(',\n')}
392
+ ON CONFLICT (session_row_id, message_index) DO UPDATE SET
393
+ content_text = EXCLUDED.content_text,
394
+ content_hash = EXCLUDED.content_hash,
395
+ embedding = CASE
396
+ WHEN ${qi(schema)}.turn_embeddings.content_hash = EXCLUDED.content_hash
397
+ THEN ${qi(schema)}.turn_embeddings.embedding
398
+ ELSE EXCLUDED.embedding
399
+ END`,
400
+ params
401
+ );
394
402
  }
395
403
 
396
404
  // ---------------------------------------------------------------------------
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shadowforge0/aquifer-memory",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "PG-native long-term memory for AI agents. Turn-level embedding, hybrid RRF ranking, optional knowledge graph. MCP server, CLI, and library API.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -2,6 +2,7 @@
2
2
  -- Usage: replace ${schema} with actual schema name (e.g., 'aquifer')
3
3
 
4
4
  CREATE EXTENSION IF NOT EXISTS vector;
5
+ CREATE EXTENSION IF NOT EXISTS pg_trgm;
5
6
  CREATE SCHEMA IF NOT EXISTS ${schema};
6
7
 
7
8
  -- =========================================================================
@@ -85,6 +86,7 @@ CREATE TABLE IF NOT EXISTS ${schema}.session_summaries (
85
86
  structured_summary JSONB NOT NULL DEFAULT '{}',
86
87
  embedding vector,
87
88
  search_tsv TSVECTOR,
89
+ search_text TEXT,
88
90
  access_count INT NOT NULL DEFAULT 0,
89
91
  last_accessed_at TIMESTAMPTZ,
90
92
  updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
@@ -96,6 +98,9 @@ CREATE INDEX IF NOT EXISTS idx_summaries_tenant
96
98
  CREATE INDEX IF NOT EXISTS idx_summaries_search_tsv
97
99
  ON ${schema}.session_summaries USING GIN (search_tsv);
98
100
 
101
+ CREATE INDEX IF NOT EXISTS idx_summaries_search_text_trgm
102
+ ON ${schema}.session_summaries USING GIN (search_text gin_trgm_ops);
103
+
99
104
  CREATE INDEX IF NOT EXISTS idx_summaries_embedding
100
105
  ON ${schema}.session_summaries (session_row_id)
101
106
  WHERE embedding IS NOT NULL;
@@ -141,6 +146,11 @@ BEGIN
141
146
  setweight(to_tsvector('simple', COALESCE(NEW.summary_text, '')), 'C') ||
142
147
  setweight(to_tsvector('simple', open_loops_text || ' ' || facts_text), 'D');
143
148
 
149
+ NEW.search_text :=
150
+ title_text || ' ' || overview_text || ' ' || topics_text || ' ' ||
151
+ decisions_text || ' ' || COALESCE(NEW.summary_text, '') || ' ' ||
152
+ open_loops_text || ' ' || facts_text;
153
+
144
154
  RETURN NEW;
145
155
  END;
146
156
  $$;
@@ -149,7 +159,7 @@ DROP TRIGGER IF EXISTS trg_session_summaries_search_tsv
149
159
  ON ${schema}.session_summaries;
150
160
 
151
161
  CREATE TRIGGER trg_session_summaries_search_tsv
152
- BEFORE INSERT OR UPDATE OF summary_text, structured_summary
162
+ BEFORE INSERT OR UPDATE OF summary_text, structured_summary, search_text
153
163
  ON ${schema}.session_summaries
154
164
  FOR EACH ROW
155
165
  EXECUTE FUNCTION ${schema}.session_summaries_search_tsv_update();
@@ -0,0 +1,161 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * FTS 中文診斷:檢查 'simple' tokenizer 在實際中文資料上的表現
5
+ *
6
+ * 測試項目:
7
+ * 1. FTS tokenization — 實際 token 長什麼樣
8
+ * 2. FTS recall — 常見中文查詢的命中率
9
+ * 3. FTS vs vector — FTS 有沒有在幫忙還是在拖後腿
10
+ */
11
+
12
+ const { Pool } = require('pg');
13
+
14
+ const DB_URL = process.env.DATABASE_URL || 'postgresql://burk:790476@localhost:5432/openclaw_db';
15
+ const SCHEMA = process.env.AQUIFER_SCHEMA || 'miranda';
16
+
17
+ const pool = new Pool({ connectionString: DB_URL });
18
+
19
+ async function run() {
20
+ const qi = (s) => `"${s}"`;
21
+
22
+ console.log('=== FTS 中文診斷 ===\n');
23
+
24
+ // 1. 看 token 分佈
25
+ console.log('--- 1. Token 分析 ---');
26
+ const tokenSample = await pool.query(`
27
+ SELECT ss.session_id,
28
+ array_length(tsvector_to_array(ss.search_tsv), 1) as token_count,
29
+ left(ss.summary_text, 80) as preview
30
+ FROM ${qi(SCHEMA)}.session_summaries ss
31
+ WHERE ss.search_tsv IS NOT NULL
32
+ ORDER BY ss.updated_at DESC
33
+ LIMIT 10
34
+ `);
35
+
36
+ let totalTokens = 0;
37
+ let sessionCount = 0;
38
+ for (const r of tokenSample.rows) {
39
+ totalTokens += r.token_count || 0;
40
+ sessionCount++;
41
+ console.log(` ${r.session_id?.slice(0, 8)} | ${r.token_count || 0} tokens | ${r.preview}`);
42
+ }
43
+ console.log(` avg: ${sessionCount ? Math.round(totalTokens / sessionCount) : 0} tokens/session\n`);
44
+
45
+ // 2. 看一個 session 的實際 token
46
+ console.log('--- 2. Token 範例(最近 session)---');
47
+ const tokenDetail = await pool.query(`
48
+ SELECT ss.session_id,
49
+ array_to_string(tsvector_to_array(ss.search_tsv), ' | ') as tokens
50
+ FROM ${qi(SCHEMA)}.session_summaries ss
51
+ WHERE ss.search_tsv IS NOT NULL
52
+ ORDER BY ss.updated_at DESC
53
+ LIMIT 1
54
+ `);
55
+ if (tokenDetail.rows[0]) {
56
+ console.log(` session: ${tokenDetail.rows[0].session_id?.slice(0, 8)}`);
57
+ const tokens = tokenDetail.rows[0].tokens || '';
58
+ // 分類 token
59
+ const all = tokens.split(' | ');
60
+ const cjk = all.filter(t => /[\u4e00-\u9fff]/.test(t));
61
+ const latin = all.filter(t => /^[a-z0-9]/.test(t));
62
+ const other = all.filter(t => !(/[\u4e00-\u9fff]/.test(t)) && !(/^[a-z0-9]/.test(t)));
63
+ console.log(` total: ${all.length} | latin: ${latin.length} | cjk: ${cjk.length} | other: ${other.length}`);
64
+ console.log(` CJK tokens (前 20): ${cjk.slice(0, 20).join(' | ')}`);
65
+ console.log(` Latin tokens (前 20): ${latin.slice(0, 20).join(' | ')}\n`);
66
+ }
67
+
68
+ // 3. 中文查詢命中率測試
69
+ console.log('--- 3. 中文查詢 FTS 命中率 ---');
70
+ const testQueries = [
71
+ 'afterburn',
72
+ 'bootstrap',
73
+ 'session',
74
+ 'recall',
75
+ '記憶',
76
+ '修復',
77
+ '架構',
78
+ '時區',
79
+ '去重',
80
+ 'daily entries',
81
+ 'OpenCode',
82
+ 'entity',
83
+ 'Jenny',
84
+ 'Aquifer',
85
+ '消化模式',
86
+ ];
87
+
88
+ // 總 session 數
89
+ const totalResult = await pool.query(`
90
+ SELECT COUNT(*) as cnt FROM ${qi(SCHEMA)}.session_summaries WHERE search_tsv IS NOT NULL
91
+ `);
92
+ const totalSessions = parseInt(totalResult.rows[0].cnt);
93
+ console.log(` total sessions with FTS index: ${totalSessions}\n`);
94
+
95
+ for (const q of testQueries) {
96
+ const ftsResult = await pool.query(`
97
+ SELECT COUNT(*) as cnt
98
+ FROM ${qi(SCHEMA)}.session_summaries ss
99
+ WHERE ss.search_tsv @@ plainto_tsquery('simple', $1)
100
+ `, [q]);
101
+ const ftsHits = parseInt(ftsResult.rows[0].cnt);
102
+
103
+ // 同時看 summary_text ILIKE 能找到幾筆(ground truth)
104
+ const ilikeResult = await pool.query(`
105
+ SELECT COUNT(*) as cnt
106
+ FROM ${qi(SCHEMA)}.session_summaries ss
107
+ WHERE ss.summary_text ILIKE $1
108
+ OR ss.structured_summary::text ILIKE $1
109
+ `, [`%${q}%`]);
110
+ const ilikeHits = parseInt(ilikeResult.rows[0].cnt);
111
+
112
+ const ftsRecall = ilikeHits > 0 ? Math.round(ftsHits / ilikeHits * 100) : (ftsHits === 0 ? 100 : 0);
113
+ const status = ftsHits === ilikeHits ? '✓' : (ftsHits < ilikeHits ? '✗ MISS' : '?');
114
+ console.log(` "${q}" | FTS: ${ftsHits} | ILIKE: ${ilikeHits} | recall: ${ftsRecall}% | ${status}`);
115
+ }
116
+
117
+ // 4. FTS 對 RRF 的貢獻度
118
+ console.log('\n--- 4. FTS 在 hybrid search 中的貢獻度 ---');
119
+ // 跑幾個查詢,看 FTS 跟 vector 的 session 重疊率
120
+ const overlapQueries = ['afterburn', 'bootstrap', '記憶', 'recall', 'entity'];
121
+ for (const q of overlapQueries) {
122
+ const ftsResult = await pool.query(`
123
+ SELECT ss.session_id
124
+ FROM ${qi(SCHEMA)}.session_summaries ss
125
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
126
+ WHERE ss.search_tsv @@ plainto_tsquery('simple', $1)
127
+ AND s.processing_status = 'succeeded'
128
+ ORDER BY ts_rank(ss.search_tsv, plainto_tsquery('simple', $1)) DESC
129
+ LIMIT 10
130
+ `, [q]);
131
+ const ftsIds = new Set(ftsResult.rows.map(r => r.session_id));
132
+
133
+ // vector search (if embedding available)
134
+ const embResult = await pool.query(`
135
+ SELECT ss.session_id
136
+ FROM ${qi(SCHEMA)}.session_summaries ss
137
+ JOIN ${qi(SCHEMA)}.sessions s ON s.id = ss.session_row_id
138
+ WHERE ss.embedding IS NOT NULL
139
+ AND s.processing_status = 'succeeded'
140
+ ORDER BY ss.embedding <=> (
141
+ SELECT ss2.embedding FROM ${qi(SCHEMA)}.session_summaries ss2
142
+ WHERE ss2.search_tsv @@ plainto_tsquery('simple', $1)
143
+ ORDER BY ts_rank(ss2.search_tsv, plainto_tsquery('simple', $1)) DESC
144
+ LIMIT 1
145
+ )
146
+ LIMIT 10
147
+ `, [q]);
148
+ const embIds = new Set(embResult.rows.map(r => r.session_id));
149
+
150
+ const overlap = [...ftsIds].filter(id => embIds.has(id)).length;
151
+ const ftsOnly = [...ftsIds].filter(id => !embIds.has(id)).length;
152
+ const embOnly = [...embIds].filter(id => !ftsIds.has(id)).length;
153
+
154
+ console.log(` "${q}" | FTS top10: ${ftsIds.size} | Vec top10: ${embIds.size} | overlap: ${overlap} | FTS-only: ${ftsOnly} | Vec-only: ${embOnly}`);
155
+ }
156
+
157
+ await pool.end();
158
+ console.log('\n=== 完成 ===');
159
+ }
160
+
161
+ run().catch(err => { console.error(err); process.exit(1); });