@shadowforge0/aquifer-memory 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +354 -0
- package/consumers/cli.js +314 -0
- package/consumers/mcp.js +135 -0
- package/consumers/openclaw-plugin.js +235 -0
- package/consumers/shared/config.js +143 -0
- package/consumers/shared/factory.js +77 -0
- package/consumers/shared/llm.js +119 -0
- package/core/aquifer.js +634 -0
- package/core/entity.js +360 -0
- package/core/hybrid-rank.js +166 -0
- package/core/storage.js +550 -0
- package/index.js +6 -0
- package/package.json +57 -0
- package/pipeline/embed.js +230 -0
- package/pipeline/extract-entities.js +73 -0
- package/pipeline/summarize.js +245 -0
- package/schema/001-base.sql +180 -0
- package/schema/002-entities.sql +120 -0
package/core/storage.js
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const crypto = require('crypto');
|
|
4
|
+
|
|
5
|
+
// C1: quote identifier for SQL safety
|
|
6
|
+
function qi(identifier) { return `"${identifier}"`; }
|
|
7
|
+
|
|
8
|
+
// Validate vector for NaN/Infinity before pgvector cast
|
|
9
|
+
function vecToStr(vec) {
|
|
10
|
+
if (!vec || !Array.isArray(vec) || vec.length === 0) return null;
|
|
11
|
+
for (let i = 0; i < vec.length; i++) {
|
|
12
|
+
if (!Number.isFinite(vec[i])) throw new Error(`Vector contains non-finite value at index ${i}`);
|
|
13
|
+
}
|
|
14
|
+
return `[${vec.join(',')}]`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Constants
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
const MIN_TURN_CHARS = 5;
|
|
22
|
+
const MAX_TURN_CHARS = 2000;
|
|
23
|
+
|
|
24
|
+
const TURN_NOISE_RE = [
|
|
25
|
+
/^\/\w/,
|
|
26
|
+
/^(ok(ay)?|好的?|嗯|對|是的?|yes|yep|no|y|n|got it|thanks?|thx|收到|了解|繼續|不用了?|sure|確認|確定)\.?$/i,
|
|
27
|
+
/^HEARTBEAT_OK$/,
|
|
28
|
+
/^THINK_OK$/,
|
|
29
|
+
/^\[Queued messages while agent was busy\]/,
|
|
30
|
+
/^<<<EXTERNAL_UNTRUSTED_CONTENT/,
|
|
31
|
+
/^A new session was started via \/new/,
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
const VALID_STATUSES = new Set(['pending', 'processing', 'succeeded', 'partial', 'failed']);
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// upsertSession
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
async function upsertSession(pool, {
|
|
41
|
+
schema,
|
|
42
|
+
tenantId,
|
|
43
|
+
sessionId,
|
|
44
|
+
sessionKey,
|
|
45
|
+
agentId,
|
|
46
|
+
source,
|
|
47
|
+
messages,
|
|
48
|
+
msgCount,
|
|
49
|
+
userCount,
|
|
50
|
+
assistantCount,
|
|
51
|
+
model,
|
|
52
|
+
tokensIn,
|
|
53
|
+
tokensOut,
|
|
54
|
+
startedAt,
|
|
55
|
+
lastMessageAt,
|
|
56
|
+
}) {
|
|
57
|
+
const result = await pool.query(
|
|
58
|
+
`INSERT INTO ${qi(schema)}.sessions
|
|
59
|
+
(tenant_id, session_id, session_key, agent_id, source, messages,
|
|
60
|
+
msg_count, user_count, assistant_count, model, tokens_in, tokens_out,
|
|
61
|
+
started_at, ended_at, last_message_at, processing_status)
|
|
62
|
+
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,now(),$14,'pending')
|
|
63
|
+
ON CONFLICT (tenant_id, agent_id, session_id) DO UPDATE SET
|
|
64
|
+
session_key = EXCLUDED.session_key,
|
|
65
|
+
source = COALESCE(EXCLUDED.source, ${qi(schema)}.sessions.source),
|
|
66
|
+
messages = EXCLUDED.messages,
|
|
67
|
+
msg_count = EXCLUDED.msg_count,
|
|
68
|
+
user_count = EXCLUDED.user_count,
|
|
69
|
+
assistant_count = EXCLUDED.assistant_count,
|
|
70
|
+
model = EXCLUDED.model,
|
|
71
|
+
tokens_in = EXCLUDED.tokens_in,
|
|
72
|
+
tokens_out = EXCLUDED.tokens_out,
|
|
73
|
+
started_at = COALESCE(EXCLUDED.started_at, ${qi(schema)}.sessions.started_at),
|
|
74
|
+
ended_at = now(),
|
|
75
|
+
last_message_at = COALESCE(EXCLUDED.last_message_at, ${qi(schema)}.sessions.last_message_at),
|
|
76
|
+
processing_status = 'pending',
|
|
77
|
+
processing_error = NULL
|
|
78
|
+
RETURNING id, tenant_id, agent_id, session_id, processing_status, (xmax = 0) AS is_new`,
|
|
79
|
+
[
|
|
80
|
+
tenantId, sessionId, sessionKey || null, agentId, source || 'api',
|
|
81
|
+
messages ? JSON.stringify(messages) : null,
|
|
82
|
+
msgCount || 0, userCount || 0, assistantCount || 0,
|
|
83
|
+
model || null, tokensIn || 0, tokensOut || 0,
|
|
84
|
+
startedAt || null, lastMessageAt || null,
|
|
85
|
+
]
|
|
86
|
+
);
|
|
87
|
+
if (!result.rows[0]) return null;
|
|
88
|
+
const r = result.rows[0];
|
|
89
|
+
return {
|
|
90
|
+
id: r.id,
|
|
91
|
+
tenantId: r.tenant_id,
|
|
92
|
+
agentId: r.agent_id,
|
|
93
|
+
sessionId: r.session_id,
|
|
94
|
+
processingStatus: r.processing_status,
|
|
95
|
+
isNew: r.is_new,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
// upsertSegments
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
|
|
103
|
+
async function upsertSegments(pool, sessionRowId, segments, { schema } = {}) {
|
|
104
|
+
if (!segments || segments.length === 0) return;
|
|
105
|
+
for (const seg of segments) {
|
|
106
|
+
await pool.query(
|
|
107
|
+
`INSERT INTO ${qi(schema)}.session_segments
|
|
108
|
+
(session_row_id, segment_no, start_msg_idx, end_msg_idx,
|
|
109
|
+
started_at, ended_at, raw_msg_count, effective_msg_count,
|
|
110
|
+
boundary_type, boundary_meta)
|
|
111
|
+
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
|
112
|
+
ON CONFLICT (session_row_id, segment_no) DO UPDATE SET
|
|
113
|
+
start_msg_idx = EXCLUDED.start_msg_idx,
|
|
114
|
+
end_msg_idx = EXCLUDED.end_msg_idx,
|
|
115
|
+
started_at = EXCLUDED.started_at,
|
|
116
|
+
ended_at = EXCLUDED.ended_at,
|
|
117
|
+
raw_msg_count = EXCLUDED.raw_msg_count,
|
|
118
|
+
effective_msg_count = EXCLUDED.effective_msg_count,
|
|
119
|
+
boundary_type = EXCLUDED.boundary_type,
|
|
120
|
+
boundary_meta = EXCLUDED.boundary_meta`,
|
|
121
|
+
[
|
|
122
|
+
sessionRowId,
|
|
123
|
+
seg.segmentNo,
|
|
124
|
+
seg.startMsgIdx !== null && seg.startMsgIdx !== undefined ? seg.startMsgIdx : null,
|
|
125
|
+
seg.endMsgIdx !== null && seg.endMsgIdx !== undefined ? seg.endMsgIdx : null,
|
|
126
|
+
seg.startedAt || null,
|
|
127
|
+
seg.endedAt || null,
|
|
128
|
+
seg.rawMsgCount || 0,
|
|
129
|
+
seg.effectiveMsgCount || 0,
|
|
130
|
+
seg.boundaryType || null,
|
|
131
|
+
seg.boundaryMeta ? JSON.stringify(seg.boundaryMeta) : '{}',
|
|
132
|
+
]
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
// upsertSummary
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
async function upsertSummary(pool, sessionRowId, {
|
|
142
|
+
schema,
|
|
143
|
+
tenantId,
|
|
144
|
+
agentId,
|
|
145
|
+
sessionId,
|
|
146
|
+
summaryText,
|
|
147
|
+
structuredSummary,
|
|
148
|
+
model,
|
|
149
|
+
sourceHash,
|
|
150
|
+
msgCount,
|
|
151
|
+
userCount,
|
|
152
|
+
assistantCount,
|
|
153
|
+
startedAt,
|
|
154
|
+
endedAt,
|
|
155
|
+
embedding,
|
|
156
|
+
}) {
|
|
157
|
+
const embStr = embedding ? vecToStr(embedding) : null;
|
|
158
|
+
const result = await pool.query(
|
|
159
|
+
`INSERT INTO ${qi(schema)}.session_summaries
|
|
160
|
+
(session_row_id, tenant_id, agent_id, session_id, summary_version, model, source_hash,
|
|
161
|
+
message_count, user_message_count, assistant_message_count,
|
|
162
|
+
boundary_count, fresh_tail_count,
|
|
163
|
+
started_at, ended_at, structured_summary, summary_text, embedding, updated_at)
|
|
164
|
+
VALUES ($1,$2,$3,$4,1,$5,$6,$7,$8,$9,0,0,$10,$11,COALESCE($12::jsonb,'{}'::jsonb),COALESCE($13,''),$14::vector,now())
|
|
165
|
+
ON CONFLICT (session_row_id) DO UPDATE SET
|
|
166
|
+
tenant_id = EXCLUDED.tenant_id,
|
|
167
|
+
agent_id = EXCLUDED.agent_id,
|
|
168
|
+
session_id = EXCLUDED.session_id,
|
|
169
|
+
model = COALESCE(EXCLUDED.model, ${qi(schema)}.session_summaries.model),
|
|
170
|
+
source_hash = COALESCE(EXCLUDED.source_hash, ${qi(schema)}.session_summaries.source_hash),
|
|
171
|
+
message_count = COALESCE(EXCLUDED.message_count, ${qi(schema)}.session_summaries.message_count),
|
|
172
|
+
user_message_count = COALESCE(EXCLUDED.user_message_count, ${qi(schema)}.session_summaries.user_message_count),
|
|
173
|
+
assistant_message_count = COALESCE(EXCLUDED.assistant_message_count, ${qi(schema)}.session_summaries.assistant_message_count),
|
|
174
|
+
started_at = COALESCE(EXCLUDED.started_at, ${qi(schema)}.session_summaries.started_at),
|
|
175
|
+
ended_at = COALESCE(EXCLUDED.ended_at, ${qi(schema)}.session_summaries.ended_at),
|
|
176
|
+
structured_summary = COALESCE(NULLIF(EXCLUDED.structured_summary, '{}'::jsonb), ${qi(schema)}.session_summaries.structured_summary),
|
|
177
|
+
summary_text = COALESCE(NULLIF(EXCLUDED.summary_text, ''), ${qi(schema)}.session_summaries.summary_text),
|
|
178
|
+
embedding = COALESCE(EXCLUDED.embedding, ${qi(schema)}.session_summaries.embedding),
|
|
179
|
+
updated_at = now()
|
|
180
|
+
RETURNING session_row_id, tenant_id, agent_id, session_id, model`,
|
|
181
|
+
[
|
|
182
|
+
sessionRowId, tenantId, agentId || null, sessionId || null,
|
|
183
|
+
model || null, sourceHash || null,
|
|
184
|
+
msgCount || 0, userCount || 0, assistantCount || 0,
|
|
185
|
+
startedAt || null, endedAt || null,
|
|
186
|
+
structuredSummary ? JSON.stringify(structuredSummary) : null,
|
|
187
|
+
summaryText || '',
|
|
188
|
+
embStr,
|
|
189
|
+
]
|
|
190
|
+
);
|
|
191
|
+
return result.rows[0] || null;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
// markStatus
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
|
|
198
|
+
async function markStatus(pool, sessionRowId, status, error, { schema } = {}) {
|
|
199
|
+
if (!VALID_STATUSES.has(status)) {
|
|
200
|
+
throw new Error(`Invalid status: ${status}`);
|
|
201
|
+
}
|
|
202
|
+
const result = await pool.query(
|
|
203
|
+
`UPDATE ${qi(schema)}.sessions
|
|
204
|
+
SET processing_status = $1,
|
|
205
|
+
processed_at = CASE WHEN $1 IN ('succeeded', 'partial') THEN now() ELSE processed_at END,
|
|
206
|
+
processing_error = $2
|
|
207
|
+
WHERE id = $3
|
|
208
|
+
RETURNING id, processing_status, processing_error`,
|
|
209
|
+
[status, error || null, sessionRowId]
|
|
210
|
+
);
|
|
211
|
+
return result.rows[0] || null;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
// persistProcessingResults (@internal — prefer aquifer.enrich() for full pipeline)
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
async function persistProcessingResults(pool, sessionRowId, {
|
|
219
|
+
schema,
|
|
220
|
+
segments,
|
|
221
|
+
summaryText,
|
|
222
|
+
structuredSummary,
|
|
223
|
+
agentId,
|
|
224
|
+
sessionId,
|
|
225
|
+
tenantId,
|
|
226
|
+
model,
|
|
227
|
+
sourceHash,
|
|
228
|
+
msgCount,
|
|
229
|
+
userCount,
|
|
230
|
+
assistantCount,
|
|
231
|
+
startedAt,
|
|
232
|
+
endedAt,
|
|
233
|
+
embedding,
|
|
234
|
+
}) {
|
|
235
|
+
const client = await pool.connect();
|
|
236
|
+
try {
|
|
237
|
+
await client.query('BEGIN');
|
|
238
|
+
if (segments) await upsertSegments(client, sessionRowId, segments, { schema });
|
|
239
|
+
await upsertSummary(client, sessionRowId, {
|
|
240
|
+
schema, tenantId, agentId, sessionId, summaryText,
|
|
241
|
+
structuredSummary, model, sourceHash,
|
|
242
|
+
msgCount, userCount, assistantCount,
|
|
243
|
+
startedAt, endedAt, embedding,
|
|
244
|
+
});
|
|
245
|
+
await markStatus(client, sessionRowId, 'succeeded', null, { schema });
|
|
246
|
+
await client.query('COMMIT');
|
|
247
|
+
} catch (err) {
|
|
248
|
+
await client.query('ROLLBACK').catch(() => {});
|
|
249
|
+
try {
|
|
250
|
+
await markStatus(pool, sessionRowId, 'failed', err.message, { schema });
|
|
251
|
+
} catch (_) { /* swallow */ }
|
|
252
|
+
throw err;
|
|
253
|
+
} finally {
|
|
254
|
+
client.release();
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// ---------------------------------------------------------------------------
|
|
259
|
+
// getSession
|
|
260
|
+
// ---------------------------------------------------------------------------
|
|
261
|
+
|
|
262
|
+
async function getSession(pool, sessionId, agentId, options = {}, { schema, tenantId: defaultTenantId } = {}) {
|
|
263
|
+
// Support legacy: options can be a string (treated as source)
|
|
264
|
+
let source = null;
|
|
265
|
+
let tid = defaultTenantId;
|
|
266
|
+
if (typeof options === 'string') {
|
|
267
|
+
source = options;
|
|
268
|
+
} else {
|
|
269
|
+
source = options.source || null;
|
|
270
|
+
tid = options.tenantId || tid;
|
|
271
|
+
}
|
|
272
|
+
const result = await pool.query(
|
|
273
|
+
`SELECT *
|
|
274
|
+
FROM ${qi(schema)}.sessions
|
|
275
|
+
WHERE session_id = $1
|
|
276
|
+
AND agent_id = $2
|
|
277
|
+
AND tenant_id = $3
|
|
278
|
+
AND ($4::text IS NULL OR source = $4)
|
|
279
|
+
LIMIT 1`,
|
|
280
|
+
[sessionId, agentId, tid, source]
|
|
281
|
+
);
|
|
282
|
+
return result.rows[0] || null;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
// getSessionFull
|
|
287
|
+
// ---------------------------------------------------------------------------
|
|
288
|
+
|
|
289
|
+
async function getSessionFull(pool, sessionId, agentId, { schema, tenantId } = {}) {
|
|
290
|
+
const session = await getSession(pool, sessionId, agentId, { tenantId }, { schema, tenantId });
|
|
291
|
+
if (!session) return null;
|
|
292
|
+
|
|
293
|
+
const [segResult, sumResult] = await Promise.all([
|
|
294
|
+
pool.query(
|
|
295
|
+
`SELECT * FROM ${qi(schema)}.session_segments
|
|
296
|
+
WHERE session_row_id = $1
|
|
297
|
+
ORDER BY segment_no ASC`,
|
|
298
|
+
[session.id]
|
|
299
|
+
),
|
|
300
|
+
pool.query(
|
|
301
|
+
`SELECT * FROM ${qi(schema)}.session_summaries
|
|
302
|
+
WHERE session_row_id = $1
|
|
303
|
+
LIMIT 1`,
|
|
304
|
+
[session.id]
|
|
305
|
+
),
|
|
306
|
+
]);
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
session,
|
|
310
|
+
segments: segResult.rows,
|
|
311
|
+
summary: sumResult.rows[0] || null,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ---------------------------------------------------------------------------
|
|
316
|
+
// getMessages
|
|
317
|
+
// ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
async function getMessages(pool, sessionId, agentId, { schema, tenantId } = {}) {
|
|
320
|
+
const row = await getSession(pool, sessionId, agentId, { tenantId }, { schema, tenantId });
|
|
321
|
+
if (!row || !row.messages) return null;
|
|
322
|
+
const msgs = typeof row.messages === 'string' ? JSON.parse(row.messages) : row.messages;
|
|
323
|
+
return msgs.normalized || msgs;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// ---------------------------------------------------------------------------
|
|
327
|
+
// searchSessions (FTS)
|
|
328
|
+
// ---------------------------------------------------------------------------
|
|
329
|
+
|
|
330
|
+
async function searchSessions(pool, query, {
|
|
331
|
+
schema,
|
|
332
|
+
tenantId,
|
|
333
|
+
agentId,
|
|
334
|
+
source,
|
|
335
|
+
dateFrom, // m1: add date filtering
|
|
336
|
+
dateTo,
|
|
337
|
+
limit = 20,
|
|
338
|
+
} = {}) {
|
|
339
|
+
const clampedLimit = Math.max(1, Math.min(100, limit));
|
|
340
|
+
const result = await pool.query(
|
|
341
|
+
`SELECT
|
|
342
|
+
s.id,
|
|
343
|
+
s.session_id,
|
|
344
|
+
s.agent_id,
|
|
345
|
+
s.source,
|
|
346
|
+
s.started_at,
|
|
347
|
+
s.last_message_at,
|
|
348
|
+
s.msg_count,
|
|
349
|
+
ss.summary_text,
|
|
350
|
+
ss.structured_summary,
|
|
351
|
+
ss.access_count,
|
|
352
|
+
ss.last_accessed_at,
|
|
353
|
+
ts_headline('simple', COALESCE(ss.summary_text, ''), plainto_tsquery('simple', $1)) AS summary_snippet,
|
|
354
|
+
ts_rank(ss.search_tsv, plainto_tsquery('simple', $1)) AS fts_rank
|
|
355
|
+
FROM ${qi(schema)}.sessions s
|
|
356
|
+
LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
|
|
357
|
+
WHERE ss.search_tsv @@ plainto_tsquery('simple', $1)
|
|
358
|
+
AND s.tenant_id = $2
|
|
359
|
+
AND ($3::text IS NULL OR s.agent_id = $3)
|
|
360
|
+
AND ($4::text IS NULL OR s.source = $4)
|
|
361
|
+
AND ($5::date IS NULL OR s.started_at::date >= $5::date)
|
|
362
|
+
AND ($6::date IS NULL OR s.started_at::date <= $6::date)
|
|
363
|
+
ORDER BY fts_rank DESC, s.last_message_at DESC NULLS LAST
|
|
364
|
+
LIMIT $7`,
|
|
365
|
+
[query, tenantId, agentId || null, source || null, dateFrom || null, dateTo || null, clampedLimit]
|
|
366
|
+
);
|
|
367
|
+
return result.rows;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// ---------------------------------------------------------------------------
|
|
371
|
+
// recordAccess
|
|
372
|
+
// ---------------------------------------------------------------------------
|
|
373
|
+
|
|
374
|
+
async function recordAccess(pool, sessionRowIds, { schema } = {}) {
|
|
375
|
+
if (!sessionRowIds || sessionRowIds.length === 0) return;
|
|
376
|
+
await pool.query(
|
|
377
|
+
`UPDATE ${qi(schema)}.session_summaries
|
|
378
|
+
SET access_count = access_count + 1, last_accessed_at = now()
|
|
379
|
+
WHERE session_row_id = ANY($1)`,
|
|
380
|
+
[sessionRowIds]
|
|
381
|
+
);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// ---------------------------------------------------------------------------
|
|
385
|
+
// extractUserTurns
|
|
386
|
+
// ---------------------------------------------------------------------------
|
|
387
|
+
|
|
388
|
+
function extractUserTurns(normalized) {
|
|
389
|
+
if (!normalized || !Array.isArray(normalized)) return [];
|
|
390
|
+
const turns = [];
|
|
391
|
+
let turnIndex = 0;
|
|
392
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
393
|
+
const msg = normalized[i];
|
|
394
|
+
if (msg.role !== 'user') continue;
|
|
395
|
+
|
|
396
|
+
let text;
|
|
397
|
+
if (typeof msg.content === 'string') {
|
|
398
|
+
text = msg.content;
|
|
399
|
+
} else if (Array.isArray(msg.content)) {
|
|
400
|
+
text = msg.content
|
|
401
|
+
.filter(p => p.type === 'text')
|
|
402
|
+
.map(p => p.text)
|
|
403
|
+
.join('\n');
|
|
404
|
+
} else if (typeof msg.text === 'string') {
|
|
405
|
+
text = msg.text;
|
|
406
|
+
} else {
|
|
407
|
+
text = '';
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
text = text.trim();
|
|
411
|
+
if (text.length < MIN_TURN_CHARS) continue;
|
|
412
|
+
if (TURN_NOISE_RE.some(re => re.test(text))) continue;
|
|
413
|
+
|
|
414
|
+
turnIndex++;
|
|
415
|
+
turns.push({
|
|
416
|
+
turnIndex,
|
|
417
|
+
messageIndex: i,
|
|
418
|
+
text: Array.from(text).slice(0, MAX_TURN_CHARS).join(''),
|
|
419
|
+
});
|
|
420
|
+
}
|
|
421
|
+
return turns;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// ---------------------------------------------------------------------------
|
|
425
|
+
// upsertTurnEmbeddings
|
|
426
|
+
// ---------------------------------------------------------------------------
|
|
427
|
+
|
|
428
|
+
async function upsertTurnEmbeddings(pool, sessionRowId, {
|
|
429
|
+
schema,
|
|
430
|
+
tenantId,
|
|
431
|
+
sessionId,
|
|
432
|
+
agentId,
|
|
433
|
+
source,
|
|
434
|
+
turns,
|
|
435
|
+
vectors,
|
|
436
|
+
}) {
|
|
437
|
+
if (!turns || turns.length === 0) return;
|
|
438
|
+
if (turns.length !== vectors.length) {
|
|
439
|
+
throw new Error(`turns.length (${turns.length}) !== vectors.length (${vectors.length})`);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
for (let i = 0; i < turns.length; i++) {
|
|
443
|
+
const t = turns[i];
|
|
444
|
+
const vec = vectors[i];
|
|
445
|
+
if (!vec) continue;
|
|
446
|
+
|
|
447
|
+
const contentHash = crypto.createHash('sha256').update(t.text).digest('hex').slice(0, 16);
|
|
448
|
+
await pool.query(
|
|
449
|
+
`INSERT INTO ${qi(schema)}.turn_embeddings
|
|
450
|
+
(session_row_id, tenant_id, session_id, agent_id, source,
|
|
451
|
+
turn_index, message_index, role, content_text, content_hash, embedding)
|
|
452
|
+
VALUES ($1,$2,$3,$4,$5,$6,$7,'user',$8,$9,$10::vector)
|
|
453
|
+
ON CONFLICT (session_row_id, message_index) DO UPDATE SET
|
|
454
|
+
content_text = EXCLUDED.content_text,
|
|
455
|
+
content_hash = EXCLUDED.content_hash,
|
|
456
|
+
embedding = CASE
|
|
457
|
+
WHEN ${qi(schema)}.turn_embeddings.content_hash = EXCLUDED.content_hash
|
|
458
|
+
THEN ${qi(schema)}.turn_embeddings.embedding
|
|
459
|
+
ELSE EXCLUDED.embedding
|
|
460
|
+
END`,
|
|
461
|
+
[
|
|
462
|
+
sessionRowId, tenantId, sessionId, agentId, source || null,
|
|
463
|
+
t.turnIndex, t.messageIndex,
|
|
464
|
+
t.text, contentHash, vecToStr(vec),
|
|
465
|
+
]
|
|
466
|
+
);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// ---------------------------------------------------------------------------
|
|
471
|
+
// searchTurnEmbeddings
|
|
472
|
+
// ---------------------------------------------------------------------------
|
|
473
|
+
|
|
474
|
+
async function searchTurnEmbeddings(pool, {
|
|
475
|
+
schema,
|
|
476
|
+
tenantId,
|
|
477
|
+
queryVec,
|
|
478
|
+
dateFrom,
|
|
479
|
+
dateTo,
|
|
480
|
+
agentId,
|
|
481
|
+
source,
|
|
482
|
+
limit = 15,
|
|
483
|
+
}) {
|
|
484
|
+
const where = ['s.tenant_id = $1'];
|
|
485
|
+
const params = [tenantId];
|
|
486
|
+
|
|
487
|
+
if (dateFrom) {
|
|
488
|
+
params.push(dateFrom);
|
|
489
|
+
where.push(`($${params.length}::date IS NULL OR s.started_at::date >= $${params.length}::date)`);
|
|
490
|
+
}
|
|
491
|
+
if (dateTo) {
|
|
492
|
+
params.push(dateTo);
|
|
493
|
+
where.push(`($${params.length}::date IS NULL OR s.started_at::date <= $${params.length}::date)`);
|
|
494
|
+
}
|
|
495
|
+
if (agentId) {
|
|
496
|
+
params.push(agentId);
|
|
497
|
+
where.push(`t.agent_id = $${params.length}`);
|
|
498
|
+
}
|
|
499
|
+
if (source) {
|
|
500
|
+
params.push(source);
|
|
501
|
+
where.push(`t.source = $${params.length}`);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
params.push(`[${queryVec.join(',')}]`);
|
|
505
|
+
const vecPos = params.length;
|
|
506
|
+
|
|
507
|
+
// m5: use subquery with LIMIT to avoid scanning all rows
|
|
508
|
+
params.push(limit * 3); // fetch more than needed for DISTINCT ON dedup
|
|
509
|
+
const innerLimitPos = params.length;
|
|
510
|
+
|
|
511
|
+
const result = await pool.query(
|
|
512
|
+
`SELECT * FROM (
|
|
513
|
+
SELECT DISTINCT ON (t.session_row_id)
|
|
514
|
+
s.session_id, s.id AS session_row_id, s.agent_id, s.source, s.started_at,
|
|
515
|
+
ss.summary_text, ss.structured_summary, ss.access_count, ss.last_accessed_at,
|
|
516
|
+
t.content_text AS matched_turn_text, t.turn_index AS matched_turn_index,
|
|
517
|
+
(t.embedding <=> $${vecPos}::vector) AS turn_distance
|
|
518
|
+
FROM ${qi(schema)}.turn_embeddings t
|
|
519
|
+
JOIN ${qi(schema)}.sessions s ON s.id = t.session_row_id
|
|
520
|
+
LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
|
|
521
|
+
WHERE ${where.join(' AND ')}
|
|
522
|
+
ORDER BY t.session_row_id, turn_distance ASC
|
|
523
|
+
) sub
|
|
524
|
+
ORDER BY turn_distance ASC
|
|
525
|
+
LIMIT $${innerLimitPos}`,
|
|
526
|
+
params
|
|
527
|
+
);
|
|
528
|
+
|
|
529
|
+
return { rows: result.rows.slice(0, limit) };
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// ---------------------------------------------------------------------------
|
|
533
|
+
// Exports
|
|
534
|
+
// ---------------------------------------------------------------------------
|
|
535
|
+
|
|
536
|
+
module.exports = {
|
|
537
|
+
upsertSession,
|
|
538
|
+
upsertSegments,
|
|
539
|
+
upsertSummary,
|
|
540
|
+
markStatus,
|
|
541
|
+
persistProcessingResults,
|
|
542
|
+
getSession,
|
|
543
|
+
getSessionFull,
|
|
544
|
+
getMessages,
|
|
545
|
+
searchSessions,
|
|
546
|
+
recordAccess,
|
|
547
|
+
extractUserTurns,
|
|
548
|
+
upsertTurnEmbeddings,
|
|
549
|
+
searchTurnEmbeddings,
|
|
550
|
+
};
|
package/index.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@shadowforge0/aquifer-memory",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "PG-native long-term memory for AI agents. Turn-level embedding, hybrid RRF ranking, optional knowledge graph. Includes CLI, MCP server, and OpenClaw plugin.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"files": [
|
|
7
|
+
"index.js",
|
|
8
|
+
"core/",
|
|
9
|
+
"pipeline/",
|
|
10
|
+
"schema/",
|
|
11
|
+
"consumers/"
|
|
12
|
+
],
|
|
13
|
+
"bin": {
|
|
14
|
+
"aquifer": "./consumers/cli.js"
|
|
15
|
+
},
|
|
16
|
+
"exports": {
|
|
17
|
+
".": "./index.js",
|
|
18
|
+
"./consumers/mcp": "./consumers/mcp.js",
|
|
19
|
+
"./consumers/openclaw-plugin": "./consumers/openclaw-plugin.js",
|
|
20
|
+
"./consumers/shared/config": "./consumers/shared/config.js",
|
|
21
|
+
"./consumers/shared/factory": "./consumers/shared/factory.js"
|
|
22
|
+
},
|
|
23
|
+
"repository": {
|
|
24
|
+
"type": "git",
|
|
25
|
+
"url": "git+https://github.com/shadowforge0/aquifer.git"
|
|
26
|
+
},
|
|
27
|
+
"homepage": "https://github.com/shadowforge0/aquifer#readme",
|
|
28
|
+
"bugs": {
|
|
29
|
+
"url": "https://github.com/shadowforge0/aquifer/issues"
|
|
30
|
+
},
|
|
31
|
+
"author": "shadowforge0",
|
|
32
|
+
"dependencies": {
|
|
33
|
+
"pg": "^8.13.0"
|
|
34
|
+
},
|
|
35
|
+
"optionalDependencies": {
|
|
36
|
+
"@modelcontextprotocol/sdk": "^1.12.0",
|
|
37
|
+
"zod": "^3.24.0"
|
|
38
|
+
},
|
|
39
|
+
"engines": {
|
|
40
|
+
"node": ">=18.0.0"
|
|
41
|
+
},
|
|
42
|
+
"license": "MIT",
|
|
43
|
+
"keywords": [
|
|
44
|
+
"ai",
|
|
45
|
+
"memory",
|
|
46
|
+
"postgres",
|
|
47
|
+
"pgvector",
|
|
48
|
+
"agent",
|
|
49
|
+
"long-term-memory",
|
|
50
|
+
"embedding",
|
|
51
|
+
"knowledge-graph",
|
|
52
|
+
"rrf",
|
|
53
|
+
"hybrid-search",
|
|
54
|
+
"mcp",
|
|
55
|
+
"openclaw"
|
|
56
|
+
]
|
|
57
|
+
}
|