@shadowforge0/aquifer-memory 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +354 -0
- package/consumers/cli.js +314 -0
- package/consumers/mcp.js +135 -0
- package/consumers/openclaw-plugin.js +235 -0
- package/consumers/shared/config.js +143 -0
- package/consumers/shared/factory.js +77 -0
- package/consumers/shared/llm.js +119 -0
- package/core/aquifer.js +634 -0
- package/core/entity.js +360 -0
- package/core/hybrid-rank.js +166 -0
- package/core/storage.js +550 -0
- package/index.js +6 -0
- package/package.json +57 -0
- package/pipeline/embed.js +230 -0
- package/pipeline/extract-entities.js +73 -0
- package/pipeline/summarize.js +245 -0
- package/schema/001-base.sql +180 -0
- package/schema/002-entities.sql +120 -0
package/core/entity.js
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// C1: quote identifier for SQL safety
|
|
4
|
+
function qi(identifier) { return `"${identifier}"`; }
|
|
5
|
+
|
|
6
|
+
function vecToStr(vec) {
|
|
7
|
+
if (!vec || !Array.isArray(vec) || vec.length === 0) return null;
|
|
8
|
+
for (let i = 0; i < vec.length; i++) {
|
|
9
|
+
if (!Number.isFinite(vec[i])) throw new Error(`Vector contains non-finite value at index ${i}`);
|
|
10
|
+
}
|
|
11
|
+
return `[${vec.join(',')}]`;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Entity type enum
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
const ENTITY_TYPES = new Set([
|
|
19
|
+
'person', 'project', 'concept', 'tool', 'metric', 'org',
|
|
20
|
+
'place', 'event', 'doc', 'task', 'topic', 'other',
|
|
21
|
+
]);
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Homoglyph mapping for normalizeEntityName
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
const HOMOGLYPH_MAP = {
|
|
28
|
+
'\u3010': '[', '\u3011': ']', // 【】
|
|
29
|
+
'\u300C': '[', '\u300D': ']', // 「」
|
|
30
|
+
'\u2014': '-', '\u2013': '-', // em-dash, en-dash
|
|
31
|
+
'\u2015': '-', // horizontal bar
|
|
32
|
+
'\u00B7': '.', '\u30FB': '.', // middle dots
|
|
33
|
+
'\uFF01': '!', '\uFF02': '"', '\uFF03': '#', '\uFF04': '$',
|
|
34
|
+
'\uFF05': '%', '\uFF06': '&', '\uFF07': "'", '\uFF08': '(',
|
|
35
|
+
'\uFF09': ')', '\uFF0A': '*', '\uFF0B': '+', '\uFF0C': ',',
|
|
36
|
+
'\uFF0D': '-', '\uFF0E': '.', '\uFF0F': '/',
|
|
37
|
+
'\uFF10': '0', '\uFF11': '1', '\uFF12': '2', '\uFF13': '3',
|
|
38
|
+
'\uFF14': '4', '\uFF15': '5', '\uFF16': '6', '\uFF17': '7',
|
|
39
|
+
'\uFF18': '8', '\uFF19': '9',
|
|
40
|
+
'\uFF1A': ':', '\uFF1B': ';', '\uFF1C': '<', '\uFF1D': '=',
|
|
41
|
+
'\uFF1E': '>', '\uFF1F': '?', '\uFF20': '@',
|
|
42
|
+
'\uFF21': 'A', '\uFF22': 'B', '\uFF23': 'C', '\uFF24': 'D',
|
|
43
|
+
'\uFF25': 'E', '\uFF26': 'F', '\uFF27': 'G', '\uFF28': 'H',
|
|
44
|
+
'\uFF29': 'I', '\uFF2A': 'J', '\uFF2B': 'K', '\uFF2C': 'L',
|
|
45
|
+
'\uFF2D': 'M', '\uFF2E': 'N', '\uFF2F': 'O', '\uFF30': 'P',
|
|
46
|
+
'\uFF31': 'Q', '\uFF32': 'R', '\uFF33': 'S', '\uFF34': 'T',
|
|
47
|
+
'\uFF35': 'U', '\uFF36': 'V', '\uFF37': 'W', '\uFF38': 'X',
|
|
48
|
+
'\uFF39': 'Y', '\uFF3A': 'Z',
|
|
49
|
+
'\uFF41': 'a', '\uFF42': 'b', '\uFF43': 'c', '\uFF44': 'd',
|
|
50
|
+
'\uFF45': 'e', '\uFF46': 'f', '\uFF47': 'g', '\uFF48': 'h',
|
|
51
|
+
'\uFF49': 'i', '\uFF4A': 'j', '\uFF4B': 'k', '\uFF4C': 'l',
|
|
52
|
+
'\uFF4D': 'm', '\uFF4E': 'n', '\uFF4F': 'o', '\uFF50': 'p',
|
|
53
|
+
'\uFF51': 'q', '\uFF52': 'r', '\uFF53': 's', '\uFF54': 't',
|
|
54
|
+
'\uFF55': 'u', '\uFF56': 'v', '\uFF57': 'w', '\uFF58': 'x',
|
|
55
|
+
'\uFF59': 'y', '\uFF5A': 'z',
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
// Build regex for homoglyph replacement
|
|
59
|
+
const HOMOGLYPH_RE = new RegExp('[' + Object.keys(HOMOGLYPH_MAP).join('') + ']', 'g');
|
|
60
|
+
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// normalizeEntityName
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
function normalizeEntityName(input) {
|
|
66
|
+
if (!input) return '';
|
|
67
|
+
|
|
68
|
+
let s = input.normalize('NFKC');
|
|
69
|
+
s = s.toLowerCase();
|
|
70
|
+
s = s.replace(HOMOGLYPH_RE, ch => HOMOGLYPH_MAP[ch] || ch);
|
|
71
|
+
s = s.replace(/\s+/g, ' ');
|
|
72
|
+
s = s.replace(/^[\s\-_.,;:!?'"()\[\]{}]+/, '');
|
|
73
|
+
s = s.replace(/[\s\-_.,;:!?'"()\[\]{}]+$/, '');
|
|
74
|
+
|
|
75
|
+
return s;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// parseEntityOutput
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
function parseEntityOutput(text) {
|
|
83
|
+
if (!text) return [];
|
|
84
|
+
|
|
85
|
+
const marker = '[ENTITIES]';
|
|
86
|
+
const idx = text.indexOf(marker);
|
|
87
|
+
if (idx === -1) return [];
|
|
88
|
+
|
|
89
|
+
const entitySection = text.slice(idx + marker.length).trim();
|
|
90
|
+
if (!entitySection || entitySection.startsWith('(none)')) return [];
|
|
91
|
+
|
|
92
|
+
const blocks = entitySection.split(/^---$/m);
|
|
93
|
+
const entities = [];
|
|
94
|
+
|
|
95
|
+
for (const block of blocks) {
|
|
96
|
+
const lines = block.trim().split('\n');
|
|
97
|
+
let name = '';
|
|
98
|
+
let type = 'other';
|
|
99
|
+
let aliases = [];
|
|
100
|
+
|
|
101
|
+
for (const line of lines) {
|
|
102
|
+
const trimmed = line.trim();
|
|
103
|
+
if (trimmed.startsWith('name:')) {
|
|
104
|
+
name = trimmed.slice(5).trim();
|
|
105
|
+
} else if (trimmed.startsWith('type:')) {
|
|
106
|
+
const t = trimmed.slice(5).trim().toLowerCase();
|
|
107
|
+
if (ENTITY_TYPES.has(t)) type = t;
|
|
108
|
+
} else if (trimmed.startsWith('aliases:')) {
|
|
109
|
+
const raw = trimmed.slice(8).trim();
|
|
110
|
+
if (raw) {
|
|
111
|
+
aliases = raw.split(',')
|
|
112
|
+
.map(a => a.trim())
|
|
113
|
+
.filter(Boolean)
|
|
114
|
+
.map(a => normalizeEntityName(a))
|
|
115
|
+
.filter(Boolean);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (!name) continue;
|
|
121
|
+
|
|
122
|
+
const normalizedName = normalizeEntityName(name);
|
|
123
|
+
if (!normalizedName) continue;
|
|
124
|
+
|
|
125
|
+
entities.push({ name, normalizedName, type, aliases });
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return entities;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
// upsertEntity
|
|
133
|
+
// ---------------------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
async function upsertEntity(pool, {
|
|
136
|
+
schema,
|
|
137
|
+
tenantId = 'default',
|
|
138
|
+
name,
|
|
139
|
+
normalizedName,
|
|
140
|
+
aliases = [],
|
|
141
|
+
type = 'other',
|
|
142
|
+
status = 'active',
|
|
143
|
+
agentId = 'main',
|
|
144
|
+
createdBy,
|
|
145
|
+
metadata = {},
|
|
146
|
+
embedding,
|
|
147
|
+
occurredAt,
|
|
148
|
+
}) {
|
|
149
|
+
const normalizedAliases = aliases.map(a => normalizeEntityName(a)).filter(Boolean);
|
|
150
|
+
const embStr = embedding ? vecToStr(embedding) : null;
|
|
151
|
+
const ts = occurredAt || new Date().toISOString();
|
|
152
|
+
|
|
153
|
+
const result = await pool.query(
|
|
154
|
+
`INSERT INTO ${qi(schema)}.entities
|
|
155
|
+
(tenant_id, name, normalized_name, aliases, type, status, agent_id,
|
|
156
|
+
created_by, metadata, embedding, first_seen_at, last_seen_at, frequency)
|
|
157
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::jsonb, $10::vector, $11, $11, 1)
|
|
158
|
+
ON CONFLICT (tenant_id, normalized_name, agent_id) DO UPDATE SET
|
|
159
|
+
frequency = ${qi(schema)}.entities.frequency + 1,
|
|
160
|
+
aliases = ARRAY(SELECT DISTINCT unnest(${qi(schema)}.entities.aliases || EXCLUDED.aliases)),
|
|
161
|
+
last_seen_at = GREATEST(${qi(schema)}.entities.last_seen_at, EXCLUDED.last_seen_at),
|
|
162
|
+
embedding = COALESCE(EXCLUDED.embedding, ${qi(schema)}.entities.embedding),
|
|
163
|
+
metadata = COALESCE(NULLIF(EXCLUDED.metadata, '{}'::jsonb), ${qi(schema)}.entities.metadata)
|
|
164
|
+
RETURNING id, (xmax = 0) AS is_new`,
|
|
165
|
+
[
|
|
166
|
+
tenantId, name, normalizedName, normalizedAliases,
|
|
167
|
+
type, status, agentId,
|
|
168
|
+
createdBy || null,
|
|
169
|
+
JSON.stringify(metadata),
|
|
170
|
+
embStr,
|
|
171
|
+
ts,
|
|
172
|
+
]
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
const row = result.rows[0];
|
|
176
|
+
if (!row) throw new Error('upsertEntity returned no row');
|
|
177
|
+
return { id: row.id, isNew: row.is_new };
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
// upsertEntityMention
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
async function upsertEntityMention(pool, {
|
|
185
|
+
schema,
|
|
186
|
+
entityId,
|
|
187
|
+
sessionRowId,
|
|
188
|
+
turnEmbeddingId,
|
|
189
|
+
source,
|
|
190
|
+
mentionText,
|
|
191
|
+
confidence = 1.0,
|
|
192
|
+
occurredAt,
|
|
193
|
+
}) {
|
|
194
|
+
const result = await pool.query(
|
|
195
|
+
`INSERT INTO ${qi(schema)}.entity_mentions
|
|
196
|
+
(entity_id, session_row_id, turn_embedding_id, source, mention_text, confidence, occurred_at)
|
|
197
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
198
|
+
ON CONFLICT (entity_id, session_row_id) DO NOTHING
|
|
199
|
+
RETURNING id`,
|
|
200
|
+
[
|
|
201
|
+
entityId, sessionRowId,
|
|
202
|
+
turnEmbeddingId || null,
|
|
203
|
+
source || null,
|
|
204
|
+
mentionText || null,
|
|
205
|
+
confidence,
|
|
206
|
+
occurredAt || new Date().toISOString(),
|
|
207
|
+
]
|
|
208
|
+
);
|
|
209
|
+
return result.rows[0] ? result.rows[0].id : null;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// ---------------------------------------------------------------------------
|
|
213
|
+
// upsertEntityRelations
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
async function upsertEntityRelations(pool, {
|
|
217
|
+
schema,
|
|
218
|
+
pairs,
|
|
219
|
+
occurredAt,
|
|
220
|
+
}) {
|
|
221
|
+
if (!pairs || pairs.length === 0) return { upserted: 0 };
|
|
222
|
+
const ts = occurredAt || new Date().toISOString();
|
|
223
|
+
let upserted = 0;
|
|
224
|
+
|
|
225
|
+
for (const { srcEntityId, dstEntityId } of pairs) {
|
|
226
|
+
if (!srcEntityId || !dstEntityId || srcEntityId === dstEntityId) continue;
|
|
227
|
+
|
|
228
|
+
const lo = Math.min(srcEntityId, dstEntityId);
|
|
229
|
+
const hi = Math.max(srcEntityId, dstEntityId);
|
|
230
|
+
|
|
231
|
+
await pool.query(
|
|
232
|
+
`INSERT INTO ${qi(schema)}.entity_relations
|
|
233
|
+
(src_entity_id, dst_entity_id, co_occurrence_count, first_seen_at, last_seen_at)
|
|
234
|
+
VALUES ($1, $2, 1, $3, $3)
|
|
235
|
+
ON CONFLICT (src_entity_id, dst_entity_id) DO UPDATE SET
|
|
236
|
+
co_occurrence_count = ${qi(schema)}.entity_relations.co_occurrence_count + 1,
|
|
237
|
+
last_seen_at = GREATEST(${qi(schema)}.entity_relations.last_seen_at, EXCLUDED.last_seen_at)`,
|
|
238
|
+
[lo, hi, ts]
|
|
239
|
+
);
|
|
240
|
+
upserted++;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return { upserted };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
// upsertEntitySession
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
async function upsertEntitySession(pool, {
|
|
251
|
+
schema,
|
|
252
|
+
entityId,
|
|
253
|
+
sessionRowId,
|
|
254
|
+
occurredAt,
|
|
255
|
+
}) {
|
|
256
|
+
await pool.query(
|
|
257
|
+
`INSERT INTO ${qi(schema)}.entity_sessions
|
|
258
|
+
(entity_id, session_row_id, mention_count, occurred_at)
|
|
259
|
+
VALUES ($1, $2, 1, $3)
|
|
260
|
+
ON CONFLICT (entity_id, session_row_id) DO UPDATE SET
|
|
261
|
+
mention_count = ${qi(schema)}.entity_sessions.mention_count + 1`,
|
|
262
|
+
[entityId, sessionRowId, occurredAt || new Date().toISOString()]
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// ---------------------------------------------------------------------------
|
|
267
|
+
// searchEntities
|
|
268
|
+
// ---------------------------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
function _escapeIlike(str) {
|
|
271
|
+
return str.replace(/\\/g, '\\\\').replace(/%/g, '\\%').replace(/_/g, '\\_');
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
async function searchEntities(pool, {
|
|
275
|
+
schema,
|
|
276
|
+
tenantId,
|
|
277
|
+
query,
|
|
278
|
+
agentId,
|
|
279
|
+
limit = 10,
|
|
280
|
+
similarityThreshold = 0.1,
|
|
281
|
+
}) {
|
|
282
|
+
const clampedLimit = Math.max(1, Math.min(100, limit));
|
|
283
|
+
const normQ = normalizeEntityName(query);
|
|
284
|
+
if (!normQ) return [];
|
|
285
|
+
|
|
286
|
+
const escaped = _escapeIlike(normQ);
|
|
287
|
+
|
|
288
|
+
const result = await pool.query(
|
|
289
|
+
`SELECT
|
|
290
|
+
id, name, normalized_name, aliases, type, status, frequency, agent_id,
|
|
291
|
+
last_seen_at, metadata,
|
|
292
|
+
similarity(normalized_name, $1) AS name_sim
|
|
293
|
+
FROM ${qi(schema)}.entities
|
|
294
|
+
WHERE status = 'active'
|
|
295
|
+
AND tenant_id = $2
|
|
296
|
+
AND (
|
|
297
|
+
similarity(normalized_name, $1) >= $3
|
|
298
|
+
OR normalized_name ILIKE '%' || $4 || '%' ESCAPE '\\'
|
|
299
|
+
OR $5 = ANY(aliases)
|
|
300
|
+
)
|
|
301
|
+
AND ($6::text IS NULL OR agent_id = $6)
|
|
302
|
+
ORDER BY name_sim DESC, frequency DESC
|
|
303
|
+
LIMIT $7`,
|
|
304
|
+
[normQ, tenantId, similarityThreshold, escaped, normQ, agentId || null, clampedLimit]
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
return result.rows;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
// getEntityRelations
|
|
312
|
+
// ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
async function getEntityRelations(pool, {
|
|
315
|
+
schema,
|
|
316
|
+
entityId,
|
|
317
|
+
limit = 20,
|
|
318
|
+
}) {
|
|
319
|
+
const clampedLimit = Math.max(1, Math.min(100, limit));
|
|
320
|
+
|
|
321
|
+
const result = await pool.query(
|
|
322
|
+
`SELECT
|
|
323
|
+
r.id,
|
|
324
|
+
r.src_entity_id,
|
|
325
|
+
r.dst_entity_id,
|
|
326
|
+
r.co_occurrence_count,
|
|
327
|
+
r.last_seen_at,
|
|
328
|
+
CASE WHEN r.src_entity_id = $1 THEN r.dst_entity_id ELSE r.src_entity_id END AS related_entity_id,
|
|
329
|
+
e.name AS related_name,
|
|
330
|
+
e.type AS related_type,
|
|
331
|
+
e.frequency AS related_frequency
|
|
332
|
+
FROM ${qi(schema)}.entity_relations r
|
|
333
|
+
JOIN ${qi(schema)}.entities e ON e.id = CASE
|
|
334
|
+
WHEN r.src_entity_id = $1 THEN r.dst_entity_id
|
|
335
|
+
ELSE r.src_entity_id
|
|
336
|
+
END
|
|
337
|
+
WHERE (r.src_entity_id = $1 OR r.dst_entity_id = $1)
|
|
338
|
+
AND e.status = 'active'
|
|
339
|
+
ORDER BY r.co_occurrence_count DESC
|
|
340
|
+
LIMIT $2`,
|
|
341
|
+
[entityId, clampedLimit]
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
return result.rows;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// ---------------------------------------------------------------------------
|
|
348
|
+
// Exports
|
|
349
|
+
// ---------------------------------------------------------------------------
|
|
350
|
+
|
|
351
|
+
module.exports = {
|
|
352
|
+
normalizeEntityName,
|
|
353
|
+
parseEntityOutput,
|
|
354
|
+
upsertEntity,
|
|
355
|
+
upsertEntityMention,
|
|
356
|
+
upsertEntityRelations,
|
|
357
|
+
upsertEntitySession,
|
|
358
|
+
searchEntities,
|
|
359
|
+
getEntityRelations,
|
|
360
|
+
};
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// rrfFusion — Reciprocal Rank Fusion across 3 result lists
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
function rrfFusion(ftsResults = [], embResults = [], turnResults = [], K = 60) {
|
|
8
|
+
const scores = new Map();
|
|
9
|
+
|
|
10
|
+
// M3 fix: fallback to .id when .session_id missing (FTS returns .id)
|
|
11
|
+
for (let i = 0; i < ftsResults.length; i++) {
|
|
12
|
+
const r = ftsResults[i];
|
|
13
|
+
if (!r) continue;
|
|
14
|
+
const id = r.session_id || String(r.id);
|
|
15
|
+
if (id) scores.set(id, (scores.get(id) || 0) + 1 / (K + i + 1));
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
for (let i = 0; i < embResults.length; i++) {
|
|
19
|
+
const r = embResults[i];
|
|
20
|
+
if (!r) continue;
|
|
21
|
+
const id = r.session_id || String(r.id);
|
|
22
|
+
if (id) scores.set(id, (scores.get(id) || 0) + 1 / (K + i + 1));
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
for (let i = 0; i < turnResults.length; i++) {
|
|
26
|
+
const r = turnResults[i];
|
|
27
|
+
if (!r) continue;
|
|
28
|
+
const id = r.session_id || String(r.id);
|
|
29
|
+
if (id) scores.set(id, (scores.get(id) || 0) + 1 / (K + i + 1));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return scores;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// timeDecay — sigmoid decay based on age in days
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
function timeDecay(startedAt, midpointDays = 45, steepness = 0.05) {
|
|
40
|
+
if (!startedAt) return 0.5;
|
|
41
|
+
const dt = typeof startedAt === 'string' ? new Date(startedAt) : startedAt;
|
|
42
|
+
if (isNaN(dt.getTime())) return 0.5;
|
|
43
|
+
|
|
44
|
+
const ageDays = (Date.now() - dt.getTime()) / (1000 * 60 * 60 * 24);
|
|
45
|
+
return 1 / (1 + Math.exp(steepness * (ageDays - midpointDays)));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
// accessScore — exponential decay on access recency (30-day half-life)
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
function accessScore(accessCount, lastAccessedAt) {
|
|
53
|
+
if (!accessCount || accessCount <= 0) return 0;
|
|
54
|
+
if (!lastAccessedAt) return 0;
|
|
55
|
+
|
|
56
|
+
const dt = typeof lastAccessedAt === 'string' ? new Date(lastAccessedAt) : lastAccessedAt;
|
|
57
|
+
if (isNaN(dt.getTime())) return 0;
|
|
58
|
+
|
|
59
|
+
const daysSince = (Date.now() - dt.getTime()) / (1000 * 60 * 60 * 24);
|
|
60
|
+
return accessCount * Math.exp(-0.693 * daysSince / 30);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// hybridRank — combine all signals into final ranked list
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
const DEFAULT_WEIGHTS = {
|
|
68
|
+
rrf: 0.65,
|
|
69
|
+
timeDecay: 0.25,
|
|
70
|
+
access: 0.10,
|
|
71
|
+
entityBoost: 0.18,
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
function hybridRank(
|
|
75
|
+
ftsResults,
|
|
76
|
+
embResults,
|
|
77
|
+
limit = 5,
|
|
78
|
+
weights = {},
|
|
79
|
+
turnResults = [],
|
|
80
|
+
entityScoreBySession = new Map(),
|
|
81
|
+
) {
|
|
82
|
+
const w = { ...DEFAULT_WEIGHTS, ...weights };
|
|
83
|
+
|
|
84
|
+
// Build allResults map: session_id → result object
|
|
85
|
+
const allResults = new Map();
|
|
86
|
+
|
|
87
|
+
// M3 fix: use session_id || id as key consistently
|
|
88
|
+
const _key = (r) => r ? (r.session_id || String(r.id || '')) : '';
|
|
89
|
+
for (const r of (ftsResults || [])) {
|
|
90
|
+
if (!r) continue;
|
|
91
|
+
const k = _key(r);
|
|
92
|
+
if (k && !allResults.has(k)) allResults.set(k, { ...r, session_id: k });
|
|
93
|
+
}
|
|
94
|
+
for (const r of (embResults || [])) {
|
|
95
|
+
if (!r) continue;
|
|
96
|
+
const k = _key(r);
|
|
97
|
+
if (k && !allResults.has(k)) allResults.set(k, { ...r, session_id: k });
|
|
98
|
+
}
|
|
99
|
+
for (const r of (turnResults || [])) {
|
|
100
|
+
if (!r) continue;
|
|
101
|
+
const k = _key(r);
|
|
102
|
+
if (k && allResults.has(k)) {
|
|
103
|
+
const existing = allResults.get(k);
|
|
104
|
+
existing.matched_turn_text = r.matched_turn_text;
|
|
105
|
+
existing.matched_turn_index = r.matched_turn_index;
|
|
106
|
+
} else if (k) {
|
|
107
|
+
allResults.set(k, { ...r, session_id: k });
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (allResults.size === 0) return [];
|
|
112
|
+
|
|
113
|
+
// Adaptive K
|
|
114
|
+
const maxLen = Math.max(
|
|
115
|
+
(ftsResults || []).length,
|
|
116
|
+
(embResults || []).length,
|
|
117
|
+
(turnResults || []).length,
|
|
118
|
+
);
|
|
119
|
+
const K = Math.max(20, Math.floor(maxLen / 2)) || 30;
|
|
120
|
+
|
|
121
|
+
// RRF scores
|
|
122
|
+
const rrfScores = rrfFusion(ftsResults || [], embResults || [], turnResults || [], K);
|
|
123
|
+
|
|
124
|
+
// Normalization: theoretical max = listCount / (K + 1)
|
|
125
|
+
const listCount = (turnResults && turnResults.length > 0 ? 3 : 2);
|
|
126
|
+
const maxRrf = listCount / (K + 1);
|
|
127
|
+
|
|
128
|
+
// Score each session
|
|
129
|
+
const scored = [];
|
|
130
|
+
for (const [sessionId, result] of allResults) {
|
|
131
|
+
const rawRrf = rrfScores.get(sessionId) || 0;
|
|
132
|
+
const normRrf = maxRrf > 0 ? rawRrf / maxRrf : 0;
|
|
133
|
+
|
|
134
|
+
const td = timeDecay(result.started_at);
|
|
135
|
+
|
|
136
|
+
const accessEff = accessScore(
|
|
137
|
+
result.access_count || 0,
|
|
138
|
+
result.last_accessed_at,
|
|
139
|
+
);
|
|
140
|
+
const as = 1 - Math.exp(-accessEff / 5);
|
|
141
|
+
|
|
142
|
+
const rawBase = w.rrf * normRrf + w.timeDecay * td + w.access * as;
|
|
143
|
+
const base = Math.min(1, rawBase); // m7: clamp to prevent negative entity boost
|
|
144
|
+
|
|
145
|
+
const entitySc = entityScoreBySession.get(sessionId) || 0;
|
|
146
|
+
const finalScore = Math.min(1, base + w.entityBoost * entitySc * (1 - base));
|
|
147
|
+
|
|
148
|
+
scored.push({
|
|
149
|
+
...result,
|
|
150
|
+
_score: finalScore,
|
|
151
|
+
_rrf: normRrf,
|
|
152
|
+
_timeDecay: td,
|
|
153
|
+
_access: as,
|
|
154
|
+
_entityScore: entitySc,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
scored.sort((a, b) => b._score - a._score);
|
|
159
|
+
return scored.slice(0, limit);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
// Exports
|
|
164
|
+
// ---------------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
module.exports = { rrfFusion, timeDecay, accessScore, hybridRank };
|