@shadowforge0/aquifer-memory 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/entity.js ADDED
@@ -0,0 +1,360 @@
1
+ 'use strict';
2
+
3
+ // C1: quote identifier for SQL safety
4
+ function qi(identifier) { return `"${identifier}"`; }
5
+
6
+ function vecToStr(vec) {
7
+ if (!vec || !Array.isArray(vec) || vec.length === 0) return null;
8
+ for (let i = 0; i < vec.length; i++) {
9
+ if (!Number.isFinite(vec[i])) throw new Error(`Vector contains non-finite value at index ${i}`);
10
+ }
11
+ return `[${vec.join(',')}]`;
12
+ }
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Entity type enum
16
+ // ---------------------------------------------------------------------------
17
+
18
+ const ENTITY_TYPES = new Set([
19
+ 'person', 'project', 'concept', 'tool', 'metric', 'org',
20
+ 'place', 'event', 'doc', 'task', 'topic', 'other',
21
+ ]);
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Homoglyph mapping for normalizeEntityName
25
+ // ---------------------------------------------------------------------------
26
+
27
+ const HOMOGLYPH_MAP = {
28
+ '\u3010': '[', '\u3011': ']', // 【】
29
+ '\u300C': '[', '\u300D': ']', // 「」
30
+ '\u2014': '-', '\u2013': '-', // em-dash, en-dash
31
+ '\u2015': '-', // horizontal bar
32
+ '\u00B7': '.', '\u30FB': '.', // middle dots
33
+ '\uFF01': '!', '\uFF02': '"', '\uFF03': '#', '\uFF04': '$',
34
+ '\uFF05': '%', '\uFF06': '&', '\uFF07': "'", '\uFF08': '(',
35
+ '\uFF09': ')', '\uFF0A': '*', '\uFF0B': '+', '\uFF0C': ',',
36
+ '\uFF0D': '-', '\uFF0E': '.', '\uFF0F': '/',
37
+ '\uFF10': '0', '\uFF11': '1', '\uFF12': '2', '\uFF13': '3',
38
+ '\uFF14': '4', '\uFF15': '5', '\uFF16': '6', '\uFF17': '7',
39
+ '\uFF18': '8', '\uFF19': '9',
40
+ '\uFF1A': ':', '\uFF1B': ';', '\uFF1C': '<', '\uFF1D': '=',
41
+ '\uFF1E': '>', '\uFF1F': '?', '\uFF20': '@',
42
+ '\uFF21': 'A', '\uFF22': 'B', '\uFF23': 'C', '\uFF24': 'D',
43
+ '\uFF25': 'E', '\uFF26': 'F', '\uFF27': 'G', '\uFF28': 'H',
44
+ '\uFF29': 'I', '\uFF2A': 'J', '\uFF2B': 'K', '\uFF2C': 'L',
45
+ '\uFF2D': 'M', '\uFF2E': 'N', '\uFF2F': 'O', '\uFF30': 'P',
46
+ '\uFF31': 'Q', '\uFF32': 'R', '\uFF33': 'S', '\uFF34': 'T',
47
+ '\uFF35': 'U', '\uFF36': 'V', '\uFF37': 'W', '\uFF38': 'X',
48
+ '\uFF39': 'Y', '\uFF3A': 'Z',
49
+ '\uFF41': 'a', '\uFF42': 'b', '\uFF43': 'c', '\uFF44': 'd',
50
+ '\uFF45': 'e', '\uFF46': 'f', '\uFF47': 'g', '\uFF48': 'h',
51
+ '\uFF49': 'i', '\uFF4A': 'j', '\uFF4B': 'k', '\uFF4C': 'l',
52
+ '\uFF4D': 'm', '\uFF4E': 'n', '\uFF4F': 'o', '\uFF50': 'p',
53
+ '\uFF51': 'q', '\uFF52': 'r', '\uFF53': 's', '\uFF54': 't',
54
+ '\uFF55': 'u', '\uFF56': 'v', '\uFF57': 'w', '\uFF58': 'x',
55
+ '\uFF59': 'y', '\uFF5A': 'z',
56
+ };
57
+
58
+ // Build regex for homoglyph replacement
59
+ const HOMOGLYPH_RE = new RegExp('[' + Object.keys(HOMOGLYPH_MAP).join('') + ']', 'g');
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // normalizeEntityName
63
+ // ---------------------------------------------------------------------------
64
+
65
+ function normalizeEntityName(input) {
66
+ if (!input) return '';
67
+
68
+ let s = input.normalize('NFKC');
69
+ s = s.toLowerCase();
70
+ s = s.replace(HOMOGLYPH_RE, ch => HOMOGLYPH_MAP[ch] || ch);
71
+ s = s.replace(/\s+/g, ' ');
72
+ s = s.replace(/^[\s\-_.,;:!?'"()\[\]{}]+/, '');
73
+ s = s.replace(/[\s\-_.,;:!?'"()\[\]{}]+$/, '');
74
+
75
+ return s;
76
+ }
77
+
78
+ // ---------------------------------------------------------------------------
79
+ // parseEntityOutput
80
+ // ---------------------------------------------------------------------------
81
+
82
+ function parseEntityOutput(text) {
83
+ if (!text) return [];
84
+
85
+ const marker = '[ENTITIES]';
86
+ const idx = text.indexOf(marker);
87
+ if (idx === -1) return [];
88
+
89
+ const entitySection = text.slice(idx + marker.length).trim();
90
+ if (!entitySection || entitySection.startsWith('(none)')) return [];
91
+
92
+ const blocks = entitySection.split(/^---$/m);
93
+ const entities = [];
94
+
95
+ for (const block of blocks) {
96
+ const lines = block.trim().split('\n');
97
+ let name = '';
98
+ let type = 'other';
99
+ let aliases = [];
100
+
101
+ for (const line of lines) {
102
+ const trimmed = line.trim();
103
+ if (trimmed.startsWith('name:')) {
104
+ name = trimmed.slice(5).trim();
105
+ } else if (trimmed.startsWith('type:')) {
106
+ const t = trimmed.slice(5).trim().toLowerCase();
107
+ if (ENTITY_TYPES.has(t)) type = t;
108
+ } else if (trimmed.startsWith('aliases:')) {
109
+ const raw = trimmed.slice(8).trim();
110
+ if (raw) {
111
+ aliases = raw.split(',')
112
+ .map(a => a.trim())
113
+ .filter(Boolean)
114
+ .map(a => normalizeEntityName(a))
115
+ .filter(Boolean);
116
+ }
117
+ }
118
+ }
119
+
120
+ if (!name) continue;
121
+
122
+ const normalizedName = normalizeEntityName(name);
123
+ if (!normalizedName) continue;
124
+
125
+ entities.push({ name, normalizedName, type, aliases });
126
+ }
127
+
128
+ return entities;
129
+ }
130
+
131
+ // ---------------------------------------------------------------------------
132
+ // upsertEntity
133
+ // ---------------------------------------------------------------------------
134
+
135
+ async function upsertEntity(pool, {
136
+ schema,
137
+ tenantId = 'default',
138
+ name,
139
+ normalizedName,
140
+ aliases = [],
141
+ type = 'other',
142
+ status = 'active',
143
+ agentId = 'main',
144
+ createdBy,
145
+ metadata = {},
146
+ embedding,
147
+ occurredAt,
148
+ }) {
149
+ const normalizedAliases = aliases.map(a => normalizeEntityName(a)).filter(Boolean);
150
+ const embStr = embedding ? vecToStr(embedding) : null;
151
+ const ts = occurredAt || new Date().toISOString();
152
+
153
+ const result = await pool.query(
154
+ `INSERT INTO ${qi(schema)}.entities
155
+ (tenant_id, name, normalized_name, aliases, type, status, agent_id,
156
+ created_by, metadata, embedding, first_seen_at, last_seen_at, frequency)
157
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::jsonb, $10::vector, $11, $11, 1)
158
+ ON CONFLICT (tenant_id, normalized_name, agent_id) DO UPDATE SET
159
+ frequency = ${qi(schema)}.entities.frequency + 1,
160
+ aliases = ARRAY(SELECT DISTINCT unnest(${qi(schema)}.entities.aliases || EXCLUDED.aliases)),
161
+ last_seen_at = GREATEST(${qi(schema)}.entities.last_seen_at, EXCLUDED.last_seen_at),
162
+ embedding = COALESCE(EXCLUDED.embedding, ${qi(schema)}.entities.embedding),
163
+ metadata = COALESCE(NULLIF(EXCLUDED.metadata, '{}'::jsonb), ${qi(schema)}.entities.metadata)
164
+ RETURNING id, (xmax = 0) AS is_new`,
165
+ [
166
+ tenantId, name, normalizedName, normalizedAliases,
167
+ type, status, agentId,
168
+ createdBy || null,
169
+ JSON.stringify(metadata),
170
+ embStr,
171
+ ts,
172
+ ]
173
+ );
174
+
175
+ const row = result.rows[0];
176
+ if (!row) throw new Error('upsertEntity returned no row');
177
+ return { id: row.id, isNew: row.is_new };
178
+ }
179
+
180
+ // ---------------------------------------------------------------------------
181
+ // upsertEntityMention
182
+ // ---------------------------------------------------------------------------
183
+
184
+ async function upsertEntityMention(pool, {
185
+ schema,
186
+ entityId,
187
+ sessionRowId,
188
+ turnEmbeddingId,
189
+ source,
190
+ mentionText,
191
+ confidence = 1.0,
192
+ occurredAt,
193
+ }) {
194
+ const result = await pool.query(
195
+ `INSERT INTO ${qi(schema)}.entity_mentions
196
+ (entity_id, session_row_id, turn_embedding_id, source, mention_text, confidence, occurred_at)
197
+ VALUES ($1, $2, $3, $4, $5, $6, $7)
198
+ ON CONFLICT (entity_id, session_row_id) DO NOTHING
199
+ RETURNING id`,
200
+ [
201
+ entityId, sessionRowId,
202
+ turnEmbeddingId || null,
203
+ source || null,
204
+ mentionText || null,
205
+ confidence,
206
+ occurredAt || new Date().toISOString(),
207
+ ]
208
+ );
209
+ return result.rows[0] ? result.rows[0].id : null;
210
+ }
211
+
212
+ // ---------------------------------------------------------------------------
213
+ // upsertEntityRelations
214
+ // ---------------------------------------------------------------------------
215
+
216
+ async function upsertEntityRelations(pool, {
217
+ schema,
218
+ pairs,
219
+ occurredAt,
220
+ }) {
221
+ if (!pairs || pairs.length === 0) return { upserted: 0 };
222
+ const ts = occurredAt || new Date().toISOString();
223
+ let upserted = 0;
224
+
225
+ for (const { srcEntityId, dstEntityId } of pairs) {
226
+ if (!srcEntityId || !dstEntityId || srcEntityId === dstEntityId) continue;
227
+
228
+ const lo = Math.min(srcEntityId, dstEntityId);
229
+ const hi = Math.max(srcEntityId, dstEntityId);
230
+
231
+ await pool.query(
232
+ `INSERT INTO ${qi(schema)}.entity_relations
233
+ (src_entity_id, dst_entity_id, co_occurrence_count, first_seen_at, last_seen_at)
234
+ VALUES ($1, $2, 1, $3, $3)
235
+ ON CONFLICT (src_entity_id, dst_entity_id) DO UPDATE SET
236
+ co_occurrence_count = ${qi(schema)}.entity_relations.co_occurrence_count + 1,
237
+ last_seen_at = GREATEST(${qi(schema)}.entity_relations.last_seen_at, EXCLUDED.last_seen_at)`,
238
+ [lo, hi, ts]
239
+ );
240
+ upserted++;
241
+ }
242
+
243
+ return { upserted };
244
+ }
245
+
246
+ // ---------------------------------------------------------------------------
247
+ // upsertEntitySession
248
+ // ---------------------------------------------------------------------------
249
+
250
+ async function upsertEntitySession(pool, {
251
+ schema,
252
+ entityId,
253
+ sessionRowId,
254
+ occurredAt,
255
+ }) {
256
+ await pool.query(
257
+ `INSERT INTO ${qi(schema)}.entity_sessions
258
+ (entity_id, session_row_id, mention_count, occurred_at)
259
+ VALUES ($1, $2, 1, $3)
260
+ ON CONFLICT (entity_id, session_row_id) DO UPDATE SET
261
+ mention_count = ${qi(schema)}.entity_sessions.mention_count + 1`,
262
+ [entityId, sessionRowId, occurredAt || new Date().toISOString()]
263
+ );
264
+ }
265
+
266
+ // ---------------------------------------------------------------------------
267
+ // searchEntities
268
+ // ---------------------------------------------------------------------------
269
+
270
+ function _escapeIlike(str) {
271
+ return str.replace(/\\/g, '\\\\').replace(/%/g, '\\%').replace(/_/g, '\\_');
272
+ }
273
+
274
+ async function searchEntities(pool, {
275
+ schema,
276
+ tenantId,
277
+ query,
278
+ agentId,
279
+ limit = 10,
280
+ similarityThreshold = 0.1,
281
+ }) {
282
+ const clampedLimit = Math.max(1, Math.min(100, limit));
283
+ const normQ = normalizeEntityName(query);
284
+ if (!normQ) return [];
285
+
286
+ const escaped = _escapeIlike(normQ);
287
+
288
+ const result = await pool.query(
289
+ `SELECT
290
+ id, name, normalized_name, aliases, type, status, frequency, agent_id,
291
+ last_seen_at, metadata,
292
+ similarity(normalized_name, $1) AS name_sim
293
+ FROM ${qi(schema)}.entities
294
+ WHERE status = 'active'
295
+ AND tenant_id = $2
296
+ AND (
297
+ similarity(normalized_name, $1) >= $3
298
+ OR normalized_name ILIKE '%' || $4 || '%' ESCAPE '\\'
299
+ OR $5 = ANY(aliases)
300
+ )
301
+ AND ($6::text IS NULL OR agent_id = $6)
302
+ ORDER BY name_sim DESC, frequency DESC
303
+ LIMIT $7`,
304
+ [normQ, tenantId, similarityThreshold, escaped, normQ, agentId || null, clampedLimit]
305
+ );
306
+
307
+ return result.rows;
308
+ }
309
+
310
+ // ---------------------------------------------------------------------------
311
+ // getEntityRelations
312
+ // ---------------------------------------------------------------------------
313
+
314
+ async function getEntityRelations(pool, {
315
+ schema,
316
+ entityId,
317
+ limit = 20,
318
+ }) {
319
+ const clampedLimit = Math.max(1, Math.min(100, limit));
320
+
321
+ const result = await pool.query(
322
+ `SELECT
323
+ r.id,
324
+ r.src_entity_id,
325
+ r.dst_entity_id,
326
+ r.co_occurrence_count,
327
+ r.last_seen_at,
328
+ CASE WHEN r.src_entity_id = $1 THEN r.dst_entity_id ELSE r.src_entity_id END AS related_entity_id,
329
+ e.name AS related_name,
330
+ e.type AS related_type,
331
+ e.frequency AS related_frequency
332
+ FROM ${qi(schema)}.entity_relations r
333
+ JOIN ${qi(schema)}.entities e ON e.id = CASE
334
+ WHEN r.src_entity_id = $1 THEN r.dst_entity_id
335
+ ELSE r.src_entity_id
336
+ END
337
+ WHERE (r.src_entity_id = $1 OR r.dst_entity_id = $1)
338
+ AND e.status = 'active'
339
+ ORDER BY r.co_occurrence_count DESC
340
+ LIMIT $2`,
341
+ [entityId, clampedLimit]
342
+ );
343
+
344
+ return result.rows;
345
+ }
346
+
347
+ // ---------------------------------------------------------------------------
348
+ // Exports
349
+ // ---------------------------------------------------------------------------
350
+
351
+ module.exports = {
352
+ normalizeEntityName,
353
+ parseEntityOutput,
354
+ upsertEntity,
355
+ upsertEntityMention,
356
+ upsertEntityRelations,
357
+ upsertEntitySession,
358
+ searchEntities,
359
+ getEntityRelations,
360
+ };
@@ -0,0 +1,166 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // rrfFusion — Reciprocal Rank Fusion across 3 result lists
5
+ // ---------------------------------------------------------------------------
6
+
7
+ function rrfFusion(ftsResults = [], embResults = [], turnResults = [], K = 60) {
8
+ const scores = new Map();
9
+
10
+ // M3 fix: fallback to .id when .session_id missing (FTS returns .id)
11
+ for (let i = 0; i < ftsResults.length; i++) {
12
+ const r = ftsResults[i];
13
+ if (!r) continue;
14
+ const id = r.session_id || String(r.id);
15
+ if (id) scores.set(id, (scores.get(id) || 0) + 1 / (K + i + 1));
16
+ }
17
+
18
+ for (let i = 0; i < embResults.length; i++) {
19
+ const r = embResults[i];
20
+ if (!r) continue;
21
+ const id = r.session_id || String(r.id);
22
+ if (id) scores.set(id, (scores.get(id) || 0) + 1 / (K + i + 1));
23
+ }
24
+
25
+ for (let i = 0; i < turnResults.length; i++) {
26
+ const r = turnResults[i];
27
+ if (!r) continue;
28
+ const id = r.session_id || String(r.id);
29
+ if (id) scores.set(id, (scores.get(id) || 0) + 1 / (K + i + 1));
30
+ }
31
+
32
+ return scores;
33
+ }
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // timeDecay — sigmoid decay based on age in days
37
+ // ---------------------------------------------------------------------------
38
+
39
+ function timeDecay(startedAt, midpointDays = 45, steepness = 0.05) {
40
+ if (!startedAt) return 0.5;
41
+ const dt = typeof startedAt === 'string' ? new Date(startedAt) : startedAt;
42
+ if (isNaN(dt.getTime())) return 0.5;
43
+
44
+ const ageDays = (Date.now() - dt.getTime()) / (1000 * 60 * 60 * 24);
45
+ return 1 / (1 + Math.exp(steepness * (ageDays - midpointDays)));
46
+ }
47
+
48
+ // ---------------------------------------------------------------------------
49
+ // accessScore — exponential decay on access recency (30-day half-life)
50
+ // ---------------------------------------------------------------------------
51
+
52
+ function accessScore(accessCount, lastAccessedAt) {
53
+ if (!accessCount || accessCount <= 0) return 0;
54
+ if (!lastAccessedAt) return 0;
55
+
56
+ const dt = typeof lastAccessedAt === 'string' ? new Date(lastAccessedAt) : lastAccessedAt;
57
+ if (isNaN(dt.getTime())) return 0;
58
+
59
+ const daysSince = (Date.now() - dt.getTime()) / (1000 * 60 * 60 * 24);
60
+ return accessCount * Math.exp(-0.693 * daysSince / 30);
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // hybridRank — combine all signals into final ranked list
65
+ // ---------------------------------------------------------------------------
66
+
67
+ const DEFAULT_WEIGHTS = {
68
+ rrf: 0.65,
69
+ timeDecay: 0.25,
70
+ access: 0.10,
71
+ entityBoost: 0.18,
72
+ };
73
+
74
+ function hybridRank(
75
+ ftsResults,
76
+ embResults,
77
+ limit = 5,
78
+ weights = {},
79
+ turnResults = [],
80
+ entityScoreBySession = new Map(),
81
+ ) {
82
+ const w = { ...DEFAULT_WEIGHTS, ...weights };
83
+
84
+ // Build allResults map: session_id → result object
85
+ const allResults = new Map();
86
+
87
+ // M3 fix: use session_id || id as key consistently
88
+ const _key = (r) => r ? (r.session_id || String(r.id || '')) : '';
89
+ for (const r of (ftsResults || [])) {
90
+ if (!r) continue;
91
+ const k = _key(r);
92
+ if (k && !allResults.has(k)) allResults.set(k, { ...r, session_id: k });
93
+ }
94
+ for (const r of (embResults || [])) {
95
+ if (!r) continue;
96
+ const k = _key(r);
97
+ if (k && !allResults.has(k)) allResults.set(k, { ...r, session_id: k });
98
+ }
99
+ for (const r of (turnResults || [])) {
100
+ if (!r) continue;
101
+ const k = _key(r);
102
+ if (k && allResults.has(k)) {
103
+ const existing = allResults.get(k);
104
+ existing.matched_turn_text = r.matched_turn_text;
105
+ existing.matched_turn_index = r.matched_turn_index;
106
+ } else if (k) {
107
+ allResults.set(k, { ...r, session_id: k });
108
+ }
109
+ }
110
+
111
+ if (allResults.size === 0) return [];
112
+
113
+ // Adaptive K
114
+ const maxLen = Math.max(
115
+ (ftsResults || []).length,
116
+ (embResults || []).length,
117
+ (turnResults || []).length,
118
+ );
119
+ const K = Math.max(20, Math.floor(maxLen / 2)) || 30;
120
+
121
+ // RRF scores
122
+ const rrfScores = rrfFusion(ftsResults || [], embResults || [], turnResults || [], K);
123
+
124
+ // Normalization: theoretical max = listCount / (K + 1)
125
+ const listCount = (turnResults && turnResults.length > 0 ? 3 : 2);
126
+ const maxRrf = listCount / (K + 1);
127
+
128
+ // Score each session
129
+ const scored = [];
130
+ for (const [sessionId, result] of allResults) {
131
+ const rawRrf = rrfScores.get(sessionId) || 0;
132
+ const normRrf = maxRrf > 0 ? rawRrf / maxRrf : 0;
133
+
134
+ const td = timeDecay(result.started_at);
135
+
136
+ const accessEff = accessScore(
137
+ result.access_count || 0,
138
+ result.last_accessed_at,
139
+ );
140
+ const as = 1 - Math.exp(-accessEff / 5);
141
+
142
+ const rawBase = w.rrf * normRrf + w.timeDecay * td + w.access * as;
143
+ const base = Math.min(1, rawBase); // m7: clamp to prevent negative entity boost
144
+
145
+ const entitySc = entityScoreBySession.get(sessionId) || 0;
146
+ const finalScore = Math.min(1, base + w.entityBoost * entitySc * (1 - base));
147
+
148
+ scored.push({
149
+ ...result,
150
+ _score: finalScore,
151
+ _rrf: normRrf,
152
+ _timeDecay: td,
153
+ _access: as,
154
+ _entityScore: entitySc,
155
+ });
156
+ }
157
+
158
+ scored.sort((a, b) => b._score - a._score);
159
+ return scored.slice(0, limit);
160
+ }
161
+
162
+ // ---------------------------------------------------------------------------
163
+ // Exports
164
+ // ---------------------------------------------------------------------------
165
+
166
+ module.exports = { rrfFusion, timeDecay, accessScore, hybridRank };