@psiclawops/hypermem 0.1.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/ARCHITECTURE.md +4 -3
  2. package/README.md +457 -174
  3. package/package.json +15 -5
  4. package/dist/background-indexer.d.ts +0 -117
  5. package/dist/background-indexer.d.ts.map +0 -1
  6. package/dist/background-indexer.js +0 -732
  7. package/dist/compaction-fence.d.ts +0 -89
  8. package/dist/compaction-fence.d.ts.map +0 -1
  9. package/dist/compaction-fence.js +0 -153
  10. package/dist/compositor.d.ts +0 -139
  11. package/dist/compositor.d.ts.map +0 -1
  12. package/dist/compositor.js +0 -1109
  13. package/dist/cross-agent.d.ts +0 -57
  14. package/dist/cross-agent.d.ts.map +0 -1
  15. package/dist/cross-agent.js +0 -254
  16. package/dist/db.d.ts +0 -131
  17. package/dist/db.d.ts.map +0 -1
  18. package/dist/db.js +0 -398
  19. package/dist/desired-state-store.d.ts +0 -100
  20. package/dist/desired-state-store.d.ts.map +0 -1
  21. package/dist/desired-state-store.js +0 -212
  22. package/dist/doc-chunk-store.d.ts +0 -115
  23. package/dist/doc-chunk-store.d.ts.map +0 -1
  24. package/dist/doc-chunk-store.js +0 -278
  25. package/dist/doc-chunker.d.ts +0 -99
  26. package/dist/doc-chunker.d.ts.map +0 -1
  27. package/dist/doc-chunker.js +0 -324
  28. package/dist/episode-store.d.ts +0 -48
  29. package/dist/episode-store.d.ts.map +0 -1
  30. package/dist/episode-store.js +0 -135
  31. package/dist/fact-store.d.ts +0 -57
  32. package/dist/fact-store.d.ts.map +0 -1
  33. package/dist/fact-store.js +0 -175
  34. package/dist/fleet-store.d.ts +0 -144
  35. package/dist/fleet-store.d.ts.map +0 -1
  36. package/dist/fleet-store.js +0 -276
  37. package/dist/hybrid-retrieval.d.ts +0 -60
  38. package/dist/hybrid-retrieval.d.ts.map +0 -1
  39. package/dist/hybrid-retrieval.js +0 -340
  40. package/dist/index.d.ts +0 -611
  41. package/dist/index.d.ts.map +0 -1
  42. package/dist/index.js +0 -1042
  43. package/dist/knowledge-graph.d.ts +0 -110
  44. package/dist/knowledge-graph.d.ts.map +0 -1
  45. package/dist/knowledge-graph.js +0 -305
  46. package/dist/knowledge-store.d.ts +0 -72
  47. package/dist/knowledge-store.d.ts.map +0 -1
  48. package/dist/knowledge-store.js +0 -241
  49. package/dist/library-schema.d.ts +0 -22
  50. package/dist/library-schema.d.ts.map +0 -1
  51. package/dist/library-schema.js +0 -717
  52. package/dist/message-store.d.ts +0 -76
  53. package/dist/message-store.d.ts.map +0 -1
  54. package/dist/message-store.js +0 -273
  55. package/dist/preference-store.d.ts +0 -54
  56. package/dist/preference-store.d.ts.map +0 -1
  57. package/dist/preference-store.js +0 -109
  58. package/dist/preservation-gate.d.ts +0 -82
  59. package/dist/preservation-gate.d.ts.map +0 -1
  60. package/dist/preservation-gate.js +0 -150
  61. package/dist/provider-translator.d.ts +0 -40
  62. package/dist/provider-translator.d.ts.map +0 -1
  63. package/dist/provider-translator.js +0 -349
  64. package/dist/rate-limiter.d.ts +0 -76
  65. package/dist/rate-limiter.d.ts.map +0 -1
  66. package/dist/rate-limiter.js +0 -179
  67. package/dist/redis.d.ts +0 -188
  68. package/dist/redis.d.ts.map +0 -1
  69. package/dist/redis.js +0 -534
  70. package/dist/schema.d.ts +0 -15
  71. package/dist/schema.d.ts.map +0 -1
  72. package/dist/schema.js +0 -203
  73. package/dist/secret-scanner.d.ts +0 -51
  74. package/dist/secret-scanner.d.ts.map +0 -1
  75. package/dist/secret-scanner.js +0 -248
  76. package/dist/seed.d.ts +0 -108
  77. package/dist/seed.d.ts.map +0 -1
  78. package/dist/seed.js +0 -177
  79. package/dist/system-store.d.ts +0 -73
  80. package/dist/system-store.d.ts.map +0 -1
  81. package/dist/system-store.js +0 -182
  82. package/dist/topic-store.d.ts +0 -45
  83. package/dist/topic-store.d.ts.map +0 -1
  84. package/dist/topic-store.js +0 -136
  85. package/dist/types.d.ts +0 -329
  86. package/dist/types.d.ts.map +0 -1
  87. package/dist/types.js +0 -9
  88. package/dist/vector-store.d.ts +0 -132
  89. package/dist/vector-store.d.ts.map +0 -1
  90. package/dist/vector-store.js +0 -498
  91. package/dist/work-store.d.ts +0 -112
  92. package/dist/work-store.d.ts.map +0 -1
  93. package/dist/work-store.js +0 -273
@@ -1,340 +0,0 @@
1
- /**
2
- * HyperMem Hybrid Retrieval — FTS5 + KNN Score Fusion
3
- *
4
- * Merges keyword (FTS5/BM25) and semantic (KNN/vector) results into a
5
- * single ranked list using Reciprocal Rank Fusion (RRF). This avoids
6
- * vocabulary mismatch (KNN-only misses exact terms) and semantic gap
7
- * (FTS5-only misses paraphrases).
8
- *
9
- * Architecture:
10
- * - FTS5 results from library.db (facts_fts, knowledge_fts, episodes_fts)
11
- * - KNN results from vectors.db via VectorStore
12
- * - RRF merges both ranked lists with configurable k constant
13
- * - Deduplication by (sourceTable, sourceId)
14
- * - Token-budgeted output for compositor consumption
15
- */
16
- // ─── FTS5 Query Building ───────────────────────────────────────
17
- /** Stop words to exclude from FTS5 queries */
18
- const STOP_WORDS = new Set([
19
- 'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
20
- 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
21
- 'should', 'may', 'might', 'shall', 'can', 'need', 'dare', 'ought',
22
- 'used', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from',
23
- 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below',
24
- 'between', 'out', 'off', 'over', 'under', 'again', 'further', 'then',
25
- 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each',
26
- 'every', 'both', 'few', 'more', 'most', 'other', 'some', 'such', 'no',
27
- 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very',
28
- 'just', 'don', 'now', 'and', 'but', 'or', 'if', 'it', 'its', 'this',
29
- 'that', 'these', 'those', 'i', 'me', 'my', 'we', 'our', 'you', 'your',
30
- 'he', 'him', 'his', 'she', 'her', 'they', 'them', 'their', 'what',
31
- 'which', 'who', 'whom', 'about', 'up',
32
- ]);
33
- /**
34
- * Build an FTS5 query from a natural language string.
35
- * Extracts meaningful words, removes stop words, uses OR conjunction.
36
- */
37
- export function buildFtsQuery(input) {
38
- const words = input
39
- .toLowerCase()
40
- .replace(/[^\w\s-]/g, ' ') // strip punctuation except hyphens
41
- .split(/\s+/)
42
- .filter(w => w.length >= 3 && !STOP_WORDS.has(w));
43
- if (words.length === 0)
44
- return '';
45
- // Deduplicate, sort by length descending (more specific terms first),
46
- // cap at 8 terms to keep queries reasonable
47
- const unique = [...new Set(words)]
48
- .sort((a, b) => b.length - a.length)
49
- .slice(0, 8);
50
- // Use prefix matching (*) and OR so any term can match
51
- return unique.map(w => `"${w}"*`).join(' OR ');
52
- }
53
- /**
54
- * Search facts via FTS5.
55
- */
56
- function searchFactsFts(db, query, agentId, limit = 20) {
57
- // Two-phase query: FTS runs first in subquery (fast), then filters on
58
- // the small result set. Joining FTS + non-FTS predicates + ORDER BY rank
59
- // in one pass forces SQLite to materialise the full FTS match set before
60
- // applying LIMIT — O(matches) instead of O(limit). See data-access-bench.
61
- const innerLimit = agentId ? limit * 4 : limit; // over-fetch to survive filter
62
- let sql = `
63
- SELECT f.id, sub.rank, f.content, f.domain, f.agent_id
64
- FROM (
65
- SELECT rowid, rank FROM facts_fts WHERE facts_fts MATCH ? ORDER BY rank LIMIT ?
66
- ) sub
67
- JOIN facts f ON f.id = sub.rowid
68
- WHERE f.superseded_by IS NULL
69
- AND f.decay_score < 0.8
70
- `;
71
- const params = [query, innerLimit];
72
- if (agentId) {
73
- sql += ' AND f.agent_id = ?';
74
- params.push(agentId);
75
- }
76
- sql += ' ORDER BY sub.rank LIMIT ?';
77
- params.push(limit);
78
- const rows = db.prepare(sql).all(...params);
79
- return rows.map(r => ({
80
- id: r.id,
81
- rank: r.rank,
82
- content: r.content,
83
- domain: r.domain,
84
- agentId: r.agent_id,
85
- }));
86
- }
87
- /**
88
- * Search knowledge via FTS5.
89
- */
90
- function searchKnowledgeFts(db, query, agentId, limit = 20) {
91
- const innerLimit = agentId ? limit * 4 : limit;
92
- let sql = `
93
- SELECT k.id, sub.rank, k.content, k.domain, k.agent_id, k.key
94
- FROM (
95
- SELECT rowid, rank FROM knowledge_fts WHERE knowledge_fts MATCH ? ORDER BY rank LIMIT ?
96
- ) sub
97
- JOIN knowledge k ON k.id = sub.rowid
98
- WHERE k.superseded_by IS NULL
99
- `;
100
- const params = [query, innerLimit];
101
- if (agentId) {
102
- sql += ' AND k.agent_id = ?';
103
- params.push(agentId);
104
- }
105
- sql += ' ORDER BY sub.rank LIMIT ?';
106
- params.push(limit);
107
- const rows = db.prepare(sql).all(...params);
108
- return rows.map(r => ({
109
- id: r.id,
110
- rank: r.rank,
111
- content: r.content,
112
- domain: r.domain,
113
- agentId: r.agent_id,
114
- metadata: r.key,
115
- }));
116
- }
117
- /**
118
- * Search episodes via FTS5.
119
- */
120
- function searchEpisodesFts(db, query, agentId, limit = 20) {
121
- let sql;
122
- let params;
123
- if (agentId) {
124
- // Agent-scoped: use WHERE IN (FTS5 subquery) instead of FTS5→JOIN→filter.
125
- // SQLite uses the agent_id index to narrow first, then checks FTS5 membership.
126
- // Benchmarked: 2.3ms avg vs 8.5ms avg for the post-join approach (13k+ episodes).
127
- sql = `
128
- SELECT e.id, 0 as rank, e.summary, e.event_type, e.agent_id, e.participants
129
- FROM episodes e
130
- WHERE e.agent_id = ?
131
- AND e.decay_score < 0.8
132
- AND e.id IN (SELECT rowid FROM episodes_fts WHERE episodes_fts MATCH ?)
133
- ORDER BY e.created_at DESC
134
- LIMIT ?
135
- `;
136
- params = [agentId, query, limit];
137
- }
138
- else {
139
- sql = `
140
- SELECT e.id, sub.rank, e.summary, e.event_type, e.agent_id, e.participants
141
- FROM (
142
- SELECT rowid, rank FROM episodes_fts WHERE episodes_fts MATCH ? ORDER BY rank LIMIT ?
143
- ) sub
144
- JOIN episodes e ON e.id = sub.rowid
145
- WHERE e.decay_score < 0.8
146
- ORDER BY sub.rank LIMIT ?
147
- `;
148
- params = [query, limit, limit];
149
- }
150
- const rows = db.prepare(sql).all(...params);
151
- return rows.map(r => ({
152
- id: r.id,
153
- rank: r.rank,
154
- content: r.summary,
155
- domain: r.event_type,
156
- agentId: r.agent_id,
157
- metadata: r.participants || undefined,
158
- }));
159
- }
160
- function resultKey(table, id) {
161
- return `${table}:${id}`;
162
- }
163
- /**
164
- * Merge FTS5 and KNN results using Reciprocal Rank Fusion.
165
- *
166
- * RRF score = Σ (weight / (k + rank)) for each result list the item appears in.
167
- * k is a constant (default 60) that dampens the effect of high rank positions.
168
- */
169
- function fuseResults(ftsResults, knnResults, k, ftsWeight, knnWeight) {
170
- const merged = new Map();
171
- // Add FTS results
172
- for (const [key, entry] of ftsResults) {
173
- const score = ftsWeight / (k + (entry.ftsRank || 1));
174
- merged.set(key, { ...entry, score, sources: ['fts'] });
175
- }
176
- // Merge KNN results
177
- for (const [key, entry] of knnResults) {
178
- const knnScore = knnWeight / (k + (entry.knnRank || 1));
179
- const existing = merged.get(key);
180
- if (existing) {
181
- // Item found by both — boost score
182
- existing.score += knnScore;
183
- existing.knnRank = entry.knnRank;
184
- existing.knnDistance = entry.knnDistance;
185
- existing.sources = ['fts', 'knn'];
186
- }
187
- else {
188
- merged.set(key, { ...entry, score: knnScore, sources: ['knn'] });
189
- }
190
- }
191
- // Sort by fused score descending
192
- return [...merged.values()].sort((a, b) => b.score - a.score);
193
- }
194
- // ─── Public API ────────────────────────────────────────────────
195
- /**
196
- * Hybrid search combining FTS5 keyword search and KNN vector search.
197
- *
198
- * When vectorStore is null, falls back to FTS5-only.
199
- * When FTS5 query is empty (all stop words), falls back to KNN-only.
200
- */
201
- export async function hybridSearch(libraryDb, vectorStore, query, opts) {
202
- const tables = opts?.tables || ['facts', 'knowledge', 'episodes'];
203
- const limit = opts?.limit || 10;
204
- const maxKnnDistance = opts?.maxKnnDistance || 1.2;
205
- const rrfK = opts?.rrfK || 60;
206
- const ftsWeight = opts?.ftsWeight || 1.0;
207
- const knnWeight = opts?.knnWeight || 1.0;
208
- const minFtsTerms = opts?.minFtsTerms || 1;
209
- // ── FTS5 retrieval ──
210
- const ftsQuery = buildFtsQuery(query);
211
- const ftsMap = new Map();
212
- if (ftsQuery && ftsQuery.split(' OR ').length >= minFtsTerms) {
213
- try {
214
- const perTableLimit = Math.ceil(limit * 1.5); // Over-fetch for fusion
215
- if (tables.includes('facts')) {
216
- const results = searchFactsFts(libraryDb, ftsQuery, opts?.agentId, perTableLimit);
217
- results.forEach((r, i) => {
218
- const key = resultKey('facts', r.id);
219
- ftsMap.set(key, {
220
- sourceTable: 'facts',
221
- sourceId: r.id,
222
- content: r.content,
223
- domain: r.domain,
224
- agentId: r.agentId,
225
- ftsRank: i + 1,
226
- score: 0,
227
- sources: ['fts'],
228
- });
229
- });
230
- }
231
- if (tables.includes('knowledge')) {
232
- const results = searchKnowledgeFts(libraryDb, ftsQuery, opts?.agentId, perTableLimit);
233
- results.forEach((r, i) => {
234
- const key = resultKey('knowledge', r.id);
235
- ftsMap.set(key, {
236
- sourceTable: 'knowledge',
237
- sourceId: r.id,
238
- content: r.content,
239
- domain: r.domain,
240
- agentId: r.agentId,
241
- metadata: r.metadata,
242
- ftsRank: i + 1,
243
- score: 0,
244
- sources: ['fts'],
245
- });
246
- });
247
- }
248
- if (tables.includes('episodes')) {
249
- const results = searchEpisodesFts(libraryDb, ftsQuery, opts?.agentId, perTableLimit);
250
- results.forEach((r, i) => {
251
- const key = resultKey('episodes', r.id);
252
- ftsMap.set(key, {
253
- sourceTable: 'episodes',
254
- sourceId: r.id,
255
- content: r.content,
256
- domain: r.domain,
257
- agentId: r.agentId,
258
- metadata: r.metadata,
259
- ftsRank: i + 1,
260
- score: 0,
261
- sources: ['fts'],
262
- });
263
- });
264
- }
265
- }
266
- catch {
267
- // FTS5 failure is non-fatal — fall through to KNN-only
268
- }
269
- }
270
- // ── KNN retrieval ──
271
- const knnMap = new Map();
272
- if (vectorStore) {
273
- try {
274
- const knnResults = await vectorStore.search(query, {
275
- tables,
276
- limit: Math.ceil(limit * 1.5),
277
- maxDistance: maxKnnDistance,
278
- });
279
- knnResults.forEach((r, i) => {
280
- const key = resultKey(r.sourceTable, r.sourceId);
281
- knnMap.set(key, {
282
- sourceTable: r.sourceTable,
283
- sourceId: r.sourceId,
284
- content: r.content,
285
- domain: r.domain,
286
- agentId: r.agentId,
287
- metadata: r.metadata,
288
- knnRank: i + 1,
289
- knnDistance: r.distance,
290
- score: 0,
291
- sources: ['knn'],
292
- });
293
- });
294
- }
295
- catch {
296
- // KNN failure is non-fatal — use FTS-only
297
- }
298
- }
299
- // ── Fusion ──
300
- if (ftsMap.size === 0 && knnMap.size === 0) {
301
- return [];
302
- }
303
- // If only one source has results, skip fusion overhead but assign scores
304
- if (ftsMap.size === 0) {
305
- // KNN-only: score by inverse distance
306
- return [...knnMap.values()]
307
- .sort((a, b) => (a.knnDistance || 99) - (b.knnDistance || 99))
308
- .slice(0, limit)
309
- .map((entry, i) => toHybridResult({
310
- ...entry,
311
- score: knnWeight / (rrfK + i + 1),
312
- }));
313
- }
314
- if (knnMap.size === 0) {
315
- // FTS-only: score by rank position
316
- return [...ftsMap.values()]
317
- .sort((a, b) => (a.ftsRank || 99) - (b.ftsRank || 99))
318
- .slice(0, limit)
319
- .map((entry, i) => toHybridResult({
320
- ...entry,
321
- score: ftsWeight / (rrfK + i + 1),
322
- }));
323
- }
324
- // Both sources present — RRF fusion
325
- const fused = fuseResults(ftsMap, knnMap, rrfK, ftsWeight, knnWeight);
326
- return fused.slice(0, limit).map(toHybridResult);
327
- }
328
- function toHybridResult(entry) {
329
- return {
330
- sourceTable: entry.sourceTable,
331
- sourceId: entry.sourceId,
332
- content: entry.content,
333
- domain: entry.domain,
334
- agentId: entry.agentId,
335
- metadata: entry.metadata,
336
- score: entry.score,
337
- sources: entry.sources,
338
- };
339
- }
340
- //# sourceMappingURL=hybrid-retrieval.js.map