@199-bio/engram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ /**
2
+ * Hybrid Search with Reciprocal Rank Fusion (RRF)
3
+ * Combines BM25 (keyword) and ColBERT (semantic) search
4
+ */
5
+
6
+ import { EngramDatabase, Memory } from "../storage/database.js";
7
+ import { KnowledgeGraph } from "../graph/knowledge-graph.js";
8
+ import { ColBERTRetriever, SimpleRetriever, SearchResult, Document } from "./colbert.js";
9
+ import { entityExtractor } from "../graph/extractor.js";
10
+
11
+ export interface HybridSearchResult {
12
+ memory: Memory;
13
+ score: number;
14
+ sources: {
15
+ bm25?: number;
16
+ semantic?: number;
17
+ graph?: number;
18
+ };
19
+ }
20
+
21
+ export class HybridSearch {
22
+ constructor(
23
+ private db: EngramDatabase,
24
+ private graph: KnowledgeGraph,
25
+ private retriever: ColBERTRetriever | SimpleRetriever
26
+ ) {}
27
+
28
+ /**
29
+ * Search using all available methods and fuse results
30
+ */
31
+ async search(
32
+ query: string,
33
+ options: {
34
+ limit?: number;
35
+ includeGraph?: boolean;
36
+ bm25Weight?: number;
37
+ semanticWeight?: number;
38
+ graphWeight?: number;
39
+ } = {}
40
+ ): Promise<HybridSearchResult[]> {
41
+ const {
42
+ limit = 10,
43
+ includeGraph = true,
44
+ bm25Weight = 1.0,
45
+ semanticWeight = 1.0,
46
+ graphWeight = 0.5,
47
+ } = options;
48
+
49
+ // Fetch more candidates than needed for fusion
50
+ const candidateLimit = Math.max(limit * 3, 30);
51
+
52
+ // Run searches in parallel
53
+ const [bm25Results, semanticResults, graphMemoryIds] = await Promise.all([
54
+ this.searchBM25(query, candidateLimit),
55
+ this.searchSemantic(query, candidateLimit),
56
+ includeGraph ? this.searchGraph(query) : Promise.resolve([]),
57
+ ]);
58
+
59
+ // Fetch graph memories
60
+ const graphMemories = graphMemoryIds.length > 0
61
+ ? graphMemoryIds.map(id => this.db.getMemory(id)).filter(Boolean) as Memory[]
62
+ : [];
63
+
64
+ // Combine all candidate IDs
65
+ const allCandidateIds = new Set<string>();
66
+ bm25Results.forEach(r => allCandidateIds.add(r.id));
67
+ semanticResults.forEach(r => allCandidateIds.add(r.id));
68
+ graphMemories.forEach(m => allCandidateIds.add(m.id));
69
+
70
+ if (allCandidateIds.size === 0) {
71
+ return [];
72
+ }
73
+
74
+ // Create rankings for RRF
75
+ const rankings: Map<string, { bm25?: number; semantic?: number; graph?: number }> = new Map();
76
+
77
+ // BM25 ranking
78
+ bm25Results.forEach((result, rank) => {
79
+ const existing = rankings.get(result.id) || {};
80
+ existing.bm25 = rank + 1; // 1-indexed rank
81
+ rankings.set(result.id, existing);
82
+ });
83
+
84
+ // Semantic ranking
85
+ semanticResults.forEach((result, rank) => {
86
+ const existing = rankings.get(result.id) || {};
87
+ existing.semantic = rank + 1;
88
+ rankings.set(result.id, existing);
89
+ });
90
+
91
+ // Graph ranking (all equal - just presence matters)
92
+ graphMemories.forEach((memory, rank) => {
93
+ const existing = rankings.get(memory.id) || {};
94
+ existing.graph = rank + 1;
95
+ rankings.set(memory.id, existing);
96
+ });
97
+
98
+ // Calculate RRF scores
99
+ const k = 60; // RRF constant
100
+ const rrfScores: Array<{ id: string; score: number; sources: typeof rankings extends Map<string, infer V> ? V : never }> = [];
101
+
102
+ for (const [id, ranks] of rankings) {
103
+ let score = 0;
104
+
105
+ if (ranks.bm25 !== undefined) {
106
+ score += bm25Weight * (1 / (k + ranks.bm25));
107
+ }
108
+ if (ranks.semantic !== undefined) {
109
+ score += semanticWeight * (1 / (k + ranks.semantic));
110
+ }
111
+ if (ranks.graph !== undefined) {
112
+ score += graphWeight * (1 / (k + ranks.graph));
113
+ }
114
+
115
+ rrfScores.push({ id, score, sources: ranks });
116
+ }
117
+
118
+ // Sort by RRF score
119
+ rrfScores.sort((a, b) => b.score - a.score);
120
+
121
+ // Get top results with full memory data
122
+ const results: HybridSearchResult[] = [];
123
+
124
+ for (const { id, score, sources } of rrfScores.slice(0, limit)) {
125
+ const memory = this.db.getMemory(id);
126
+ if (memory) {
127
+ // Update access count
128
+ this.db.touchMemory(id);
129
+
130
+ results.push({
131
+ memory,
132
+ score,
133
+ sources: {
134
+ bm25: sources.bm25,
135
+ semantic: sources.semantic,
136
+ graph: sources.graph,
137
+ },
138
+ });
139
+ }
140
+ }
141
+
142
+ return results;
143
+ }
144
+
145
+ /**
146
+ * BM25 keyword search via SQLite FTS5
147
+ */
148
+ private async searchBM25(query: string, limit: number): Promise<Array<{ id: string; score: number }>> {
149
+ try {
150
+ const results = this.db.searchBM25(query, limit);
151
+ return results.map(r => ({ id: r.id, score: Math.abs(r.score) }));
152
+ } catch {
153
+ return [];
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Semantic search via ColBERT
159
+ */
160
+ private async searchSemantic(query: string, limit: number): Promise<Array<{ id: string; score: number }>> {
161
+ try {
162
+ const results = await this.retriever.search(query, limit);
163
+ return results.map(r => ({ id: r.id, score: r.score }));
164
+ } catch {
165
+ return [];
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Graph-based search: find entities in query, traverse graph
171
+ */
172
+ private async searchGraph(query: string): Promise<string[]> {
173
+ // Extract entities from query
174
+ const entities = entityExtractor.extractAll(query);
175
+
176
+ const memoryIds = new Set<string>();
177
+
178
+ for (const entity of entities) {
179
+ // Find related memory IDs through graph traversal
180
+ const relatedIds = this.graph.findRelatedMemoryIds(entity.name, 2);
181
+ relatedIds.forEach(id => memoryIds.add(id));
182
+ }
183
+
184
+ return Array.from(memoryIds);
185
+ }
186
+
187
+ /**
188
+ * Add a memory to the semantic index
189
+ */
190
+ async indexMemory(memory: Memory): Promise<void> {
191
+ await this.retriever.add([{
192
+ id: memory.id,
193
+ content: memory.content,
194
+ }]);
195
+ }
196
+
197
+ /**
198
+ * Rebuild the entire semantic index
199
+ */
200
+ async rebuildIndex(): Promise<{ count: number }> {
201
+ const memories = this.db.getAllMemories();
202
+
203
+ const documents: Document[] = memories.map(m => ({
204
+ id: m.id,
205
+ content: m.content,
206
+ }));
207
+
208
+ const result = await this.retriever.index(documents);
209
+ return { count: result.count };
210
+ }
211
+
212
+ /**
213
+ * Remove a memory from the semantic index
214
+ */
215
+ async removeFromIndex(memoryId: string): Promise<void> {
216
+ await this.retriever.delete([memoryId]);
217
+ }
218
+ }
@@ -0,0 +1,2 @@
1
+ export * from "./colbert.js";
2
+ export * from "./hybrid.js";