@199-bio/engram 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +19 -0
- package/LICENSE +21 -0
- package/LIVING_PLAN.md +180 -0
- package/PLAN.md +514 -0
- package/README.md +304 -0
- package/dist/graph/extractor.d.ts.map +1 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/knowledge-graph.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +473 -0
- package/dist/retrieval/colbert.d.ts.map +1 -0
- package/dist/retrieval/hybrid.d.ts.map +1 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/storage/database.d.ts.map +1 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/package.json +62 -0
- package/src/graph/extractor.ts +441 -0
- package/src/graph/index.ts +2 -0
- package/src/graph/knowledge-graph.ts +263 -0
- package/src/index.ts +558 -0
- package/src/retrieval/colbert-bridge.py +222 -0
- package/src/retrieval/colbert.ts +317 -0
- package/src/retrieval/hybrid.ts +218 -0
- package/src/retrieval/index.ts +2 -0
- package/src/storage/database.ts +527 -0
- package/src/storage/index.ts +1 -0
- package/tests/test-interactive.js +218 -0
- package/tests/test-mcp.sh +81 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Search with Reciprocal Rank Fusion (RRF)
|
|
3
|
+
* Combines BM25 (keyword) and ColBERT (semantic) search
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { EngramDatabase, Memory } from "../storage/database.js";
|
|
7
|
+
import { KnowledgeGraph } from "../graph/knowledge-graph.js";
|
|
8
|
+
import { ColBERTRetriever, SimpleRetriever, SearchResult, Document } from "./colbert.js";
|
|
9
|
+
import { entityExtractor } from "../graph/extractor.js";
|
|
10
|
+
|
|
11
|
+
export interface HybridSearchResult {
|
|
12
|
+
memory: Memory;
|
|
13
|
+
score: number;
|
|
14
|
+
sources: {
|
|
15
|
+
bm25?: number;
|
|
16
|
+
semantic?: number;
|
|
17
|
+
graph?: number;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class HybridSearch {
|
|
22
|
+
constructor(
|
|
23
|
+
private db: EngramDatabase,
|
|
24
|
+
private graph: KnowledgeGraph,
|
|
25
|
+
private retriever: ColBERTRetriever | SimpleRetriever
|
|
26
|
+
) {}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Search using all available methods and fuse results
|
|
30
|
+
*/
|
|
31
|
+
async search(
|
|
32
|
+
query: string,
|
|
33
|
+
options: {
|
|
34
|
+
limit?: number;
|
|
35
|
+
includeGraph?: boolean;
|
|
36
|
+
bm25Weight?: number;
|
|
37
|
+
semanticWeight?: number;
|
|
38
|
+
graphWeight?: number;
|
|
39
|
+
} = {}
|
|
40
|
+
): Promise<HybridSearchResult[]> {
|
|
41
|
+
const {
|
|
42
|
+
limit = 10,
|
|
43
|
+
includeGraph = true,
|
|
44
|
+
bm25Weight = 1.0,
|
|
45
|
+
semanticWeight = 1.0,
|
|
46
|
+
graphWeight = 0.5,
|
|
47
|
+
} = options;
|
|
48
|
+
|
|
49
|
+
// Fetch more candidates than needed for fusion
|
|
50
|
+
const candidateLimit = Math.max(limit * 3, 30);
|
|
51
|
+
|
|
52
|
+
// Run searches in parallel
|
|
53
|
+
const [bm25Results, semanticResults, graphMemoryIds] = await Promise.all([
|
|
54
|
+
this.searchBM25(query, candidateLimit),
|
|
55
|
+
this.searchSemantic(query, candidateLimit),
|
|
56
|
+
includeGraph ? this.searchGraph(query) : Promise.resolve([]),
|
|
57
|
+
]);
|
|
58
|
+
|
|
59
|
+
// Fetch graph memories
|
|
60
|
+
const graphMemories = graphMemoryIds.length > 0
|
|
61
|
+
? graphMemoryIds.map(id => this.db.getMemory(id)).filter(Boolean) as Memory[]
|
|
62
|
+
: [];
|
|
63
|
+
|
|
64
|
+
// Combine all candidate IDs
|
|
65
|
+
const allCandidateIds = new Set<string>();
|
|
66
|
+
bm25Results.forEach(r => allCandidateIds.add(r.id));
|
|
67
|
+
semanticResults.forEach(r => allCandidateIds.add(r.id));
|
|
68
|
+
graphMemories.forEach(m => allCandidateIds.add(m.id));
|
|
69
|
+
|
|
70
|
+
if (allCandidateIds.size === 0) {
|
|
71
|
+
return [];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Create rankings for RRF
|
|
75
|
+
const rankings: Map<string, { bm25?: number; semantic?: number; graph?: number }> = new Map();
|
|
76
|
+
|
|
77
|
+
// BM25 ranking
|
|
78
|
+
bm25Results.forEach((result, rank) => {
|
|
79
|
+
const existing = rankings.get(result.id) || {};
|
|
80
|
+
existing.bm25 = rank + 1; // 1-indexed rank
|
|
81
|
+
rankings.set(result.id, existing);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// Semantic ranking
|
|
85
|
+
semanticResults.forEach((result, rank) => {
|
|
86
|
+
const existing = rankings.get(result.id) || {};
|
|
87
|
+
existing.semantic = rank + 1;
|
|
88
|
+
rankings.set(result.id, existing);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Graph ranking (all equal - just presence matters)
|
|
92
|
+
graphMemories.forEach((memory, rank) => {
|
|
93
|
+
const existing = rankings.get(memory.id) || {};
|
|
94
|
+
existing.graph = rank + 1;
|
|
95
|
+
rankings.set(memory.id, existing);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Calculate RRF scores
|
|
99
|
+
const k = 60; // RRF constant
|
|
100
|
+
const rrfScores: Array<{ id: string; score: number; sources: typeof rankings extends Map<string, infer V> ? V : never }> = [];
|
|
101
|
+
|
|
102
|
+
for (const [id, ranks] of rankings) {
|
|
103
|
+
let score = 0;
|
|
104
|
+
|
|
105
|
+
if (ranks.bm25 !== undefined) {
|
|
106
|
+
score += bm25Weight * (1 / (k + ranks.bm25));
|
|
107
|
+
}
|
|
108
|
+
if (ranks.semantic !== undefined) {
|
|
109
|
+
score += semanticWeight * (1 / (k + ranks.semantic));
|
|
110
|
+
}
|
|
111
|
+
if (ranks.graph !== undefined) {
|
|
112
|
+
score += graphWeight * (1 / (k + ranks.graph));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
rrfScores.push({ id, score, sources: ranks });
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Sort by RRF score
|
|
119
|
+
rrfScores.sort((a, b) => b.score - a.score);
|
|
120
|
+
|
|
121
|
+
// Get top results with full memory data
|
|
122
|
+
const results: HybridSearchResult[] = [];
|
|
123
|
+
|
|
124
|
+
for (const { id, score, sources } of rrfScores.slice(0, limit)) {
|
|
125
|
+
const memory = this.db.getMemory(id);
|
|
126
|
+
if (memory) {
|
|
127
|
+
// Update access count
|
|
128
|
+
this.db.touchMemory(id);
|
|
129
|
+
|
|
130
|
+
results.push({
|
|
131
|
+
memory,
|
|
132
|
+
score,
|
|
133
|
+
sources: {
|
|
134
|
+
bm25: sources.bm25,
|
|
135
|
+
semantic: sources.semantic,
|
|
136
|
+
graph: sources.graph,
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return results;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* BM25 keyword search via SQLite FTS5
|
|
147
|
+
*/
|
|
148
|
+
private async searchBM25(query: string, limit: number): Promise<Array<{ id: string; score: number }>> {
|
|
149
|
+
try {
|
|
150
|
+
const results = this.db.searchBM25(query, limit);
|
|
151
|
+
return results.map(r => ({ id: r.id, score: Math.abs(r.score) }));
|
|
152
|
+
} catch {
|
|
153
|
+
return [];
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Semantic search via ColBERT
|
|
159
|
+
*/
|
|
160
|
+
private async searchSemantic(query: string, limit: number): Promise<Array<{ id: string; score: number }>> {
|
|
161
|
+
try {
|
|
162
|
+
const results = await this.retriever.search(query, limit);
|
|
163
|
+
return results.map(r => ({ id: r.id, score: r.score }));
|
|
164
|
+
} catch {
|
|
165
|
+
return [];
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Graph-based search: find entities in query, traverse graph
|
|
171
|
+
*/
|
|
172
|
+
private async searchGraph(query: string): Promise<string[]> {
|
|
173
|
+
// Extract entities from query
|
|
174
|
+
const entities = entityExtractor.extractAll(query);
|
|
175
|
+
|
|
176
|
+
const memoryIds = new Set<string>();
|
|
177
|
+
|
|
178
|
+
for (const entity of entities) {
|
|
179
|
+
// Find related memory IDs through graph traversal
|
|
180
|
+
const relatedIds = this.graph.findRelatedMemoryIds(entity.name, 2);
|
|
181
|
+
relatedIds.forEach(id => memoryIds.add(id));
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return Array.from(memoryIds);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Add a memory to the semantic index
|
|
189
|
+
*/
|
|
190
|
+
async indexMemory(memory: Memory): Promise<void> {
|
|
191
|
+
await this.retriever.add([{
|
|
192
|
+
id: memory.id,
|
|
193
|
+
content: memory.content,
|
|
194
|
+
}]);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Rebuild the entire semantic index
|
|
199
|
+
*/
|
|
200
|
+
async rebuildIndex(): Promise<{ count: number }> {
|
|
201
|
+
const memories = this.db.getAllMemories();
|
|
202
|
+
|
|
203
|
+
const documents: Document[] = memories.map(m => ({
|
|
204
|
+
id: m.id,
|
|
205
|
+
content: m.content,
|
|
206
|
+
}));
|
|
207
|
+
|
|
208
|
+
const result = await this.retriever.index(documents);
|
|
209
|
+
return { count: result.count };
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Remove a memory from the semantic index
|
|
214
|
+
*/
|
|
215
|
+
async removeFromIndex(memoryId: string): Promise<void> {
|
|
216
|
+
await this.retriever.delete([memoryId]);
|
|
217
|
+
}
|
|
218
|
+
}
|