agentdb 1.5.9 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -11
- package/dist/agentdb.min.js +4 -4
- package/dist/cli/agentdb-cli.d.ts +29 -0
- package/dist/cli/agentdb-cli.d.ts.map +1 -1
- package/dist/cli/agentdb-cli.js +1009 -34
- package/dist/cli/agentdb-cli.js.map +1 -1
- package/dist/controllers/ContextSynthesizer.d.ts +65 -0
- package/dist/controllers/ContextSynthesizer.d.ts.map +1 -0
- package/dist/controllers/ContextSynthesizer.js +208 -0
- package/dist/controllers/ContextSynthesizer.js.map +1 -0
- package/dist/controllers/HNSWIndex.d.ts +128 -0
- package/dist/controllers/HNSWIndex.d.ts.map +1 -0
- package/dist/controllers/HNSWIndex.js +361 -0
- package/dist/controllers/HNSWIndex.js.map +1 -0
- package/dist/controllers/MMRDiversityRanker.d.ts +50 -0
- package/dist/controllers/MMRDiversityRanker.d.ts.map +1 -0
- package/dist/controllers/MMRDiversityRanker.js +130 -0
- package/dist/controllers/MMRDiversityRanker.js.map +1 -0
- package/dist/controllers/MetadataFilter.d.ts +70 -0
- package/dist/controllers/MetadataFilter.d.ts.map +1 -0
- package/dist/controllers/MetadataFilter.js +243 -0
- package/dist/controllers/MetadataFilter.js.map +1 -0
- package/dist/controllers/QUICClient.d.ts +109 -0
- package/dist/controllers/QUICClient.d.ts.map +1 -0
- package/dist/controllers/QUICClient.js +299 -0
- package/dist/controllers/QUICClient.js.map +1 -0
- package/dist/controllers/QUICServer.d.ts +121 -0
- package/dist/controllers/QUICServer.d.ts.map +1 -0
- package/dist/controllers/QUICServer.js +383 -0
- package/dist/controllers/QUICServer.js.map +1 -0
- package/dist/controllers/SyncCoordinator.d.ts +120 -0
- package/dist/controllers/SyncCoordinator.d.ts.map +1 -0
- package/dist/controllers/SyncCoordinator.js +441 -0
- package/dist/controllers/SyncCoordinator.js.map +1 -0
- package/dist/controllers/WASMVectorSearch.d.ts.map +1 -1
- package/dist/controllers/WASMVectorSearch.js +10 -2
- package/dist/controllers/WASMVectorSearch.js.map +1 -1
- package/dist/controllers/index.d.ts +14 -0
- package/dist/controllers/index.d.ts.map +1 -1
- package/dist/controllers/index.js +7 -0
- package/dist/controllers/index.js.map +1 -1
- package/dist/examples/quic-sync-example.d.ts +9 -0
- package/dist/examples/quic-sync-example.d.ts.map +1 -0
- package/dist/examples/quic-sync-example.js +169 -0
- package/dist/examples/quic-sync-example.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/types/quic.d.ts +518 -0
- package/dist/types/quic.d.ts.map +1 -0
- package/dist/types/quic.js +272 -0
- package/dist/types/quic.js.map +1 -0
- package/package.json +11 -3
- package/src/browser-entry.js +41 -6
- package/src/cli/agentdb-cli.ts +1114 -33
- package/src/controllers/ContextSynthesizer.ts +285 -0
- package/src/controllers/HNSWIndex.ts +495 -0
- package/src/controllers/MMRDiversityRanker.ts +187 -0
- package/src/controllers/MetadataFilter.ts +280 -0
- package/src/controllers/QUICClient.ts +413 -0
- package/src/controllers/QUICServer.ts +498 -0
- package/src/controllers/SyncCoordinator.ts +597 -0
- package/src/controllers/WASMVectorSearch.ts +11 -2
- package/src/controllers/index.ts +14 -0
- package/src/examples/quic-sync-example.ts +198 -0
- package/src/index.ts +2 -1
- package/src/types/quic.ts +772 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HNSWIndex - Hierarchical Navigable Small World Index
|
|
3
|
+
*
|
|
4
|
+
* High-performance approximate nearest neighbor (ANN) search using HNSW algorithm.
|
|
5
|
+
* Provides 10-100x speedup over brute-force search for large vector datasets.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - HNSW indexing for sub-millisecond search
|
|
9
|
+
* - Automatic index building and management
|
|
10
|
+
* - Configurable M and efConstruction parameters
|
|
11
|
+
* - Persistent index storage
|
|
12
|
+
* - Graceful fallback to brute-force
|
|
13
|
+
* - Multi-distance metric support (cosine, euclidean, ip)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import hnswlibNode from 'hnswlib-node';
|
|
17
|
+
import * as fs from 'fs';
|
|
18
|
+
import * as path from 'path';
|
|
19
|
+
|
|
20
|
+
const { HierarchicalNSW } = hnswlibNode as any;
|
|
21
|
+
|
|
22
|
+
// Database type from db-fallback
|
|
23
|
+
type Database = any;
|
|
24
|
+
|
|
25
|
+
export interface HNSWConfig {
|
|
26
|
+
/** Maximum number of connections per layer (default: 16) */
|
|
27
|
+
M: number;
|
|
28
|
+
|
|
29
|
+
/** Size of dynamic candidate list during construction (default: 200) */
|
|
30
|
+
efConstruction: number;
|
|
31
|
+
|
|
32
|
+
/** Size of dynamic candidate list during search (default: 100) */
|
|
33
|
+
efSearch: number;
|
|
34
|
+
|
|
35
|
+
/** Distance metric: 'cosine', 'euclidean', 'ip' (inner product) */
|
|
36
|
+
metric: 'cosine' | 'l2' | 'ip';
|
|
37
|
+
|
|
38
|
+
/** Vector dimension */
|
|
39
|
+
dimension: number;
|
|
40
|
+
|
|
41
|
+
/** Maximum number of elements in index */
|
|
42
|
+
maxElements: number;
|
|
43
|
+
|
|
44
|
+
/** Enable persistent index storage */
|
|
45
|
+
persistIndex: boolean;
|
|
46
|
+
|
|
47
|
+
/** Path to store index file */
|
|
48
|
+
indexPath?: string;
|
|
49
|
+
|
|
50
|
+
/** Rebuild index threshold (rebuild when updates exceed this percentage) */
|
|
51
|
+
rebuildThreshold: number;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface HNSWSearchResult {
|
|
55
|
+
id: number;
|
|
56
|
+
distance: number;
|
|
57
|
+
similarity: number;
|
|
58
|
+
metadata?: any;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface HNSWStats {
|
|
62
|
+
enabled: boolean;
|
|
63
|
+
indexBuilt: boolean;
|
|
64
|
+
numElements: number;
|
|
65
|
+
dimension: number;
|
|
66
|
+
metric: string;
|
|
67
|
+
M: number;
|
|
68
|
+
efConstruction: number;
|
|
69
|
+
efSearch: number;
|
|
70
|
+
lastBuildTime: number | null;
|
|
71
|
+
lastSearchTime: number | null;
|
|
72
|
+
totalSearches: number;
|
|
73
|
+
avgSearchTimeMs: number;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export class HNSWIndex {
|
|
77
|
+
private db: Database;
|
|
78
|
+
private config: HNSWConfig;
|
|
79
|
+
private index: any | null = null;
|
|
80
|
+
private vectorCache: Map<number, Float32Array> = new Map();
|
|
81
|
+
private idToLabel: Map<number, number> = new Map();
|
|
82
|
+
private labelToId: Map<number, number> = new Map();
|
|
83
|
+
private nextLabel: number = 0;
|
|
84
|
+
private indexBuilt: boolean = false;
|
|
85
|
+
private updatesSinceLastBuild: number = 0;
|
|
86
|
+
private totalSearches: number = 0;
|
|
87
|
+
private totalSearchTime: number = 0;
|
|
88
|
+
private lastBuildTime: number | null = null;
|
|
89
|
+
private lastSearchTime: number | null = null;
|
|
90
|
+
|
|
91
|
+
constructor(db: Database, config?: Partial<HNSWConfig>) {
|
|
92
|
+
this.db = db;
|
|
93
|
+
this.config = {
|
|
94
|
+
M: 16,
|
|
95
|
+
efConstruction: 200,
|
|
96
|
+
efSearch: 100,
|
|
97
|
+
metric: 'cosine',
|
|
98
|
+
dimension: 1536,
|
|
99
|
+
maxElements: 100000,
|
|
100
|
+
persistIndex: true,
|
|
101
|
+
rebuildThreshold: 0.1, // Rebuild after 10% updates
|
|
102
|
+
...config,
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
// Try to load existing index
|
|
106
|
+
if (this.config.persistIndex && this.config.indexPath) {
|
|
107
|
+
this.loadIndex();
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Build HNSW index from database vectors
|
|
113
|
+
*/
|
|
114
|
+
async buildIndex(tableName: string = 'pattern_embeddings'): Promise<void> {
|
|
115
|
+
const start = Date.now();
|
|
116
|
+
console.log(`[HNSWIndex] Building HNSW index from ${tableName}...`);
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
// Fetch all vectors from database
|
|
120
|
+
const stmt = this.db.prepare(`
|
|
121
|
+
SELECT pattern_id as id, embedding
|
|
122
|
+
FROM ${tableName}
|
|
123
|
+
`);
|
|
124
|
+
|
|
125
|
+
const rows = stmt.all() as any[];
|
|
126
|
+
|
|
127
|
+
if (rows.length === 0) {
|
|
128
|
+
console.warn('[HNSWIndex] No vectors found in database');
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Create new HNSW index
|
|
133
|
+
this.index = new HierarchicalNSW(this.config.metric, this.config.dimension);
|
|
134
|
+
this.index.initIndex(
|
|
135
|
+
Math.max(rows.length, this.config.maxElements),
|
|
136
|
+
this.config.M,
|
|
137
|
+
this.config.efConstruction
|
|
138
|
+
);
|
|
139
|
+
this.index.setEf(this.config.efSearch);
|
|
140
|
+
|
|
141
|
+
// Clear mappings
|
|
142
|
+
this.vectorCache.clear();
|
|
143
|
+
this.idToLabel.clear();
|
|
144
|
+
this.labelToId.clear();
|
|
145
|
+
this.nextLabel = 0;
|
|
146
|
+
|
|
147
|
+
// Add vectors to index
|
|
148
|
+
console.log(`[HNSWIndex] Adding ${rows.length} vectors to index...`);
|
|
149
|
+
|
|
150
|
+
for (const row of rows) {
|
|
151
|
+
const id = row.id;
|
|
152
|
+
const embedding = new Float32Array(
|
|
153
|
+
(row.embedding as Buffer).buffer,
|
|
154
|
+
(row.embedding as Buffer).byteOffset,
|
|
155
|
+
(row.embedding as Buffer).byteLength / 4
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
// Add to index with label (convert Float32Array to number[])
|
|
159
|
+
const label = this.nextLabel++;
|
|
160
|
+
this.index.addPoint(Array.from(embedding), label);
|
|
161
|
+
|
|
162
|
+
// Store mappings
|
|
163
|
+
this.idToLabel.set(id, label);
|
|
164
|
+
this.labelToId.set(label, id);
|
|
165
|
+
this.vectorCache.set(id, embedding);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
this.indexBuilt = true;
|
|
169
|
+
this.updatesSinceLastBuild = 0;
|
|
170
|
+
this.lastBuildTime = Date.now();
|
|
171
|
+
|
|
172
|
+
const duration = (Date.now() - start) / 1000;
|
|
173
|
+
console.log(`[HNSWIndex] ✅ Index built successfully in ${duration.toFixed(2)}s`);
|
|
174
|
+
console.log(`[HNSWIndex] - Elements: ${rows.length}`);
|
|
175
|
+
console.log(`[HNSWIndex] - Dimension: ${this.config.dimension}`);
|
|
176
|
+
console.log(`[HNSWIndex] - M: ${this.config.M}`);
|
|
177
|
+
console.log(`[HNSWIndex] - efConstruction: ${this.config.efConstruction}`);
|
|
178
|
+
|
|
179
|
+
// Persist index if enabled
|
|
180
|
+
if (this.config.persistIndex && this.config.indexPath) {
|
|
181
|
+
await this.saveIndex();
|
|
182
|
+
}
|
|
183
|
+
} catch (error) {
|
|
184
|
+
console.error('[HNSWIndex] Failed to build index:', error);
|
|
185
|
+
this.indexBuilt = false;
|
|
186
|
+
throw error;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Search HNSW index for k-nearest neighbors
|
|
192
|
+
*/
|
|
193
|
+
async search(
|
|
194
|
+
query: Float32Array,
|
|
195
|
+
k: number,
|
|
196
|
+
options?: {
|
|
197
|
+
threshold?: number;
|
|
198
|
+
filters?: Record<string, any>;
|
|
199
|
+
}
|
|
200
|
+
): Promise<HNSWSearchResult[]> {
|
|
201
|
+
if (!this.index || !this.indexBuilt) {
|
|
202
|
+
throw new Error('Index not built. Call buildIndex() first.');
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const searchStart = Date.now();
|
|
206
|
+
|
|
207
|
+
try {
|
|
208
|
+
// Perform HNSW search (convert Float32Array to number[])
|
|
209
|
+
const result = this.index.searchKnn(Array.from(query), k);
|
|
210
|
+
|
|
211
|
+
const searchTime = Date.now() - searchStart;
|
|
212
|
+
this.lastSearchTime = searchTime;
|
|
213
|
+
this.totalSearches++;
|
|
214
|
+
this.totalSearchTime += searchTime;
|
|
215
|
+
|
|
216
|
+
// Convert results to our format
|
|
217
|
+
const results: HNSWSearchResult[] = [];
|
|
218
|
+
|
|
219
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
220
|
+
const label = result.neighbors[i];
|
|
221
|
+
const distance = result.distances[i];
|
|
222
|
+
const id = this.labelToId.get(label);
|
|
223
|
+
|
|
224
|
+
if (id === undefined) {
|
|
225
|
+
console.warn(`[HNSWIndex] Label ${label} not found in mapping`);
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Convert distance to similarity based on metric
|
|
230
|
+
const similarity = this.distanceToSimilarity(distance);
|
|
231
|
+
|
|
232
|
+
// Apply threshold if specified
|
|
233
|
+
if (options?.threshold !== undefined && similarity < options.threshold) {
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
results.push({
|
|
238
|
+
id,
|
|
239
|
+
distance,
|
|
240
|
+
similarity,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Apply filters if specified (post-filtering)
|
|
245
|
+
if (options?.filters) {
|
|
246
|
+
return this.applyFilters(results, options.filters);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return results;
|
|
250
|
+
} catch (error) {
|
|
251
|
+
console.error('[HNSWIndex] Search failed:', error);
|
|
252
|
+
throw error;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Add a single vector to the index
|
|
258
|
+
*/
|
|
259
|
+
addVector(id: number, embedding: Float32Array): void {
|
|
260
|
+
if (!this.index || !this.indexBuilt) {
|
|
261
|
+
throw new Error('Index not built. Call buildIndex() first.');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const label = this.nextLabel++;
|
|
265
|
+
this.index.addPoint(Array.from(embedding), label);
|
|
266
|
+
|
|
267
|
+
this.idToLabel.set(id, label);
|
|
268
|
+
this.labelToId.set(label, id);
|
|
269
|
+
this.vectorCache.set(id, embedding);
|
|
270
|
+
|
|
271
|
+
this.updatesSinceLastBuild++;
|
|
272
|
+
|
|
273
|
+
// Check if rebuild is needed
|
|
274
|
+
const totalElements = this.labelToId.size;
|
|
275
|
+
const updatePercentage = this.updatesSinceLastBuild / totalElements;
|
|
276
|
+
|
|
277
|
+
if (updatePercentage > this.config.rebuildThreshold) {
|
|
278
|
+
console.log(`[HNSWIndex] Rebuild threshold reached (${(updatePercentage * 100).toFixed(1)}%)`);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Remove a vector from the index
|
|
284
|
+
*/
|
|
285
|
+
removeVector(id: number): void {
|
|
286
|
+
if (!this.index || !this.indexBuilt) {
|
|
287
|
+
throw new Error('Index not built. Call buildIndex() first.');
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const label = this.idToLabel.get(id);
|
|
291
|
+
if (label === undefined) {
|
|
292
|
+
console.warn(`[HNSWIndex] ID ${id} not found in index`);
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Note: hnswlib doesn't support deletion, so we mark for rebuild
|
|
297
|
+
this.idToLabel.delete(id);
|
|
298
|
+
this.labelToId.delete(label);
|
|
299
|
+
this.vectorCache.delete(id);
|
|
300
|
+
|
|
301
|
+
this.updatesSinceLastBuild++;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Check if index needs rebuilding
|
|
306
|
+
*/
|
|
307
|
+
needsRebuild(): boolean {
|
|
308
|
+
if (!this.indexBuilt) return true;
|
|
309
|
+
|
|
310
|
+
const totalElements = this.labelToId.size;
|
|
311
|
+
if (totalElements === 0) return false;
|
|
312
|
+
|
|
313
|
+
const updatePercentage = this.updatesSinceLastBuild / totalElements;
|
|
314
|
+
return updatePercentage > this.config.rebuildThreshold;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Save index to disk
|
|
319
|
+
*/
|
|
320
|
+
private async saveIndex(): Promise<void> {
|
|
321
|
+
if (!this.index || !this.config.indexPath) return;
|
|
322
|
+
|
|
323
|
+
try {
|
|
324
|
+
const indexDir = path.dirname(this.config.indexPath);
|
|
325
|
+
if (!fs.existsSync(indexDir)) {
|
|
326
|
+
fs.mkdirSync(indexDir, { recursive: true });
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Save HNSW index
|
|
330
|
+
this.index.writeIndex(this.config.indexPath);
|
|
331
|
+
|
|
332
|
+
// Save mappings
|
|
333
|
+
const mappingsPath = this.config.indexPath + '.mappings.json';
|
|
334
|
+
const mappings = {
|
|
335
|
+
idToLabel: Array.from(this.idToLabel.entries()),
|
|
336
|
+
labelToId: Array.from(this.labelToId.entries()),
|
|
337
|
+
nextLabel: this.nextLabel,
|
|
338
|
+
config: this.config,
|
|
339
|
+
};
|
|
340
|
+
|
|
341
|
+
fs.writeFileSync(mappingsPath, JSON.stringify(mappings, null, 2));
|
|
342
|
+
|
|
343
|
+
console.log(`[HNSWIndex] Index saved to ${this.config.indexPath}`);
|
|
344
|
+
} catch (error) {
|
|
345
|
+
console.error('[HNSWIndex] Failed to save index:', error);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Load index from disk
|
|
351
|
+
*/
|
|
352
|
+
private loadIndex(): void {
|
|
353
|
+
if (!this.config.indexPath || !fs.existsSync(this.config.indexPath)) {
|
|
354
|
+
return;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
try {
|
|
358
|
+
console.log(`[HNSWIndex] Loading index from ${this.config.indexPath}...`);
|
|
359
|
+
|
|
360
|
+
// Load HNSW index
|
|
361
|
+
this.index = new HierarchicalNSW(this.config.metric, this.config.dimension);
|
|
362
|
+
this.index.readIndex(this.config.indexPath);
|
|
363
|
+
this.index.setEf(this.config.efSearch);
|
|
364
|
+
|
|
365
|
+
// Load mappings
|
|
366
|
+
const mappingsPath = this.config.indexPath + '.mappings.json';
|
|
367
|
+
if (fs.existsSync(mappingsPath)) {
|
|
368
|
+
const mappingsData = JSON.parse(fs.readFileSync(mappingsPath, 'utf-8'));
|
|
369
|
+
|
|
370
|
+
this.idToLabel = new Map(mappingsData.idToLabel);
|
|
371
|
+
this.labelToId = new Map(mappingsData.labelToId);
|
|
372
|
+
this.nextLabel = mappingsData.nextLabel;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
this.indexBuilt = true;
|
|
376
|
+
console.log(`[HNSWIndex] ✅ Index loaded successfully (${this.labelToId.size} elements)`);
|
|
377
|
+
} catch (error) {
|
|
378
|
+
console.warn('[HNSWIndex] Failed to load index:', error);
|
|
379
|
+
this.index = null;
|
|
380
|
+
this.indexBuilt = false;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Convert distance to similarity based on metric
|
|
386
|
+
*/
|
|
387
|
+
private distanceToSimilarity(distance: number): number {
|
|
388
|
+
switch (this.config.metric) {
|
|
389
|
+
case 'cosine':
|
|
390
|
+
// Cosine distance is 1 - similarity
|
|
391
|
+
return 1 - distance;
|
|
392
|
+
|
|
393
|
+
case 'l2':
|
|
394
|
+
// Euclidean distance: convert to similarity (0-1 range)
|
|
395
|
+
// Using exponential decay: e^(-distance)
|
|
396
|
+
return Math.exp(-distance);
|
|
397
|
+
|
|
398
|
+
case 'ip':
|
|
399
|
+
// Inner product: higher is more similar
|
|
400
|
+
// Negate distance to get similarity
|
|
401
|
+
return -distance;
|
|
402
|
+
|
|
403
|
+
default:
|
|
404
|
+
return 1 - distance;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Apply post-filtering to search results
|
|
410
|
+
*/
|
|
411
|
+
private applyFilters(
|
|
412
|
+
results: HNSWSearchResult[],
|
|
413
|
+
filters: Record<string, any>
|
|
414
|
+
): HNSWSearchResult[] {
|
|
415
|
+
// Build WHERE clause for filters
|
|
416
|
+
const conditions: string[] = [];
|
|
417
|
+
const params: any[] = [];
|
|
418
|
+
|
|
419
|
+
Object.entries(filters).forEach(([key, value]) => {
|
|
420
|
+
conditions.push(`${key} = ?`);
|
|
421
|
+
params.push(value);
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
const whereClause = conditions.join(' AND ');
|
|
425
|
+
|
|
426
|
+
// Filter results by querying database
|
|
427
|
+
const filtered: HNSWSearchResult[] = [];
|
|
428
|
+
|
|
429
|
+
for (const result of results) {
|
|
430
|
+
const stmt = this.db.prepare(`
|
|
431
|
+
SELECT 1 FROM pattern_embeddings
|
|
432
|
+
WHERE pattern_id = ? AND ${whereClause}
|
|
433
|
+
`);
|
|
434
|
+
|
|
435
|
+
const matches = stmt.get(result.id, ...params);
|
|
436
|
+
if (matches) {
|
|
437
|
+
filtered.push(result);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
return filtered;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Get index statistics
|
|
446
|
+
*/
|
|
447
|
+
getStats(): HNSWStats {
|
|
448
|
+
return {
|
|
449
|
+
enabled: this.indexBuilt,
|
|
450
|
+
indexBuilt: this.indexBuilt,
|
|
451
|
+
numElements: this.labelToId.size,
|
|
452
|
+
dimension: this.config.dimension,
|
|
453
|
+
metric: this.config.metric,
|
|
454
|
+
M: this.config.M,
|
|
455
|
+
efConstruction: this.config.efConstruction,
|
|
456
|
+
efSearch: this.config.efSearch,
|
|
457
|
+
lastBuildTime: this.lastBuildTime,
|
|
458
|
+
lastSearchTime: this.lastSearchTime,
|
|
459
|
+
totalSearches: this.totalSearches,
|
|
460
|
+
avgSearchTimeMs: this.totalSearches > 0 ? this.totalSearchTime / this.totalSearches : 0,
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* Update efSearch parameter for search quality/speed tradeoff
|
|
466
|
+
*/
|
|
467
|
+
setEfSearch(ef: number): void {
|
|
468
|
+
if (this.index) {
|
|
469
|
+
this.index.setEf(ef);
|
|
470
|
+
this.config.efSearch = ef;
|
|
471
|
+
console.log(`[HNSWIndex] efSearch updated to ${ef}`);
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Clear index and free memory
|
|
477
|
+
*/
|
|
478
|
+
clear(): void {
|
|
479
|
+
this.index = null;
|
|
480
|
+
this.vectorCache.clear();
|
|
481
|
+
this.idToLabel.clear();
|
|
482
|
+
this.labelToId.clear();
|
|
483
|
+
this.nextLabel = 0;
|
|
484
|
+
this.indexBuilt = false;
|
|
485
|
+
this.updatesSinceLastBuild = 0;
|
|
486
|
+
console.log('[HNSWIndex] Index cleared');
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Check if index is built and ready
|
|
491
|
+
*/
|
|
492
|
+
isReady(): boolean {
|
|
493
|
+
return this.indexBuilt && this.index !== null;
|
|
494
|
+
}
|
|
495
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MMR (Maximal Marginal Relevance) Diversity Ranking
|
|
3
|
+
*
|
|
4
|
+
* Implements MMR algorithm to select diverse results that balance
|
|
5
|
+
* relevance to query with diversity from already-selected results.
|
|
6
|
+
*
|
|
7
|
+
* Formula: MMR = argmax [λ × Sim(Di, Q) - (1-λ) × max Sim(Di, Dj)]
|
|
8
|
+
* Di∈R\S Dj∈S
|
|
9
|
+
*
|
|
10
|
+
* Where:
|
|
11
|
+
* - Di = candidate document
|
|
12
|
+
* - Q = query
|
|
13
|
+
* - S = already selected documents
|
|
14
|
+
* - λ = balance parameter (0 = max diversity, 1 = max relevance)
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
export interface MMROptions {
|
|
18
|
+
lambda?: number; // Balance between relevance and diversity (default: 0.5)
|
|
19
|
+
k?: number; // Number of results to return (default: 10)
|
|
20
|
+
metric?: 'cosine' | 'euclidean' | 'dot'; // Similarity metric
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface MMRCandidate {
|
|
24
|
+
id: number;
|
|
25
|
+
embedding: number[];
|
|
26
|
+
similarity: number; // Similarity to query
|
|
27
|
+
[key: string]: any; // Additional data
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export class MMRDiversityRanker {
|
|
31
|
+
/**
|
|
32
|
+
* Select diverse results using MMR algorithm
|
|
33
|
+
*
|
|
34
|
+
* @param candidates - All candidate results with embeddings
|
|
35
|
+
* @param queryEmbedding - Query vector
|
|
36
|
+
* @param options - MMR configuration
|
|
37
|
+
* @returns Diverse subset of candidates
|
|
38
|
+
*/
|
|
39
|
+
static selectDiverse(
|
|
40
|
+
candidates: MMRCandidate[],
|
|
41
|
+
queryEmbedding: number[],
|
|
42
|
+
options: MMROptions = {}
|
|
43
|
+
): MMRCandidate[] {
|
|
44
|
+
const lambda = options.lambda ?? 0.5;
|
|
45
|
+
const k = options.k ?? 10;
|
|
46
|
+
const metric = options.metric ?? 'cosine';
|
|
47
|
+
|
|
48
|
+
if (candidates.length === 0) {
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (candidates.length <= k) {
|
|
53
|
+
return candidates;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Calculate initial similarities to query
|
|
57
|
+
const candidatesWithSim = candidates.map(c => ({
|
|
58
|
+
...c,
|
|
59
|
+
similarity: c.similarity ?? this.calculateSimilarity(
|
|
60
|
+
queryEmbedding,
|
|
61
|
+
c.embedding,
|
|
62
|
+
metric
|
|
63
|
+
),
|
|
64
|
+
}));
|
|
65
|
+
|
|
66
|
+
const selected: MMRCandidate[] = [];
|
|
67
|
+
const remaining = [...candidatesWithSim];
|
|
68
|
+
|
|
69
|
+
// Select first item (highest relevance)
|
|
70
|
+
remaining.sort((a, b) => b.similarity - a.similarity);
|
|
71
|
+
selected.push(remaining.shift()!);
|
|
72
|
+
|
|
73
|
+
// Iteratively select items with highest MMR score
|
|
74
|
+
while (selected.length < k && remaining.length > 0) {
|
|
75
|
+
let maxMMR = -Infinity;
|
|
76
|
+
let maxIdx = 0;
|
|
77
|
+
|
|
78
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
79
|
+
const candidate = remaining[i];
|
|
80
|
+
|
|
81
|
+
// Calculate max similarity to already-selected items
|
|
82
|
+
let maxSimToSelected = -Infinity;
|
|
83
|
+
for (const selectedItem of selected) {
|
|
84
|
+
const sim = this.calculateSimilarity(
|
|
85
|
+
candidate.embedding,
|
|
86
|
+
selectedItem.embedding,
|
|
87
|
+
metric
|
|
88
|
+
);
|
|
89
|
+
maxSimToSelected = Math.max(maxSimToSelected, sim);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Calculate MMR score
|
|
93
|
+
const mmrScore = lambda * candidate.similarity - (1 - lambda) * maxSimToSelected;
|
|
94
|
+
|
|
95
|
+
if (mmrScore > maxMMR) {
|
|
96
|
+
maxMMR = mmrScore;
|
|
97
|
+
maxIdx = i;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Add item with highest MMR score
|
|
102
|
+
selected.push(remaining.splice(maxIdx, 1)[0]);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return selected;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Calculate similarity between two vectors
|
|
110
|
+
*/
|
|
111
|
+
private static calculateSimilarity(
|
|
112
|
+
vec1: number[],
|
|
113
|
+
vec2: number[],
|
|
114
|
+
metric: 'cosine' | 'euclidean' | 'dot'
|
|
115
|
+
): number {
|
|
116
|
+
if (vec1.length !== vec2.length) {
|
|
117
|
+
throw new Error(`Vector dimension mismatch: ${vec1.length} vs ${vec2.length}`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
switch (metric) {
|
|
121
|
+
case 'cosine': {
|
|
122
|
+
let dot = 0, mag1 = 0, mag2 = 0;
|
|
123
|
+
for (let i = 0; i < vec1.length; i++) {
|
|
124
|
+
dot += vec1[i] * vec2[i];
|
|
125
|
+
mag1 += vec1[i] * vec1[i];
|
|
126
|
+
mag2 += vec2[i] * vec2[i];
|
|
127
|
+
}
|
|
128
|
+
return dot / (Math.sqrt(mag1) * Math.sqrt(mag2));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
case 'euclidean': {
|
|
132
|
+
let sum = 0;
|
|
133
|
+
for (let i = 0; i < vec1.length; i++) {
|
|
134
|
+
const diff = vec1[i] - vec2[i];
|
|
135
|
+
sum += diff * diff;
|
|
136
|
+
}
|
|
137
|
+
return 1 / (1 + Math.sqrt(sum)); // Normalized to 0-1
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
case 'dot': {
|
|
141
|
+
let dot = 0;
|
|
142
|
+
for (let i = 0; i < vec1.length; i++) {
|
|
143
|
+
dot += vec1[i] * vec2[i];
|
|
144
|
+
}
|
|
145
|
+
return dot;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
default:
|
|
149
|
+
throw new Error(`Unknown metric: ${metric}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Calculate diversity score for a set of results
|
|
155
|
+
*
|
|
156
|
+
* @param results - Results to analyze
|
|
157
|
+
* @param metric - Similarity metric
|
|
158
|
+
* @returns Average pairwise distance (higher = more diverse)
|
|
159
|
+
*/
|
|
160
|
+
static calculateDiversityScore(
|
|
161
|
+
results: MMRCandidate[],
|
|
162
|
+
metric: 'cosine' | 'euclidean' | 'dot' = 'cosine'
|
|
163
|
+
): number {
|
|
164
|
+
if (results.length < 2) {
|
|
165
|
+
return 1.0; // Single result is maximally diverse
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
let totalDistance = 0;
|
|
169
|
+
let comparisons = 0;
|
|
170
|
+
|
|
171
|
+
for (let i = 0; i < results.length; i++) {
|
|
172
|
+
for (let j = i + 1; j < results.length; j++) {
|
|
173
|
+
const similarity = this.calculateSimilarity(
|
|
174
|
+
results[i].embedding,
|
|
175
|
+
results[j].embedding,
|
|
176
|
+
metric
|
|
177
|
+
);
|
|
178
|
+
// Convert similarity to distance
|
|
179
|
+
const distance = metric === 'cosine' ? 1 - similarity : similarity;
|
|
180
|
+
totalDistance += distance;
|
|
181
|
+
comparisons++;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return comparisons > 0 ? totalDistance / comparisons : 0;
|
|
186
|
+
}
|
|
187
|
+
}
|