agentdb 1.5.9 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +11 -11
  2. package/dist/agentdb.min.js +4 -4
  3. package/dist/cli/agentdb-cli.d.ts +29 -0
  4. package/dist/cli/agentdb-cli.d.ts.map +1 -1
  5. package/dist/cli/agentdb-cli.js +1009 -34
  6. package/dist/cli/agentdb-cli.js.map +1 -1
  7. package/dist/controllers/ContextSynthesizer.d.ts +65 -0
  8. package/dist/controllers/ContextSynthesizer.d.ts.map +1 -0
  9. package/dist/controllers/ContextSynthesizer.js +208 -0
  10. package/dist/controllers/ContextSynthesizer.js.map +1 -0
  11. package/dist/controllers/HNSWIndex.d.ts +128 -0
  12. package/dist/controllers/HNSWIndex.d.ts.map +1 -0
  13. package/dist/controllers/HNSWIndex.js +361 -0
  14. package/dist/controllers/HNSWIndex.js.map +1 -0
  15. package/dist/controllers/MMRDiversityRanker.d.ts +50 -0
  16. package/dist/controllers/MMRDiversityRanker.d.ts.map +1 -0
  17. package/dist/controllers/MMRDiversityRanker.js +130 -0
  18. package/dist/controllers/MMRDiversityRanker.js.map +1 -0
  19. package/dist/controllers/MetadataFilter.d.ts +70 -0
  20. package/dist/controllers/MetadataFilter.d.ts.map +1 -0
  21. package/dist/controllers/MetadataFilter.js +243 -0
  22. package/dist/controllers/MetadataFilter.js.map +1 -0
  23. package/dist/controllers/QUICClient.d.ts +109 -0
  24. package/dist/controllers/QUICClient.d.ts.map +1 -0
  25. package/dist/controllers/QUICClient.js +299 -0
  26. package/dist/controllers/QUICClient.js.map +1 -0
  27. package/dist/controllers/QUICServer.d.ts +121 -0
  28. package/dist/controllers/QUICServer.d.ts.map +1 -0
  29. package/dist/controllers/QUICServer.js +383 -0
  30. package/dist/controllers/QUICServer.js.map +1 -0
  31. package/dist/controllers/SyncCoordinator.d.ts +120 -0
  32. package/dist/controllers/SyncCoordinator.d.ts.map +1 -0
  33. package/dist/controllers/SyncCoordinator.js +441 -0
  34. package/dist/controllers/SyncCoordinator.js.map +1 -0
  35. package/dist/controllers/WASMVectorSearch.d.ts.map +1 -1
  36. package/dist/controllers/WASMVectorSearch.js +10 -2
  37. package/dist/controllers/WASMVectorSearch.js.map +1 -1
  38. package/dist/controllers/index.d.ts +14 -0
  39. package/dist/controllers/index.d.ts.map +1 -1
  40. package/dist/controllers/index.js +7 -0
  41. package/dist/controllers/index.js.map +1 -1
  42. package/dist/examples/quic-sync-example.d.ts +9 -0
  43. package/dist/examples/quic-sync-example.d.ts.map +1 -0
  44. package/dist/examples/quic-sync-example.js +169 -0
  45. package/dist/examples/quic-sync-example.js.map +1 -0
  46. package/dist/index.d.ts +1 -0
  47. package/dist/index.d.ts.map +1 -1
  48. package/dist/index.js +2 -1
  49. package/dist/index.js.map +1 -1
  50. package/dist/types/quic.d.ts +518 -0
  51. package/dist/types/quic.d.ts.map +1 -0
  52. package/dist/types/quic.js +272 -0
  53. package/dist/types/quic.js.map +1 -0
  54. package/package.json +11 -3
  55. package/src/browser-entry.js +41 -6
  56. package/src/cli/agentdb-cli.ts +1114 -33
  57. package/src/controllers/ContextSynthesizer.ts +285 -0
  58. package/src/controllers/HNSWIndex.ts +495 -0
  59. package/src/controllers/MMRDiversityRanker.ts +187 -0
  60. package/src/controllers/MetadataFilter.ts +280 -0
  61. package/src/controllers/QUICClient.ts +413 -0
  62. package/src/controllers/QUICServer.ts +498 -0
  63. package/src/controllers/SyncCoordinator.ts +597 -0
  64. package/src/controllers/WASMVectorSearch.ts +11 -2
  65. package/src/controllers/index.ts +14 -0
  66. package/src/examples/quic-sync-example.ts +198 -0
  67. package/src/index.ts +2 -1
  68. package/src/types/quic.ts +772 -0
@@ -0,0 +1,495 @@
1
+ /**
2
+ * HNSWIndex - Hierarchical Navigable Small World Index
3
+ *
4
+ * High-performance approximate nearest neighbor (ANN) search using HNSW algorithm.
5
+ * Provides 10-100x speedup over brute-force search for large vector datasets.
6
+ *
7
+ * Features:
8
+ * - HNSW indexing for sub-millisecond search
9
+ * - Automatic index building and management
10
+ * - Configurable M and efConstruction parameters
11
+ * - Persistent index storage
12
+ * - Graceful fallback to brute-force
13
+ * - Multi-distance metric support (cosine, euclidean, ip)
14
+ */
15
+
16
+ import hnswlibNode from 'hnswlib-node';
17
+ import * as fs from 'fs';
18
+ import * as path from 'path';
19
+
20
+ const { HierarchicalNSW } = hnswlibNode as any;
21
+
22
+ // Database type from db-fallback
23
+ type Database = any;
24
+
25
+ export interface HNSWConfig {
26
+ /** Maximum number of connections per layer (default: 16) */
27
+ M: number;
28
+
29
+ /** Size of dynamic candidate list during construction (default: 200) */
30
+ efConstruction: number;
31
+
32
+ /** Size of dynamic candidate list during search (default: 100) */
33
+ efSearch: number;
34
+
35
+ /** Distance metric: 'cosine', 'euclidean', 'ip' (inner product) */
36
+ metric: 'cosine' | 'l2' | 'ip';
37
+
38
+ /** Vector dimension */
39
+ dimension: number;
40
+
41
+ /** Maximum number of elements in index */
42
+ maxElements: number;
43
+
44
+ /** Enable persistent index storage */
45
+ persistIndex: boolean;
46
+
47
+ /** Path to store index file */
48
+ indexPath?: string;
49
+
50
+ /** Rebuild index threshold (rebuild when updates exceed this percentage) */
51
+ rebuildThreshold: number;
52
+ }
53
+
54
+ export interface HNSWSearchResult {
55
+ id: number;
56
+ distance: number;
57
+ similarity: number;
58
+ metadata?: any;
59
+ }
60
+
61
+ export interface HNSWStats {
62
+ enabled: boolean;
63
+ indexBuilt: boolean;
64
+ numElements: number;
65
+ dimension: number;
66
+ metric: string;
67
+ M: number;
68
+ efConstruction: number;
69
+ efSearch: number;
70
+ lastBuildTime: number | null;
71
+ lastSearchTime: number | null;
72
+ totalSearches: number;
73
+ avgSearchTimeMs: number;
74
+ }
75
+
76
+ export class HNSWIndex {
77
+ private db: Database;
78
+ private config: HNSWConfig;
79
+ private index: any | null = null;
80
+ private vectorCache: Map<number, Float32Array> = new Map();
81
+ private idToLabel: Map<number, number> = new Map();
82
+ private labelToId: Map<number, number> = new Map();
83
+ private nextLabel: number = 0;
84
+ private indexBuilt: boolean = false;
85
+ private updatesSinceLastBuild: number = 0;
86
+ private totalSearches: number = 0;
87
+ private totalSearchTime: number = 0;
88
+ private lastBuildTime: number | null = null;
89
+ private lastSearchTime: number | null = null;
90
+
91
+ constructor(db: Database, config?: Partial<HNSWConfig>) {
92
+ this.db = db;
93
+ this.config = {
94
+ M: 16,
95
+ efConstruction: 200,
96
+ efSearch: 100,
97
+ metric: 'cosine',
98
+ dimension: 1536,
99
+ maxElements: 100000,
100
+ persistIndex: true,
101
+ rebuildThreshold: 0.1, // Rebuild after 10% updates
102
+ ...config,
103
+ };
104
+
105
+ // Try to load existing index
106
+ if (this.config.persistIndex && this.config.indexPath) {
107
+ this.loadIndex();
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Build HNSW index from database vectors
113
+ */
114
+ async buildIndex(tableName: string = 'pattern_embeddings'): Promise<void> {
115
+ const start = Date.now();
116
+ console.log(`[HNSWIndex] Building HNSW index from ${tableName}...`);
117
+
118
+ try {
119
+ // Fetch all vectors from database
120
+ const stmt = this.db.prepare(`
121
+ SELECT pattern_id as id, embedding
122
+ FROM ${tableName}
123
+ `);
124
+
125
+ const rows = stmt.all() as any[];
126
+
127
+ if (rows.length === 0) {
128
+ console.warn('[HNSWIndex] No vectors found in database');
129
+ return;
130
+ }
131
+
132
+ // Create new HNSW index
133
+ this.index = new HierarchicalNSW(this.config.metric, this.config.dimension);
134
+ this.index.initIndex(
135
+ Math.max(rows.length, this.config.maxElements),
136
+ this.config.M,
137
+ this.config.efConstruction
138
+ );
139
+ this.index.setEf(this.config.efSearch);
140
+
141
+ // Clear mappings
142
+ this.vectorCache.clear();
143
+ this.idToLabel.clear();
144
+ this.labelToId.clear();
145
+ this.nextLabel = 0;
146
+
147
+ // Add vectors to index
148
+ console.log(`[HNSWIndex] Adding ${rows.length} vectors to index...`);
149
+
150
+ for (const row of rows) {
151
+ const id = row.id;
152
+ const embedding = new Float32Array(
153
+ (row.embedding as Buffer).buffer,
154
+ (row.embedding as Buffer).byteOffset,
155
+ (row.embedding as Buffer).byteLength / 4
156
+ );
157
+
158
+ // Add to index with label (convert Float32Array to number[])
159
+ const label = this.nextLabel++;
160
+ this.index.addPoint(Array.from(embedding), label);
161
+
162
+ // Store mappings
163
+ this.idToLabel.set(id, label);
164
+ this.labelToId.set(label, id);
165
+ this.vectorCache.set(id, embedding);
166
+ }
167
+
168
+ this.indexBuilt = true;
169
+ this.updatesSinceLastBuild = 0;
170
+ this.lastBuildTime = Date.now();
171
+
172
+ const duration = (Date.now() - start) / 1000;
173
+ console.log(`[HNSWIndex] ✅ Index built successfully in ${duration.toFixed(2)}s`);
174
+ console.log(`[HNSWIndex] - Elements: ${rows.length}`);
175
+ console.log(`[HNSWIndex] - Dimension: ${this.config.dimension}`);
176
+ console.log(`[HNSWIndex] - M: ${this.config.M}`);
177
+ console.log(`[HNSWIndex] - efConstruction: ${this.config.efConstruction}`);
178
+
179
+ // Persist index if enabled
180
+ if (this.config.persistIndex && this.config.indexPath) {
181
+ await this.saveIndex();
182
+ }
183
+ } catch (error) {
184
+ console.error('[HNSWIndex] Failed to build index:', error);
185
+ this.indexBuilt = false;
186
+ throw error;
187
+ }
188
+ }
189
+
190
+ /**
191
+ * Search HNSW index for k-nearest neighbors
192
+ */
193
+ async search(
194
+ query: Float32Array,
195
+ k: number,
196
+ options?: {
197
+ threshold?: number;
198
+ filters?: Record<string, any>;
199
+ }
200
+ ): Promise<HNSWSearchResult[]> {
201
+ if (!this.index || !this.indexBuilt) {
202
+ throw new Error('Index not built. Call buildIndex() first.');
203
+ }
204
+
205
+ const searchStart = Date.now();
206
+
207
+ try {
208
+ // Perform HNSW search (convert Float32Array to number[])
209
+ const result = this.index.searchKnn(Array.from(query), k);
210
+
211
+ const searchTime = Date.now() - searchStart;
212
+ this.lastSearchTime = searchTime;
213
+ this.totalSearches++;
214
+ this.totalSearchTime += searchTime;
215
+
216
+ // Convert results to our format
217
+ const results: HNSWSearchResult[] = [];
218
+
219
+ for (let i = 0; i < result.neighbors.length; i++) {
220
+ const label = result.neighbors[i];
221
+ const distance = result.distances[i];
222
+ const id = this.labelToId.get(label);
223
+
224
+ if (id === undefined) {
225
+ console.warn(`[HNSWIndex] Label ${label} not found in mapping`);
226
+ continue;
227
+ }
228
+
229
+ // Convert distance to similarity based on metric
230
+ const similarity = this.distanceToSimilarity(distance);
231
+
232
+ // Apply threshold if specified
233
+ if (options?.threshold !== undefined && similarity < options.threshold) {
234
+ continue;
235
+ }
236
+
237
+ results.push({
238
+ id,
239
+ distance,
240
+ similarity,
241
+ });
242
+ }
243
+
244
+ // Apply filters if specified (post-filtering)
245
+ if (options?.filters) {
246
+ return this.applyFilters(results, options.filters);
247
+ }
248
+
249
+ return results;
250
+ } catch (error) {
251
+ console.error('[HNSWIndex] Search failed:', error);
252
+ throw error;
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Add a single vector to the index
258
+ */
259
+ addVector(id: number, embedding: Float32Array): void {
260
+ if (!this.index || !this.indexBuilt) {
261
+ throw new Error('Index not built. Call buildIndex() first.');
262
+ }
263
+
264
+ const label = this.nextLabel++;
265
+ this.index.addPoint(Array.from(embedding), label);
266
+
267
+ this.idToLabel.set(id, label);
268
+ this.labelToId.set(label, id);
269
+ this.vectorCache.set(id, embedding);
270
+
271
+ this.updatesSinceLastBuild++;
272
+
273
+ // Check if rebuild is needed
274
+ const totalElements = this.labelToId.size;
275
+ const updatePercentage = this.updatesSinceLastBuild / totalElements;
276
+
277
+ if (updatePercentage > this.config.rebuildThreshold) {
278
+ console.log(`[HNSWIndex] Rebuild threshold reached (${(updatePercentage * 100).toFixed(1)}%)`);
279
+ }
280
+ }
281
+
282
+ /**
283
+ * Remove a vector from the index
284
+ */
285
+ removeVector(id: number): void {
286
+ if (!this.index || !this.indexBuilt) {
287
+ throw new Error('Index not built. Call buildIndex() first.');
288
+ }
289
+
290
+ const label = this.idToLabel.get(id);
291
+ if (label === undefined) {
292
+ console.warn(`[HNSWIndex] ID ${id} not found in index`);
293
+ return;
294
+ }
295
+
296
+ // Note: hnswlib doesn't support deletion, so we mark for rebuild
297
+ this.idToLabel.delete(id);
298
+ this.labelToId.delete(label);
299
+ this.vectorCache.delete(id);
300
+
301
+ this.updatesSinceLastBuild++;
302
+ }
303
+
304
+ /**
305
+ * Check if index needs rebuilding
306
+ */
307
+ needsRebuild(): boolean {
308
+ if (!this.indexBuilt) return true;
309
+
310
+ const totalElements = this.labelToId.size;
311
+ if (totalElements === 0) return false;
312
+
313
+ const updatePercentage = this.updatesSinceLastBuild / totalElements;
314
+ return updatePercentage > this.config.rebuildThreshold;
315
+ }
316
+
317
+ /**
318
+ * Save index to disk
319
+ */
320
+ private async saveIndex(): Promise<void> {
321
+ if (!this.index || !this.config.indexPath) return;
322
+
323
+ try {
324
+ const indexDir = path.dirname(this.config.indexPath);
325
+ if (!fs.existsSync(indexDir)) {
326
+ fs.mkdirSync(indexDir, { recursive: true });
327
+ }
328
+
329
+ // Save HNSW index
330
+ this.index.writeIndex(this.config.indexPath);
331
+
332
+ // Save mappings
333
+ const mappingsPath = this.config.indexPath + '.mappings.json';
334
+ const mappings = {
335
+ idToLabel: Array.from(this.idToLabel.entries()),
336
+ labelToId: Array.from(this.labelToId.entries()),
337
+ nextLabel: this.nextLabel,
338
+ config: this.config,
339
+ };
340
+
341
+ fs.writeFileSync(mappingsPath, JSON.stringify(mappings, null, 2));
342
+
343
+ console.log(`[HNSWIndex] Index saved to ${this.config.indexPath}`);
344
+ } catch (error) {
345
+ console.error('[HNSWIndex] Failed to save index:', error);
346
+ }
347
+ }
348
+
349
+ /**
350
+ * Load index from disk
351
+ */
352
+ private loadIndex(): void {
353
+ if (!this.config.indexPath || !fs.existsSync(this.config.indexPath)) {
354
+ return;
355
+ }
356
+
357
+ try {
358
+ console.log(`[HNSWIndex] Loading index from ${this.config.indexPath}...`);
359
+
360
+ // Load HNSW index
361
+ this.index = new HierarchicalNSW(this.config.metric, this.config.dimension);
362
+ this.index.readIndex(this.config.indexPath);
363
+ this.index.setEf(this.config.efSearch);
364
+
365
+ // Load mappings
366
+ const mappingsPath = this.config.indexPath + '.mappings.json';
367
+ if (fs.existsSync(mappingsPath)) {
368
+ const mappingsData = JSON.parse(fs.readFileSync(mappingsPath, 'utf-8'));
369
+
370
+ this.idToLabel = new Map(mappingsData.idToLabel);
371
+ this.labelToId = new Map(mappingsData.labelToId);
372
+ this.nextLabel = mappingsData.nextLabel;
373
+ }
374
+
375
+ this.indexBuilt = true;
376
+ console.log(`[HNSWIndex] ✅ Index loaded successfully (${this.labelToId.size} elements)`);
377
+ } catch (error) {
378
+ console.warn('[HNSWIndex] Failed to load index:', error);
379
+ this.index = null;
380
+ this.indexBuilt = false;
381
+ }
382
+ }
383
+
384
+ /**
385
+ * Convert distance to similarity based on metric
386
+ */
387
+ private distanceToSimilarity(distance: number): number {
388
+ switch (this.config.metric) {
389
+ case 'cosine':
390
+ // Cosine distance is 1 - similarity
391
+ return 1 - distance;
392
+
393
+ case 'l2':
394
+ // Euclidean distance: convert to similarity (0-1 range)
395
+ // Using exponential decay: e^(-distance)
396
+ return Math.exp(-distance);
397
+
398
+ case 'ip':
399
+ // Inner product: higher is more similar
400
+ // Negate distance to get similarity
401
+ return -distance;
402
+
403
+ default:
404
+ return 1 - distance;
405
+ }
406
+ }
407
+
408
+ /**
409
+ * Apply post-filtering to search results
410
+ */
411
+ private applyFilters(
412
+ results: HNSWSearchResult[],
413
+ filters: Record<string, any>
414
+ ): HNSWSearchResult[] {
415
+ // Build WHERE clause for filters
416
+ const conditions: string[] = [];
417
+ const params: any[] = [];
418
+
419
+ Object.entries(filters).forEach(([key, value]) => {
420
+ conditions.push(`${key} = ?`);
421
+ params.push(value);
422
+ });
423
+
424
+ const whereClause = conditions.join(' AND ');
425
+
426
+ // Filter results by querying database
427
+ const filtered: HNSWSearchResult[] = [];
428
+
429
+ for (const result of results) {
430
+ const stmt = this.db.prepare(`
431
+ SELECT 1 FROM pattern_embeddings
432
+ WHERE pattern_id = ? AND ${whereClause}
433
+ `);
434
+
435
+ const matches = stmt.get(result.id, ...params);
436
+ if (matches) {
437
+ filtered.push(result);
438
+ }
439
+ }
440
+
441
+ return filtered;
442
+ }
443
+
444
+ /**
445
+ * Get index statistics
446
+ */
447
+ getStats(): HNSWStats {
448
+ return {
449
+ enabled: this.indexBuilt,
450
+ indexBuilt: this.indexBuilt,
451
+ numElements: this.labelToId.size,
452
+ dimension: this.config.dimension,
453
+ metric: this.config.metric,
454
+ M: this.config.M,
455
+ efConstruction: this.config.efConstruction,
456
+ efSearch: this.config.efSearch,
457
+ lastBuildTime: this.lastBuildTime,
458
+ lastSearchTime: this.lastSearchTime,
459
+ totalSearches: this.totalSearches,
460
+ avgSearchTimeMs: this.totalSearches > 0 ? this.totalSearchTime / this.totalSearches : 0,
461
+ };
462
+ }
463
+
464
+ /**
465
+ * Update efSearch parameter for search quality/speed tradeoff
466
+ */
467
+ setEfSearch(ef: number): void {
468
+ if (this.index) {
469
+ this.index.setEf(ef);
470
+ this.config.efSearch = ef;
471
+ console.log(`[HNSWIndex] efSearch updated to ${ef}`);
472
+ }
473
+ }
474
+
475
+ /**
476
+ * Clear index and free memory
477
+ */
478
+ clear(): void {
479
+ this.index = null;
480
+ this.vectorCache.clear();
481
+ this.idToLabel.clear();
482
+ this.labelToId.clear();
483
+ this.nextLabel = 0;
484
+ this.indexBuilt = false;
485
+ this.updatesSinceLastBuild = 0;
486
+ console.log('[HNSWIndex] Index cleared');
487
+ }
488
+
489
+ /**
490
+ * Check if index is built and ready
491
+ */
492
+ isReady(): boolean {
493
+ return this.indexBuilt && this.index !== null;
494
+ }
495
+ }
@@ -0,0 +1,187 @@
1
+ /**
2
+ * MMR (Maximal Marginal Relevance) Diversity Ranking
3
+ *
4
+ * Implements MMR algorithm to select diverse results that balance
5
+ * relevance to query with diversity from already-selected results.
6
+ *
7
+ * Formula: MMR = argmax [λ × Sim(Di, Q) - (1-λ) × max Sim(Di, Dj)]
8
+ * Di∈R\S Dj∈S
9
+ *
10
+ * Where:
11
+ * - Di = candidate document
12
+ * - Q = query
13
+ * - S = already selected documents
14
+ * - λ = balance parameter (0 = max diversity, 1 = max relevance)
15
+ */
16
+
17
+ export interface MMROptions {
18
+ lambda?: number; // Balance between relevance and diversity (default: 0.5)
19
+ k?: number; // Number of results to return (default: 10)
20
+ metric?: 'cosine' | 'euclidean' | 'dot'; // Similarity metric
21
+ }
22
+
23
+ export interface MMRCandidate {
24
+ id: number;
25
+ embedding: number[];
26
+ similarity: number; // Similarity to query
27
+ [key: string]: any; // Additional data
28
+ }
29
+
30
+ export class MMRDiversityRanker {
31
+ /**
32
+ * Select diverse results using MMR algorithm
33
+ *
34
+ * @param candidates - All candidate results with embeddings
35
+ * @param queryEmbedding - Query vector
36
+ * @param options - MMR configuration
37
+ * @returns Diverse subset of candidates
38
+ */
39
+ static selectDiverse(
40
+ candidates: MMRCandidate[],
41
+ queryEmbedding: number[],
42
+ options: MMROptions = {}
43
+ ): MMRCandidate[] {
44
+ const lambda = options.lambda ?? 0.5;
45
+ const k = options.k ?? 10;
46
+ const metric = options.metric ?? 'cosine';
47
+
48
+ if (candidates.length === 0) {
49
+ return [];
50
+ }
51
+
52
+ if (candidates.length <= k) {
53
+ return candidates;
54
+ }
55
+
56
+ // Calculate initial similarities to query
57
+ const candidatesWithSim = candidates.map(c => ({
58
+ ...c,
59
+ similarity: c.similarity ?? this.calculateSimilarity(
60
+ queryEmbedding,
61
+ c.embedding,
62
+ metric
63
+ ),
64
+ }));
65
+
66
+ const selected: MMRCandidate[] = [];
67
+ const remaining = [...candidatesWithSim];
68
+
69
+ // Select first item (highest relevance)
70
+ remaining.sort((a, b) => b.similarity - a.similarity);
71
+ selected.push(remaining.shift()!);
72
+
73
+ // Iteratively select items with highest MMR score
74
+ while (selected.length < k && remaining.length > 0) {
75
+ let maxMMR = -Infinity;
76
+ let maxIdx = 0;
77
+
78
+ for (let i = 0; i < remaining.length; i++) {
79
+ const candidate = remaining[i];
80
+
81
+ // Calculate max similarity to already-selected items
82
+ let maxSimToSelected = -Infinity;
83
+ for (const selectedItem of selected) {
84
+ const sim = this.calculateSimilarity(
85
+ candidate.embedding,
86
+ selectedItem.embedding,
87
+ metric
88
+ );
89
+ maxSimToSelected = Math.max(maxSimToSelected, sim);
90
+ }
91
+
92
+ // Calculate MMR score
93
+ const mmrScore = lambda * candidate.similarity - (1 - lambda) * maxSimToSelected;
94
+
95
+ if (mmrScore > maxMMR) {
96
+ maxMMR = mmrScore;
97
+ maxIdx = i;
98
+ }
99
+ }
100
+
101
+ // Add item with highest MMR score
102
+ selected.push(remaining.splice(maxIdx, 1)[0]);
103
+ }
104
+
105
+ return selected;
106
+ }
107
+
108
+ /**
109
+ * Calculate similarity between two vectors
110
+ */
111
+ private static calculateSimilarity(
112
+ vec1: number[],
113
+ vec2: number[],
114
+ metric: 'cosine' | 'euclidean' | 'dot'
115
+ ): number {
116
+ if (vec1.length !== vec2.length) {
117
+ throw new Error(`Vector dimension mismatch: ${vec1.length} vs ${vec2.length}`);
118
+ }
119
+
120
+ switch (metric) {
121
+ case 'cosine': {
122
+ let dot = 0, mag1 = 0, mag2 = 0;
123
+ for (let i = 0; i < vec1.length; i++) {
124
+ dot += vec1[i] * vec2[i];
125
+ mag1 += vec1[i] * vec1[i];
126
+ mag2 += vec2[i] * vec2[i];
127
+ }
128
+ return dot / (Math.sqrt(mag1) * Math.sqrt(mag2));
129
+ }
130
+
131
+ case 'euclidean': {
132
+ let sum = 0;
133
+ for (let i = 0; i < vec1.length; i++) {
134
+ const diff = vec1[i] - vec2[i];
135
+ sum += diff * diff;
136
+ }
137
+ return 1 / (1 + Math.sqrt(sum)); // Normalized to 0-1
138
+ }
139
+
140
+ case 'dot': {
141
+ let dot = 0;
142
+ for (let i = 0; i < vec1.length; i++) {
143
+ dot += vec1[i] * vec2[i];
144
+ }
145
+ return dot;
146
+ }
147
+
148
+ default:
149
+ throw new Error(`Unknown metric: ${metric}`);
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Calculate diversity score for a set of results
155
+ *
156
+ * @param results - Results to analyze
157
+ * @param metric - Similarity metric
158
+ * @returns Average pairwise distance (higher = more diverse)
159
+ */
160
+ static calculateDiversityScore(
161
+ results: MMRCandidate[],
162
+ metric: 'cosine' | 'euclidean' | 'dot' = 'cosine'
163
+ ): number {
164
+ if (results.length < 2) {
165
+ return 1.0; // Single result is maximally diverse
166
+ }
167
+
168
+ let totalDistance = 0;
169
+ let comparisons = 0;
170
+
171
+ for (let i = 0; i < results.length; i++) {
172
+ for (let j = i + 1; j < results.length; j++) {
173
+ const similarity = this.calculateSimilarity(
174
+ results[i].embedding,
175
+ results[j].embedding,
176
+ metric
177
+ );
178
+ // Convert similarity to distance
179
+ const distance = metric === 'cosine' ? 1 - similarity : similarity;
180
+ totalDistance += distance;
181
+ comparisons++;
182
+ }
183
+ }
184
+
185
+ return comparisons > 0 ? totalDistance / comparisons : 0;
186
+ }
187
+ }