memory-lancedb-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,722 @@
1
+ /**
2
+ * Hybrid Retrieval System
3
+ * Combines vector search + BM25 full-text search with RRF fusion
4
+ */
5
+
6
+ import type { MemoryStore, MemorySearchResult } from "./store.js";
7
+ import type { Embedder } from "./embedder.js";
8
+ import { filterNoise } from "./noise-filter.js";
9
+
10
+ // ============================================================================
11
+ // Types & Configuration
12
+ // ============================================================================
13
+
14
+ export interface RetrievalConfig {
15
+ mode: "hybrid" | "vector";
16
+ vectorWeight: number;
17
+ bm25Weight: number;
18
+ minScore: number;
19
+ rerank: "cross-encoder" | "lightweight" | "none";
20
+ candidatePoolSize: number;
21
+ /** Recency boost half-life in days (default: 14). Set 0 to disable. */
22
+ recencyHalfLifeDays: number;
23
+ /** Max recency boost factor (default: 0.10) */
24
+ recencyWeight: number;
25
+ /** Filter noise from results (default: true) */
26
+ filterNoise: boolean;
27
+ /** Reranker API key (enables cross-encoder reranking) */
28
+ rerankApiKey?: string;
29
+ /** Reranker model (default: jina-reranker-v2-base-multilingual) */
30
+ rerankModel?: string;
31
+ /** Reranker API endpoint (default: https://api.jina.ai/v1/rerank). */
32
+ rerankEndpoint?: string;
33
+ /** Reranker provider format. Determines request/response shape and auth header.
34
+ * - "jina" (default): Authorization: Bearer, string[] documents, results[].relevance_score
35
+ * - "siliconflow": same format as jina (alias, for clarity)
36
+ * - "pinecone": Api-Key header, {text}[] documents, data[].score */
37
+ rerankProvider?: "jina" | "siliconflow" | "pinecone";
38
+ /**
39
+ * Length normalization: penalize long entries that dominate via sheer keyword
40
+ * density. Formula: score *= 1 / (1 + log2(charLen / anchor)).
41
+ * anchor = reference length (default: 500 chars). Entries shorter than anchor
42
+ * get a slight boost; longer entries get penalized progressively.
43
+ * Set 0 to disable. (default: 300)
44
+ */
45
+ lengthNormAnchor: number;
46
+ /**
47
+ * Hard cutoff after rerank: discard results below this score.
48
+ * Applied after all scoring stages (rerank, recency, importance, length norm).
49
+ * Higher = fewer but more relevant results. (default: 0.35)
50
+ */
51
+ hardMinScore: number;
52
+ /**
53
+ * Time decay half-life in days. Entries older than this lose score.
54
+ * Different from recencyBoost (additive bonus for new entries):
55
+ * this is a multiplicative penalty for old entries.
56
+ * Formula: score *= 0.5 + 0.5 * exp(-ageDays / halfLife)
57
+ * At halfLife days: ~0.68x. At 2*halfLife: ~0.59x. At 4*halfLife: ~0.52x.
58
+ * Set 0 to disable. (default: 60)
59
+ */
60
+ timeDecayHalfLifeDays: number;
61
+ }
62
+
63
+ export interface RetrievalContext {
64
+ query: string;
65
+ limit: number;
66
+ scopeFilter?: string[];
67
+ category?: string;
68
+ }
69
+
70
+ export interface RetrievalResult extends MemorySearchResult {
71
+ sources: {
72
+ vector?: { score: number; rank: number };
73
+ bm25?: { score: number; rank: number };
74
+ fused?: { score: number };
75
+ reranked?: { score: number };
76
+ };
77
+ }
78
+
79
+ // ============================================================================
80
+ // Default Configuration
81
+ // ============================================================================
82
+
83
+ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
84
+ mode: "hybrid",
85
+ vectorWeight: 0.7,
86
+ bm25Weight: 0.3,
87
+ minScore: 0.3,
88
+ rerank: "cross-encoder",
89
+ candidatePoolSize: 20,
90
+ recencyHalfLifeDays: 14,
91
+ recencyWeight: 0.10,
92
+ filterNoise: true,
93
+ rerankModel: "jina-reranker-v2-base-multilingual",
94
+ rerankEndpoint: "https://api.jina.ai/v1/rerank",
95
+ lengthNormAnchor: 500,
96
+ hardMinScore: 0.35,
97
+ timeDecayHalfLifeDays: 60,
98
+ };
99
+
100
+ // ============================================================================
101
+ // Utility Functions
102
+ // ============================================================================
103
+
104
+ function clampInt(value: number, min: number, max: number): number {
105
+ if (!Number.isFinite(value)) return min;
106
+ return Math.min(max, Math.max(min, Math.floor(value)));
107
+ }
108
+
109
+ function clamp01(value: number, fallback: number): number {
110
+ if (!Number.isFinite(value)) return Number.isFinite(fallback) ? fallback : 0;
111
+ return Math.min(1, Math.max(0, value));
112
+ }
113
+
114
+ // ============================================================================
115
+ // Rerank Provider Adapters
116
+ // ============================================================================
117
+
118
+ type RerankProvider = "jina" | "siliconflow" | "pinecone";
119
+
120
+ interface RerankItem { index: number; score: number }
121
+
122
+ /** Build provider-specific request headers and body */
123
+ function buildRerankRequest(
124
+ provider: RerankProvider,
125
+ apiKey: string,
126
+ model: string,
127
+ query: string,
128
+ documents: string[],
129
+ topN: number,
130
+ ): { headers: Record<string, string>; body: Record<string, unknown> } {
131
+ switch (provider) {
132
+ case "pinecone":
133
+ return {
134
+ headers: {
135
+ "Content-Type": "application/json",
136
+ "Api-Key": apiKey,
137
+ "X-Pinecone-API-Version": "2024-10",
138
+ },
139
+ body: {
140
+ model,
141
+ query,
142
+ documents: documents.map(text => ({ text })),
143
+ top_n: topN,
144
+ rank_fields: ["text"],
145
+ },
146
+ };
147
+ case "siliconflow":
148
+ case "jina":
149
+ default:
150
+ return {
151
+ headers: {
152
+ "Content-Type": "application/json",
153
+ "Authorization": `Bearer ${apiKey}`,
154
+ },
155
+ body: {
156
+ model,
157
+ query,
158
+ documents,
159
+ top_n: topN,
160
+ },
161
+ };
162
+ }
163
+ }
164
+
165
+ /** Parse provider-specific response into unified format */
166
+ function parseRerankResponse(
167
+ provider: RerankProvider,
168
+ data: Record<string, unknown>,
169
+ ): RerankItem[] | null {
170
+ switch (provider) {
171
+ case "pinecone": {
172
+ // Pinecone: { data: [{ index, score, document }] }
173
+ const items = data.data as Array<{ index: number; score: number }> | undefined;
174
+ if (!Array.isArray(items)) return null;
175
+ return items.map(r => ({ index: r.index, score: r.score }));
176
+ }
177
+ case "siliconflow":
178
+ case "jina":
179
+ default: {
180
+ // Jina / SiliconFlow: { results: [{ index, relevance_score }] }
181
+ const items = data.results as Array<{ index: number; relevance_score: number }> | undefined;
182
+ if (!Array.isArray(items)) return null;
183
+ return items.map(r => ({ index: r.index, score: r.relevance_score }));
184
+ }
185
+ }
186
+ }
187
+
188
+ // Cosine similarity for reranking fallback
189
+ function cosineSimilarity(a: number[], b: number[]): number {
190
+ if (a.length !== b.length) {
191
+ throw new Error("Vector dimensions must match for cosine similarity");
192
+ }
193
+
194
+ let dotProduct = 0;
195
+ let normA = 0;
196
+ let normB = 0;
197
+
198
+ for (let i = 0; i < a.length; i++) {
199
+ dotProduct += a[i] * b[i];
200
+ normA += a[i] * a[i];
201
+ normB += b[i] * b[i];
202
+ }
203
+
204
+ const norm = Math.sqrt(normA) * Math.sqrt(normB);
205
+ return norm === 0 ? 0 : dotProduct / norm;
206
+ }
207
+
208
+ // ============================================================================
209
+ // Memory Retriever
210
+ // ============================================================================
211
+
212
+ export class MemoryRetriever {
213
+ constructor(
214
+ private store: MemoryStore,
215
+ private embedder: Embedder,
216
+ private config: RetrievalConfig = DEFAULT_RETRIEVAL_CONFIG
217
+ ) {}
218
+
219
+ async retrieve(context: RetrievalContext): Promise<RetrievalResult[]> {
220
+ const { query, limit, scopeFilter, category } = context;
221
+ const safeLimit = clampInt(limit, 1, 20);
222
+
223
+ // For vector-only mode, use legacy behavior
224
+ if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
225
+ return this.vectorOnlyRetrieval(query, safeLimit, scopeFilter, category);
226
+ }
227
+
228
+ // Hybrid retrieval with vector + BM25 + RRF fusion
229
+ return this.hybridRetrieval(query, safeLimit, scopeFilter, category);
230
+ }
231
+
232
+ private async vectorOnlyRetrieval(
233
+ query: string,
234
+ limit: number,
235
+ scopeFilter?: string[],
236
+ category?: string
237
+ ): Promise<RetrievalResult[]> {
238
+ const queryVector = await this.embedder.embedQuery(query);
239
+ const results = await this.store.vectorSearch(queryVector, limit, this.config.minScore, scopeFilter);
240
+
241
+ // Filter by category if specified
242
+ const filtered = category
243
+ ? results.filter(r => r.entry.category === category)
244
+ : results;
245
+
246
+ const mapped = filtered.map((result, index) => ({
247
+ ...result,
248
+ sources: {
249
+ vector: { score: result.score, rank: index + 1 },
250
+ },
251
+ } as RetrievalResult));
252
+
253
+ const boosted = this.applyRecencyBoost(mapped);
254
+ const weighted = this.applyImportanceWeight(boosted);
255
+ const lengthNormalized = this.applyLengthNormalization(weighted);
256
+ const timeDecayed = this.applyTimeDecay(lengthNormalized);
257
+ const hardFiltered = timeDecayed.filter(r => r.score >= this.config.hardMinScore);
258
+ const denoised = this.config.filterNoise
259
+ ? filterNoise(hardFiltered, r => r.entry.text)
260
+ : hardFiltered;
261
+
262
+ // MMR deduplication: avoid top-k filled with near-identical memories
263
+ const deduplicated = this.applyMMRDiversity(denoised);
264
+
265
+ return deduplicated.slice(0, limit);
266
+ }
267
+
268
+ private async hybridRetrieval(
269
+ query: string,
270
+ limit: number,
271
+ scopeFilter?: string[],
272
+ category?: string
273
+ ): Promise<RetrievalResult[]> {
274
+ const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
275
+
276
+ // Compute query embedding once, reuse for vector search + reranking
277
+ const queryVector = await this.embedder.embedQuery(query);
278
+
279
+ // Run vector and BM25 searches in parallel
280
+ const [vectorResults, bm25Results] = await Promise.all([
281
+ this.runVectorSearch(queryVector, candidatePoolSize, scopeFilter, category),
282
+ this.runBM25Search(query, candidatePoolSize, scopeFilter, category),
283
+ ]);
284
+
285
+ // Fuse results using RRF
286
+ const fusedResults = this.fuseResults(vectorResults, bm25Results);
287
+
288
+ // Apply minimum score threshold
289
+ const filtered = fusedResults.filter(r => r.score >= this.config.minScore);
290
+
291
+ // Rerank if enabled
292
+ const reranked = this.config.rerank !== "none"
293
+ ? await this.rerankResults(query, queryVector, filtered.slice(0, limit * 2))
294
+ : filtered;
295
+
296
+ // Apply temporal re-ranking (recency boost)
297
+ const temporalReranked = this.applyRecencyBoost(reranked);
298
+
299
+ // Apply importance weighting
300
+ const importanceWeighted = this.applyImportanceWeight(temporalReranked);
301
+
302
+ // Apply length normalization (penalize long entries dominating via keyword density)
303
+ const lengthNormalized = this.applyLengthNormalization(importanceWeighted);
304
+
305
+ // Apply time decay (penalize stale entries)
306
+ const timeDecayed = this.applyTimeDecay(lengthNormalized);
307
+
308
+ // Hard minimum score cutoff (post all scoring stages)
309
+ const hardFiltered = timeDecayed.filter(r => r.score >= this.config.hardMinScore);
310
+
311
+ // Filter noise
312
+ const denoised = this.config.filterNoise
313
+ ? filterNoise(hardFiltered, r => r.entry.text)
314
+ : hardFiltered;
315
+
316
+ // MMR deduplication: avoid top-k filled with near-identical memories
317
+ const deduplicated = this.applyMMRDiversity(denoised);
318
+
319
+ return deduplicated.slice(0, limit);
320
+ }
321
+
322
+ private async runVectorSearch(
323
+ queryVector: number[],
324
+ limit: number,
325
+ scopeFilter?: string[],
326
+ category?: string
327
+ ): Promise<Array<MemorySearchResult & { rank: number }>> {
328
+ const results = await this.store.vectorSearch(queryVector, limit, 0.1, scopeFilter);
329
+
330
+ // Filter by category if specified
331
+ const filtered = category
332
+ ? results.filter(r => r.entry.category === category)
333
+ : results;
334
+
335
+ return filtered.map((result, index) => ({
336
+ ...result,
337
+ rank: index + 1,
338
+ }));
339
+ }
340
+
341
+ private async runBM25Search(
342
+ query: string,
343
+ limit: number,
344
+ scopeFilter?: string[],
345
+ category?: string
346
+ ): Promise<Array<MemorySearchResult & { rank: number }>> {
347
+ const results = await this.store.bm25Search(query, limit, scopeFilter);
348
+
349
+ // Filter by category if specified
350
+ const filtered = category
351
+ ? results.filter(r => r.entry.category === category)
352
+ : results;
353
+
354
+ return filtered.map((result, index) => ({
355
+ ...result,
356
+ rank: index + 1,
357
+ }));
358
+ }
359
+
360
+ private fuseResults(
361
+ vectorResults: Array<MemorySearchResult & { rank: number }>,
362
+ bm25Results: Array<MemorySearchResult & { rank: number }>
363
+ ): RetrievalResult[] {
364
+ // Create maps for quick lookup
365
+ const vectorMap = new Map<string, MemorySearchResult & { rank: number }>();
366
+ const bm25Map = new Map<string, MemorySearchResult & { rank: number }>();
367
+
368
+ vectorResults.forEach(result => {
369
+ vectorMap.set(result.entry.id, result);
370
+ });
371
+
372
+ bm25Results.forEach(result => {
373
+ bm25Map.set(result.entry.id, result);
374
+ });
375
+
376
+ // Get all unique document IDs
377
+ const allIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
378
+
379
+ // Calculate RRF scores
380
+ const fusedResults: RetrievalResult[] = [];
381
+
382
+ for (const id of allIds) {
383
+ const vectorResult = vectorMap.get(id);
384
+ const bm25Result = bm25Map.get(id);
385
+
386
+ // Use the result with more complete data (prefer vector result if both exist)
387
+ const baseResult = vectorResult || bm25Result!;
388
+
389
+ // Use vector similarity as the base score.
390
+ // BM25 hit acts as a bonus (keyword match confirms relevance).
391
+ const vectorScore = vectorResult ? vectorResult.score : 0;
392
+ const bm25Hit = bm25Result ? 1 : 0;
393
+
394
+ // Base = vector score; BM25 hit boosts by up to 15%
395
+ // BM25-only results use their normalized score (floor 0.5) so exact keyword
396
+ // matches aren't buried — e.g. searching "JINA_API_KEY" should surface even
397
+ // when vector distance is large.
398
+ const fusedScore = vectorResult
399
+ ? clamp01(vectorScore + (bm25Hit * 0.15 * vectorScore), 0.1)
400
+ : clamp01(Math.max(bm25Result!.score, 0.5), 0.1);
401
+
402
+ fusedResults.push({
403
+ entry: baseResult.entry,
404
+ score: fusedScore,
405
+ sources: {
406
+ vector: vectorResult ? { score: vectorResult.score, rank: vectorResult.rank } : undefined,
407
+ bm25: bm25Result ? { score: bm25Result.score, rank: bm25Result.rank } : undefined,
408
+ fused: { score: fusedScore },
409
+ },
410
+ });
411
+ }
412
+
413
+ // Sort by fused score descending
414
+ return fusedResults.sort((a, b) => b.score - a.score);
415
+ }
416
+
417
+ /**
418
+ * Rerank results using cross-encoder API (Jina, Pinecone, or compatible).
419
+ * Falls back to cosine similarity if API is unavailable or fails.
420
+ */
421
+ private async rerankResults(query: string, queryVector: number[], results: RetrievalResult[]): Promise<RetrievalResult[]> {
422
+ if (results.length === 0) {
423
+ return results;
424
+ }
425
+
426
+ // Try cross-encoder rerank via configured provider API
427
+ if (this.config.rerank === "cross-encoder" && this.config.rerankApiKey) {
428
+ try {
429
+ const provider = this.config.rerankProvider || "jina";
430
+ const model = this.config.rerankModel || "jina-reranker-v2-base-multilingual";
431
+ const endpoint = this.config.rerankEndpoint || "https://api.jina.ai/v1/rerank";
432
+ const documents = results.map(r => r.entry.text);
433
+
434
+ // Build provider-specific request
435
+ const { headers, body } = buildRerankRequest(provider, this.config.rerankApiKey, model, query, documents, results.length);
436
+
437
+ // Timeout: 5 seconds to prevent stalling retrieval pipeline
438
+ const controller = new AbortController();
439
+ const timeout = setTimeout(() => controller.abort(), 5000);
440
+
441
+ const response = await fetch(endpoint, {
442
+ method: "POST",
443
+ headers,
444
+ body: JSON.stringify(body),
445
+ signal: controller.signal,
446
+ });
447
+
448
+ clearTimeout(timeout);
449
+
450
+ if (response.ok) {
451
+ const data = await response.json() as Record<string, unknown>;
452
+
453
+ // Parse provider-specific response into unified format
454
+ const parsed = parseRerankResponse(provider, data);
455
+
456
+ if (!parsed) {
457
+ console.warn("Rerank API: invalid response shape, falling back to cosine");
458
+ } else {
459
+ // Build a Set of returned indices to identify unreturned candidates
460
+ const returnedIndices = new Set(parsed.map(r => r.index));
461
+
462
+ const reranked = parsed
463
+ .filter(item => item.index >= 0 && item.index < results.length)
464
+ .map(item => {
465
+ const original = results[item.index];
466
+ // Blend: 60% cross-encoder score + 40% original fused score
467
+ const blendedScore = clamp01(
468
+ item.score * 0.6 + original.score * 0.4,
469
+ original.score * 0.5,
470
+ );
471
+ return {
472
+ ...original,
473
+ score: blendedScore,
474
+ sources: {
475
+ ...original.sources,
476
+ reranked: { score: item.score },
477
+ },
478
+ };
479
+ });
480
+
481
+ // Keep unreturned candidates with their original scores (slightly penalized)
482
+ const unreturned = results
483
+ .filter((_, idx) => !returnedIndices.has(idx))
484
+ .map(r => ({ ...r, score: r.score * 0.8 }));
485
+
486
+ return [...reranked, ...unreturned].sort((a, b) => b.score - a.score);
487
+ }
488
+ } else {
489
+ const errText = await response.text().catch(() => "");
490
+ console.warn(`Rerank API returned ${response.status}: ${errText.slice(0, 200)}, falling back to cosine`);
491
+ }
492
+ } catch (error) {
493
+ if (error instanceof Error && error.name === "AbortError") {
494
+ console.warn("Rerank API timed out (5s), falling back to cosine");
495
+ } else {
496
+ console.warn("Rerank API failed, falling back to cosine:", error);
497
+ }
498
+ }
499
+ }
500
+
501
+ // Fallback: lightweight cosine similarity rerank
502
+ try {
503
+ const reranked = results.map(result => {
504
+ const cosineScore = cosineSimilarity(queryVector, result.entry.vector);
505
+ const combinedScore = (result.score * 0.7) + (cosineScore * 0.3);
506
+
507
+ return {
508
+ ...result,
509
+ score: clamp01(combinedScore, result.score),
510
+ sources: {
511
+ ...result.sources,
512
+ reranked: { score: cosineScore },
513
+ },
514
+ };
515
+ });
516
+
517
+ return reranked.sort((a, b) => b.score - a.score);
518
+ } catch (error) {
519
+ console.warn("Reranking failed, returning original results:", error);
520
+ return results;
521
+ }
522
+ }
523
+
524
+ /**
525
+ * Apply recency boost: newer memories get a small score bonus.
526
+ * This ensures corrections/updates naturally outrank older entries
527
+ * when semantic similarity is close.
528
+ * Formula: boost = exp(-ageDays / halfLife) * weight
529
+ */
530
+ private applyRecencyBoost(results: RetrievalResult[]): RetrievalResult[] {
531
+ const { recencyHalfLifeDays, recencyWeight } = this.config;
532
+ if (!recencyHalfLifeDays || recencyHalfLifeDays <= 0 || !recencyWeight) {
533
+ return results;
534
+ }
535
+
536
+ const now = Date.now();
537
+ const boosted = results.map(r => {
538
+ const ts = (r.entry.timestamp && r.entry.timestamp > 0) ? r.entry.timestamp : now;
539
+ const ageDays = (now - ts) / 86_400_000;
540
+ const boost = Math.exp(-ageDays / recencyHalfLifeDays) * recencyWeight;
541
+ return {
542
+ ...r,
543
+ score: clamp01(r.score + boost, r.score),
544
+ };
545
+ });
546
+
547
+ return boosted.sort((a, b) => b.score - a.score);
548
+ }
549
+
550
+ /**
551
+ * Apply importance weighting: memories with higher importance get a score boost.
552
+ * This ensures critical memories (importance=1.0) outrank casual ones (importance=0.5)
553
+ * when semantic similarity is close.
554
+ * Formula: score *= (baseWeight + (1 - baseWeight) * importance)
555
+ * With baseWeight=0.7: importance=1.0 → ×1.0, importance=0.5 → ×0.85, importance=0.0 → ×0.7
556
+ */
557
+ private applyImportanceWeight(results: RetrievalResult[]): RetrievalResult[] {
558
+ const baseWeight = 0.7;
559
+ const weighted = results.map(r => {
560
+ const importance = r.entry.importance ?? 0.7;
561
+ const factor = baseWeight + (1 - baseWeight) * importance;
562
+ return {
563
+ ...r,
564
+ score: clamp01(r.score * factor, r.score * baseWeight),
565
+ };
566
+ });
567
+ return weighted.sort((a, b) => b.score - a.score);
568
+ }
569
+
570
+ /**
571
+ * Length normalization: penalize long entries that dominate search results
572
+ * via sheer keyword density and broad semantic coverage.
573
+ * Short, focused entries (< anchor) get a slight boost.
574
+ * Long, sprawling entries (> anchor) get penalized.
575
+ * Formula: score *= 1 / (1 + log2(charLen / anchor))
576
+ */
577
+ private applyLengthNormalization(results: RetrievalResult[]): RetrievalResult[] {
578
+ const anchor = this.config.lengthNormAnchor;
579
+ if (!anchor || anchor <= 0) return results;
580
+
581
+ const normalized = results.map(r => {
582
+ const charLen = r.entry.text.length;
583
+ const ratio = charLen / anchor;
584
+ // No penalty for entries at or below anchor length.
585
+ // Gentle logarithmic decay for longer entries:
586
+ // anchor (500) → 1.0, 800 → 0.75, 1000 → 0.67, 1500 → 0.56, 2000 → 0.50
587
+ // This prevents long, keyword-rich entries from dominating top-k
588
+ // while keeping their scores reasonable.
589
+ const logRatio = Math.log2(Math.max(ratio, 1)); // no boost for short entries
590
+ const factor = 1 / (1 + 0.5 * logRatio);
591
+ return {
592
+ ...r,
593
+ score: clamp01(r.score * factor, r.score * 0.3),
594
+ };
595
+ });
596
+
597
+ return normalized.sort((a, b) => b.score - a.score);
598
+ }
599
+
600
+ /**
601
+ * Time decay: multiplicative penalty for old entries.
602
+ * Unlike recencyBoost (additive bonus for new entries), this actively
603
+ * penalizes stale information so recent knowledge wins ties.
604
+ * Formula: score *= 0.5 + 0.5 * exp(-ageDays / halfLife)
605
+ * At 0 days: 1.0x (no penalty)
606
+ * At halfLife: ~0.68x
607
+ * At 2*halfLife: ~0.59x
608
+ * Floor at 0.5x (never penalize more than half)
609
+ */
610
+ private applyTimeDecay(results: RetrievalResult[]): RetrievalResult[] {
611
+ const halfLife = this.config.timeDecayHalfLifeDays;
612
+ if (!halfLife || halfLife <= 0) return results;
613
+
614
+ const now = Date.now();
615
+ const decayed = results.map(r => {
616
+ const ts = (r.entry.timestamp && r.entry.timestamp > 0) ? r.entry.timestamp : now;
617
+ const ageDays = (now - ts) / 86_400_000;
618
+ // floor at 0.5: even very old entries keep at least 50% of their score
619
+ const factor = 0.5 + 0.5 * Math.exp(-ageDays / halfLife);
620
+ return {
621
+ ...r,
622
+ score: clamp01(r.score * factor, r.score * 0.5),
623
+ };
624
+ });
625
+
626
+ return decayed.sort((a, b) => b.score - a.score);
627
+ }
628
+
629
+ /**
630
+ * MMR-inspired diversity filter: greedily select results that are both
631
+ * relevant (high score) and diverse (low similarity to already-selected).
632
+ *
633
+ * Uses cosine similarity between memory vectors. If two memories have
634
+ * cosine similarity > threshold (default 0.92), the lower-scored one
635
+ * is demoted to the end rather than removed entirely.
636
+ *
637
+ * This prevents top-k from being filled with near-identical entries
638
+ * (e.g. 3 similar "SVG style" memories) while keeping them available
639
+ * if the pool is small.
640
+ */
641
+ private applyMMRDiversity(results: RetrievalResult[], similarityThreshold = 0.85): RetrievalResult[] {
642
+ if (results.length <= 1) return results;
643
+
644
+ const selected: RetrievalResult[] = [];
645
+ const deferred: RetrievalResult[] = [];
646
+
647
+ for (const candidate of results) {
648
+ // Check if this candidate is too similar to any already-selected result
649
+ const tooSimilar = selected.some(s => {
650
+ // Both must have vectors to compare.
651
+ // LanceDB returns Arrow Vector objects (not plain arrays),
652
+ // so use .length directly and Array.from() for conversion.
653
+ const sVec = s.entry.vector;
654
+ const cVec = candidate.entry.vector;
655
+ if (!sVec?.length || !cVec?.length) return false;
656
+ const sArr = Array.from(sVec as Iterable<number>);
657
+ const cArr = Array.from(cVec as Iterable<number>);
658
+ const sim = cosineSimilarity(sArr, cArr);
659
+ return sim > similarityThreshold;
660
+ });
661
+
662
+ if (tooSimilar) {
663
+ deferred.push(candidate);
664
+ } else {
665
+ selected.push(candidate);
666
+ }
667
+ }
668
+ // Append deferred results at the end (available but deprioritized)
669
+ return [...selected, ...deferred];
670
+ }
671
+
672
+ // Update configuration
673
+ updateConfig(newConfig: Partial<RetrievalConfig>): void {
674
+ this.config = { ...this.config, ...newConfig };
675
+ }
676
+
677
+ // Get current configuration
678
+ getConfig(): RetrievalConfig {
679
+ return { ...this.config };
680
+ }
681
+
682
+ // Test retrieval system
683
+ async test(query = "test query"): Promise<{
684
+ success: boolean;
685
+ mode: string;
686
+ hasFtsSupport: boolean;
687
+ error?: string;
688
+ }> {
689
+ try {
690
+ const results = await this.retrieve({
691
+ query,
692
+ limit: 1,
693
+ });
694
+
695
+ return {
696
+ success: true,
697
+ mode: this.config.mode,
698
+ hasFtsSupport: this.store.hasFtsSupport,
699
+ };
700
+ } catch (error) {
701
+ return {
702
+ success: false,
703
+ mode: this.config.mode,
704
+ hasFtsSupport: this.store.hasFtsSupport,
705
+ error: error instanceof Error ? error.message : String(error),
706
+ };
707
+ }
708
+ }
709
+ }
710
+
711
+ // ============================================================================
712
+ // Factory Function
713
+ // ============================================================================
714
+
715
+ export function createRetriever(
716
+ store: MemoryStore,
717
+ embedder: Embedder,
718
+ config?: Partial<RetrievalConfig>
719
+ ): MemoryRetriever {
720
+ const fullConfig = { ...DEFAULT_RETRIEVAL_CONFIG, ...config };
721
+ return new MemoryRetriever(store, embedder, fullConfig);
722
+ }