memory-lancedb-pro 1.0.25 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/retriever.ts CHANGED
@@ -6,6 +6,11 @@
6
6
  import type { MemoryStore, MemorySearchResult } from "./store.js";
7
7
  import type { Embedder } from "./embedder.js";
8
8
  import { filterNoise } from "./noise-filter.js";
9
+ import {
10
+ AccessTracker,
11
+ parseAccessMetadata,
12
+ computeEffectiveHalfLife,
13
+ } from "./access-tracker.js";
9
14
 
10
15
  // ============================================================================
11
16
  // Types & Configuration
@@ -59,6 +64,12 @@ export interface RetrievalConfig {
59
64
  * Set 0 to disable. (default: 60)
60
65
  */
61
66
  timeDecayHalfLifeDays: number;
67
+ /** Access reinforcement factor for time decay half-life extension.
68
+ * Higher = stronger reinforcement. 0 to disable. (default: 0.5) */
69
+ reinforcementFactor: number;
70
+ /** Maximum half-life multiplier from access reinforcement.
71
+ * Prevents frequently accessed memories from becoming immortal. (default: 3) */
72
+ maxHalfLifeMultiplier: number;
62
73
  }
63
74
 
64
75
  export interface RetrievalContext {
@@ -66,6 +77,8 @@ export interface RetrievalContext {
66
77
  limit: number;
67
78
  scopeFilter?: string[];
68
79
  category?: string;
80
+ /** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated. */
81
+ source?: "manual" | "auto-recall";
69
82
  }
70
83
 
71
84
  export interface RetrievalResult extends MemorySearchResult {
@@ -89,13 +102,15 @@ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
89
102
  rerank: "cross-encoder",
90
103
  candidatePoolSize: 20,
91
104
  recencyHalfLifeDays: 14,
92
- recencyWeight: 0.10,
105
+ recencyWeight: 0.1,
93
106
  filterNoise: true,
94
107
  rerankModel: "jina-reranker-v3",
95
108
  rerankEndpoint: "https://api.jina.ai/v1/rerank",
96
109
  lengthNormAnchor: 500,
97
110
  hardMinScore: 0.35,
98
111
  timeDecayHalfLifeDays: 60,
112
+ reinforcementFactor: 0.5,
113
+ maxHalfLifeMultiplier: 3,
99
114
  };
100
115
 
101
116
  // ============================================================================
@@ -118,7 +133,10 @@ function clamp01(value: number, fallback: number): number {
118
133
 
119
134
  type RerankProvider = "jina" | "siliconflow" | "voyage" | "pinecone";
120
135
 
121
- interface RerankItem { index: number; score: number }
136
+ interface RerankItem {
137
+ index: number;
138
+ score: number;
139
+ }
122
140
 
123
141
  /** Build provider-specific request headers and body */
124
142
  function buildRerankRequest(
@@ -140,7 +158,7 @@ function buildRerankRequest(
140
158
  body: {
141
159
  model,
142
160
  query,
143
- documents: documents.map(text => ({ text })),
161
+ documents: documents.map((text) => ({ text })),
144
162
  top_n: topN,
145
163
  rank_fields: ["text"],
146
164
  },
@@ -149,7 +167,7 @@ function buildRerankRequest(
149
167
  return {
150
168
  headers: {
151
169
  "Content-Type": "application/json",
152
- "Authorization": `Bearer ${apiKey}`,
170
+ Authorization: `Bearer ${apiKey}`,
153
171
  },
154
172
  body: {
155
173
  model,
@@ -165,7 +183,7 @@ function buildRerankRequest(
165
183
  return {
166
184
  headers: {
167
185
  "Content-Type": "application/json",
168
- "Authorization": `Bearer ${apiKey}`,
186
+ Authorization: `Bearer ${apiKey}`,
169
187
  },
170
188
  body: {
171
189
  model,
@@ -189,7 +207,8 @@ function parseRerankResponse(
189
207
  if (!Array.isArray(items)) return null;
190
208
  const parsed: RerankItem[] = [];
191
209
  for (const raw of items as Array<Record<string, unknown>>) {
192
- const index = typeof raw?.index === "number" ? raw.index : Number(raw?.index);
210
+ const index =
211
+ typeof raw?.index === "number" ? raw.index : Number(raw?.index);
193
212
  if (!Number.isFinite(index)) continue;
194
213
  let score: number | null = null;
195
214
  for (const key of scoreKeys) {
@@ -261,53 +280,85 @@ function cosineSimilarity(a: number[], b: number[]): number {
261
280
  // ============================================================================
262
281
 
263
282
  export class MemoryRetriever {
283
+ private accessTracker: AccessTracker | null = null;
284
+
264
285
  constructor(
265
286
  private store: MemoryStore,
266
287
  private embedder: Embedder,
267
- private config: RetrievalConfig = DEFAULT_RETRIEVAL_CONFIG
288
+ private config: RetrievalConfig = DEFAULT_RETRIEVAL_CONFIG,
268
289
  ) {}
269
290
 
291
+ setAccessTracker(tracker: AccessTracker): void {
292
+ this.accessTracker = tracker;
293
+ }
294
+
270
295
  async retrieve(context: RetrievalContext): Promise<RetrievalResult[]> {
271
- const { query, limit, scopeFilter, category } = context;
296
+ const { query, limit, scopeFilter, category, source } = context;
272
297
  const safeLimit = clampInt(limit, 1, 20);
273
298
 
274
- // For vector-only mode, use legacy behavior
299
+ let results: RetrievalResult[];
275
300
  if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
276
- return this.vectorOnlyRetrieval(query, safeLimit, scopeFilter, category);
301
+ results = await this.vectorOnlyRetrieval(
302
+ query,
303
+ safeLimit,
304
+ scopeFilter,
305
+ category,
306
+ );
307
+ } else {
308
+ results = await this.hybridRetrieval(
309
+ query,
310
+ safeLimit,
311
+ scopeFilter,
312
+ category,
313
+ );
314
+ }
315
+
316
+ // Record access for reinforcement (manual recall only)
317
+ if (this.accessTracker && source === "manual" && results.length > 0) {
318
+ this.accessTracker.recordAccess(results.map((r) => r.entry.id));
277
319
  }
278
320
 
279
- // Hybrid retrieval with vector + BM25 + RRF fusion
280
- return this.hybridRetrieval(query, safeLimit, scopeFilter, category);
321
+ return results;
281
322
  }
282
323
 
283
324
  private async vectorOnlyRetrieval(
284
325
  query: string,
285
326
  limit: number,
286
327
  scopeFilter?: string[],
287
- category?: string
328
+ category?: string,
288
329
  ): Promise<RetrievalResult[]> {
289
330
  const queryVector = await this.embedder.embedQuery(query);
290
- const results = await this.store.vectorSearch(queryVector, limit, this.config.minScore, scopeFilter);
331
+ const results = await this.store.vectorSearch(
332
+ queryVector,
333
+ limit,
334
+ this.config.minScore,
335
+ scopeFilter,
336
+ );
291
337
 
292
338
  // Filter by category if specified
293
339
  const filtered = category
294
- ? results.filter(r => r.entry.category === category)
340
+ ? results.filter((r) => r.entry.category === category)
295
341
  : results;
296
342
 
297
- const mapped = filtered.map((result, index) => ({
298
- ...result,
299
- sources: {
300
- vector: { score: result.score, rank: index + 1 },
301
- },
302
- } as RetrievalResult));
343
+ const mapped = filtered.map(
344
+ (result, index) =>
345
+ ({
346
+ ...result,
347
+ sources: {
348
+ vector: { score: result.score, rank: index + 1 },
349
+ },
350
+ }) as RetrievalResult,
351
+ );
303
352
 
304
353
  const boosted = this.applyRecencyBoost(mapped);
305
354
  const weighted = this.applyImportanceWeight(boosted);
306
355
  const lengthNormalized = this.applyLengthNormalization(weighted);
307
356
  const timeDecayed = this.applyTimeDecay(lengthNormalized);
308
- const hardFiltered = timeDecayed.filter(r => r.score >= this.config.hardMinScore);
357
+ const hardFiltered = timeDecayed.filter(
358
+ (r) => r.score >= this.config.hardMinScore,
359
+ );
309
360
  const denoised = this.config.filterNoise
310
- ? filterNoise(hardFiltered, r => r.entry.text)
361
+ ? filterNoise(hardFiltered, (r) => r.entry.text)
311
362
  : hardFiltered;
312
363
 
313
364
  // MMR deduplication: avoid top-k filled with near-identical memories
@@ -320,16 +371,24 @@ export class MemoryRetriever {
320
371
  query: string,
321
372
  limit: number,
322
373
  scopeFilter?: string[],
323
- category?: string
374
+ category?: string,
324
375
  ): Promise<RetrievalResult[]> {
325
- const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
376
+ const candidatePoolSize = Math.max(
377
+ this.config.candidatePoolSize,
378
+ limit * 2,
379
+ );
326
380
 
327
381
  // Compute query embedding once, reuse for vector search + reranking
328
382
  const queryVector = await this.embedder.embedQuery(query);
329
383
 
330
384
  // Run vector and BM25 searches in parallel
331
385
  const [vectorResults, bm25Results] = await Promise.all([
332
- this.runVectorSearch(queryVector, candidatePoolSize, scopeFilter, category),
386
+ this.runVectorSearch(
387
+ queryVector,
388
+ candidatePoolSize,
389
+ scopeFilter,
390
+ category,
391
+ ),
333
392
  this.runBM25Search(query, candidatePoolSize, scopeFilter, category),
334
393
  ]);
335
394
 
@@ -337,12 +396,19 @@ export class MemoryRetriever {
337
396
  const fusedResults = await this.fuseResults(vectorResults, bm25Results);
338
397
 
339
398
  // Apply minimum score threshold
340
- const filtered = fusedResults.filter(r => r.score >= this.config.minScore);
399
+ const filtered = fusedResults.filter(
400
+ (r) => r.score >= this.config.minScore,
401
+ );
341
402
 
342
403
  // Rerank if enabled
343
- const reranked = this.config.rerank !== "none"
344
- ? await this.rerankResults(query, queryVector, filtered.slice(0, limit * 2))
345
- : filtered;
404
+ const reranked =
405
+ this.config.rerank !== "none"
406
+ ? await this.rerankResults(
407
+ query,
408
+ queryVector,
409
+ filtered.slice(0, limit * 2),
410
+ )
411
+ : filtered;
346
412
 
347
413
  // Apply temporal re-ranking (recency boost)
348
414
  const temporalReranked = this.applyRecencyBoost(reranked);
@@ -357,11 +423,13 @@ export class MemoryRetriever {
357
423
  const timeDecayed = this.applyTimeDecay(lengthNormalized);
358
424
 
359
425
  // Hard minimum score cutoff (post all scoring stages)
360
- const hardFiltered = timeDecayed.filter(r => r.score >= this.config.hardMinScore);
426
+ const hardFiltered = timeDecayed.filter(
427
+ (r) => r.score >= this.config.hardMinScore,
428
+ );
361
429
 
362
430
  // Filter noise
363
431
  const denoised = this.config.filterNoise
364
- ? filterNoise(hardFiltered, r => r.entry.text)
432
+ ? filterNoise(hardFiltered, (r) => r.entry.text)
365
433
  : hardFiltered;
366
434
 
367
435
  // MMR deduplication: avoid top-k filled with near-identical memories
@@ -374,13 +442,18 @@ export class MemoryRetriever {
374
442
  queryVector: number[],
375
443
  limit: number,
376
444
  scopeFilter?: string[],
377
- category?: string
445
+ category?: string,
378
446
  ): Promise<Array<MemorySearchResult & { rank: number }>> {
379
- const results = await this.store.vectorSearch(queryVector, limit, 0.1, scopeFilter);
447
+ const results = await this.store.vectorSearch(
448
+ queryVector,
449
+ limit,
450
+ 0.1,
451
+ scopeFilter,
452
+ );
380
453
 
381
454
  // Filter by category if specified
382
455
  const filtered = category
383
- ? results.filter(r => r.entry.category === category)
456
+ ? results.filter((r) => r.entry.category === category)
384
457
  : results;
385
458
 
386
459
  return filtered.map((result, index) => ({
@@ -393,13 +466,13 @@ export class MemoryRetriever {
393
466
  query: string,
394
467
  limit: number,
395
468
  scopeFilter?: string[],
396
- category?: string
469
+ category?: string,
397
470
  ): Promise<Array<MemorySearchResult & { rank: number }>> {
398
471
  const results = await this.store.bm25Search(query, limit, scopeFilter);
399
472
 
400
473
  // Filter by category if specified
401
474
  const filtered = category
402
- ? results.filter(r => r.entry.category === category)
475
+ ? results.filter((r) => r.entry.category === category)
403
476
  : results;
404
477
 
405
478
  return filtered.map((result, index) => ({
@@ -410,17 +483,17 @@ export class MemoryRetriever {
410
483
 
411
484
  private async fuseResults(
412
485
  vectorResults: Array<MemorySearchResult & { rank: number }>,
413
- bm25Results: Array<MemorySearchResult & { rank: number }>
486
+ bm25Results: Array<MemorySearchResult & { rank: number }>,
414
487
  ): Promise<RetrievalResult[]> {
415
488
  // Create maps for quick lookup
416
489
  const vectorMap = new Map<string, MemorySearchResult & { rank: number }>();
417
490
  const bm25Map = new Map<string, MemorySearchResult & { rank: number }>();
418
491
 
419
- vectorResults.forEach(result => {
492
+ vectorResults.forEach((result) => {
420
493
  vectorMap.set(result.entry.id, result);
421
494
  });
422
495
 
423
- bm25Results.forEach(result => {
496
+ bm25Results.forEach((result) => {
424
497
  bm25Map.set(result.entry.id, result);
425
498
  });
426
499
 
@@ -459,15 +532,19 @@ export class MemoryRetriever {
459
532
  // (e.g. searching "JINA_API_KEY") still surface. The previous floor of 0.5
460
533
  // was too generous and allowed ghost entries to survive hardMinScore (0.35).
461
534
  const fusedScore = vectorResult
462
- ? clamp01(vectorScore + (bm25Hit * 0.15 * vectorScore), 0.1)
535
+ ? clamp01(vectorScore + bm25Hit * 0.15 * vectorScore, 0.1)
463
536
  : clamp01(bm25Result!.score, 0.1);
464
537
 
465
538
  fusedResults.push({
466
539
  entry: baseResult.entry,
467
540
  score: fusedScore,
468
541
  sources: {
469
- vector: vectorResult ? { score: vectorResult.score, rank: vectorResult.rank } : undefined,
470
- bm25: bm25Result ? { score: bm25Result.score, rank: bm25Result.rank } : undefined,
542
+ vector: vectorResult
543
+ ? { score: vectorResult.score, rank: vectorResult.rank }
544
+ : undefined,
545
+ bm25: bm25Result
546
+ ? { score: bm25Result.score, rank: bm25Result.rank }
547
+ : undefined,
471
548
  fused: { score: fusedScore },
472
549
  },
473
550
  });
@@ -481,7 +558,11 @@ export class MemoryRetriever {
481
558
  * Rerank results using cross-encoder API (Jina, Pinecone, or compatible).
482
559
  * Falls back to cosine similarity if API is unavailable or fails.
483
560
  */
484
- private async rerankResults(query: string, queryVector: number[], results: RetrievalResult[]): Promise<RetrievalResult[]> {
561
+ private async rerankResults(
562
+ query: string,
563
+ queryVector: number[],
564
+ results: RetrievalResult[],
565
+ ): Promise<RetrievalResult[]> {
485
566
  if (results.length === 0) {
486
567
  return results;
487
568
  }
@@ -491,11 +572,19 @@ export class MemoryRetriever {
491
572
  try {
492
573
  const provider = this.config.rerankProvider || "jina";
493
574
  const model = this.config.rerankModel || "jina-reranker-v3";
494
- const endpoint = this.config.rerankEndpoint || "https://api.jina.ai/v1/rerank";
495
- const documents = results.map(r => r.entry.text);
575
+ const endpoint =
576
+ this.config.rerankEndpoint || "https://api.jina.ai/v1/rerank";
577
+ const documents = results.map((r) => r.entry.text);
496
578
 
497
579
  // Build provider-specific request
498
- const { headers, body } = buildRerankRequest(provider, this.config.rerankApiKey, model, query, documents, results.length);
580
+ const { headers, body } = buildRerankRequest(
581
+ provider,
582
+ this.config.rerankApiKey,
583
+ model,
584
+ query,
585
+ documents,
586
+ results.length,
587
+ );
499
588
 
500
589
  // Timeout: 5 seconds to prevent stalling retrieval pipeline
501
590
  const controller = new AbortController();
@@ -511,20 +600,22 @@ export class MemoryRetriever {
511
600
  clearTimeout(timeout);
512
601
 
513
602
  if (response.ok) {
514
- const data = await response.json() as Record<string, unknown>;
603
+ const data = (await response.json()) as Record<string, unknown>;
515
604
 
516
605
  // Parse provider-specific response into unified format
517
606
  const parsed = parseRerankResponse(provider, data);
518
607
 
519
608
  if (!parsed) {
520
- console.warn("Rerank API: invalid response shape, falling back to cosine");
609
+ console.warn(
610
+ "Rerank API: invalid response shape, falling back to cosine",
611
+ );
521
612
  } else {
522
613
  // Build a Set of returned indices to identify unreturned candidates
523
- const returnedIndices = new Set(parsed.map(r => r.index));
614
+ const returnedIndices = new Set(parsed.map((r) => r.index));
524
615
 
525
616
  const reranked = parsed
526
- .filter(item => item.index >= 0 && item.index < results.length)
527
- .map(item => {
617
+ .filter((item) => item.index >= 0 && item.index < results.length)
618
+ .map((item) => {
528
619
  const original = results[item.index];
529
620
  // Blend: 60% cross-encoder score + 40% original fused score
530
621
  const blendedScore = clamp01(
@@ -544,13 +635,17 @@ export class MemoryRetriever {
544
635
  // Keep unreturned candidates with their original scores (slightly penalized)
545
636
  const unreturned = results
546
637
  .filter((_, idx) => !returnedIndices.has(idx))
547
- .map(r => ({ ...r, score: r.score * 0.8 }));
638
+ .map((r) => ({ ...r, score: r.score * 0.8 }));
548
639
 
549
- return [...reranked, ...unreturned].sort((a, b) => b.score - a.score);
640
+ return [...reranked, ...unreturned].sort(
641
+ (a, b) => b.score - a.score,
642
+ );
550
643
  }
551
644
  } else {
552
645
  const errText = await response.text().catch(() => "");
553
- console.warn(`Rerank API returned ${response.status}: ${errText.slice(0, 200)}, falling back to cosine`);
646
+ console.warn(
647
+ `Rerank API returned ${response.status}: ${errText.slice(0, 200)}, falling back to cosine`,
648
+ );
554
649
  }
555
650
  } catch (error) {
556
651
  if (error instanceof Error && error.name === "AbortError") {
@@ -563,9 +658,9 @@ export class MemoryRetriever {
563
658
 
564
659
  // Fallback: lightweight cosine similarity rerank
565
660
  try {
566
- const reranked = results.map(result => {
661
+ const reranked = results.map((result) => {
567
662
  const cosineScore = cosineSimilarity(queryVector, result.entry.vector);
568
- const combinedScore = (result.score * 0.7) + (cosineScore * 0.3);
663
+ const combinedScore = result.score * 0.7 + cosineScore * 0.3;
569
664
 
570
665
  return {
571
666
  ...result,
@@ -597,8 +692,9 @@ export class MemoryRetriever {
597
692
  }
598
693
 
599
694
  const now = Date.now();
600
- const boosted = results.map(r => {
601
- const ts = (r.entry.timestamp && r.entry.timestamp > 0) ? r.entry.timestamp : now;
695
+ const boosted = results.map((r) => {
696
+ const ts =
697
+ r.entry.timestamp && r.entry.timestamp > 0 ? r.entry.timestamp : now;
602
698
  const ageDays = (now - ts) / 86_400_000;
603
699
  const boost = Math.exp(-ageDays / recencyHalfLifeDays) * recencyWeight;
604
700
  return {
@@ -619,7 +715,7 @@ export class MemoryRetriever {
619
715
  */
620
716
  private applyImportanceWeight(results: RetrievalResult[]): RetrievalResult[] {
621
717
  const baseWeight = 0.7;
622
- const weighted = results.map(r => {
718
+ const weighted = results.map((r) => {
623
719
  const importance = r.entry.importance ?? 0.7;
624
720
  const factor = baseWeight + (1 - baseWeight) * importance;
625
721
  return {
@@ -637,11 +733,13 @@ export class MemoryRetriever {
637
733
  * Long, sprawling entries (> anchor) get penalized.
638
734
  * Formula: score *= 1 / (1 + log2(charLen / anchor))
639
735
  */
640
- private applyLengthNormalization(results: RetrievalResult[]): RetrievalResult[] {
736
+ private applyLengthNormalization(
737
+ results: RetrievalResult[],
738
+ ): RetrievalResult[] {
641
739
  const anchor = this.config.lengthNormAnchor;
642
740
  if (!anchor || anchor <= 0) return results;
643
741
 
644
- const normalized = results.map(r => {
742
+ const normalized = results.map((r) => {
645
743
  const charLen = r.entry.text.length;
646
744
  const ratio = charLen / anchor;
647
745
  // No penalty for entries at or below anchor length.
@@ -675,11 +773,25 @@ export class MemoryRetriever {
675
773
  if (!halfLife || halfLife <= 0) return results;
676
774
 
677
775
  const now = Date.now();
678
- const decayed = results.map(r => {
679
- const ts = (r.entry.timestamp && r.entry.timestamp > 0) ? r.entry.timestamp : now;
776
+ const decayed = results.map((r) => {
777
+ const ts =
778
+ r.entry.timestamp && r.entry.timestamp > 0 ? r.entry.timestamp : now;
680
779
  const ageDays = (now - ts) / 86_400_000;
780
+
781
+ // Access reinforcement: frequently recalled memories decay slower
782
+ const { accessCount, lastAccessedAt } = parseAccessMetadata(
783
+ r.entry.metadata,
784
+ );
785
+ const effectiveHL = computeEffectiveHalfLife(
786
+ halfLife,
787
+ accessCount,
788
+ lastAccessedAt,
789
+ this.config.reinforcementFactor,
790
+ this.config.maxHalfLifeMultiplier,
791
+ );
792
+
681
793
  // floor at 0.5: even very old entries keep at least 50% of their score
682
- const factor = 0.5 + 0.5 * Math.exp(-ageDays / halfLife);
794
+ const factor = 0.5 + 0.5 * Math.exp(-ageDays / effectiveHL);
683
795
  return {
684
796
  ...r,
685
797
  score: clamp01(r.score * factor, r.score * 0.5),
@@ -701,7 +813,10 @@ export class MemoryRetriever {
701
813
  * (e.g. 3 similar "SVG style" memories) while keeping them available
702
814
  * if the pool is small.
703
815
  */
704
- private applyMMRDiversity(results: RetrievalResult[], similarityThreshold = 0.85): RetrievalResult[] {
816
+ private applyMMRDiversity(
817
+ results: RetrievalResult[],
818
+ similarityThreshold = 0.85,
819
+ ): RetrievalResult[] {
705
820
  if (results.length <= 1) return results;
706
821
 
707
822
  const selected: RetrievalResult[] = [];
@@ -709,7 +824,7 @@ export class MemoryRetriever {
709
824
 
710
825
  for (const candidate of results) {
711
826
  // Check if this candidate is too similar to any already-selected result
712
- const tooSimilar = selected.some(s => {
827
+ const tooSimilar = selected.some((s) => {
713
828
  // Both must have vectors to compare.
714
829
  // LanceDB returns Arrow Vector objects (not plain arrays),
715
830
  // so use .length directly and Array.from() for conversion.
@@ -778,7 +893,7 @@ export class MemoryRetriever {
778
893
  export function createRetriever(
779
894
  store: MemoryStore,
780
895
  embedder: Embedder,
781
- config?: Partial<RetrievalConfig>
896
+ config?: Partial<RetrievalConfig>,
782
897
  ): MemoryRetriever {
783
898
  const fullConfig = { ...DEFAULT_RETRIEVAL_CONFIG, ...config };
784
899
  return new MemoryRetriever(store, embedder, fullConfig);