memory-lancedb-pro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +489 -0
- package/README_CN.md +406 -0
- package/cli.ts +611 -0
- package/index.ts +698 -0
- package/openclaw.plugin.json +385 -0
- package/package.json +38 -0
- package/skills/lesson/SKILL.md +28 -0
- package/src/adaptive-retrieval.ts +60 -0
- package/src/embedder.ts +354 -0
- package/src/migrate.ts +356 -0
- package/src/noise-filter.ts +78 -0
- package/src/retriever.ts +722 -0
- package/src/scopes.ts +374 -0
- package/src/store.ts +567 -0
- package/src/tools.ts +639 -0
package/src/retriever.ts
ADDED
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Retrieval System
|
|
3
|
+
* Combines vector search + BM25 full-text search with RRF fusion
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { MemoryStore, MemorySearchResult } from "./store.js";
|
|
7
|
+
import type { Embedder } from "./embedder.js";
|
|
8
|
+
import { filterNoise } from "./noise-filter.js";
|
|
9
|
+
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Types & Configuration
|
|
12
|
+
// ============================================================================
|
|
13
|
+
|
|
14
|
+
export interface RetrievalConfig {
|
|
15
|
+
mode: "hybrid" | "vector";
|
|
16
|
+
vectorWeight: number;
|
|
17
|
+
bm25Weight: number;
|
|
18
|
+
minScore: number;
|
|
19
|
+
rerank: "cross-encoder" | "lightweight" | "none";
|
|
20
|
+
candidatePoolSize: number;
|
|
21
|
+
/** Recency boost half-life in days (default: 14). Set 0 to disable. */
|
|
22
|
+
recencyHalfLifeDays: number;
|
|
23
|
+
/** Max recency boost factor (default: 0.10) */
|
|
24
|
+
recencyWeight: number;
|
|
25
|
+
/** Filter noise from results (default: true) */
|
|
26
|
+
filterNoise: boolean;
|
|
27
|
+
/** Reranker API key (enables cross-encoder reranking) */
|
|
28
|
+
rerankApiKey?: string;
|
|
29
|
+
/** Reranker model (default: jina-reranker-v2-base-multilingual) */
|
|
30
|
+
rerankModel?: string;
|
|
31
|
+
/** Reranker API endpoint (default: https://api.jina.ai/v1/rerank). */
|
|
32
|
+
rerankEndpoint?: string;
|
|
33
|
+
/** Reranker provider format. Determines request/response shape and auth header.
|
|
34
|
+
* - "jina" (default): Authorization: Bearer, string[] documents, results[].relevance_score
|
|
35
|
+
* - "siliconflow": same format as jina (alias, for clarity)
|
|
36
|
+
* - "pinecone": Api-Key header, {text}[] documents, data[].score */
|
|
37
|
+
rerankProvider?: "jina" | "siliconflow" | "pinecone";
|
|
38
|
+
/**
|
|
39
|
+
* Length normalization: penalize long entries that dominate via sheer keyword
|
|
40
|
+
* density. Formula: score *= 1 / (1 + log2(charLen / anchor)).
|
|
41
|
+
* anchor = reference length (default: 500 chars). Entries shorter than anchor
|
|
42
|
+
* get a slight boost; longer entries get penalized progressively.
|
|
43
|
+
* Set 0 to disable. (default: 300)
|
|
44
|
+
*/
|
|
45
|
+
lengthNormAnchor: number;
|
|
46
|
+
/**
|
|
47
|
+
* Hard cutoff after rerank: discard results below this score.
|
|
48
|
+
* Applied after all scoring stages (rerank, recency, importance, length norm).
|
|
49
|
+
* Higher = fewer but more relevant results. (default: 0.35)
|
|
50
|
+
*/
|
|
51
|
+
hardMinScore: number;
|
|
52
|
+
/**
|
|
53
|
+
* Time decay half-life in days. Entries older than this lose score.
|
|
54
|
+
* Different from recencyBoost (additive bonus for new entries):
|
|
55
|
+
* this is a multiplicative penalty for old entries.
|
|
56
|
+
* Formula: score *= 0.5 + 0.5 * exp(-ageDays / halfLife)
|
|
57
|
+
* At halfLife days: ~0.68x. At 2*halfLife: ~0.59x. At 4*halfLife: ~0.52x.
|
|
58
|
+
* Set 0 to disable. (default: 60)
|
|
59
|
+
*/
|
|
60
|
+
timeDecayHalfLifeDays: number;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface RetrievalContext {
|
|
64
|
+
query: string;
|
|
65
|
+
limit: number;
|
|
66
|
+
scopeFilter?: string[];
|
|
67
|
+
category?: string;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface RetrievalResult extends MemorySearchResult {
|
|
71
|
+
sources: {
|
|
72
|
+
vector?: { score: number; rank: number };
|
|
73
|
+
bm25?: { score: number; rank: number };
|
|
74
|
+
fused?: { score: number };
|
|
75
|
+
reranked?: { score: number };
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ============================================================================
|
|
80
|
+
// Default Configuration
|
|
81
|
+
// ============================================================================
|
|
82
|
+
|
|
83
|
+
export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
|
|
84
|
+
mode: "hybrid",
|
|
85
|
+
vectorWeight: 0.7,
|
|
86
|
+
bm25Weight: 0.3,
|
|
87
|
+
minScore: 0.3,
|
|
88
|
+
rerank: "cross-encoder",
|
|
89
|
+
candidatePoolSize: 20,
|
|
90
|
+
recencyHalfLifeDays: 14,
|
|
91
|
+
recencyWeight: 0.10,
|
|
92
|
+
filterNoise: true,
|
|
93
|
+
rerankModel: "jina-reranker-v2-base-multilingual",
|
|
94
|
+
rerankEndpoint: "https://api.jina.ai/v1/rerank",
|
|
95
|
+
lengthNormAnchor: 500,
|
|
96
|
+
hardMinScore: 0.35,
|
|
97
|
+
timeDecayHalfLifeDays: 60,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// ============================================================================
|
|
101
|
+
// Utility Functions
|
|
102
|
+
// ============================================================================
|
|
103
|
+
|
|
104
|
+
function clampInt(value: number, min: number, max: number): number {
|
|
105
|
+
if (!Number.isFinite(value)) return min;
|
|
106
|
+
return Math.min(max, Math.max(min, Math.floor(value)));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function clamp01(value: number, fallback: number): number {
|
|
110
|
+
if (!Number.isFinite(value)) return Number.isFinite(fallback) ? fallback : 0;
|
|
111
|
+
return Math.min(1, Math.max(0, value));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ============================================================================
|
|
115
|
+
// Rerank Provider Adapters
|
|
116
|
+
// ============================================================================
|
|
117
|
+
|
|
118
|
+
type RerankProvider = "jina" | "siliconflow" | "pinecone";
|
|
119
|
+
|
|
120
|
+
interface RerankItem { index: number; score: number }
|
|
121
|
+
|
|
122
|
+
/** Build provider-specific request headers and body */
|
|
123
|
+
function buildRerankRequest(
|
|
124
|
+
provider: RerankProvider,
|
|
125
|
+
apiKey: string,
|
|
126
|
+
model: string,
|
|
127
|
+
query: string,
|
|
128
|
+
documents: string[],
|
|
129
|
+
topN: number,
|
|
130
|
+
): { headers: Record<string, string>; body: Record<string, unknown> } {
|
|
131
|
+
switch (provider) {
|
|
132
|
+
case "pinecone":
|
|
133
|
+
return {
|
|
134
|
+
headers: {
|
|
135
|
+
"Content-Type": "application/json",
|
|
136
|
+
"Api-Key": apiKey,
|
|
137
|
+
"X-Pinecone-API-Version": "2024-10",
|
|
138
|
+
},
|
|
139
|
+
body: {
|
|
140
|
+
model,
|
|
141
|
+
query,
|
|
142
|
+
documents: documents.map(text => ({ text })),
|
|
143
|
+
top_n: topN,
|
|
144
|
+
rank_fields: ["text"],
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
case "siliconflow":
|
|
148
|
+
case "jina":
|
|
149
|
+
default:
|
|
150
|
+
return {
|
|
151
|
+
headers: {
|
|
152
|
+
"Content-Type": "application/json",
|
|
153
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
154
|
+
},
|
|
155
|
+
body: {
|
|
156
|
+
model,
|
|
157
|
+
query,
|
|
158
|
+
documents,
|
|
159
|
+
top_n: topN,
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/** Parse provider-specific response into unified format */
|
|
166
|
+
function parseRerankResponse(
|
|
167
|
+
provider: RerankProvider,
|
|
168
|
+
data: Record<string, unknown>,
|
|
169
|
+
): RerankItem[] | null {
|
|
170
|
+
switch (provider) {
|
|
171
|
+
case "pinecone": {
|
|
172
|
+
// Pinecone: { data: [{ index, score, document }] }
|
|
173
|
+
const items = data.data as Array<{ index: number; score: number }> | undefined;
|
|
174
|
+
if (!Array.isArray(items)) return null;
|
|
175
|
+
return items.map(r => ({ index: r.index, score: r.score }));
|
|
176
|
+
}
|
|
177
|
+
case "siliconflow":
|
|
178
|
+
case "jina":
|
|
179
|
+
default: {
|
|
180
|
+
// Jina / SiliconFlow: { results: [{ index, relevance_score }] }
|
|
181
|
+
const items = data.results as Array<{ index: number; relevance_score: number }> | undefined;
|
|
182
|
+
if (!Array.isArray(items)) return null;
|
|
183
|
+
return items.map(r => ({ index: r.index, score: r.relevance_score }));
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Cosine similarity for reranking fallback
|
|
189
|
+
function cosineSimilarity(a: number[], b: number[]): number {
|
|
190
|
+
if (a.length !== b.length) {
|
|
191
|
+
throw new Error("Vector dimensions must match for cosine similarity");
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
let dotProduct = 0;
|
|
195
|
+
let normA = 0;
|
|
196
|
+
let normB = 0;
|
|
197
|
+
|
|
198
|
+
for (let i = 0; i < a.length; i++) {
|
|
199
|
+
dotProduct += a[i] * b[i];
|
|
200
|
+
normA += a[i] * a[i];
|
|
201
|
+
normB += b[i] * b[i];
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const norm = Math.sqrt(normA) * Math.sqrt(normB);
|
|
205
|
+
return norm === 0 ? 0 : dotProduct / norm;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// ============================================================================
|
|
209
|
+
// Memory Retriever
|
|
210
|
+
// ============================================================================
|
|
211
|
+
|
|
212
|
+
export class MemoryRetriever {
|
|
213
|
+
constructor(
|
|
214
|
+
private store: MemoryStore,
|
|
215
|
+
private embedder: Embedder,
|
|
216
|
+
private config: RetrievalConfig = DEFAULT_RETRIEVAL_CONFIG
|
|
217
|
+
) {}
|
|
218
|
+
|
|
219
|
+
async retrieve(context: RetrievalContext): Promise<RetrievalResult[]> {
|
|
220
|
+
const { query, limit, scopeFilter, category } = context;
|
|
221
|
+
const safeLimit = clampInt(limit, 1, 20);
|
|
222
|
+
|
|
223
|
+
// For vector-only mode, use legacy behavior
|
|
224
|
+
if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
|
|
225
|
+
return this.vectorOnlyRetrieval(query, safeLimit, scopeFilter, category);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Hybrid retrieval with vector + BM25 + RRF fusion
|
|
229
|
+
return this.hybridRetrieval(query, safeLimit, scopeFilter, category);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
private async vectorOnlyRetrieval(
|
|
233
|
+
query: string,
|
|
234
|
+
limit: number,
|
|
235
|
+
scopeFilter?: string[],
|
|
236
|
+
category?: string
|
|
237
|
+
): Promise<RetrievalResult[]> {
|
|
238
|
+
const queryVector = await this.embedder.embedQuery(query);
|
|
239
|
+
const results = await this.store.vectorSearch(queryVector, limit, this.config.minScore, scopeFilter);
|
|
240
|
+
|
|
241
|
+
// Filter by category if specified
|
|
242
|
+
const filtered = category
|
|
243
|
+
? results.filter(r => r.entry.category === category)
|
|
244
|
+
: results;
|
|
245
|
+
|
|
246
|
+
const mapped = filtered.map((result, index) => ({
|
|
247
|
+
...result,
|
|
248
|
+
sources: {
|
|
249
|
+
vector: { score: result.score, rank: index + 1 },
|
|
250
|
+
},
|
|
251
|
+
} as RetrievalResult));
|
|
252
|
+
|
|
253
|
+
const boosted = this.applyRecencyBoost(mapped);
|
|
254
|
+
const weighted = this.applyImportanceWeight(boosted);
|
|
255
|
+
const lengthNormalized = this.applyLengthNormalization(weighted);
|
|
256
|
+
const timeDecayed = this.applyTimeDecay(lengthNormalized);
|
|
257
|
+
const hardFiltered = timeDecayed.filter(r => r.score >= this.config.hardMinScore);
|
|
258
|
+
const denoised = this.config.filterNoise
|
|
259
|
+
? filterNoise(hardFiltered, r => r.entry.text)
|
|
260
|
+
: hardFiltered;
|
|
261
|
+
|
|
262
|
+
// MMR deduplication: avoid top-k filled with near-identical memories
|
|
263
|
+
const deduplicated = this.applyMMRDiversity(denoised);
|
|
264
|
+
|
|
265
|
+
return deduplicated.slice(0, limit);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
private async hybridRetrieval(
|
|
269
|
+
query: string,
|
|
270
|
+
limit: number,
|
|
271
|
+
scopeFilter?: string[],
|
|
272
|
+
category?: string
|
|
273
|
+
): Promise<RetrievalResult[]> {
|
|
274
|
+
const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
|
|
275
|
+
|
|
276
|
+
// Compute query embedding once, reuse for vector search + reranking
|
|
277
|
+
const queryVector = await this.embedder.embedQuery(query);
|
|
278
|
+
|
|
279
|
+
// Run vector and BM25 searches in parallel
|
|
280
|
+
const [vectorResults, bm25Results] = await Promise.all([
|
|
281
|
+
this.runVectorSearch(queryVector, candidatePoolSize, scopeFilter, category),
|
|
282
|
+
this.runBM25Search(query, candidatePoolSize, scopeFilter, category),
|
|
283
|
+
]);
|
|
284
|
+
|
|
285
|
+
// Fuse results using RRF
|
|
286
|
+
const fusedResults = this.fuseResults(vectorResults, bm25Results);
|
|
287
|
+
|
|
288
|
+
// Apply minimum score threshold
|
|
289
|
+
const filtered = fusedResults.filter(r => r.score >= this.config.minScore);
|
|
290
|
+
|
|
291
|
+
// Rerank if enabled
|
|
292
|
+
const reranked = this.config.rerank !== "none"
|
|
293
|
+
? await this.rerankResults(query, queryVector, filtered.slice(0, limit * 2))
|
|
294
|
+
: filtered;
|
|
295
|
+
|
|
296
|
+
// Apply temporal re-ranking (recency boost)
|
|
297
|
+
const temporalReranked = this.applyRecencyBoost(reranked);
|
|
298
|
+
|
|
299
|
+
// Apply importance weighting
|
|
300
|
+
const importanceWeighted = this.applyImportanceWeight(temporalReranked);
|
|
301
|
+
|
|
302
|
+
// Apply length normalization (penalize long entries dominating via keyword density)
|
|
303
|
+
const lengthNormalized = this.applyLengthNormalization(importanceWeighted);
|
|
304
|
+
|
|
305
|
+
// Apply time decay (penalize stale entries)
|
|
306
|
+
const timeDecayed = this.applyTimeDecay(lengthNormalized);
|
|
307
|
+
|
|
308
|
+
// Hard minimum score cutoff (post all scoring stages)
|
|
309
|
+
const hardFiltered = timeDecayed.filter(r => r.score >= this.config.hardMinScore);
|
|
310
|
+
|
|
311
|
+
// Filter noise
|
|
312
|
+
const denoised = this.config.filterNoise
|
|
313
|
+
? filterNoise(hardFiltered, r => r.entry.text)
|
|
314
|
+
: hardFiltered;
|
|
315
|
+
|
|
316
|
+
// MMR deduplication: avoid top-k filled with near-identical memories
|
|
317
|
+
const deduplicated = this.applyMMRDiversity(denoised);
|
|
318
|
+
|
|
319
|
+
return deduplicated.slice(0, limit);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
private async runVectorSearch(
|
|
323
|
+
queryVector: number[],
|
|
324
|
+
limit: number,
|
|
325
|
+
scopeFilter?: string[],
|
|
326
|
+
category?: string
|
|
327
|
+
): Promise<Array<MemorySearchResult & { rank: number }>> {
|
|
328
|
+
const results = await this.store.vectorSearch(queryVector, limit, 0.1, scopeFilter);
|
|
329
|
+
|
|
330
|
+
// Filter by category if specified
|
|
331
|
+
const filtered = category
|
|
332
|
+
? results.filter(r => r.entry.category === category)
|
|
333
|
+
: results;
|
|
334
|
+
|
|
335
|
+
return filtered.map((result, index) => ({
|
|
336
|
+
...result,
|
|
337
|
+
rank: index + 1,
|
|
338
|
+
}));
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
private async runBM25Search(
|
|
342
|
+
query: string,
|
|
343
|
+
limit: number,
|
|
344
|
+
scopeFilter?: string[],
|
|
345
|
+
category?: string
|
|
346
|
+
): Promise<Array<MemorySearchResult & { rank: number }>> {
|
|
347
|
+
const results = await this.store.bm25Search(query, limit, scopeFilter);
|
|
348
|
+
|
|
349
|
+
// Filter by category if specified
|
|
350
|
+
const filtered = category
|
|
351
|
+
? results.filter(r => r.entry.category === category)
|
|
352
|
+
: results;
|
|
353
|
+
|
|
354
|
+
return filtered.map((result, index) => ({
|
|
355
|
+
...result,
|
|
356
|
+
rank: index + 1,
|
|
357
|
+
}));
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
private fuseResults(
|
|
361
|
+
vectorResults: Array<MemorySearchResult & { rank: number }>,
|
|
362
|
+
bm25Results: Array<MemorySearchResult & { rank: number }>
|
|
363
|
+
): RetrievalResult[] {
|
|
364
|
+
// Create maps for quick lookup
|
|
365
|
+
const vectorMap = new Map<string, MemorySearchResult & { rank: number }>();
|
|
366
|
+
const bm25Map = new Map<string, MemorySearchResult & { rank: number }>();
|
|
367
|
+
|
|
368
|
+
vectorResults.forEach(result => {
|
|
369
|
+
vectorMap.set(result.entry.id, result);
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
bm25Results.forEach(result => {
|
|
373
|
+
bm25Map.set(result.entry.id, result);
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
// Get all unique document IDs
|
|
377
|
+
const allIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
|
|
378
|
+
|
|
379
|
+
// Calculate RRF scores
|
|
380
|
+
const fusedResults: RetrievalResult[] = [];
|
|
381
|
+
|
|
382
|
+
for (const id of allIds) {
|
|
383
|
+
const vectorResult = vectorMap.get(id);
|
|
384
|
+
const bm25Result = bm25Map.get(id);
|
|
385
|
+
|
|
386
|
+
// Use the result with more complete data (prefer vector result if both exist)
|
|
387
|
+
const baseResult = vectorResult || bm25Result!;
|
|
388
|
+
|
|
389
|
+
// Use vector similarity as the base score.
|
|
390
|
+
// BM25 hit acts as a bonus (keyword match confirms relevance).
|
|
391
|
+
const vectorScore = vectorResult ? vectorResult.score : 0;
|
|
392
|
+
const bm25Hit = bm25Result ? 1 : 0;
|
|
393
|
+
|
|
394
|
+
// Base = vector score; BM25 hit boosts by up to 15%
|
|
395
|
+
// BM25-only results use their normalized score (floor 0.5) so exact keyword
|
|
396
|
+
// matches aren't buried — e.g. searching "JINA_API_KEY" should surface even
|
|
397
|
+
// when vector distance is large.
|
|
398
|
+
const fusedScore = vectorResult
|
|
399
|
+
? clamp01(vectorScore + (bm25Hit * 0.15 * vectorScore), 0.1)
|
|
400
|
+
: clamp01(Math.max(bm25Result!.score, 0.5), 0.1);
|
|
401
|
+
|
|
402
|
+
fusedResults.push({
|
|
403
|
+
entry: baseResult.entry,
|
|
404
|
+
score: fusedScore,
|
|
405
|
+
sources: {
|
|
406
|
+
vector: vectorResult ? { score: vectorResult.score, rank: vectorResult.rank } : undefined,
|
|
407
|
+
bm25: bm25Result ? { score: bm25Result.score, rank: bm25Result.rank } : undefined,
|
|
408
|
+
fused: { score: fusedScore },
|
|
409
|
+
},
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// Sort by fused score descending
|
|
414
|
+
return fusedResults.sort((a, b) => b.score - a.score);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Rerank results using cross-encoder API (Jina, Pinecone, or compatible).
|
|
419
|
+
* Falls back to cosine similarity if API is unavailable or fails.
|
|
420
|
+
*/
|
|
421
|
+
private async rerankResults(query: string, queryVector: number[], results: RetrievalResult[]): Promise<RetrievalResult[]> {
|
|
422
|
+
if (results.length === 0) {
|
|
423
|
+
return results;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// Try cross-encoder rerank via configured provider API
|
|
427
|
+
if (this.config.rerank === "cross-encoder" && this.config.rerankApiKey) {
|
|
428
|
+
try {
|
|
429
|
+
const provider = this.config.rerankProvider || "jina";
|
|
430
|
+
const model = this.config.rerankModel || "jina-reranker-v2-base-multilingual";
|
|
431
|
+
const endpoint = this.config.rerankEndpoint || "https://api.jina.ai/v1/rerank";
|
|
432
|
+
const documents = results.map(r => r.entry.text);
|
|
433
|
+
|
|
434
|
+
// Build provider-specific request
|
|
435
|
+
const { headers, body } = buildRerankRequest(provider, this.config.rerankApiKey, model, query, documents, results.length);
|
|
436
|
+
|
|
437
|
+
// Timeout: 5 seconds to prevent stalling retrieval pipeline
|
|
438
|
+
const controller = new AbortController();
|
|
439
|
+
const timeout = setTimeout(() => controller.abort(), 5000);
|
|
440
|
+
|
|
441
|
+
const response = await fetch(endpoint, {
|
|
442
|
+
method: "POST",
|
|
443
|
+
headers,
|
|
444
|
+
body: JSON.stringify(body),
|
|
445
|
+
signal: controller.signal,
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
clearTimeout(timeout);
|
|
449
|
+
|
|
450
|
+
if (response.ok) {
|
|
451
|
+
const data = await response.json() as Record<string, unknown>;
|
|
452
|
+
|
|
453
|
+
// Parse provider-specific response into unified format
|
|
454
|
+
const parsed = parseRerankResponse(provider, data);
|
|
455
|
+
|
|
456
|
+
if (!parsed) {
|
|
457
|
+
console.warn("Rerank API: invalid response shape, falling back to cosine");
|
|
458
|
+
} else {
|
|
459
|
+
// Build a Set of returned indices to identify unreturned candidates
|
|
460
|
+
const returnedIndices = new Set(parsed.map(r => r.index));
|
|
461
|
+
|
|
462
|
+
const reranked = parsed
|
|
463
|
+
.filter(item => item.index >= 0 && item.index < results.length)
|
|
464
|
+
.map(item => {
|
|
465
|
+
const original = results[item.index];
|
|
466
|
+
// Blend: 60% cross-encoder score + 40% original fused score
|
|
467
|
+
const blendedScore = clamp01(
|
|
468
|
+
item.score * 0.6 + original.score * 0.4,
|
|
469
|
+
original.score * 0.5,
|
|
470
|
+
);
|
|
471
|
+
return {
|
|
472
|
+
...original,
|
|
473
|
+
score: blendedScore,
|
|
474
|
+
sources: {
|
|
475
|
+
...original.sources,
|
|
476
|
+
reranked: { score: item.score },
|
|
477
|
+
},
|
|
478
|
+
};
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
// Keep unreturned candidates with their original scores (slightly penalized)
|
|
482
|
+
const unreturned = results
|
|
483
|
+
.filter((_, idx) => !returnedIndices.has(idx))
|
|
484
|
+
.map(r => ({ ...r, score: r.score * 0.8 }));
|
|
485
|
+
|
|
486
|
+
return [...reranked, ...unreturned].sort((a, b) => b.score - a.score);
|
|
487
|
+
}
|
|
488
|
+
} else {
|
|
489
|
+
const errText = await response.text().catch(() => "");
|
|
490
|
+
console.warn(`Rerank API returned ${response.status}: ${errText.slice(0, 200)}, falling back to cosine`);
|
|
491
|
+
}
|
|
492
|
+
} catch (error) {
|
|
493
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
494
|
+
console.warn("Rerank API timed out (5s), falling back to cosine");
|
|
495
|
+
} else {
|
|
496
|
+
console.warn("Rerank API failed, falling back to cosine:", error);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Fallback: lightweight cosine similarity rerank
|
|
502
|
+
try {
|
|
503
|
+
const reranked = results.map(result => {
|
|
504
|
+
const cosineScore = cosineSimilarity(queryVector, result.entry.vector);
|
|
505
|
+
const combinedScore = (result.score * 0.7) + (cosineScore * 0.3);
|
|
506
|
+
|
|
507
|
+
return {
|
|
508
|
+
...result,
|
|
509
|
+
score: clamp01(combinedScore, result.score),
|
|
510
|
+
sources: {
|
|
511
|
+
...result.sources,
|
|
512
|
+
reranked: { score: cosineScore },
|
|
513
|
+
},
|
|
514
|
+
};
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
return reranked.sort((a, b) => b.score - a.score);
|
|
518
|
+
} catch (error) {
|
|
519
|
+
console.warn("Reranking failed, returning original results:", error);
|
|
520
|
+
return results;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Apply recency boost: newer memories get a small score bonus.
|
|
526
|
+
* This ensures corrections/updates naturally outrank older entries
|
|
527
|
+
* when semantic similarity is close.
|
|
528
|
+
* Formula: boost = exp(-ageDays / halfLife) * weight
|
|
529
|
+
*/
|
|
530
|
+
private applyRecencyBoost(results: RetrievalResult[]): RetrievalResult[] {
|
|
531
|
+
const { recencyHalfLifeDays, recencyWeight } = this.config;
|
|
532
|
+
if (!recencyHalfLifeDays || recencyHalfLifeDays <= 0 || !recencyWeight) {
|
|
533
|
+
return results;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
const now = Date.now();
|
|
537
|
+
const boosted = results.map(r => {
|
|
538
|
+
const ts = (r.entry.timestamp && r.entry.timestamp > 0) ? r.entry.timestamp : now;
|
|
539
|
+
const ageDays = (now - ts) / 86_400_000;
|
|
540
|
+
const boost = Math.exp(-ageDays / recencyHalfLifeDays) * recencyWeight;
|
|
541
|
+
return {
|
|
542
|
+
...r,
|
|
543
|
+
score: clamp01(r.score + boost, r.score),
|
|
544
|
+
};
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
return boosted.sort((a, b) => b.score - a.score);
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Apply importance weighting: memories with higher importance get a score boost.
|
|
552
|
+
* This ensures critical memories (importance=1.0) outrank casual ones (importance=0.5)
|
|
553
|
+
* when semantic similarity is close.
|
|
554
|
+
* Formula: score *= (baseWeight + (1 - baseWeight) * importance)
|
|
555
|
+
* With baseWeight=0.7: importance=1.0 → ×1.0, importance=0.5 → ×0.85, importance=0.0 → ×0.7
|
|
556
|
+
*/
|
|
557
|
+
private applyImportanceWeight(results: RetrievalResult[]): RetrievalResult[] {
|
|
558
|
+
const baseWeight = 0.7;
|
|
559
|
+
const weighted = results.map(r => {
|
|
560
|
+
const importance = r.entry.importance ?? 0.7;
|
|
561
|
+
const factor = baseWeight + (1 - baseWeight) * importance;
|
|
562
|
+
return {
|
|
563
|
+
...r,
|
|
564
|
+
score: clamp01(r.score * factor, r.score * baseWeight),
|
|
565
|
+
};
|
|
566
|
+
});
|
|
567
|
+
return weighted.sort((a, b) => b.score - a.score);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Length normalization: penalize long entries that dominate search results
|
|
572
|
+
* via sheer keyword density and broad semantic coverage.
|
|
573
|
+
* Short, focused entries (< anchor) get a slight boost.
|
|
574
|
+
* Long, sprawling entries (> anchor) get penalized.
|
|
575
|
+
* Formula: score *= 1 / (1 + log2(charLen / anchor))
|
|
576
|
+
*/
|
|
577
|
+
private applyLengthNormalization(results: RetrievalResult[]): RetrievalResult[] {
|
|
578
|
+
const anchor = this.config.lengthNormAnchor;
|
|
579
|
+
if (!anchor || anchor <= 0) return results;
|
|
580
|
+
|
|
581
|
+
const normalized = results.map(r => {
|
|
582
|
+
const charLen = r.entry.text.length;
|
|
583
|
+
const ratio = charLen / anchor;
|
|
584
|
+
// No penalty for entries at or below anchor length.
|
|
585
|
+
// Gentle logarithmic decay for longer entries:
|
|
586
|
+
// anchor (500) → 1.0, 800 → 0.75, 1000 → 0.67, 1500 → 0.56, 2000 → 0.50
|
|
587
|
+
// This prevents long, keyword-rich entries from dominating top-k
|
|
588
|
+
// while keeping their scores reasonable.
|
|
589
|
+
const logRatio = Math.log2(Math.max(ratio, 1)); // no boost for short entries
|
|
590
|
+
const factor = 1 / (1 + 0.5 * logRatio);
|
|
591
|
+
return {
|
|
592
|
+
...r,
|
|
593
|
+
score: clamp01(r.score * factor, r.score * 0.3),
|
|
594
|
+
};
|
|
595
|
+
});
|
|
596
|
+
|
|
597
|
+
return normalized.sort((a, b) => b.score - a.score);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* Time decay: multiplicative penalty for old entries.
|
|
602
|
+
* Unlike recencyBoost (additive bonus for new entries), this actively
|
|
603
|
+
* penalizes stale information so recent knowledge wins ties.
|
|
604
|
+
* Formula: score *= 0.5 + 0.5 * exp(-ageDays / halfLife)
|
|
605
|
+
* At 0 days: 1.0x (no penalty)
|
|
606
|
+
* At halfLife: ~0.68x
|
|
607
|
+
* At 2*halfLife: ~0.59x
|
|
608
|
+
* Floor at 0.5x (never penalize more than half)
|
|
609
|
+
*/
|
|
610
|
+
private applyTimeDecay(results: RetrievalResult[]): RetrievalResult[] {
|
|
611
|
+
const halfLife = this.config.timeDecayHalfLifeDays;
|
|
612
|
+
if (!halfLife || halfLife <= 0) return results;
|
|
613
|
+
|
|
614
|
+
const now = Date.now();
|
|
615
|
+
const decayed = results.map(r => {
|
|
616
|
+
const ts = (r.entry.timestamp && r.entry.timestamp > 0) ? r.entry.timestamp : now;
|
|
617
|
+
const ageDays = (now - ts) / 86_400_000;
|
|
618
|
+
// floor at 0.5: even very old entries keep at least 50% of their score
|
|
619
|
+
const factor = 0.5 + 0.5 * Math.exp(-ageDays / halfLife);
|
|
620
|
+
return {
|
|
621
|
+
...r,
|
|
622
|
+
score: clamp01(r.score * factor, r.score * 0.5),
|
|
623
|
+
};
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
return decayed.sort((a, b) => b.score - a.score);
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* MMR-inspired diversity filter: greedily select results that are both
|
|
631
|
+
* relevant (high score) and diverse (low similarity to already-selected).
|
|
632
|
+
*
|
|
633
|
+
* Uses cosine similarity between memory vectors. If two memories have
|
|
634
|
+
* cosine similarity > threshold (default 0.92), the lower-scored one
|
|
635
|
+
* is demoted to the end rather than removed entirely.
|
|
636
|
+
*
|
|
637
|
+
* This prevents top-k from being filled with near-identical entries
|
|
638
|
+
* (e.g. 3 similar "SVG style" memories) while keeping them available
|
|
639
|
+
* if the pool is small.
|
|
640
|
+
*/
|
|
641
|
+
private applyMMRDiversity(results: RetrievalResult[], similarityThreshold = 0.85): RetrievalResult[] {
|
|
642
|
+
if (results.length <= 1) return results;
|
|
643
|
+
|
|
644
|
+
const selected: RetrievalResult[] = [];
|
|
645
|
+
const deferred: RetrievalResult[] = [];
|
|
646
|
+
|
|
647
|
+
for (const candidate of results) {
|
|
648
|
+
// Check if this candidate is too similar to any already-selected result
|
|
649
|
+
const tooSimilar = selected.some(s => {
|
|
650
|
+
// Both must have vectors to compare.
|
|
651
|
+
// LanceDB returns Arrow Vector objects (not plain arrays),
|
|
652
|
+
// so use .length directly and Array.from() for conversion.
|
|
653
|
+
const sVec = s.entry.vector;
|
|
654
|
+
const cVec = candidate.entry.vector;
|
|
655
|
+
if (!sVec?.length || !cVec?.length) return false;
|
|
656
|
+
const sArr = Array.from(sVec as Iterable<number>);
|
|
657
|
+
const cArr = Array.from(cVec as Iterable<number>);
|
|
658
|
+
const sim = cosineSimilarity(sArr, cArr);
|
|
659
|
+
return sim > similarityThreshold;
|
|
660
|
+
});
|
|
661
|
+
|
|
662
|
+
if (tooSimilar) {
|
|
663
|
+
deferred.push(candidate);
|
|
664
|
+
} else {
|
|
665
|
+
selected.push(candidate);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
// Append deferred results at the end (available but deprioritized)
|
|
669
|
+
return [...selected, ...deferred];
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// Update configuration
|
|
673
|
+
updateConfig(newConfig: Partial<RetrievalConfig>): void {
|
|
674
|
+
this.config = { ...this.config, ...newConfig };
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
// Get current configuration
|
|
678
|
+
getConfig(): RetrievalConfig {
|
|
679
|
+
return { ...this.config };
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// Test retrieval system
|
|
683
|
+
async test(query = "test query"): Promise<{
|
|
684
|
+
success: boolean;
|
|
685
|
+
mode: string;
|
|
686
|
+
hasFtsSupport: boolean;
|
|
687
|
+
error?: string;
|
|
688
|
+
}> {
|
|
689
|
+
try {
|
|
690
|
+
const results = await this.retrieve({
|
|
691
|
+
query,
|
|
692
|
+
limit: 1,
|
|
693
|
+
});
|
|
694
|
+
|
|
695
|
+
return {
|
|
696
|
+
success: true,
|
|
697
|
+
mode: this.config.mode,
|
|
698
|
+
hasFtsSupport: this.store.hasFtsSupport,
|
|
699
|
+
};
|
|
700
|
+
} catch (error) {
|
|
701
|
+
return {
|
|
702
|
+
success: false,
|
|
703
|
+
mode: this.config.mode,
|
|
704
|
+
hasFtsSupport: this.store.hasFtsSupport,
|
|
705
|
+
error: error instanceof Error ? error.message : String(error),
|
|
706
|
+
};
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// ============================================================================
|
|
712
|
+
// Factory Function
|
|
713
|
+
// ============================================================================
|
|
714
|
+
|
|
715
|
+
export function createRetriever(
|
|
716
|
+
store: MemoryStore,
|
|
717
|
+
embedder: Embedder,
|
|
718
|
+
config?: Partial<RetrievalConfig>
|
|
719
|
+
): MemoryRetriever {
|
|
720
|
+
const fullConfig = { ...DEFAULT_RETRIEVAL_CONFIG, ...config };
|
|
721
|
+
return new MemoryRetriever(store, embedder, fullConfig);
|
|
722
|
+
}
|