@lov3kaizen/agentsea-memory 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +450 -0
- package/dist/chunk-GACX3FPR.js +1402 -0
- package/dist/chunk-M44NB53O.js +1226 -0
- package/dist/chunk-MQDWBPZU.js +972 -0
- package/dist/chunk-TPC7MYWK.js +1495 -0
- package/dist/chunk-XD2CQGSD.js +1540 -0
- package/dist/chunk-YI7RPDEV.js +1215 -0
- package/dist/core.types-lkxKv-bW.d.cts +242 -0
- package/dist/core.types-lkxKv-bW.d.ts +242 -0
- package/dist/debug/index.cjs +1248 -0
- package/dist/debug/index.d.cts +3 -0
- package/dist/debug/index.d.ts +3 -0
- package/dist/debug/index.js +20 -0
- package/dist/index-7SsAJ4et.d.ts +525 -0
- package/dist/index-BGxYqpFb.d.cts +601 -0
- package/dist/index-BX62efZu.d.ts +565 -0
- package/dist/index-Bbc3COw0.d.cts +748 -0
- package/dist/index-Bczz1Eyk.d.ts +637 -0
- package/dist/index-C7pEiT8L.d.cts +637 -0
- package/dist/index-CHetLTb0.d.ts +389 -0
- package/dist/index-CloeiFyx.d.ts +748 -0
- package/dist/index-DNOhq-3y.d.cts +525 -0
- package/dist/index-Da-M8FOV.d.cts +389 -0
- package/dist/index-Dy8UjRFz.d.cts +565 -0
- package/dist/index-aVcITW0B.d.ts +601 -0
- package/dist/index.cjs +8554 -0
- package/dist/index.d.cts +293 -0
- package/dist/index.d.ts +293 -0
- package/dist/index.js +742 -0
- package/dist/processing/index.cjs +1575 -0
- package/dist/processing/index.d.cts +2 -0
- package/dist/processing/index.d.ts +2 -0
- package/dist/processing/index.js +24 -0
- package/dist/retrieval/index.cjs +1262 -0
- package/dist/retrieval/index.d.cts +2 -0
- package/dist/retrieval/index.d.ts +2 -0
- package/dist/retrieval/index.js +26 -0
- package/dist/sharing/index.cjs +1003 -0
- package/dist/sharing/index.d.cts +3 -0
- package/dist/sharing/index.d.ts +3 -0
- package/dist/sharing/index.js +16 -0
- package/dist/stores/index.cjs +1445 -0
- package/dist/stores/index.d.cts +2 -0
- package/dist/stores/index.d.ts +2 -0
- package/dist/stores/index.js +20 -0
- package/dist/structures/index.cjs +1530 -0
- package/dist/structures/index.d.cts +3 -0
- package/dist/structures/index.d.ts +3 -0
- package/dist/structures/index.js +24 -0
- package/package.json +141 -0
|
@@ -0,0 +1,1226 @@
|
|
|
1
|
+
// src/retrieval/strategies/SemanticRetrieval.ts
|
|
2
|
+
var SemanticRetrieval = class {
|
|
3
|
+
store;
|
|
4
|
+
embedFn;
|
|
5
|
+
config;
|
|
6
|
+
constructor(store, embedFn, config = {}) {
|
|
7
|
+
this.store = store;
|
|
8
|
+
this.embedFn = embedFn;
|
|
9
|
+
this.config = {
|
|
10
|
+
topK: config.topK ?? 10,
|
|
11
|
+
minScore: config.minScore ?? 0.7,
|
|
12
|
+
reranking: config.reranking ?? false,
|
|
13
|
+
maxCandidates: config.maxCandidates ?? 100,
|
|
14
|
+
...config
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Retrieve memories semantically similar to the query
|
|
19
|
+
*/
|
|
20
|
+
async retrieve(options) {
|
|
21
|
+
const startTime = Date.now();
|
|
22
|
+
const queryEmbedding = await this.embedFn(options.query);
|
|
23
|
+
const results = await this.store.search(queryEmbedding, {
|
|
24
|
+
topK: options.topK ?? this.config.topK,
|
|
25
|
+
minScore: options.minScore ?? this.config.minScore,
|
|
26
|
+
namespace: options.namespace,
|
|
27
|
+
filter: options.filter
|
|
28
|
+
});
|
|
29
|
+
let finalResults = results;
|
|
30
|
+
if (this.config.reranking && this.config.rerankFn) {
|
|
31
|
+
finalResults = await this.rerank(options.query, results);
|
|
32
|
+
}
|
|
33
|
+
const memories = finalResults.map((r) => {
|
|
34
|
+
const entry = { ...r.entry };
|
|
35
|
+
if (!options.includeEmbeddings) {
|
|
36
|
+
delete entry.embedding;
|
|
37
|
+
}
|
|
38
|
+
return entry;
|
|
39
|
+
});
|
|
40
|
+
return {
|
|
41
|
+
memories,
|
|
42
|
+
scores: finalResults.map((r) => r.score),
|
|
43
|
+
totalCandidates: results.length,
|
|
44
|
+
retrievalTimeMs: Date.now() - startTime,
|
|
45
|
+
strategy: "semantic"
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Retrieve with context - includes surrounding memories
|
|
50
|
+
*/
|
|
51
|
+
async retrieveWithContext(options, contextWindow = 2) {
|
|
52
|
+
const result = await this.retrieve(options);
|
|
53
|
+
const contextMemories = [];
|
|
54
|
+
for (const memory of result.memories) {
|
|
55
|
+
const surrounding = await this.getSurroundingMemories(
|
|
56
|
+
memory,
|
|
57
|
+
contextWindow
|
|
58
|
+
);
|
|
59
|
+
contextMemories.push(surrounding);
|
|
60
|
+
}
|
|
61
|
+
return {
|
|
62
|
+
...result,
|
|
63
|
+
contextMemories
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Get memories surrounding a given memory by timestamp
|
|
68
|
+
*/
|
|
69
|
+
async getSurroundingMemories(memory, windowSize) {
|
|
70
|
+
const before = await this.store.query({
|
|
71
|
+
endTime: memory.timestamp - 1,
|
|
72
|
+
limit: windowSize,
|
|
73
|
+
namespace: memory.metadata.namespace
|
|
74
|
+
});
|
|
75
|
+
const after = await this.store.query({
|
|
76
|
+
startTime: memory.timestamp + 1,
|
|
77
|
+
limit: windowSize,
|
|
78
|
+
namespace: memory.metadata.namespace
|
|
79
|
+
});
|
|
80
|
+
return [...before.entries.reverse(), ...after.entries];
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Rerank results using provided rerank function
|
|
84
|
+
*/
|
|
85
|
+
async rerank(query, results) {
|
|
86
|
+
if (!this.config.rerankFn) {
|
|
87
|
+
return results;
|
|
88
|
+
}
|
|
89
|
+
const reranked = await this.config.rerankFn(query, results);
|
|
90
|
+
return Promise.resolve(
|
|
91
|
+
reranked.sort((a, b) => b.score - a.score)
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Find memories similar to a given memory
|
|
96
|
+
*/
|
|
97
|
+
async findSimilar(memory, options) {
|
|
98
|
+
if (!memory.embedding) {
|
|
99
|
+
const embedding = await this.embedFn(memory.content);
|
|
100
|
+
return this.store.search(embedding, {
|
|
101
|
+
topK: options?.topK ?? this.config.topK,
|
|
102
|
+
minScore: options?.minScore ?? this.config.minScore,
|
|
103
|
+
namespace: options?.namespace ?? memory.metadata.namespace,
|
|
104
|
+
filter: {
|
|
105
|
+
...options?.filter,
|
|
106
|
+
// Exclude the source memory
|
|
107
|
+
id: { $ne: memory.id }
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
return this.store.search(memory.embedding, {
|
|
112
|
+
topK: (options?.topK ?? this.config.topK) + 1,
|
|
113
|
+
// +1 to exclude self
|
|
114
|
+
minScore: options?.minScore ?? this.config.minScore,
|
|
115
|
+
namespace: options?.namespace ?? memory.metadata.namespace,
|
|
116
|
+
filter: options?.filter
|
|
117
|
+
}).then((results) => results.filter((r) => r.entry.id !== memory.id));
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Cluster memories by semantic similarity
|
|
121
|
+
*/
|
|
122
|
+
async cluster(memories, numClusters = 5) {
|
|
123
|
+
const clusters = /* @__PURE__ */ new Map();
|
|
124
|
+
const memoriesWithEmbeddings = await Promise.all(
|
|
125
|
+
memories.map(async (m) => {
|
|
126
|
+
if (m.embedding) return m;
|
|
127
|
+
return {
|
|
128
|
+
...m,
|
|
129
|
+
embedding: await this.embedFn(m.content)
|
|
130
|
+
};
|
|
131
|
+
})
|
|
132
|
+
);
|
|
133
|
+
const centerIndices = this.randomSample(
|
|
134
|
+
memoriesWithEmbeddings.length,
|
|
135
|
+
numClusters
|
|
136
|
+
);
|
|
137
|
+
const centers = centerIndices.map(
|
|
138
|
+
(i) => memoriesWithEmbeddings[i].embedding
|
|
139
|
+
);
|
|
140
|
+
for (let iteration = 0; iteration < 10; iteration++) {
|
|
141
|
+
for (let i = 0; i < numClusters; i++) {
|
|
142
|
+
clusters.set(i, []);
|
|
143
|
+
}
|
|
144
|
+
for (const memory of memoriesWithEmbeddings) {
|
|
145
|
+
let bestCluster = 0;
|
|
146
|
+
let bestSimilarity = -Infinity;
|
|
147
|
+
for (let i = 0; i < centers.length; i++) {
|
|
148
|
+
const similarity = this.cosineSimilarity(
|
|
149
|
+
memory.embedding,
|
|
150
|
+
centers[i]
|
|
151
|
+
);
|
|
152
|
+
if (similarity > bestSimilarity) {
|
|
153
|
+
bestSimilarity = similarity;
|
|
154
|
+
bestCluster = i;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
clusters.get(bestCluster).push(memory);
|
|
158
|
+
}
|
|
159
|
+
for (let i = 0; i < numClusters; i++) {
|
|
160
|
+
const clusterMemories = clusters.get(i);
|
|
161
|
+
if (clusterMemories.length > 0) {
|
|
162
|
+
centers[i] = this.averageEmbedding(
|
|
163
|
+
clusterMemories.map((m) => m.embedding)
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
return clusters;
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Calculate cosine similarity between two vectors
|
|
172
|
+
*/
|
|
173
|
+
cosineSimilarity(a, b) {
|
|
174
|
+
if (a.length !== b.length) return 0;
|
|
175
|
+
let dotProduct = 0;
|
|
176
|
+
let normA = 0;
|
|
177
|
+
let normB = 0;
|
|
178
|
+
for (let i = 0; i < a.length; i++) {
|
|
179
|
+
dotProduct += a[i] * b[i];
|
|
180
|
+
normA += a[i] * a[i];
|
|
181
|
+
normB += b[i] * b[i];
|
|
182
|
+
}
|
|
183
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
184
|
+
return magnitude === 0 ? 0 : dotProduct / magnitude;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Calculate average embedding
|
|
188
|
+
*/
|
|
189
|
+
averageEmbedding(embeddings) {
|
|
190
|
+
const dim = embeddings[0].length;
|
|
191
|
+
const avg = new Array(dim).fill(0);
|
|
192
|
+
for (const emb of embeddings) {
|
|
193
|
+
for (let i = 0; i < dim; i++) {
|
|
194
|
+
avg[i] += emb[i];
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
for (let i = 0; i < dim; i++) {
|
|
198
|
+
avg[i] /= embeddings.length;
|
|
199
|
+
}
|
|
200
|
+
return avg;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Random sample without replacement
|
|
204
|
+
*/
|
|
205
|
+
randomSample(max, count) {
|
|
206
|
+
const result = [];
|
|
207
|
+
const used = /* @__PURE__ */ new Set();
|
|
208
|
+
while (result.length < count && result.length < max) {
|
|
209
|
+
const index = Math.floor(Math.random() * max);
|
|
210
|
+
if (!used.has(index)) {
|
|
211
|
+
used.add(index);
|
|
212
|
+
result.push(index);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return result;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Update configuration
|
|
219
|
+
*/
|
|
220
|
+
configure(config) {
|
|
221
|
+
this.config = { ...this.config, ...config };
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Get current configuration
|
|
225
|
+
*/
|
|
226
|
+
getConfig() {
|
|
227
|
+
return { ...this.config };
|
|
228
|
+
}
|
|
229
|
+
};
|
|
230
|
+
function createSemanticRetrieval(store, embedFn, config) {
|
|
231
|
+
return new SemanticRetrieval(store, embedFn, config);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// src/retrieval/strategies/HybridRetrieval.ts
|
|
235
|
+
var HybridRetrieval = class {
|
|
236
|
+
store;
|
|
237
|
+
semanticRetrieval;
|
|
238
|
+
config;
|
|
239
|
+
constructor(store, embedFn, config = {}) {
|
|
240
|
+
this.store = store;
|
|
241
|
+
this.config = {
|
|
242
|
+
semanticWeight: config.semanticWeight ?? 0.7,
|
|
243
|
+
keywordWeight: config.keywordWeight ?? 0.3,
|
|
244
|
+
topK: config.topK ?? 10,
|
|
245
|
+
minScore: config.minScore ?? 0.5,
|
|
246
|
+
fusionMethod: config.fusionMethod ?? "rrf",
|
|
247
|
+
...config
|
|
248
|
+
};
|
|
249
|
+
this.semanticRetrieval = new SemanticRetrieval(store, embedFn, {
|
|
250
|
+
topK: config.topK ?? 10,
|
|
251
|
+
minScore: 0
|
|
252
|
+
// Lower threshold for candidates
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Retrieve memories using hybrid search
|
|
257
|
+
*/
|
|
258
|
+
async retrieve(options) {
|
|
259
|
+
const startTime = Date.now();
|
|
260
|
+
const topK = options.topK ?? this.config.topK;
|
|
261
|
+
const semanticWeight = options.semanticWeight ?? this.config.semanticWeight;
|
|
262
|
+
const keywordWeight = options.keywordWeight ?? this.config.keywordWeight;
|
|
263
|
+
const candidateMultiplier = 3;
|
|
264
|
+
const semanticResult = await this.semanticRetrieval.retrieve({
|
|
265
|
+
query: options.query,
|
|
266
|
+
topK: topK * candidateMultiplier,
|
|
267
|
+
minScore: 0,
|
|
268
|
+
// Get all candidates
|
|
269
|
+
namespace: options.namespace,
|
|
270
|
+
filter: options.filter
|
|
271
|
+
});
|
|
272
|
+
const keywordResult = await this.keywordSearch(options.query, {
|
|
273
|
+
limit: topK * candidateMultiplier,
|
|
274
|
+
namespace: options.namespace
|
|
275
|
+
});
|
|
276
|
+
const fusedResults = this.fuseResults(
|
|
277
|
+
semanticResult.memories.map((m, i) => ({
|
|
278
|
+
entry: m,
|
|
279
|
+
score: semanticResult.scores?.[i] ?? 0
|
|
280
|
+
})),
|
|
281
|
+
keywordResult,
|
|
282
|
+
semanticWeight,
|
|
283
|
+
keywordWeight
|
|
284
|
+
);
|
|
285
|
+
const filtered = fusedResults.filter((r) => r.score >= (options.minScore ?? this.config.minScore)).slice(0, topK);
|
|
286
|
+
return {
|
|
287
|
+
memories: filtered.map((r) => r.entry),
|
|
288
|
+
scores: filtered.map((r) => r.score),
|
|
289
|
+
totalCandidates: semanticResult.memories.length + keywordResult.length,
|
|
290
|
+
retrievalTimeMs: Date.now() - startTime,
|
|
291
|
+
strategy: "hybrid",
|
|
292
|
+
metadata: {
|
|
293
|
+
semanticCandidates: semanticResult.memories.length,
|
|
294
|
+
keywordCandidates: keywordResult.length,
|
|
295
|
+
fusionMethod: this.config.fusionMethod
|
|
296
|
+
}
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Keyword-based search using text matching
|
|
301
|
+
*/
|
|
302
|
+
async keywordSearch(query, options) {
|
|
303
|
+
const queryTokens = this.tokenize(query);
|
|
304
|
+
const { entries } = await this.store.query({
|
|
305
|
+
query,
|
|
306
|
+
limit: options.limit * 2,
|
|
307
|
+
// Get more for better recall
|
|
308
|
+
namespace: options.namespace
|
|
309
|
+
});
|
|
310
|
+
const results = [];
|
|
311
|
+
for (const entry of entries) {
|
|
312
|
+
const score = this.calculateKeywordScore(queryTokens, entry.content);
|
|
313
|
+
if (score > 0) {
|
|
314
|
+
results.push({ entry, score });
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
results.sort((a, b) => b.score - a.score);
|
|
318
|
+
return Promise.resolve(results.slice(0, options.limit));
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Tokenize text into terms
|
|
322
|
+
*/
|
|
323
|
+
tokenize(text) {
|
|
324
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 2);
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Calculate BM25-like keyword score
|
|
328
|
+
*/
|
|
329
|
+
calculateKeywordScore(queryTokens, content) {
|
|
330
|
+
const contentTokens = this.tokenize(content);
|
|
331
|
+
const contentTokenSet = new Set(contentTokens);
|
|
332
|
+
const termFreq = /* @__PURE__ */ new Map();
|
|
333
|
+
for (const token of contentTokens) {
|
|
334
|
+
termFreq.set(token, (termFreq.get(token) ?? 0) + 1);
|
|
335
|
+
}
|
|
336
|
+
const k1 = 1.2;
|
|
337
|
+
const b = 0.75;
|
|
338
|
+
const avgDocLength = 100;
|
|
339
|
+
let score = 0;
|
|
340
|
+
const docLength = contentTokens.length;
|
|
341
|
+
for (const token of queryTokens) {
|
|
342
|
+
if (contentTokenSet.has(token)) {
|
|
343
|
+
const tf = termFreq.get(token) ?? 0;
|
|
344
|
+
const idf = 1.5;
|
|
345
|
+
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * docLength / avgDocLength));
|
|
346
|
+
score += idf * tfNorm;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
return score / queryTokens.length;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Fuse semantic and keyword results
|
|
353
|
+
*/
|
|
354
|
+
fuseResults(semanticResults, keywordResults, semanticWeight, keywordWeight) {
|
|
355
|
+
if (this.config.fusionMethod === "rrf") {
|
|
356
|
+
return this.reciprocalRankFusion(semanticResults, keywordResults);
|
|
357
|
+
} else {
|
|
358
|
+
return this.weightedFusion(
|
|
359
|
+
semanticResults,
|
|
360
|
+
keywordResults,
|
|
361
|
+
semanticWeight,
|
|
362
|
+
keywordWeight
|
|
363
|
+
);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Reciprocal Rank Fusion (RRF)
|
|
368
|
+
*/
|
|
369
|
+
reciprocalRankFusion(list1, list2) {
|
|
370
|
+
const k = 60;
|
|
371
|
+
const scoreMap = /* @__PURE__ */ new Map();
|
|
372
|
+
for (let i = 0; i < list1.length; i++) {
|
|
373
|
+
const id = list1[i].entry.id;
|
|
374
|
+
const rrfScore = 1 / (k + i + 1);
|
|
375
|
+
scoreMap.set(id, {
|
|
376
|
+
entry: list1[i].entry,
|
|
377
|
+
score: rrfScore
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
for (let i = 0; i < list2.length; i++) {
|
|
381
|
+
const id = list2[i].entry.id;
|
|
382
|
+
const rrfScore = 1 / (k + i + 1);
|
|
383
|
+
const existing = scoreMap.get(id);
|
|
384
|
+
if (existing) {
|
|
385
|
+
existing.score += rrfScore;
|
|
386
|
+
} else {
|
|
387
|
+
scoreMap.set(id, {
|
|
388
|
+
entry: list2[i].entry,
|
|
389
|
+
score: rrfScore
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
const results = Array.from(scoreMap.values()).map((item) => ({
|
|
394
|
+
entry: item.entry,
|
|
395
|
+
score: item.score
|
|
396
|
+
}));
|
|
397
|
+
results.sort((a, b) => b.score - a.score);
|
|
398
|
+
return results;
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Weighted score fusion
|
|
402
|
+
*/
|
|
403
|
+
weightedFusion(semanticResults, keywordResults, semanticWeight, keywordWeight) {
|
|
404
|
+
const scoreMap = /* @__PURE__ */ new Map();
|
|
405
|
+
const maxSemantic = Math.max(...semanticResults.map((r) => r.score), 1);
|
|
406
|
+
for (const result of semanticResults) {
|
|
407
|
+
const normalizedScore = result.score / maxSemantic;
|
|
408
|
+
scoreMap.set(result.entry.id, {
|
|
409
|
+
entry: result.entry,
|
|
410
|
+
semanticScore: normalizedScore,
|
|
411
|
+
keywordScore: 0
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
const maxKeyword = Math.max(...keywordResults.map((r) => r.score), 1);
|
|
415
|
+
for (const result of keywordResults) {
|
|
416
|
+
const normalizedScore = result.score / maxKeyword;
|
|
417
|
+
const existing = scoreMap.get(result.entry.id);
|
|
418
|
+
if (existing) {
|
|
419
|
+
existing.keywordScore = normalizedScore;
|
|
420
|
+
} else {
|
|
421
|
+
scoreMap.set(result.entry.id, {
|
|
422
|
+
entry: result.entry,
|
|
423
|
+
semanticScore: 0,
|
|
424
|
+
keywordScore: normalizedScore
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
const results = Array.from(scoreMap.values()).map((item) => ({
|
|
429
|
+
entry: item.entry,
|
|
430
|
+
score: item.semanticScore * semanticWeight + item.keywordScore * keywordWeight
|
|
431
|
+
}));
|
|
432
|
+
results.sort((a, b) => b.score - a.score);
|
|
433
|
+
return results;
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Retrieve with explanation of why each result matched
|
|
437
|
+
*/
|
|
438
|
+
async retrieveWithExplanation(options) {
|
|
439
|
+
const startTime = Date.now();
|
|
440
|
+
const topK = options.topK ?? this.config.topK;
|
|
441
|
+
const semanticResult = await this.semanticRetrieval.retrieve({
|
|
442
|
+
query: options.query,
|
|
443
|
+
topK: topK * 3,
|
|
444
|
+
minScore: 0,
|
|
445
|
+
namespace: options.namespace
|
|
446
|
+
});
|
|
447
|
+
const keywordResult = await this.keywordSearch(options.query, {
|
|
448
|
+
limit: topK * 3,
|
|
449
|
+
namespace: options.namespace
|
|
450
|
+
});
|
|
451
|
+
const semanticScores = /* @__PURE__ */ new Map();
|
|
452
|
+
semanticResult.memories.forEach((m, i) => {
|
|
453
|
+
semanticScores.set(m.id, semanticResult.scores?.[i] ?? 0);
|
|
454
|
+
});
|
|
455
|
+
const keywordScores = /* @__PURE__ */ new Map();
|
|
456
|
+
keywordResult.forEach((r) => {
|
|
457
|
+
keywordScores.set(r.entry.id, r.score);
|
|
458
|
+
});
|
|
459
|
+
const fusedResults = this.fuseResults(
|
|
460
|
+
semanticResult.memories.map((m, i) => ({
|
|
461
|
+
entry: m,
|
|
462
|
+
score: semanticResult.scores?.[i] ?? 0
|
|
463
|
+
})),
|
|
464
|
+
keywordResult,
|
|
465
|
+
options.semanticWeight ?? this.config.semanticWeight,
|
|
466
|
+
options.keywordWeight ?? this.config.keywordWeight
|
|
467
|
+
).slice(0, topK);
|
|
468
|
+
const queryTokens = this.tokenize(options.query);
|
|
469
|
+
const explanations = fusedResults.map((r) => {
|
|
470
|
+
const semScore = semanticScores.get(r.entry.id);
|
|
471
|
+
const kwScore = keywordScores.get(r.entry.id);
|
|
472
|
+
const parts = [];
|
|
473
|
+
if (semScore !== void 0 && semScore > 0.5) {
|
|
474
|
+
parts.push(`semantically similar (${(semScore * 100).toFixed(0)}%)`);
|
|
475
|
+
}
|
|
476
|
+
if (kwScore !== void 0 && kwScore > 0) {
|
|
477
|
+
const matchingTokens = queryTokens.filter(
|
|
478
|
+
(t) => r.entry.content.toLowerCase().includes(t)
|
|
479
|
+
);
|
|
480
|
+
if (matchingTokens.length > 0) {
|
|
481
|
+
parts.push(`keyword matches: "${matchingTokens.join('", "')}"`);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
return parts.length > 0 ? parts.join("; ") : "matched via fuzzy matching";
|
|
485
|
+
});
|
|
486
|
+
return {
|
|
487
|
+
memories: fusedResults.map((r) => r.entry),
|
|
488
|
+
scores: fusedResults.map((r) => r.score),
|
|
489
|
+
totalCandidates: semanticResult.memories.length + keywordResult.length,
|
|
490
|
+
retrievalTimeMs: Date.now() - startTime,
|
|
491
|
+
strategy: "hybrid",
|
|
492
|
+
explanations
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
/**
|
|
496
|
+
* Update configuration
|
|
497
|
+
*/
|
|
498
|
+
configure(config) {
|
|
499
|
+
this.config = { ...this.config, ...config };
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Get current configuration
|
|
503
|
+
*/
|
|
504
|
+
getConfig() {
|
|
505
|
+
return { ...this.config };
|
|
506
|
+
}
|
|
507
|
+
};
|
|
508
|
+
function createHybridRetrieval(store, embedFn, config) {
|
|
509
|
+
return new HybridRetrieval(store, embedFn, config);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// src/retrieval/strategies/TemporalRetrieval.ts
|
|
513
|
+
var TemporalRetrieval = class {
|
|
514
|
+
store;
|
|
515
|
+
config;
|
|
516
|
+
constructor(store, config = {}) {
|
|
517
|
+
this.store = store;
|
|
518
|
+
this.config = {
|
|
519
|
+
recencyWeight: config.recencyWeight ?? 0.5,
|
|
520
|
+
importanceWeight: config.importanceWeight ?? 0.3,
|
|
521
|
+
accessWeight: config.accessWeight ?? 0.2,
|
|
522
|
+
decayFunction: config.decayFunction ?? "exponential",
|
|
523
|
+
decayHalfLife: config.decayHalfLife ?? 24 * 60 * 60 * 1e3,
|
|
524
|
+
// 24 hours
|
|
525
|
+
topK: config.topK ?? 10,
|
|
526
|
+
...config
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
/**
|
|
530
|
+
* Retrieve memories with temporal scoring
|
|
531
|
+
*/
|
|
532
|
+
async retrieve(options) {
|
|
533
|
+
const startTime = Date.now();
|
|
534
|
+
const { entries } = await this.store.query({
|
|
535
|
+
startTime: options.startTime,
|
|
536
|
+
endTime: options.endTime,
|
|
537
|
+
namespace: options.namespace,
|
|
538
|
+
types: options.types,
|
|
539
|
+
limit: 1e3
|
|
540
|
+
// Get many candidates for scoring
|
|
541
|
+
});
|
|
542
|
+
let filtered = entries;
|
|
543
|
+
if (options.filter) {
|
|
544
|
+
filtered = this.applyFilter(entries, options.filter);
|
|
545
|
+
}
|
|
546
|
+
const now = Date.now();
|
|
547
|
+
const scored = filtered.map((entry) => ({
|
|
548
|
+
entry,
|
|
549
|
+
score: this.calculateTemporalScore(entry, now, options)
|
|
550
|
+
}));
|
|
551
|
+
scored.sort((a, b) => b.score - a.score);
|
|
552
|
+
const topK = options.topK ?? this.config.topK;
|
|
553
|
+
const results = scored.slice(0, topK);
|
|
554
|
+
return {
|
|
555
|
+
memories: results.map((r) => r.entry),
|
|
556
|
+
scores: results.map((r) => r.score),
|
|
557
|
+
totalCandidates: filtered.length,
|
|
558
|
+
retrievalTimeMs: Date.now() - startTime,
|
|
559
|
+
strategy: "temporal"
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
/**
|
|
563
|
+
* Retrieve memories from specific time windows
|
|
564
|
+
*/
|
|
565
|
+
async retrieveFromWindows(windows, options) {
|
|
566
|
+
const results = /* @__PURE__ */ new Map();
|
|
567
|
+
for (const window of windows) {
|
|
568
|
+
const label = window.label ?? `${window.start}-${window.end}`;
|
|
569
|
+
const result = await this.retrieve({
|
|
570
|
+
...options,
|
|
571
|
+
startTime: window.start,
|
|
572
|
+
endTime: window.end
|
|
573
|
+
});
|
|
574
|
+
results.set(label, result);
|
|
575
|
+
}
|
|
576
|
+
return results;
|
|
577
|
+
}
|
|
578
|
+
/**
|
|
579
|
+
* Get memories from relative time periods
|
|
580
|
+
*/
|
|
581
|
+
async retrieveRecent(period, options) {
|
|
582
|
+
const now = Date.now();
|
|
583
|
+
const periodMs = {
|
|
584
|
+
hour: 60 * 60 * 1e3,
|
|
585
|
+
day: 24 * 60 * 60 * 1e3,
|
|
586
|
+
week: 7 * 24 * 60 * 60 * 1e3,
|
|
587
|
+
month: 30 * 24 * 60 * 60 * 1e3
|
|
588
|
+
};
|
|
589
|
+
return this.retrieve({
|
|
590
|
+
...options,
|
|
591
|
+
startTime: now - periodMs[period],
|
|
592
|
+
endTime: now
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
/**
|
|
596
|
+
* Get trending memories (high access in recent time)
|
|
597
|
+
*/
|
|
598
|
+
async retrieveTrending(windowMs = 24 * 60 * 60 * 1e3, options) {
|
|
599
|
+
const startTime = Date.now();
|
|
600
|
+
const windowStart = startTime - windowMs;
|
|
601
|
+
const { entries } = await this.store.query({
|
|
602
|
+
startTime: windowStart,
|
|
603
|
+
namespace: options?.namespace,
|
|
604
|
+
limit: 1e3
|
|
605
|
+
});
|
|
606
|
+
const scored = entries.map((entry) => {
|
|
607
|
+
const age = startTime - entry.timestamp;
|
|
608
|
+
const accessRate = entry.accessCount / Math.max(age / (60 * 60 * 1e3), 1);
|
|
609
|
+
return {
|
|
610
|
+
entry,
|
|
611
|
+
score: accessRate * entry.importance
|
|
612
|
+
};
|
|
613
|
+
});
|
|
614
|
+
scored.sort((a, b) => b.score - a.score);
|
|
615
|
+
const topK = options?.topK ?? this.config.topK;
|
|
616
|
+
const results = scored.slice(0, topK);
|
|
617
|
+
return {
|
|
618
|
+
memories: results.map((r) => r.entry),
|
|
619
|
+
scores: results.map((r) => r.score),
|
|
620
|
+
totalCandidates: entries.length,
|
|
621
|
+
retrievalTimeMs: Date.now() - startTime,
|
|
622
|
+
strategy: "temporal-trending"
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* Get memories matching a temporal pattern
|
|
627
|
+
*/
|
|
628
|
+
async retrieveByPattern(pattern, lookbackPeriods = 4, options) {
|
|
629
|
+
const startTime = Date.now();
|
|
630
|
+
const now = Date.now();
|
|
631
|
+
const windows = [];
|
|
632
|
+
for (let i = 0; i < lookbackPeriods; i++) {
|
|
633
|
+
const periodStart = now - (i + 1) * pattern.interval;
|
|
634
|
+
const periodEnd = now - i * pattern.interval;
|
|
635
|
+
if (pattern.peakHours || pattern.peakDays) {
|
|
636
|
+
const date = new Date(periodStart);
|
|
637
|
+
const hour = date.getHours();
|
|
638
|
+
const day = date.getDay();
|
|
639
|
+
if (pattern.peakHours && !pattern.peakHours.includes(hour)) {
|
|
640
|
+
continue;
|
|
641
|
+
}
|
|
642
|
+
if (pattern.peakDays && !pattern.peakDays.includes(day)) {
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
windows.push({ start: periodStart, end: periodEnd });
|
|
647
|
+
}
|
|
648
|
+
const allEntries = [];
|
|
649
|
+
for (const window of windows) {
|
|
650
|
+
const { entries } = await this.store.query({
|
|
651
|
+
startTime: window.start,
|
|
652
|
+
endTime: window.end,
|
|
653
|
+
namespace: options?.namespace,
|
|
654
|
+
limit: 100
|
|
655
|
+
});
|
|
656
|
+
allEntries.push(...entries);
|
|
657
|
+
}
|
|
658
|
+
const uniqueEntries = this.deduplicateEntries(allEntries);
|
|
659
|
+
const scored = uniqueEntries.map((entry) => ({
|
|
660
|
+
entry,
|
|
661
|
+
score: this.calculateTemporalScore(entry, now, options ?? {})
|
|
662
|
+
}));
|
|
663
|
+
scored.sort((a, b) => b.score - a.score);
|
|
664
|
+
const topK = options?.topK ?? this.config.topK;
|
|
665
|
+
const results = scored.slice(0, topK);
|
|
666
|
+
return {
|
|
667
|
+
memories: results.map((r) => r.entry),
|
|
668
|
+
scores: results.map((r) => r.score),
|
|
669
|
+
totalCandidates: uniqueEntries.length,
|
|
670
|
+
retrievalTimeMs: Date.now() - startTime,
|
|
671
|
+
strategy: "temporal-pattern",
|
|
672
|
+
metadata: {
|
|
673
|
+
pattern: pattern.type,
|
|
674
|
+
windowsMatched: windows.length
|
|
675
|
+
}
|
|
676
|
+
};
|
|
677
|
+
}
|
|
678
|
+
/**
|
|
679
|
+
* Get timeline of memories
|
|
680
|
+
*/
|
|
681
|
+
async getTimeline(options) {
|
|
682
|
+
const bucketMs = {
|
|
683
|
+
hour: 60 * 60 * 1e3,
|
|
684
|
+
day: 24 * 60 * 60 * 1e3,
|
|
685
|
+
week: 7 * 24 * 60 * 60 * 1e3
|
|
686
|
+
};
|
|
687
|
+
const { entries } = await this.store.query({
|
|
688
|
+
startTime: options.startTime,
|
|
689
|
+
endTime: options.endTime,
|
|
690
|
+
namespace: options.namespace,
|
|
691
|
+
limit: 1e4
|
|
692
|
+
});
|
|
693
|
+
const timeline = /* @__PURE__ */ new Map();
|
|
694
|
+
const size = bucketMs[options.bucketSize];
|
|
695
|
+
for (const entry of entries) {
|
|
696
|
+
const bucketStart = Math.floor(entry.timestamp / size) * size;
|
|
697
|
+
const key = new Date(bucketStart).toISOString();
|
|
698
|
+
if (!timeline.has(key)) {
|
|
699
|
+
timeline.set(key, []);
|
|
700
|
+
}
|
|
701
|
+
timeline.get(key).push(entry);
|
|
702
|
+
}
|
|
703
|
+
return timeline;
|
|
704
|
+
}
|
|
705
|
+
/**
|
|
706
|
+
* Calculate temporal score for a memory
|
|
707
|
+
*/
|
|
708
|
+
calculateTemporalScore(entry, now, options) {
|
|
709
|
+
const recencyWeight = options.recencyWeight ?? this.config.recencyWeight;
|
|
710
|
+
const importanceWeight = options.importanceWeight ?? this.config.importanceWeight;
|
|
711
|
+
const accessWeight = options.accessWeight ?? this.config.accessWeight;
|
|
712
|
+
const recencyScore = this.calculateDecay(now - entry.timestamp);
|
|
713
|
+
const importanceScore = entry.importance;
|
|
714
|
+
const maxAccessCount = 100;
|
|
715
|
+
const accessScore = Math.min(entry.accessCount / maxAccessCount, 1);
|
|
716
|
+
return recencyScore * recencyWeight + importanceScore * importanceWeight + accessScore * accessWeight;
|
|
717
|
+
}
|
|
718
|
+
/**
|
|
719
|
+
* Calculate decay based on configured function
|
|
720
|
+
*/
|
|
721
|
+
calculateDecay(ageMs) {
|
|
722
|
+
const halfLife = this.config.decayHalfLife;
|
|
723
|
+
switch (this.config.decayFunction) {
|
|
724
|
+
case "exponential":
|
|
725
|
+
return Math.exp(-Math.LN2 * ageMs / halfLife);
|
|
726
|
+
case "linear":
|
|
727
|
+
return Math.max(0, 1 - ageMs / (halfLife * 2));
|
|
728
|
+
case "step":
|
|
729
|
+
return ageMs <= halfLife ? 1 : 0;
|
|
730
|
+
case "logarithmic":
|
|
731
|
+
return 1 / (1 + Math.log2(1 + ageMs / halfLife));
|
|
732
|
+
default:
|
|
733
|
+
return Math.exp(-Math.LN2 * ageMs / halfLife);
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
/**
|
|
737
|
+
* Apply metadata filter to entries
|
|
738
|
+
*/
|
|
739
|
+
applyFilter(entries, filter) {
|
|
740
|
+
return entries.filter((entry) => {
|
|
741
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
742
|
+
const entryValue = entry.metadata[key];
|
|
743
|
+
if (Array.isArray(value)) {
|
|
744
|
+
if (!value.includes(entryValue)) return false;
|
|
745
|
+
} else if (entryValue !== value) {
|
|
746
|
+
return false;
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
return true;
|
|
750
|
+
});
|
|
751
|
+
}
|
|
752
|
+
/**
|
|
753
|
+
* Deduplicate entries by ID
|
|
754
|
+
*/
|
|
755
|
+
deduplicateEntries(entries) {
|
|
756
|
+
const seen = /* @__PURE__ */ new Set();
|
|
757
|
+
return entries.filter((entry) => {
|
|
758
|
+
if (seen.has(entry.id)) return false;
|
|
759
|
+
seen.add(entry.id);
|
|
760
|
+
return true;
|
|
761
|
+
});
|
|
762
|
+
}
|
|
763
|
+
/**
|
|
764
|
+
* Update configuration
|
|
765
|
+
*/
|
|
766
|
+
configure(config) {
|
|
767
|
+
this.config = { ...this.config, ...config };
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* Get current configuration
|
|
771
|
+
*/
|
|
772
|
+
getConfig() {
|
|
773
|
+
return { ...this.config };
|
|
774
|
+
}
|
|
775
|
+
};
|
|
776
|
+
function createTemporalRetrieval(store, config) {
|
|
777
|
+
return new TemporalRetrieval(store, config);
|
|
778
|
+
}
|
|
779
|
+
var TimeWindows = {
|
|
780
|
+
lastHour: () => ({
|
|
781
|
+
start: Date.now() - 60 * 60 * 1e3,
|
|
782
|
+
end: Date.now(),
|
|
783
|
+
label: "last-hour"
|
|
784
|
+
}),
|
|
785
|
+
lastDay: () => ({
|
|
786
|
+
start: Date.now() - 24 * 60 * 60 * 1e3,
|
|
787
|
+
end: Date.now(),
|
|
788
|
+
label: "last-day"
|
|
789
|
+
}),
|
|
790
|
+
lastWeek: () => ({
|
|
791
|
+
start: Date.now() - 7 * 24 * 60 * 60 * 1e3,
|
|
792
|
+
end: Date.now(),
|
|
793
|
+
label: "last-week"
|
|
794
|
+
}),
|
|
795
|
+
lastMonth: () => ({
|
|
796
|
+
start: Date.now() - 30 * 24 * 60 * 60 * 1e3,
|
|
797
|
+
end: Date.now(),
|
|
798
|
+
label: "last-month"
|
|
799
|
+
}),
|
|
800
|
+
today: () => {
|
|
801
|
+
const now = /* @__PURE__ */ new Date();
|
|
802
|
+
const startOfDay = new Date(
|
|
803
|
+
now.getFullYear(),
|
|
804
|
+
now.getMonth(),
|
|
805
|
+
now.getDate()
|
|
806
|
+
).getTime();
|
|
807
|
+
return {
|
|
808
|
+
start: startOfDay,
|
|
809
|
+
end: Date.now(),
|
|
810
|
+
label: "today"
|
|
811
|
+
};
|
|
812
|
+
},
|
|
813
|
+
yesterday: () => {
|
|
814
|
+
const now = /* @__PURE__ */ new Date();
|
|
815
|
+
const startOfYesterday = new Date(
|
|
816
|
+
now.getFullYear(),
|
|
817
|
+
now.getMonth(),
|
|
818
|
+
now.getDate() - 1
|
|
819
|
+
).getTime();
|
|
820
|
+
const endOfYesterday = new Date(
|
|
821
|
+
now.getFullYear(),
|
|
822
|
+
now.getMonth(),
|
|
823
|
+
now.getDate()
|
|
824
|
+
).getTime();
|
|
825
|
+
return {
|
|
826
|
+
start: startOfYesterday,
|
|
827
|
+
end: endOfYesterday,
|
|
828
|
+
label: "yesterday"
|
|
829
|
+
};
|
|
830
|
+
},
|
|
831
|
+
custom: (startDate, endDate, label) => ({
|
|
832
|
+
start: startDate.getTime(),
|
|
833
|
+
end: endDate.getTime(),
|
|
834
|
+
label
|
|
835
|
+
})
|
|
836
|
+
};
|
|
837
|
+
|
|
838
|
+
// src/retrieval/strategies/RetrievalPipeline.ts
|
|
839
|
+
var RetrievalPipeline = class {
|
|
840
|
+
stages = /* @__PURE__ */ new Map();
|
|
841
|
+
config;
|
|
842
|
+
constructor(config = { stages: [] }) {
|
|
843
|
+
this.config = {
|
|
844
|
+
maxCandidates: config.maxCandidates ?? 100,
|
|
845
|
+
minScore: config.minScore ?? 0,
|
|
846
|
+
timeout: config.timeout ?? 3e4,
|
|
847
|
+
...config
|
|
848
|
+
};
|
|
849
|
+
this.registerBuiltInStages();
|
|
850
|
+
}
|
|
851
|
+
/**
|
|
852
|
+
* Register built-in stages
|
|
853
|
+
*/
|
|
854
|
+
registerBuiltInStages() {
|
|
855
|
+
this.register("filter", (ctx, config) => {
|
|
856
|
+
const filters = config.params?.filters;
|
|
857
|
+
if (!filters) return Promise.resolve(ctx);
|
|
858
|
+
const filtered = ctx.candidates.filter((c) => {
|
|
859
|
+
for (const [key, value] of Object.entries(filters)) {
|
|
860
|
+
const entryValue = c.entry.metadata[key] ?? c.entry[key];
|
|
861
|
+
if (Array.isArray(value)) {
|
|
862
|
+
if (!value.includes(entryValue)) return false;
|
|
863
|
+
} else if (entryValue !== value) {
|
|
864
|
+
return false;
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
return true;
|
|
868
|
+
});
|
|
869
|
+
return Promise.resolve({ ...ctx, candidates: filtered });
|
|
870
|
+
});
|
|
871
|
+
this.register("boost", (ctx, config) => {
|
|
872
|
+
const boosts = config.params?.boosts;
|
|
873
|
+
if (!boosts) return Promise.resolve(ctx);
|
|
874
|
+
const boosted = ctx.candidates.map((c) => {
|
|
875
|
+
let newScore = c.score;
|
|
876
|
+
for (const boost of boosts) {
|
|
877
|
+
const fieldValue = c.entry.metadata[boost.field] ?? c.entry[boost.field];
|
|
878
|
+
if (fieldValue === boost.value) {
|
|
879
|
+
newScore *= boost.factor;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
return { ...c, score: newScore };
|
|
883
|
+
});
|
|
884
|
+
boosted.sort((a, b) => b.score - a.score);
|
|
885
|
+
return Promise.resolve({ ...ctx, candidates: boosted });
|
|
886
|
+
});
|
|
887
|
+
this.register("rerank", (ctx, config) => {
|
|
888
|
+
const weights = config.params?.weights;
|
|
889
|
+
if (!weights) return Promise.resolve(ctx);
|
|
890
|
+
const reranked = ctx.candidates.map((c) => {
|
|
891
|
+
let newScore = 0;
|
|
892
|
+
let totalWeight = 0;
|
|
893
|
+
for (const [field, weight] of Object.entries(weights)) {
|
|
894
|
+
if (field === "originalScore") {
|
|
895
|
+
newScore += c.score * weight;
|
|
896
|
+
} else {
|
|
897
|
+
const value = c.entry.metadata[field] ?? c.entry[field];
|
|
898
|
+
if (typeof value === "number") {
|
|
899
|
+
newScore += value * weight;
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
totalWeight += weight;
|
|
903
|
+
}
|
|
904
|
+
return {
|
|
905
|
+
...c,
|
|
906
|
+
score: totalWeight > 0 ? newScore / totalWeight : c.score
|
|
907
|
+
};
|
|
908
|
+
});
|
|
909
|
+
reranked.sort((a, b) => b.score - a.score);
|
|
910
|
+
return Promise.resolve({ ...ctx, candidates: reranked });
|
|
911
|
+
});
|
|
912
|
+
this.register("dedupe", (ctx, config) => {
|
|
913
|
+
const field = config.params?.field ?? "content";
|
|
914
|
+
const seen = /* @__PURE__ */ new Map();
|
|
915
|
+
const deduped = [];
|
|
916
|
+
for (const candidate of ctx.candidates) {
|
|
917
|
+
const key = this.getDedupeKey(candidate.entry, field);
|
|
918
|
+
const existing = seen.get(key);
|
|
919
|
+
if (!existing) {
|
|
920
|
+
seen.set(key, candidate);
|
|
921
|
+
deduped.push(candidate);
|
|
922
|
+
} else if (candidate.score > existing.score) {
|
|
923
|
+
const idx = deduped.indexOf(existing);
|
|
924
|
+
if (idx !== -1) {
|
|
925
|
+
deduped[idx] = candidate;
|
|
926
|
+
}
|
|
927
|
+
seen.set(key, candidate);
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
return Promise.resolve({ ...ctx, candidates: deduped });
|
|
931
|
+
});
|
|
932
|
+
this.register("diversify", (ctx, config) => {
|
|
933
|
+
const field = config.params?.field ?? "type";
|
|
934
|
+
const maxPerCategory = config.params?.maxPerCategory ?? 3;
|
|
935
|
+
const categoryCounts = /* @__PURE__ */ new Map();
|
|
936
|
+
const diversified = [];
|
|
937
|
+
for (const candidate of ctx.candidates) {
|
|
938
|
+
const category = String(
|
|
939
|
+
candidate.entry.metadata[field] ?? candidate.entry[field] ?? "unknown"
|
|
940
|
+
);
|
|
941
|
+
const count = categoryCounts.get(category) ?? 0;
|
|
942
|
+
if (count < maxPerCategory) {
|
|
943
|
+
diversified.push(candidate);
|
|
944
|
+
categoryCounts.set(category, count + 1);
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
return Promise.resolve({ ...ctx, candidates: diversified });
|
|
948
|
+
});
|
|
949
|
+
this.register("truncate", (ctx, config) => {
|
|
950
|
+
const limit = config.params?.limit ?? this.config.maxCandidates;
|
|
951
|
+
return Promise.resolve({
|
|
952
|
+
...ctx,
|
|
953
|
+
candidates: ctx.candidates.slice(0, limit)
|
|
954
|
+
});
|
|
955
|
+
});
|
|
956
|
+
this.register("enrich", async (ctx, config) => {
|
|
957
|
+
const enrichFn = config.params?.enrichFn;
|
|
958
|
+
if (!enrichFn) return ctx;
|
|
959
|
+
const enriched = await Promise.all(
|
|
960
|
+
ctx.candidates.map(async (c) => {
|
|
961
|
+
const enrichment = await enrichFn(c.entry);
|
|
962
|
+
return {
|
|
963
|
+
...c,
|
|
964
|
+
entry: {
|
|
965
|
+
...c.entry,
|
|
966
|
+
metadata: { ...c.entry.metadata, ...enrichment }
|
|
967
|
+
}
|
|
968
|
+
};
|
|
969
|
+
})
|
|
970
|
+
);
|
|
971
|
+
return { ...ctx, candidates: enriched };
|
|
972
|
+
});
|
|
973
|
+
}
|
|
974
|
+
/**
|
|
975
|
+
* Register a custom stage
|
|
976
|
+
*/
|
|
977
|
+
register(name, stage) {
|
|
978
|
+
this.stages.set(name, stage);
|
|
979
|
+
}
|
|
980
|
+
/**
|
|
981
|
+
* Execute the pipeline
|
|
982
|
+
*/
|
|
983
|
+
async execute(query, initialCandidates) {
|
|
984
|
+
const startTime = Date.now();
|
|
985
|
+
let context = {
|
|
986
|
+
query,
|
|
987
|
+
candidates: initialCandidates,
|
|
988
|
+
metadata: {},
|
|
989
|
+
timing: {}
|
|
990
|
+
};
|
|
991
|
+
for (const stageConfig of this.config.stages) {
|
|
992
|
+
if (stageConfig.enabled === false) continue;
|
|
993
|
+
const stage = this.stages.get(stageConfig.name);
|
|
994
|
+
if (!stage) {
|
|
995
|
+
console.warn(
|
|
996
|
+
`Pipeline stage "${stageConfig.name}" not found, skipping`
|
|
997
|
+
);
|
|
998
|
+
continue;
|
|
999
|
+
}
|
|
1000
|
+
const stageStart = Date.now();
|
|
1001
|
+
try {
|
|
1002
|
+
context = await this.executeWithTimeout(
|
|
1003
|
+
stage(context, stageConfig),
|
|
1004
|
+
this.config.timeout
|
|
1005
|
+
);
|
|
1006
|
+
context.timing[stageConfig.name] = Date.now() - stageStart;
|
|
1007
|
+
} catch (error) {
|
|
1008
|
+
console.error(`Pipeline stage "${stageConfig.name}" failed:`, error);
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
if (this.config.minScore > 0) {
|
|
1012
|
+
context.candidates = context.candidates.filter(
|
|
1013
|
+
(c) => c.score >= this.config.minScore
|
|
1014
|
+
);
|
|
1015
|
+
}
|
|
1016
|
+
context.candidates = context.candidates.slice(0, this.config.maxCandidates);
|
|
1017
|
+
return {
|
|
1018
|
+
memories: context.candidates.map((c) => c.entry),
|
|
1019
|
+
scores: context.candidates.map((c) => c.score),
|
|
1020
|
+
totalCandidates: initialCandidates.length,
|
|
1021
|
+
retrievalTimeMs: Date.now() - startTime,
|
|
1022
|
+
strategy: "pipeline",
|
|
1023
|
+
metadata: {
|
|
1024
|
+
...context.metadata,
|
|
1025
|
+
timing: context.timing,
|
|
1026
|
+
stagesExecuted: this.config.stages.filter((s) => s.enabled !== false).length
|
|
1027
|
+
}
|
|
1028
|
+
};
|
|
1029
|
+
}
|
|
1030
|
+
/**
|
|
1031
|
+
* Execute promise with timeout
|
|
1032
|
+
*/
|
|
1033
|
+
async executeWithTimeout(promise, timeout) {
|
|
1034
|
+
return Promise.race([
|
|
1035
|
+
promise,
|
|
1036
|
+
new Promise(
|
|
1037
|
+
(_, reject) => setTimeout(() => reject(new Error("Stage timeout")), timeout)
|
|
1038
|
+
)
|
|
1039
|
+
]);
|
|
1040
|
+
}
|
|
1041
|
+
/**
|
|
1042
|
+
* Get dedupe key for an entry
|
|
1043
|
+
*/
|
|
1044
|
+
getDedupeKey(entry, field) {
|
|
1045
|
+
if (field === "id") return entry.id;
|
|
1046
|
+
if (field === "content") {
|
|
1047
|
+
return entry.content.toLowerCase().trim().slice(0, 200);
|
|
1048
|
+
}
|
|
1049
|
+
return String(
|
|
1050
|
+
entry.metadata[field] ?? entry[field] ?? entry.id
|
|
1051
|
+
);
|
|
1052
|
+
}
|
|
1053
|
+
/**
|
|
1054
|
+
* Add a stage to the pipeline
|
|
1055
|
+
*/
|
|
1056
|
+
addStage(config) {
|
|
1057
|
+
this.config.stages.push(config);
|
|
1058
|
+
return this;
|
|
1059
|
+
}
|
|
1060
|
+
/**
|
|
1061
|
+
* Remove a stage from the pipeline
|
|
1062
|
+
*/
|
|
1063
|
+
removeStage(name) {
|
|
1064
|
+
this.config.stages = this.config.stages.filter((s) => s.name !== name);
|
|
1065
|
+
return this;
|
|
1066
|
+
}
|
|
1067
|
+
/**
|
|
1068
|
+
* Update pipeline configuration
|
|
1069
|
+
*/
|
|
1070
|
+
configure(config) {
|
|
1071
|
+
this.config = { ...this.config, ...config };
|
|
1072
|
+
}
|
|
1073
|
+
/**
|
|
1074
|
+
* Get current configuration
|
|
1075
|
+
*/
|
|
1076
|
+
getConfig() {
|
|
1077
|
+
return { ...this.config };
|
|
1078
|
+
}
|
|
1079
|
+
/**
|
|
1080
|
+
* Get registered stage names
|
|
1081
|
+
*/
|
|
1082
|
+
getStageNames() {
|
|
1083
|
+
return Array.from(this.stages.keys());
|
|
1084
|
+
}
|
|
1085
|
+
};
|
|
1086
|
+
var PipelineBuilder = class {
|
|
1087
|
+
stages = [];
|
|
1088
|
+
maxCandidates = 100;
|
|
1089
|
+
minScore = 0;
|
|
1090
|
+
timeout = 3e4;
|
|
1091
|
+
customStages = /* @__PURE__ */ new Map();
|
|
1092
|
+
/**
|
|
1093
|
+
* Add a filter stage
|
|
1094
|
+
*/
|
|
1095
|
+
filter(filters) {
|
|
1096
|
+
this.stages.push({
|
|
1097
|
+
name: "filter",
|
|
1098
|
+
params: { filters }
|
|
1099
|
+
});
|
|
1100
|
+
return this;
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Add a boost stage
|
|
1104
|
+
*/
|
|
1105
|
+
boost(boosts) {
|
|
1106
|
+
this.stages.push({
|
|
1107
|
+
name: "boost",
|
|
1108
|
+
params: { boosts }
|
|
1109
|
+
});
|
|
1110
|
+
return this;
|
|
1111
|
+
}
|
|
1112
|
+
/**
|
|
1113
|
+
* Add a rerank stage
|
|
1114
|
+
*/
|
|
1115
|
+
rerank(weights) {
|
|
1116
|
+
this.stages.push({
|
|
1117
|
+
name: "rerank",
|
|
1118
|
+
params: { weights }
|
|
1119
|
+
});
|
|
1120
|
+
return this;
|
|
1121
|
+
}
|
|
1122
|
+
/**
|
|
1123
|
+
* Add a dedupe stage
|
|
1124
|
+
*/
|
|
1125
|
+
dedupe(field = "content", similarity = 0.95) {
|
|
1126
|
+
this.stages.push({
|
|
1127
|
+
name: "dedupe",
|
|
1128
|
+
params: { field, similarity }
|
|
1129
|
+
});
|
|
1130
|
+
return this;
|
|
1131
|
+
}
|
|
1132
|
+
/**
|
|
1133
|
+
* Add a diversify stage
|
|
1134
|
+
*/
|
|
1135
|
+
diversify(field = "type", maxPerCategory = 3) {
|
|
1136
|
+
this.stages.push({
|
|
1137
|
+
name: "diversify",
|
|
1138
|
+
params: { field, maxPerCategory }
|
|
1139
|
+
});
|
|
1140
|
+
return this;
|
|
1141
|
+
}
|
|
1142
|
+
/**
|
|
1143
|
+
* Add a truncate stage
|
|
1144
|
+
*/
|
|
1145
|
+
truncate(limit) {
|
|
1146
|
+
this.stages.push({
|
|
1147
|
+
name: "truncate",
|
|
1148
|
+
params: { limit }
|
|
1149
|
+
});
|
|
1150
|
+
return this;
|
|
1151
|
+
}
|
|
1152
|
+
/**
|
|
1153
|
+
* Add an enrich stage
|
|
1154
|
+
*/
|
|
1155
|
+
enrich(enrichFn) {
|
|
1156
|
+
this.stages.push({
|
|
1157
|
+
name: "enrich",
|
|
1158
|
+
params: { enrichFn }
|
|
1159
|
+
});
|
|
1160
|
+
return this;
|
|
1161
|
+
}
|
|
1162
|
+
/**
|
|
1163
|
+
* Add a custom stage
|
|
1164
|
+
*/
|
|
1165
|
+
custom(name, stage, params) {
|
|
1166
|
+
this.customStages.set(name, stage);
|
|
1167
|
+
this.stages.push({ name, params });
|
|
1168
|
+
return this;
|
|
1169
|
+
}
|
|
1170
|
+
/**
|
|
1171
|
+
* Set maximum candidates
|
|
1172
|
+
*/
|
|
1173
|
+
withMaxCandidates(max) {
|
|
1174
|
+
this.maxCandidates = max;
|
|
1175
|
+
return this;
|
|
1176
|
+
}
|
|
1177
|
+
/**
|
|
1178
|
+
* Set minimum score
|
|
1179
|
+
*/
|
|
1180
|
+
withMinScore(min) {
|
|
1181
|
+
this.minScore = min;
|
|
1182
|
+
return this;
|
|
1183
|
+
}
|
|
1184
|
+
/**
|
|
1185
|
+
* Set timeout
|
|
1186
|
+
*/
|
|
1187
|
+
withTimeout(ms) {
|
|
1188
|
+
this.timeout = ms;
|
|
1189
|
+
return this;
|
|
1190
|
+
}
|
|
1191
|
+
/**
|
|
1192
|
+
* Build the pipeline
|
|
1193
|
+
*/
|
|
1194
|
+
build() {
|
|
1195
|
+
const pipeline = new RetrievalPipeline({
|
|
1196
|
+
stages: this.stages,
|
|
1197
|
+
maxCandidates: this.maxCandidates,
|
|
1198
|
+
minScore: this.minScore,
|
|
1199
|
+
timeout: this.timeout
|
|
1200
|
+
});
|
|
1201
|
+
for (const [name, stage] of this.customStages) {
|
|
1202
|
+
pipeline.register(name, stage);
|
|
1203
|
+
}
|
|
1204
|
+
return pipeline;
|
|
1205
|
+
}
|
|
1206
|
+
};
|
|
1207
|
+
function createPipelineBuilder() {
|
|
1208
|
+
return new PipelineBuilder();
|
|
1209
|
+
}
|
|
1210
|
+
function createRetrievalPipeline(config) {
|
|
1211
|
+
return new RetrievalPipeline(config);
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
export {
|
|
1215
|
+
SemanticRetrieval,
|
|
1216
|
+
createSemanticRetrieval,
|
|
1217
|
+
HybridRetrieval,
|
|
1218
|
+
createHybridRetrieval,
|
|
1219
|
+
TemporalRetrieval,
|
|
1220
|
+
createTemporalRetrieval,
|
|
1221
|
+
TimeWindows,
|
|
1222
|
+
RetrievalPipeline,
|
|
1223
|
+
PipelineBuilder,
|
|
1224
|
+
createPipelineBuilder,
|
|
1225
|
+
createRetrievalPipeline
|
|
1226
|
+
};
|