@mnemoai/core 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +3395 -0
- package/openclaw.plugin.json +815 -0
- package/package.json +59 -0
- package/src/access-tracker.ts +341 -0
- package/src/adapters/README.md +78 -0
- package/src/adapters/chroma.ts +206 -0
- package/src/adapters/lancedb.ts +237 -0
- package/src/adapters/pgvector.ts +218 -0
- package/src/adapters/qdrant.ts +191 -0
- package/src/adaptive-retrieval.ts +90 -0
- package/src/audit-log.ts +238 -0
- package/src/chunker.ts +254 -0
- package/src/config.ts +271 -0
- package/src/decay-engine.ts +238 -0
- package/src/embedder.ts +735 -0
- package/src/extraction-prompts.ts +339 -0
- package/src/license.ts +258 -0
- package/src/llm-client.ts +125 -0
- package/src/mcp-server.ts +415 -0
- package/src/memory-categories.ts +71 -0
- package/src/memory-upgrader.ts +388 -0
- package/src/migrate.ts +364 -0
- package/src/mnemo.ts +142 -0
- package/src/noise-filter.ts +97 -0
- package/src/noise-prototypes.ts +164 -0
- package/src/observability.ts +81 -0
- package/src/query-tracker.ts +57 -0
- package/src/reflection-event-store.ts +98 -0
- package/src/reflection-item-store.ts +112 -0
- package/src/reflection-mapped-metadata.ts +84 -0
- package/src/reflection-metadata.ts +23 -0
- package/src/reflection-ranking.ts +33 -0
- package/src/reflection-retry.ts +181 -0
- package/src/reflection-slices.ts +265 -0
- package/src/reflection-store.ts +602 -0
- package/src/resonance-state.ts +85 -0
- package/src/retriever.ts +1510 -0
- package/src/scopes.ts +375 -0
- package/src/self-improvement-files.ts +143 -0
- package/src/semantic-gate.ts +121 -0
- package/src/session-recovery.ts +138 -0
- package/src/smart-extractor.ts +923 -0
- package/src/smart-metadata.ts +561 -0
- package/src/storage-adapter.ts +153 -0
- package/src/store.ts +1330 -0
- package/src/tier-manager.ts +189 -0
- package/src/tools.ts +1292 -0
- package/src/wal-recovery.ts +172 -0
- package/test/core.test.mjs +301 -0
package/src/retriever.ts
ADDED
|
@@ -0,0 +1,1510 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
/**
|
|
3
|
+
* Hybrid Retrieval System
|
|
4
|
+
* Combines vector search + BM25 full-text search with RRF fusion
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { MemoryEntry, MemoryStore, MemorySearchResult } from "./store.js";
|
|
8
|
+
import type { Embedder } from "./embedder.js";
|
|
9
|
+
import {
|
|
10
|
+
import { filterNoise } from "./noise-filter.js";
|
|
11
|
+
import type { DecayEngine, DecayableMemory } from "./decay-engine.js";
|
|
12
|
+
import type { TierManager } from "./tier-manager.js";
|
|
13
|
+
import { toLifecycleMemory, getDecayableFromEntry } from "./smart-metadata.js";
|
|
14
|
+
import { getAdaptiveThreshold, recordResonanceScore } from "./resonance-state.js";
|
|
15
|
+
import { requirePro } from "./license.js";
|
|
16
|
+
|
|
17
|
+
// Pro: Access tracking & query tracking — graceful degradation without license
|
|
18
|
+
type AccessTracker = { bumpAccess: (id: string) => Promise<void> };
|
|
19
|
+
let _parseAccessMetadata: ((m: any) => { accessCount: number; lastAccessedAt: number }) | null = null;
|
|
20
|
+
let _computeEffectiveHalfLife: ((hl: number, ac: number, la: number, rf?: number, mx?: number) => number) | null = null;
|
|
21
|
+
let _recordQuery: ((...args: any[]) => void) | null = null;
|
|
22
|
+
|
|
23
|
+
if (requirePro("access-tracking")) {
|
|
24
|
+
import("./access-tracker.js").then((mod) => {
|
|
25
|
+
_parseAccessMetadata = mod.parseAccessMetadata;
|
|
26
|
+
_computeEffectiveHalfLife = mod.computeEffectiveHalfLife;
|
|
27
|
+
}).catch(() => {});
|
|
28
|
+
}
|
|
29
|
+
if (requirePro("query-tracking")) {
|
|
30
|
+
import("./query-tracker.js").then((mod) => {
|
|
31
|
+
_recordQuery = mod.recordQuery;
|
|
32
|
+
}).catch(() => {});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Fallbacks for Core mode (no access reinforcement)
|
|
36
|
+
function parseAccessMetadata(m: any): { accessCount: number; lastAccessedAt: number } {
|
|
37
|
+
if (_parseAccessMetadata) return _parseAccessMetadata(m);
|
|
38
|
+
return { accessCount: 0, lastAccessedAt: 0 };
|
|
39
|
+
}
|
|
40
|
+
function computeEffectiveHalfLife(hl: number, _ac: number, _la: number, _rf?: number, _mx?: number): number {
|
|
41
|
+
if (_computeEffectiveHalfLife) return _computeEffectiveHalfLife(hl, _ac, _la, _rf, _mx);
|
|
42
|
+
return hl; // Core: use fixed half-life without reinforcement
|
|
43
|
+
}
|
|
44
|
+
function recordQuery(data: any): void {
|
|
45
|
+
_recordQuery?.(data);
|
|
46
|
+
}
|
|
47
|
+
import { appendFile } from "node:fs/promises";
|
|
48
|
+
import { homedir } from "node:os";
|
|
49
|
+
import { join } from "node:path";
|
|
50
|
+
|
|
51
|
+
// ============================================================================
|
|
52
|
+
// Graphiti Graph Search Integration (3rd retrieval path)
|
|
53
|
+
// ============================================================================
|
|
54
|
+
|
|
55
|
+
/** Lazy-read env at call time — openclaw.json env may inject after module load. */
|
|
56
|
+
function getGraphitiConfig() {
|
|
57
|
+
return {
|
|
58
|
+
enabled: process.env.GRAPHITI_ENABLED === "true",
|
|
59
|
+
baseUrl: process.env.GRAPHITI_BASE_URL || "http://127.0.0.1:18799",
|
|
60
|
+
timeoutMs: 3000,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
interface GraphitiFact {
|
|
65
|
+
fact: string;
|
|
66
|
+
source_node?: string;
|
|
67
|
+
target_node?: string;
|
|
68
|
+
from_node?: string;
|
|
69
|
+
to_node?: string;
|
|
70
|
+
created_at?: string;
|
|
71
|
+
valid_at?: string | null;
|
|
72
|
+
score?: number | null;
|
|
73
|
+
degree?: number;
|
|
74
|
+
source?: "search" | "spread";
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Query Graphiti /spread for graph-traversal results.
|
|
79
|
+
* Returns synthetic MemorySearchResult entries. Fails silently (returns []).
|
|
80
|
+
*/
|
|
81
|
+
async function graphitiSpreadSearch(
|
|
82
|
+
query: string,
|
|
83
|
+
groupId: string = "default",
|
|
84
|
+
searchLimit: number = 3,
|
|
85
|
+
spreadLimit: number = 3,
|
|
86
|
+
): Promise<Array<MemorySearchResult & { rank: number }>> {
|
|
87
|
+
const cfg = getGraphitiConfig();
|
|
88
|
+
if (!cfg.enabled) return [];
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
const controller = new AbortController();
|
|
92
|
+
const timeout = setTimeout(() => controller.abort(), cfg.timeoutMs);
|
|
93
|
+
const resp = await fetch(`${cfg.baseUrl}/spread`, {
|
|
94
|
+
method: "POST",
|
|
95
|
+
headers: { "Content-Type": "application/json" },
|
|
96
|
+
body: JSON.stringify({
|
|
97
|
+
query,
|
|
98
|
+
group_id: groupId,
|
|
99
|
+
search_limit: searchLimit,
|
|
100
|
+
spread_depth: 1,
|
|
101
|
+
spread_limit: spreadLimit,
|
|
102
|
+
}),
|
|
103
|
+
signal: controller.signal,
|
|
104
|
+
});
|
|
105
|
+
clearTimeout(timeout);
|
|
106
|
+
if (!resp.ok) return [];
|
|
107
|
+
|
|
108
|
+
const facts: GraphitiFact[] = await resp.json();
|
|
109
|
+
if (!Array.isArray(facts) || facts.length === 0) return [];
|
|
110
|
+
|
|
111
|
+
const seen = new Set<string>();
|
|
112
|
+
const results: Array<MemorySearchResult & { rank: number }> = [];
|
|
113
|
+
|
|
114
|
+
for (let i = 0; i < facts.length; i++) {
|
|
115
|
+
const f = facts[i];
|
|
116
|
+
const factText = f.fact?.trim();
|
|
117
|
+
if (!factText || factText.length < 5 || seen.has(factText)) continue;
|
|
118
|
+
seen.add(factText);
|
|
119
|
+
|
|
120
|
+
const nodes = [f.source_node, f.target_node, f.from_node, f.to_node]
|
|
121
|
+
.filter(Boolean).join(" → ");
|
|
122
|
+
const text = nodes ? `[图谱] ${factText} (${nodes})` : `[图谱] ${factText}`;
|
|
123
|
+
|
|
124
|
+
const baseScore = f.source === "search" ? 0.75 : 0.45;
|
|
125
|
+
const degreeBoost = f.degree ? Math.min(0.15, Math.log1p(f.degree) * 0.02) : 0;
|
|
126
|
+
|
|
127
|
+
results.push({
|
|
128
|
+
entry: {
|
|
129
|
+
id: `graphiti-${i}-${Date.now()}`,
|
|
130
|
+
text,
|
|
131
|
+
category: "entity" as const,
|
|
132
|
+
importance: 0.8,
|
|
133
|
+
timestamp: f.created_at ? new Date(f.created_at).getTime() : Date.now(),
|
|
134
|
+
scope: "global",
|
|
135
|
+
},
|
|
136
|
+
score: Math.min(1.0, baseScore + degreeBoost),
|
|
137
|
+
rank: i + 1,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
return results;
|
|
141
|
+
} catch {
|
|
142
|
+
return []; // Silent fallback
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ============================================================================
|
|
147
|
+
// Types & Configuration
|
|
148
|
+
// ============================================================================
|
|
149
|
+
|
|
150
|
+
export interface RetrievalConfig {
|
|
151
|
+
mode: "hybrid" | "vector";
|
|
152
|
+
vectorWeight: number;
|
|
153
|
+
bm25Weight: number;
|
|
154
|
+
minScore: number;
|
|
155
|
+
rerank: "cross-encoder" | "lightweight" | "none";
|
|
156
|
+
candidatePoolSize: number;
|
|
157
|
+
/** Recency boost half-life in days (default: 14). Set 0 to disable. */
|
|
158
|
+
recencyHalfLifeDays: number;
|
|
159
|
+
/** Max recency boost factor (default: 0.10) */
|
|
160
|
+
recencyWeight: number;
|
|
161
|
+
/** Filter noise from results (default: true) */
|
|
162
|
+
filterNoise: boolean;
|
|
163
|
+
/** Reranker API key (enables cross-encoder reranking) */
|
|
164
|
+
rerankApiKey?: string;
|
|
165
|
+
/** Reranker model (default: jina-reranker-v3) */
|
|
166
|
+
rerankModel?: string;
|
|
167
|
+
/** Reranker API endpoint (default: https://api.jina.ai/v1/rerank). */
|
|
168
|
+
rerankEndpoint?: string;
|
|
169
|
+
/** Reranker provider format. Determines request/response shape and auth header.
|
|
170
|
+
* - "jina" (default): Authorization: Bearer, string[] documents, results[].relevance_score
|
|
171
|
+
* - "siliconflow": same format as jina (alias, for clarity)
|
|
172
|
+
* - "voyage": Authorization: Bearer, string[] documents, data[].relevance_score
|
|
173
|
+
* - "pinecone": Api-Key header, {text}[] documents, data[].score */
|
|
174
|
+
rerankProvider?: "jina" | "siliconflow" | "voyage" | "pinecone" | "ollama";
|
|
175
|
+
/**
|
|
176
|
+
* Length normalization: penalize long entries that dominate via sheer keyword
|
|
177
|
+
* density. Formula: score *= 1 / (1 + log2(charLen / anchor)).
|
|
178
|
+
* anchor = reference length (default: 500 chars). Entries shorter than anchor
|
|
179
|
+
* get a slight boost; longer entries get penalized progressively.
|
|
180
|
+
* Set 0 to disable. (default: 300)
|
|
181
|
+
*/
|
|
182
|
+
lengthNormAnchor: number;
|
|
183
|
+
/**
|
|
184
|
+
* Hard cutoff after rerank: discard results below this score.
|
|
185
|
+
* Applied after all scoring stages (rerank, recency, importance, length norm).
|
|
186
|
+
* Higher = fewer but more relevant results. (default: 0.35)
|
|
187
|
+
*/
|
|
188
|
+
hardMinScore: number;
|
|
189
|
+
/**
|
|
190
|
+
* Time decay half-life in days. Entries older than this lose score.
|
|
191
|
+
* Different from recencyBoost (additive bonus for new entries):
|
|
192
|
+
* this is a multiplicative penalty for old entries.
|
|
193
|
+
* Formula: score *= 0.5 + 0.5 * exp(-ageDays / halfLife)
|
|
194
|
+
* At halfLife days: ~0.68x. At 2*halfLife: ~0.59x. At 4*halfLife: ~0.52x.
|
|
195
|
+
* Set 0 to disable. (default: 60)
|
|
196
|
+
*/
|
|
197
|
+
timeDecayHalfLifeDays: number;
|
|
198
|
+
/** Access reinforcement factor for time decay half-life extension.
|
|
199
|
+
* Higher = stronger reinforcement. 0 to disable. (default: 0.5) */
|
|
200
|
+
reinforcementFactor: number;
|
|
201
|
+
/** Maximum half-life multiplier from access reinforcement.
|
|
202
|
+
* Prevents frequently accessed memories from becoming immortal. (default: 3) */
|
|
203
|
+
maxHalfLifeMultiplier: number;
|
|
204
|
+
/** Enable multi-hop query routing: detected multi-hop queries skip Graphiti
|
|
205
|
+
* spread and rely on LanceDB secondary retrieval instead. (default: true) */
|
|
206
|
+
multiHopRouting: boolean;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export interface RetrievalContext {
|
|
210
|
+
query: string;
|
|
211
|
+
limit: number;
|
|
212
|
+
scopeFilter?: string[];
|
|
213
|
+
category?: string;
|
|
214
|
+
/** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated, "cli" for CLI commands. */
|
|
215
|
+
source?: "manual" | "auto-recall" | "cli";
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
export interface RetrievalResult extends MemorySearchResult {
|
|
219
|
+
sources: {
|
|
220
|
+
vector?: { score: number; rank: number };
|
|
221
|
+
bm25?: { score: number; rank: number };
|
|
222
|
+
graphiti?: { score: number; rank: number };
|
|
223
|
+
fused?: { score: number };
|
|
224
|
+
reranked?: { score: number };
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ============================================================================
|
|
229
|
+
// Default Configuration
|
|
230
|
+
// ============================================================================
|
|
231
|
+
|
|
232
|
+
export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
|
|
233
|
+
mode: "hybrid",
|
|
234
|
+
vectorWeight: 0.7,
|
|
235
|
+
bm25Weight: 0.3,
|
|
236
|
+
minScore: 0.3,
|
|
237
|
+
rerank: "cross-encoder",
|
|
238
|
+
candidatePoolSize: 20,
|
|
239
|
+
recencyHalfLifeDays: 14,
|
|
240
|
+
recencyWeight: 0.1,
|
|
241
|
+
filterNoise: true,
|
|
242
|
+
rerankModel: "jina-reranker-v3",
|
|
243
|
+
rerankEndpoint: "https://api.jina.ai/v1/rerank",
|
|
244
|
+
lengthNormAnchor: 500,
|
|
245
|
+
hardMinScore: 0.35,
|
|
246
|
+
timeDecayHalfLifeDays: 60,
|
|
247
|
+
reinforcementFactor: 0.5,
|
|
248
|
+
maxHalfLifeMultiplier: 3,
|
|
249
|
+
multiHopRouting: true,
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
// ============================================================================
|
|
253
|
+
// Utility Functions
|
|
254
|
+
// ============================================================================
|
|
255
|
+
|
|
256
|
+
function clampInt(value: number, min: number, max: number): number {
|
|
257
|
+
if (!Number.isFinite(value)) return min;
|
|
258
|
+
return Math.min(max, Math.max(min, Math.floor(value)));
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function clamp01(value: number, fallback: number): number {
|
|
262
|
+
if (!Number.isFinite(value)) return Number.isFinite(fallback) ? fallback : 0;
|
|
263
|
+
return Math.min(1, Math.max(0, value));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function clamp01WithFloor(value: number, floor: number): number {
|
|
267
|
+
const safeFloor = clamp01(floor, 0);
|
|
268
|
+
return Math.max(safeFloor, clamp01(value, safeFloor));
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// ============================================================================
|
|
272
|
+
// Rerank Provider Adapters
|
|
273
|
+
// ============================================================================
|
|
274
|
+
|
|
275
|
+
type RerankProvider = "jina" | "siliconflow" | "voyage" | "pinecone" | "ollama";
|
|
276
|
+
|
|
277
|
+
interface RerankItem {
|
|
278
|
+
index: number;
|
|
279
|
+
score: number;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/** Build provider-specific request headers and body */
|
|
283
|
+
function buildRerankRequest(
|
|
284
|
+
provider: RerankProvider,
|
|
285
|
+
apiKey: string,
|
|
286
|
+
model: string,
|
|
287
|
+
query: string,
|
|
288
|
+
documents: string[],
|
|
289
|
+
topN: number,
|
|
290
|
+
): { headers: Record<string, string>; body: Record<string, unknown> } {
|
|
291
|
+
switch (provider) {
|
|
292
|
+
case "pinecone":
|
|
293
|
+
return {
|
|
294
|
+
headers: {
|
|
295
|
+
"Content-Type": "application/json",
|
|
296
|
+
"Api-Key": apiKey,
|
|
297
|
+
"X-Pinecone-API-Version": "2024-10",
|
|
298
|
+
},
|
|
299
|
+
body: {
|
|
300
|
+
model,
|
|
301
|
+
query,
|
|
302
|
+
documents: documents.map((text) => ({ text })),
|
|
303
|
+
top_n: topN,
|
|
304
|
+
rank_fields: ["text"],
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
case "voyage":
|
|
308
|
+
return {
|
|
309
|
+
headers: {
|
|
310
|
+
"Content-Type": "application/json",
|
|
311
|
+
Authorization: `Bearer ${apiKey}`,
|
|
312
|
+
},
|
|
313
|
+
body: {
|
|
314
|
+
model,
|
|
315
|
+
query,
|
|
316
|
+
documents,
|
|
317
|
+
// Voyage uses top_k (not top_n) to limit reranked outputs.
|
|
318
|
+
top_k: topN,
|
|
319
|
+
},
|
|
320
|
+
};
|
|
321
|
+
case "ollama":
|
|
322
|
+
// Ollama rerank API: POST /api/rerank (available since Ollama 0.6+)
|
|
323
|
+
// Models: jina-reranker-v1-turbo, bge-reranker-v2-m3, etc.
|
|
324
|
+
// No API key needed for local Ollama
|
|
325
|
+
return {
|
|
326
|
+
headers: {
|
|
327
|
+
"Content-Type": "application/json",
|
|
328
|
+
},
|
|
329
|
+
body: {
|
|
330
|
+
model,
|
|
331
|
+
query,
|
|
332
|
+
documents,
|
|
333
|
+
top_n: topN,
|
|
334
|
+
},
|
|
335
|
+
};
|
|
336
|
+
case "siliconflow":
|
|
337
|
+
case "jina":
|
|
338
|
+
default:
|
|
339
|
+
return {
|
|
340
|
+
headers: {
|
|
341
|
+
"Content-Type": "application/json",
|
|
342
|
+
Authorization: `Bearer ${apiKey}`,
|
|
343
|
+
},
|
|
344
|
+
body: {
|
|
345
|
+
model,
|
|
346
|
+
query,
|
|
347
|
+
documents,
|
|
348
|
+
top_n: topN,
|
|
349
|
+
},
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/** Parse provider-specific response into unified format */
|
|
355
|
+
function parseRerankResponse(
|
|
356
|
+
provider: RerankProvider,
|
|
357
|
+
data: Record<string, unknown>,
|
|
358
|
+
): RerankItem[] | null {
|
|
359
|
+
const parseItems = (
|
|
360
|
+
items: unknown,
|
|
361
|
+
scoreKeys: Array<"score" | "relevance_score">,
|
|
362
|
+
): RerankItem[] | null => {
|
|
363
|
+
if (!Array.isArray(items)) return null;
|
|
364
|
+
const parsed: RerankItem[] = [];
|
|
365
|
+
for (const raw of items as Array<Record<string, unknown>>) {
|
|
366
|
+
const index =
|
|
367
|
+
typeof raw?.index === "number" ? raw.index : Number(raw?.index);
|
|
368
|
+
if (!Number.isFinite(index)) continue;
|
|
369
|
+
let score: number | null = null;
|
|
370
|
+
for (const key of scoreKeys) {
|
|
371
|
+
const value = raw?.[key];
|
|
372
|
+
const n = typeof value === "number" ? value : Number(value);
|
|
373
|
+
if (Number.isFinite(n)) {
|
|
374
|
+
score = n;
|
|
375
|
+
break;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
if (score === null) continue;
|
|
379
|
+
parsed.push({ index, score });
|
|
380
|
+
}
|
|
381
|
+
return parsed.length > 0 ? parsed : null;
|
|
382
|
+
};
|
|
383
|
+
|
|
384
|
+
switch (provider) {
|
|
385
|
+
case "ollama": {
|
|
386
|
+
// Ollama: { results: [{ index, relevance_score }] }
|
|
387
|
+
// Same format as Jina, but also check data[] for compatibility
|
|
388
|
+
return (
|
|
389
|
+
parseItems(data.results, ["relevance_score", "score"]) ??
|
|
390
|
+
parseItems(data.data, ["relevance_score", "score"])
|
|
391
|
+
);
|
|
392
|
+
}
|
|
393
|
+
case "pinecone": {
|
|
394
|
+
// Pinecone: usually { data: [{ index, score, ... }] }
|
|
395
|
+
// Also tolerate results[] with score/relevance_score for robustness.
|
|
396
|
+
return (
|
|
397
|
+
parseItems(data.data, ["score", "relevance_score"]) ??
|
|
398
|
+
parseItems(data.results, ["score", "relevance_score"])
|
|
399
|
+
);
|
|
400
|
+
}
|
|
401
|
+
case "voyage": {
|
|
402
|
+
// Voyage: usually { data: [{ index, relevance_score }] }
|
|
403
|
+
// Also tolerate results[] for compatibility across gateways.
|
|
404
|
+
return (
|
|
405
|
+
parseItems(data.data, ["relevance_score", "score"]) ??
|
|
406
|
+
parseItems(data.results, ["relevance_score", "score"])
|
|
407
|
+
);
|
|
408
|
+
}
|
|
409
|
+
case "siliconflow":
|
|
410
|
+
case "jina":
|
|
411
|
+
default: {
|
|
412
|
+
// Jina / SiliconFlow: usually { results: [{ index, relevance_score }] }
|
|
413
|
+
// Also tolerate data[] for compatibility across gateways.
|
|
414
|
+
return (
|
|
415
|
+
parseItems(data.results, ["relevance_score", "score"]) ??
|
|
416
|
+
parseItems(data.data, ["relevance_score", "score"])
|
|
417
|
+
);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Cosine similarity for reranking fallback
|
|
423
|
+
function cosineSimilarity(a: number[], b: number[]): number {
|
|
424
|
+
if (a.length !== b.length) {
|
|
425
|
+
throw new Error("Vector dimensions must match for cosine similarity");
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
let dotProduct = 0;
|
|
429
|
+
let normA = 0;
|
|
430
|
+
let normB = 0;
|
|
431
|
+
|
|
432
|
+
for (let i = 0; i < a.length; i++) {
|
|
433
|
+
dotProduct += a[i] * b[i];
|
|
434
|
+
normA += a[i] * a[i];
|
|
435
|
+
normB += b[i] * b[i];
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
const norm = Math.sqrt(normA) * Math.sqrt(normB);
|
|
439
|
+
return norm === 0 ? 0 : dotProduct / norm;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// ============================================================================
|
|
443
|
+
// Memory Retriever
|
|
444
|
+
// ============================================================================
|
|
445
|
+
|
|
446
|
+
const RETRIEVAL_LOG_PATH = join(homedir(), ".openclaw", "memory", "retrieval-log.jsonl");
|
|
447
|
+
|
|
448
|
+
export class MemoryRetriever {
|
|
449
|
+
private accessTracker: AccessTracker | null = null;
|
|
450
|
+
private tierManager: TierManager | null = null;
|
|
451
|
+
|
|
452
|
+
constructor(
|
|
453
|
+
private store: MemoryStore,
|
|
454
|
+
private embedder: Embedder,
|
|
455
|
+
private config: RetrievalConfig = DEFAULT_RETRIEVAL_CONFIG,
|
|
456
|
+
private decayEngine: DecayEngine | null = null,
|
|
457
|
+
) { }
|
|
458
|
+
|
|
459
|
+
setAccessTracker(tracker: AccessTracker): void {
|
|
460
|
+
this.accessTracker = tracker;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* Resonance check: fast vector probe to see if the query "resonates"
|
|
465
|
+
* with any high-salience memories. Returns true if at least one memory
|
|
466
|
+
* has cosine similarity above threshold. This mimics human associative
|
|
467
|
+
* memory — most inputs don't trigger recall, only resonant ones do.
|
|
468
|
+
*/
|
|
469
|
+
private async resonanceCheck(
|
|
470
|
+
query: string,
|
|
471
|
+
scopeFilter?: string[],
|
|
472
|
+
): Promise<{ resonates: boolean; topScore: number }> {
|
|
473
|
+
try {
|
|
474
|
+
const queryVector = await this.embedder.embedQuery(query);
|
|
475
|
+
const threshold = getAdaptiveThreshold();
|
|
476
|
+
const probeResults = await this.store.vectorSearch(
|
|
477
|
+
queryVector,
|
|
478
|
+
3,
|
|
479
|
+
threshold,
|
|
480
|
+
scopeFilter,
|
|
481
|
+
);
|
|
482
|
+
if (probeResults.length === 0) return { resonates: false, topScore: 0 };
|
|
483
|
+
|
|
484
|
+
const topScore = Math.max(...probeResults.map(r => r.score));
|
|
485
|
+
|
|
486
|
+
// Record top score for adaptive threshold sliding window
|
|
487
|
+
recordResonanceScore(topScore);
|
|
488
|
+
|
|
489
|
+
// At least one result resonates — check if it has meaningful salience
|
|
490
|
+
// (not just a random match with a low-importance peripheral memory)
|
|
491
|
+
for (const r of probeResults) {
|
|
492
|
+
const importance = r.entry.importance ?? 0.5;
|
|
493
|
+
const similarity = r.score;
|
|
494
|
+
// Resonance = strong similarity OR moderate similarity + high importance
|
|
495
|
+
if (similarity >= 0.55 || (similarity >= threshold && importance >= 0.7)) {
|
|
496
|
+
return { resonates: true, topScore };
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
return { resonates: false, topScore };
|
|
500
|
+
} catch {
|
|
501
|
+
// If resonance check fails, allow recall (fail-open)
|
|
502
|
+
return { resonates: true, topScore: 0 };
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
/**
|
|
507
|
+
* Strip metadata blocks from auto-recall queries.
|
|
508
|
+
* OpenClaw core may pass the full message including Conversation info,
|
|
509
|
+
* Replied message, and Sender metadata JSON blocks. We only want the
|
|
510
|
+
* actual user text for semantic search.
|
|
511
|
+
*/
|
|
512
|
+
private cleanAutoRecallQuery(raw: string): string {
|
|
513
|
+
// Remove ```json ... ``` code blocks (metadata envelopes)
|
|
514
|
+
let cleaned = raw.replace(/```json[\s\S]*?```/g, "");
|
|
515
|
+
// Remove known metadata headers
|
|
516
|
+
cleaned = cleaned.replace(/Conversation info \(untrusted metadata\):/g, "");
|
|
517
|
+
cleaned = cleaned.replace(/Sender \(untrusted metadata\):/g, "");
|
|
518
|
+
cleaned = cleaned.replace(/Replied message \(untrusted, for context\):/g, "");
|
|
519
|
+
// Remove [Queued messages ...] headers
|
|
520
|
+
cleaned = cleaned.replace(/\[Queued messages[^\]]*\]/g, "");
|
|
521
|
+
cleaned = cleaned.replace(/---\s*\nQueued #\d+/g, "");
|
|
522
|
+
// Collapse whitespace
|
|
523
|
+
cleaned = cleaned.replace(/\n{3,}/g, "\n").trim();
|
|
524
|
+
// If nothing left after cleaning, return original (safety)
|
|
525
|
+
return cleaned.length > 2 ? cleaned : raw;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/**
|
|
529
|
+
* Expand date expressions in query to multiple formats for BM25 matching.
|
|
530
|
+
* "3月17日" → "3月17日 3/17 03-17 2026-03-17"
|
|
531
|
+
* "2026年3月17日" → "2026年3月17日 3/17 2026-03-17 03-17"
|
|
532
|
+
* "昨天/前天/上周" → resolved to absolute dates
|
|
533
|
+
*/
|
|
534
|
+
private expandDateFormats(query: string): string {
|
|
535
|
+
const now = new Date();
|
|
536
|
+
const year = now.getFullYear();
|
|
537
|
+
let expanded = query;
|
|
538
|
+
|
|
539
|
+
// Pattern: X月Y日 or X月Y号
|
|
540
|
+
expanded = expanded.replace(/(\d{1,2})月(\d{1,2})[日号]/g, (match, m, d) => {
|
|
541
|
+
const mm = String(m).padStart(2, "0");
|
|
542
|
+
const dd = String(d).padStart(2, "0");
|
|
543
|
+
return `${match} ${m}/${d} ${year}-${mm}-${dd} ${mm}-${dd}`;
|
|
544
|
+
});
|
|
545
|
+
|
|
546
|
+
// Pattern: YYYY年X月Y日
|
|
547
|
+
expanded = expanded.replace(/(\d{4})年(\d{1,2})月(\d{1,2})[日号]/g, (match, y, m, d) => {
|
|
548
|
+
const mm = String(m).padStart(2, "0");
|
|
549
|
+
const dd = String(d).padStart(2, "0");
|
|
550
|
+
return `${match} ${m}/${d} ${y}-${mm}-${dd} ${mm}-${dd}`;
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
// Pattern: X.Y or X/Y (might be dates like 3/17)
|
|
554
|
+
// Don't expand — these are already BM25-friendly
|
|
555
|
+
|
|
556
|
+
// Relative dates
|
|
557
|
+
const relMap: Record<string, number> = {
|
|
558
|
+
"今天": 0, "昨天": -1, "前天": -2, "大前天": -3,
|
|
559
|
+
};
|
|
560
|
+
for (const [word, offset] of Object.entries(relMap)) {
|
|
561
|
+
if (expanded.includes(word)) {
|
|
562
|
+
const d = new Date(now.getTime() + offset * 86400000);
|
|
563
|
+
const iso = d.toISOString().slice(0, 10);
|
|
564
|
+
const m = d.getMonth() + 1;
|
|
565
|
+
const day = d.getDate();
|
|
566
|
+
expanded = expanded.replace(word, `${word} ${iso} ${m}/${day} ${m}月${day}日`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
return expanded;
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
async retrieve(context: RetrievalContext): Promise<RetrievalResult[]> {
|
|
574
|
+
let { query, limit, scopeFilter, category, source } = context;
|
|
575
|
+
// Clean metadata pollution from auto-recall queries
|
|
576
|
+
if (source === "auto-recall") {
|
|
577
|
+
query = this.cleanAutoRecallQuery(query);
|
|
578
|
+
}
|
|
579
|
+
const safeLimit = clampInt(limit, 1, 20);
|
|
580
|
+
const t0 = performance.now();
|
|
581
|
+
|
|
582
|
+
// ── 共振门控:auto-recall 时先做快速共振检测 ──
|
|
583
|
+
let resonanceTriggered = false;
|
|
584
|
+
let resonanceTopScore = 0;
|
|
585
|
+
if (source === "auto-recall") {
|
|
586
|
+
const { resonates, topScore: probeTopScore } = await this.resonanceCheck(query, scopeFilter);
|
|
587
|
+
resonanceTriggered = resonates;
|
|
588
|
+
resonanceTopScore = probeTopScore;
|
|
589
|
+
if (!resonates) {
|
|
590
|
+
// Fire-and-forget tracking for gated-out queries
|
|
591
|
+
const trackEntry = JSON.stringify({
|
|
592
|
+
ts: new Date().toISOString(),
|
|
593
|
+
query: query.substring(0, 200),
|
|
594
|
+
source: source || "manual",
|
|
595
|
+
queryType: "gated-out",
|
|
596
|
+
resonanceScore: resonanceTopScore,
|
|
597
|
+
resonanceTriggered: false,
|
|
598
|
+
lancedbCount: 0,
|
|
599
|
+
graphitiCount: 0,
|
|
600
|
+
rerankCount: 0,
|
|
601
|
+
finalCount: 0,
|
|
602
|
+
totalLatencyMs: Math.round(performance.now() - t0),
|
|
603
|
+
lancedbLatencyMs: 0,
|
|
604
|
+
graphitiLatencyMs: 0,
|
|
605
|
+
rerankLatencyMs: 0,
|
|
606
|
+
}) + "\n";
|
|
607
|
+
appendFile(RETRIEVAL_LOG_PATH, trackEntry).catch(() => {});
|
|
608
|
+
recordQuery({
|
|
609
|
+
timestamp: new Date().toISOString(),
|
|
610
|
+
query: query.substring(0, 200),
|
|
611
|
+
source: (source === "auto-recall" ? "auto" : source || "manual") as "auto" | "manual" | "cli",
|
|
612
|
+
hitCount: 0,
|
|
613
|
+
topScore: resonanceTopScore,
|
|
614
|
+
latency_ms: Math.round(performance.now() - t0),
|
|
615
|
+
queryType: "gated-out",
|
|
616
|
+
resonancePass: false,
|
|
617
|
+
});
|
|
618
|
+
return []; // 无共振 → 不注入记忆
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
// ── Multi-hop 检测 ──
|
|
623
|
+
const isMultiHop = this.config.multiHopRouting && this.isMultiHopQuery(query);
|
|
624
|
+
|
|
625
|
+
// ── 并发查询:LanceDB + Graphiti ──
|
|
626
|
+
const tLance0 = performance.now();
|
|
627
|
+
const lanceDbPromise = (async () => {
|
|
628
|
+
if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
|
|
629
|
+
return this.vectorOnlyRetrieval(query, safeLimit, scopeFilter, category);
|
|
630
|
+
} else {
|
|
631
|
+
return this.hybridRetrieval(query, safeLimit, scopeFilter, category);
|
|
632
|
+
}
|
|
633
|
+
})();
|
|
634
|
+
|
|
635
|
+
const tGraphiti0 = performance.now();
|
|
636
|
+
const graphitiPromise = (async (): Promise<RetrievalResult[]> => {
|
|
637
|
+
if (process.env.GRAPHITI_ENABLED !== "true") return [];
|
|
638
|
+
try {
|
|
639
|
+
const graphitiBase = process.env.GRAPHITI_BASE_URL || "http://127.0.0.1:18799";
|
|
640
|
+
const scope = scopeFilter?.[0] || "default";
|
|
641
|
+
const groupId = scope.startsWith("agent:") ? scope.split(":")[1] || "default" : "default";
|
|
642
|
+
|
|
643
|
+
// Multi-hop queries skip spread (spread follows single-entity neighborhoods)
|
|
644
|
+
const useSpread = source === "auto-recall" && !isMultiHop;
|
|
645
|
+
const endpoint = useSpread ? "/spread" : "/search";
|
|
646
|
+
const body = useSpread
|
|
647
|
+
? { query, group_id: groupId, search_limit: 3, spread_depth: 1, spread_limit: 3 }
|
|
648
|
+
: { query, group_id: groupId, limit: Math.min(safeLimit, 5) };
|
|
649
|
+
|
|
650
|
+
const resp = await fetch(`${graphitiBase}${endpoint}`, {
|
|
651
|
+
method: "POST",
|
|
652
|
+
headers: { "Content-Type": "application/json" },
|
|
653
|
+
body: JSON.stringify(body),
|
|
654
|
+
signal: AbortSignal.timeout(5000),
|
|
655
|
+
});
|
|
656
|
+
if (!resp.ok) return [];
|
|
657
|
+
|
|
658
|
+
const facts = (await resp.json()) as Array<{
|
|
659
|
+
fact: string; valid_at?: string; expired_at?: string; created_at?: string;
|
|
660
|
+
degree?: number; source?: string;
|
|
661
|
+
}>;
|
|
662
|
+
return facts
|
|
663
|
+
.filter(f => f.fact && !f.expired_at)
|
|
664
|
+
.map((f, i) => {
|
|
665
|
+
const isSpread = f.source === "spread";
|
|
666
|
+
const degreeBoost = f.degree ? Math.min(0.15, Math.log1p(f.degree) * 0.03) : 0;
|
|
667
|
+
const baseScore = isSpread ? 0.45 : 0.65;
|
|
668
|
+
return {
|
|
669
|
+
entry: {
|
|
670
|
+
id: `graphiti-${Date.now()}-${i}`,
|
|
671
|
+
text: f.fact,
|
|
672
|
+
vector: [],
|
|
673
|
+
category: "fact" as const,
|
|
674
|
+
scope: scope,
|
|
675
|
+
importance: 0.7,
|
|
676
|
+
timestamp: f.valid_at ? new Date(f.valid_at).getTime() : Date.now(),
|
|
677
|
+
metadata: JSON.stringify({ source: isSpread ? "graphiti-spread" : "graphiti", valid_at: f.valid_at, degree: f.degree }),
|
|
678
|
+
},
|
|
679
|
+
score: baseScore + (0.01 * (facts.length - i)) + degreeBoost,
|
|
680
|
+
sources: { graphiti: { rank: i + 1 } },
|
|
681
|
+
} as RetrievalResult;
|
|
682
|
+
});
|
|
683
|
+
} catch {
|
|
684
|
+
return [];
|
|
685
|
+
}
|
|
686
|
+
})();
|
|
687
|
+
|
|
688
|
+
const [lanceResults, graphitiResults] = await Promise.all([lanceDbPromise, graphitiPromise]);
|
|
689
|
+
const tLanceMs = Math.round(performance.now() - tLance0);
|
|
690
|
+
const tGraphitiMs = Math.round(performance.now() - tGraphiti0);
|
|
691
|
+
|
|
692
|
+
// Merge: LanceDB results first, append non-duplicate Graphiti facts
|
|
693
|
+
const lanceTexts = new Set(lanceResults.map(r => r.entry.text.slice(0, 80)));
|
|
694
|
+
const uniqueGraphiti = graphitiResults.filter(
|
|
695
|
+
r => !lanceTexts.has(r.entry.text.slice(0, 80))
|
|
696
|
+
);
|
|
697
|
+
|
|
698
|
+
// ── 跨源统一 Rerank(所有结果都过 reranker)──
|
|
699
|
+
let merged: RetrievalResult[];
|
|
700
|
+
let rerankCount = 0;
|
|
701
|
+
const tRerank0 = performance.now();
|
|
702
|
+
const combined = [...lanceResults, ...uniqueGraphiti];
|
|
703
|
+
if (this.config.rerank !== "none" && this.config.rerankApiKey && combined.length > 0) {
|
|
704
|
+
const queryVector = await this.embedder.embedQuery(query);
|
|
705
|
+
merged = await this.rerankResults(query, queryVector, combined);
|
|
706
|
+
rerankCount = merged.length;
|
|
707
|
+
merged = merged.slice(0, safeLimit);
|
|
708
|
+
} else {
|
|
709
|
+
merged = combined.slice(0, safeLimit);
|
|
710
|
+
}
|
|
711
|
+
const tRerankMs = Math.round(performance.now() - tRerank0);
|
|
712
|
+
|
|
713
|
+
// Record access for reinforcement (manual recall only)
|
|
714
|
+
if (this.accessTracker && source === "manual" && merged.length > 0) {
|
|
715
|
+
this.accessTracker.recordAccess(
|
|
716
|
+
merged.filter(r => !r.entry.id.startsWith("graphiti-")).map(r => r.entry.id)
|
|
717
|
+
);
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// ── Fire-and-forget tracking log ──
|
|
721
|
+
const topScore = merged.length > 0 ? Math.max(...merged.map(r => r.score)) : 0;
|
|
722
|
+
const trackEntry = JSON.stringify({
|
|
723
|
+
ts: new Date().toISOString(),
|
|
724
|
+
query: query.substring(0, 200),
|
|
725
|
+
source: source || "manual",
|
|
726
|
+
queryType: isMultiHop ? "multi-hop" : "single",
|
|
727
|
+
resonanceScore: topScore,
|
|
728
|
+
resonanceTriggered,
|
|
729
|
+
lancedbCount: lanceResults.length,
|
|
730
|
+
graphitiCount: graphitiResults.length,
|
|
731
|
+
rerankCount,
|
|
732
|
+
finalCount: merged.length,
|
|
733
|
+
totalLatencyMs: Math.round(performance.now() - t0),
|
|
734
|
+
lancedbLatencyMs: tLanceMs,
|
|
735
|
+
graphitiLatencyMs: tGraphitiMs,
|
|
736
|
+
rerankLatencyMs: tRerankMs,
|
|
737
|
+
}) + "\n";
|
|
738
|
+
appendFile(RETRIEVAL_LOG_PATH, trackEntry).catch(() => {});
|
|
739
|
+
recordQuery({
|
|
740
|
+
timestamp: new Date().toISOString(),
|
|
741
|
+
query: query.substring(0, 200),
|
|
742
|
+
source: (source === "auto-recall" ? "auto" : source || "manual") as "auto" | "manual" | "cli",
|
|
743
|
+
hitCount: merged.length,
|
|
744
|
+
topScore,
|
|
745
|
+
latency_ms: Math.round(performance.now() - t0),
|
|
746
|
+
queryType: isMultiHop ? "multi-hop" : "single",
|
|
747
|
+
resonancePass: true,
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
return merged;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
private async vectorOnlyRetrieval(
|
|
754
|
+
query: string,
|
|
755
|
+
limit: number,
|
|
756
|
+
scopeFilter?: string[],
|
|
757
|
+
category?: string,
|
|
758
|
+
): Promise<RetrievalResult[]> {
|
|
759
|
+
const queryVector = await this.embedder.embedQuery(query);
|
|
760
|
+
const results = await this.store.vectorSearch(
|
|
761
|
+
queryVector,
|
|
762
|
+
limit,
|
|
763
|
+
this.config.minScore,
|
|
764
|
+
scopeFilter,
|
|
765
|
+
);
|
|
766
|
+
|
|
767
|
+
// Filter by category if specified
|
|
768
|
+
const filtered = category
|
|
769
|
+
? results.filter((r) => r.entry.category === category)
|
|
770
|
+
: results;
|
|
771
|
+
|
|
772
|
+
const mapped = filtered.map(
|
|
773
|
+
(result, index) =>
|
|
774
|
+
({
|
|
775
|
+
...result,
|
|
776
|
+
sources: {
|
|
777
|
+
vector: { score: result.score, rank: index + 1 },
|
|
778
|
+
},
|
|
779
|
+
}) as RetrievalResult,
|
|
780
|
+
);
|
|
781
|
+
|
|
782
|
+
const weighted = this.decayEngine ? mapped : this.applyImportanceWeight(this.applyRecencyBoost(mapped));
|
|
783
|
+
const lengthNormalized = this.applyLengthNormalization(weighted);
|
|
784
|
+
const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
|
|
785
|
+
const lifecycleRanked = this.decayEngine
|
|
786
|
+
? this.applyDecayBoost(hardFiltered)
|
|
787
|
+
: this.applyTimeDecay(hardFiltered);
|
|
788
|
+
const denoised = this.config.filterNoise
|
|
789
|
+
? filterNoise(lifecycleRanked, r => r.entry.text)
|
|
790
|
+
: lifecycleRanked;
|
|
791
|
+
|
|
792
|
+
// MMR deduplication: avoid top-k filled with near-identical memories
|
|
793
|
+
const deduplicated = this.applyMMRDiversity(denoised);
|
|
794
|
+
|
|
795
|
+
return deduplicated.slice(0, limit);
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
private async hybridRetrieval(
|
|
799
|
+
query: string,
|
|
800
|
+
limit: number,
|
|
801
|
+
scopeFilter?: string[],
|
|
802
|
+
category?: string,
|
|
803
|
+
): Promise<RetrievalResult[]> {
|
|
804
|
+
const candidatePoolSize = Math.max(
|
|
805
|
+
this.config.candidatePoolSize,
|
|
806
|
+
limit * 2,
|
|
807
|
+
);
|
|
808
|
+
|
|
809
|
+
// Compute query embedding once, reuse for vector search + reranking
|
|
810
|
+
const queryVector = await this.embedder.embedQuery(query);
|
|
811
|
+
|
|
812
|
+
// Run vector, BM25, and Graphiti searches in parallel (3-way)
|
|
813
|
+
const [vectorResults, bm25Results, graphitiResults] = await Promise.all([
|
|
814
|
+
this.runVectorSearch(
|
|
815
|
+
queryVector,
|
|
816
|
+
candidatePoolSize,
|
|
817
|
+
scopeFilter,
|
|
818
|
+
category,
|
|
819
|
+
),
|
|
820
|
+
this.runBM25Search(query, candidatePoolSize, scopeFilter, category),
|
|
821
|
+
graphitiSpreadSearch(query, "default", 3, 3),
|
|
822
|
+
]);
|
|
823
|
+
|
|
824
|
+
// Fuse results using RRF (async: validates BM25-only entries exist in store)
|
|
825
|
+
// Graphiti results merged as 3rd signal
|
|
826
|
+
const fusedResults = await this.fuseResults(vectorResults, bm25Results, graphitiResults);
|
|
827
|
+
|
|
828
|
+
// Apply minimum score threshold
|
|
829
|
+
const filtered = fusedResults.filter(
|
|
830
|
+
(r) => r.score >= this.config.minScore,
|
|
831
|
+
);
|
|
832
|
+
|
|
833
|
+
// Rerank if enabled
|
|
834
|
+
const reranked =
|
|
835
|
+
this.config.rerank !== "none"
|
|
836
|
+
? await this.rerankResults(
|
|
837
|
+
query,
|
|
838
|
+
queryVector,
|
|
839
|
+
filtered.slice(0, limit * 2),
|
|
840
|
+
)
|
|
841
|
+
: filtered;
|
|
842
|
+
|
|
843
|
+
const temporallyRanked = this.decayEngine
|
|
844
|
+
? reranked
|
|
845
|
+
: this.applyImportanceWeight(this.applyRecencyBoost(reranked));
|
|
846
|
+
|
|
847
|
+
// Apply length normalization (penalize long entries dominating via keyword density)
|
|
848
|
+
const lengthNormalized = this.applyLengthNormalization(temporallyRanked);
|
|
849
|
+
|
|
850
|
+
// Hard minimum score cutoff should be based on semantic / lexical relevance.
|
|
851
|
+
// Lifecycle decay and time-decay are used for re-ranking, not for dropping
|
|
852
|
+
// otherwise relevant fresh memories.
|
|
853
|
+
const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
|
|
854
|
+
|
|
855
|
+
// Apply lifecycle-aware decay or legacy time decay after thresholding
|
|
856
|
+
const lifecycleRanked = this.decayEngine
|
|
857
|
+
? this.applyDecayBoost(hardFiltered)
|
|
858
|
+
: this.applyTimeDecay(hardFiltered);
|
|
859
|
+
|
|
860
|
+
// Filter noise
|
|
861
|
+
const denoised = this.config.filterNoise
|
|
862
|
+
? filterNoise(lifecycleRanked, r => r.entry.text)
|
|
863
|
+
: lifecycleRanked;
|
|
864
|
+
|
|
865
|
+
// MMR deduplication: avoid top-k filled with near-identical memories
|
|
866
|
+
const deduplicated = this.applyMMRDiversity(denoised);
|
|
867
|
+
|
|
868
|
+
return deduplicated.slice(0, limit);
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
private async runVectorSearch(
|
|
872
|
+
queryVector: number[],
|
|
873
|
+
limit: number,
|
|
874
|
+
scopeFilter?: string[],
|
|
875
|
+
category?: string,
|
|
876
|
+
): Promise<Array<MemorySearchResult & { rank: number }>> {
|
|
877
|
+
const results = await this.store.vectorSearch(
|
|
878
|
+
queryVector,
|
|
879
|
+
limit,
|
|
880
|
+
0.1,
|
|
881
|
+
scopeFilter,
|
|
882
|
+
);
|
|
883
|
+
|
|
884
|
+
// Filter by category if specified
|
|
885
|
+
const filtered = category
|
|
886
|
+
? results.filter((r) => r.entry.category === category)
|
|
887
|
+
: results;
|
|
888
|
+
|
|
889
|
+
return filtered.map((result, index) => ({
|
|
890
|
+
...result,
|
|
891
|
+
rank: index + 1,
|
|
892
|
+
}));
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
private async runBM25Search(
|
|
896
|
+
query: string,
|
|
897
|
+
limit: number,
|
|
898
|
+
scopeFilter?: string[],
|
|
899
|
+
category?: string,
|
|
900
|
+
): Promise<Array<MemorySearchResult & { rank: number }>> {
|
|
901
|
+
// Expand date formats for better BM25 matching
|
|
902
|
+
const expandedQuery = this.expandDateFormats(query);
|
|
903
|
+
const results = await this.store.bm25Search(expandedQuery, limit, scopeFilter);
|
|
904
|
+
|
|
905
|
+
// Filter by category if specified
|
|
906
|
+
const filtered = category
|
|
907
|
+
? results.filter((r) => r.entry.category === category)
|
|
908
|
+
: results;
|
|
909
|
+
|
|
910
|
+
return filtered.map((result, index) => ({
|
|
911
|
+
...result,
|
|
912
|
+
rank: index + 1,
|
|
913
|
+
}));
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
private async fuseResults(
|
|
917
|
+
vectorResults: Array<MemorySearchResult & { rank: number }>,
|
|
918
|
+
bm25Results: Array<MemorySearchResult & { rank: number }>,
|
|
919
|
+
graphitiResults: Array<MemorySearchResult & { rank: number }> = [],
|
|
920
|
+
): Promise<RetrievalResult[]> {
|
|
921
|
+
// Create maps for quick lookup
|
|
922
|
+
const vectorMap = new Map<string, MemorySearchResult & { rank: number }>();
|
|
923
|
+
const bm25Map = new Map<string, MemorySearchResult & { rank: number }>();
|
|
924
|
+
|
|
925
|
+
vectorResults.forEach((result) => {
|
|
926
|
+
vectorMap.set(result.entry.id, result);
|
|
927
|
+
});
|
|
928
|
+
|
|
929
|
+
bm25Results.forEach((result) => {
|
|
930
|
+
bm25Map.set(result.entry.id, result);
|
|
931
|
+
});
|
|
932
|
+
|
|
933
|
+
// Get all unique document IDs
|
|
934
|
+
const allIds = new Set([...vectorMap.keys(), ...bm25Map.keys()]);
|
|
935
|
+
|
|
936
|
+
// Calculate RRF scores
|
|
937
|
+
const fusedResults: RetrievalResult[] = [];
|
|
938
|
+
|
|
939
|
+
for (const id of allIds) {
|
|
940
|
+
const vectorResult = vectorMap.get(id);
|
|
941
|
+
const bm25Result = bm25Map.get(id);
|
|
942
|
+
|
|
943
|
+
// FIX(#15): BM25-only results may be "ghost" entries whose vector data was
|
|
944
|
+
// deleted but whose FTS index entry lingers until the next index rebuild.
|
|
945
|
+
// Validate that the entry actually exists in the store before including it.
|
|
946
|
+
if (!vectorResult && bm25Result) {
|
|
947
|
+
try {
|
|
948
|
+
const exists = await this.store.hasId(id);
|
|
949
|
+
if (!exists) continue; // Skip ghost entry
|
|
950
|
+
} catch {
|
|
951
|
+
// If hasId fails, keep the result (fail-open)
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
// Use the result with more complete data (prefer vector result if both exist)
|
|
956
|
+
const baseResult = vectorResult || bm25Result!;
|
|
957
|
+
|
|
958
|
+
// Use vector similarity as the base score.
|
|
959
|
+
// BM25 hit acts as a bonus (keyword match confirms relevance).
|
|
960
|
+
const vectorScore = vectorResult ? vectorResult.score : 0;
|
|
961
|
+
const bm25Score = bm25Result ? bm25Result.score : 0;
|
|
962
|
+
// Weighted fusion: vectorWeight/bm25Weight directly control score blending.
|
|
963
|
+
// BM25 high-score floor (>= 0.75) preserves exact keyword matches
|
|
964
|
+
// (e.g. API keys, ticket numbers) that may have low vector similarity.
|
|
965
|
+
const weightedFusion = (vectorScore * this.config.vectorWeight)
|
|
966
|
+
+ (bm25Score * this.config.bm25Weight);
|
|
967
|
+
const fusedScore = vectorResult
|
|
968
|
+
? clamp01(
|
|
969
|
+
Math.max(
|
|
970
|
+
weightedFusion,
|
|
971
|
+
bm25Score >= 0.75 ? bm25Score * 0.92 : 0,
|
|
972
|
+
),
|
|
973
|
+
0.1,
|
|
974
|
+
)
|
|
975
|
+
: clamp01(bm25Result!.score, 0.1);
|
|
976
|
+
|
|
977
|
+
fusedResults.push({
|
|
978
|
+
entry: baseResult.entry,
|
|
979
|
+
score: fusedScore,
|
|
980
|
+
sources: {
|
|
981
|
+
vector: vectorResult
|
|
982
|
+
? { score: vectorResult.score, rank: vectorResult.rank }
|
|
983
|
+
: undefined,
|
|
984
|
+
bm25: bm25Result
|
|
985
|
+
? { score: bm25Result.score, rank: bm25Result.rank }
|
|
986
|
+
: undefined,
|
|
987
|
+
fused: { score: fusedScore },
|
|
988
|
+
},
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
// Inject Graphiti graph traversal results as 3rd signal.
|
|
993
|
+
// Synthetic entries (not in LanceDB) — scored slightly lower to avoid dominating,
|
|
994
|
+
// but provide coverage for relational queries (e.g. "Alice's clients", "who did Bob meet?").
|
|
995
|
+
for (const gr of graphitiResults) {
|
|
996
|
+
fusedResults.push({
|
|
997
|
+
entry: gr.entry,
|
|
998
|
+
score: gr.score * 0.85,
|
|
999
|
+
sources: {
|
|
1000
|
+
graphiti: { score: gr.score, rank: gr.rank },
|
|
1001
|
+
fused: { score: gr.score * 0.85 },
|
|
1002
|
+
},
|
|
1003
|
+
});
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
// Sort by fused score descending
|
|
1007
|
+
return fusedResults.sort((a, b) => b.score - a.score);
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
/**
|
|
1011
|
+
* Rerank results using cross-encoder API (Jina, Pinecone, or compatible).
|
|
1012
|
+
* Falls back to cosine similarity if API is unavailable or fails.
|
|
1013
|
+
*/
|
|
1014
|
+
private async rerankResults(
|
|
1015
|
+
query: string,
|
|
1016
|
+
queryVector: number[],
|
|
1017
|
+
results: RetrievalResult[],
|
|
1018
|
+
): Promise<RetrievalResult[]> {
|
|
1019
|
+
if (results.length === 0) {
|
|
1020
|
+
return results;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
// Try cross-encoder rerank via configured provider API
|
|
1024
|
+
console.warn(`[rerank-debug] rerank=${this.config.rerank}, hasKey=${!!this.config.rerankApiKey}, keyPrefix=${String(this.config.rerankApiKey || '').substring(0, 8)}, provider=${this.config.rerankProvider}, model=${this.config.rerankModel}`);
|
|
1025
|
+
const isLocalRerank = this.config.rerankProvider === "ollama";
|
|
1026
|
+
if (this.config.rerank === "cross-encoder" && (this.config.rerankApiKey || isLocalRerank)) {
|
|
1027
|
+
try {
|
|
1028
|
+
const provider = this.config.rerankProvider || "jina";
|
|
1029
|
+
const model = this.config.rerankModel || (isLocalRerank ? "bge-reranker-v2-m3" : "jina-reranker-v3");
|
|
1030
|
+
const endpoint =
|
|
1031
|
+
this.config.rerankEndpoint || (isLocalRerank ? "http://127.0.0.1:11434/api/rerank" : "https://api.jina.ai/v1/rerank");
|
|
1032
|
+
const documents = results.map((r) => r.entry.text);
|
|
1033
|
+
|
|
1034
|
+
// Build provider-specific request
|
|
1035
|
+
const { headers, body } = buildRerankRequest(
|
|
1036
|
+
provider,
|
|
1037
|
+
this.config.rerankApiKey,
|
|
1038
|
+
model,
|
|
1039
|
+
query,
|
|
1040
|
+
documents,
|
|
1041
|
+
results.length,
|
|
1042
|
+
);
|
|
1043
|
+
|
|
1044
|
+
// Timeout: 5 seconds to prevent stalling retrieval pipeline
|
|
1045
|
+
const controller = new AbortController();
|
|
1046
|
+
const timeout = setTimeout(() => controller.abort(), 5000);
|
|
1047
|
+
|
|
1048
|
+
const response = await fetch(endpoint, {
|
|
1049
|
+
method: "POST",
|
|
1050
|
+
headers,
|
|
1051
|
+
body: JSON.stringify(body),
|
|
1052
|
+
signal: controller.signal,
|
|
1053
|
+
});
|
|
1054
|
+
|
|
1055
|
+
clearTimeout(timeout);
|
|
1056
|
+
|
|
1057
|
+
if (response.ok) {
|
|
1058
|
+
const data = (await response.json()) as Record<string, unknown>;
|
|
1059
|
+
|
|
1060
|
+
// Parse provider-specific response into unified format
|
|
1061
|
+
const parsed = parseRerankResponse(provider, data);
|
|
1062
|
+
|
|
1063
|
+
if (!parsed) {
|
|
1064
|
+
console.warn(
|
|
1065
|
+
"Rerank API: invalid response shape, falling back to cosine",
|
|
1066
|
+
);
|
|
1067
|
+
} else {
|
|
1068
|
+
// Build a Set of returned indices to identify unreturned candidates
|
|
1069
|
+
const returnedIndices = new Set(parsed.map((r) => r.index));
|
|
1070
|
+
|
|
1071
|
+
const reranked = parsed
|
|
1072
|
+
.filter((item) => item.index >= 0 && item.index < results.length)
|
|
1073
|
+
.map((item) => {
|
|
1074
|
+
const original = results[item.index];
|
|
1075
|
+
const floor = this.getRerankPreservationFloor(original, false);
|
|
1076
|
+
// Blend: 60% cross-encoder score + 40% original fused score
|
|
1077
|
+
const blendedScore = clamp01WithFloor(
|
|
1078
|
+
item.score * 0.6 + original.score * 0.4,
|
|
1079
|
+
floor,
|
|
1080
|
+
);
|
|
1081
|
+
return {
|
|
1082
|
+
...original,
|
|
1083
|
+
score: blendedScore,
|
|
1084
|
+
sources: {
|
|
1085
|
+
...original.sources,
|
|
1086
|
+
reranked: { score: item.score },
|
|
1087
|
+
},
|
|
1088
|
+
};
|
|
1089
|
+
});
|
|
1090
|
+
|
|
1091
|
+
// Keep unreturned candidates with their original scores (slightly penalized)
|
|
1092
|
+
const unreturned = results
|
|
1093
|
+
.filter((_, idx) => !returnedIndices.has(idx))
|
|
1094
|
+
.map(r => ({
|
|
1095
|
+
...r,
|
|
1096
|
+
score: clamp01WithFloor(
|
|
1097
|
+
r.score * 0.8,
|
|
1098
|
+
this.getRerankPreservationFloor(r, true),
|
|
1099
|
+
),
|
|
1100
|
+
}));
|
|
1101
|
+
|
|
1102
|
+
return [...reranked, ...unreturned].sort(
|
|
1103
|
+
(a, b) => b.score - a.score,
|
|
1104
|
+
);
|
|
1105
|
+
}
|
|
1106
|
+
} else {
|
|
1107
|
+
const errText = await response.text().catch(() => "");
|
|
1108
|
+
console.warn(
|
|
1109
|
+
`Rerank API returned ${response.status}: ${errText.slice(0, 200)}, falling back to cosine`,
|
|
1110
|
+
);
|
|
1111
|
+
}
|
|
1112
|
+
} catch (error) {
|
|
1113
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
1114
|
+
console.warn("Rerank API timed out (5s), falling back to cosine");
|
|
1115
|
+
} else {
|
|
1116
|
+
console.warn("Rerank API failed, falling back to cosine:", error);
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
// Fallback: lightweight cosine similarity rerank
|
|
1122
|
+
try {
|
|
1123
|
+
const reranked = results.map((result) => {
|
|
1124
|
+
const cosineScore = cosineSimilarity(queryVector, result.entry.vector);
|
|
1125
|
+
const combinedScore = result.score * 0.7 + cosineScore * 0.3;
|
|
1126
|
+
|
|
1127
|
+
return {
|
|
1128
|
+
...result,
|
|
1129
|
+
score: clamp01(combinedScore, result.score),
|
|
1130
|
+
sources: {
|
|
1131
|
+
...result.sources,
|
|
1132
|
+
reranked: { score: cosineScore },
|
|
1133
|
+
},
|
|
1134
|
+
};
|
|
1135
|
+
});
|
|
1136
|
+
|
|
1137
|
+
return reranked.sort((a, b) => b.score - a.score);
|
|
1138
|
+
} catch (error) {
|
|
1139
|
+
console.warn("Reranking failed, returning original results:", error);
|
|
1140
|
+
return results;
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
private getRerankPreservationFloor(result: RetrievalResult, unreturned: boolean): number {
|
|
1145
|
+
const bm25Score = result.sources.bm25?.score ?? 0;
|
|
1146
|
+
|
|
1147
|
+
// Exact lexical hits (IDs, env vars, ticket numbers) should not disappear
|
|
1148
|
+
// just because a reranker under-scores symbolic or mixed-language queries.
|
|
1149
|
+
if (bm25Score >= 0.75) {
|
|
1150
|
+
return result.score * (unreturned ? 1.0 : 0.95);
|
|
1151
|
+
}
|
|
1152
|
+
if (bm25Score >= 0.6) {
|
|
1153
|
+
return result.score * (unreturned ? 0.95 : 0.9);
|
|
1154
|
+
}
|
|
1155
|
+
return result.score * (unreturned ? 0.8 : 0.5);
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
/**
|
|
1159
|
+
* Apply recency boost: newer memories get a small score bonus.
|
|
1160
|
+
* This ensures corrections/updates naturally outrank older entries
|
|
1161
|
+
* when semantic similarity is close.
|
|
1162
|
+
* Formula: boost = exp(-ageDays / halfLife) * weight
|
|
1163
|
+
*/
|
|
1164
|
+
private applyRecencyBoost(results: RetrievalResult[]): RetrievalResult[] {
|
|
1165
|
+
const { recencyHalfLifeDays, recencyWeight } = this.config;
|
|
1166
|
+
if (!recencyHalfLifeDays || recencyHalfLifeDays <= 0 || !recencyWeight) {
|
|
1167
|
+
return results;
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
const now = Date.now();
|
|
1171
|
+
const boosted = results.map((r) => {
|
|
1172
|
+
const ts =
|
|
1173
|
+
r.entry.timestamp && r.entry.timestamp > 0 ? r.entry.timestamp : now;
|
|
1174
|
+
const ageDays = (now - ts) / 86_400_000;
|
|
1175
|
+
const boost = Math.exp(-ageDays / recencyHalfLifeDays) * recencyWeight;
|
|
1176
|
+
return {
|
|
1177
|
+
...r,
|
|
1178
|
+
score: clamp01(r.score + boost, r.score),
|
|
1179
|
+
};
|
|
1180
|
+
});
|
|
1181
|
+
|
|
1182
|
+
return boosted.sort((a, b) => b.score - a.score);
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
/**
|
|
1186
|
+
* Apply importance weighting: memories with higher importance get a score boost.
|
|
1187
|
+
* This ensures critical memories (importance=1.0) outrank casual ones (importance=0.5)
|
|
1188
|
+
* when semantic similarity is close.
|
|
1189
|
+
* Formula: score *= (baseWeight + (1 - baseWeight) * importance)
|
|
1190
|
+
* With baseWeight=0.7: importance=1.0 → ×1.0, importance=0.5 → ×0.85, importance=0.0 → ×0.7
|
|
1191
|
+
*/
|
|
1192
|
+
private applyImportanceWeight(results: RetrievalResult[]): RetrievalResult[] {
|
|
1193
|
+
const baseWeight = 0.7;
|
|
1194
|
+
const weighted = results.map((r) => {
|
|
1195
|
+
const importance = r.entry.importance ?? 0.7;
|
|
1196
|
+
const factor = baseWeight + (1 - baseWeight) * importance;
|
|
1197
|
+
return {
|
|
1198
|
+
...r,
|
|
1199
|
+
score: clamp01(r.score * factor, r.score * baseWeight),
|
|
1200
|
+
};
|
|
1201
|
+
});
|
|
1202
|
+
return weighted.sort((a, b) => b.score - a.score);
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
private applyDecayBoost(results: RetrievalResult[]): RetrievalResult[] {
|
|
1206
|
+
if (!this.decayEngine || results.length === 0) return results;
|
|
1207
|
+
|
|
1208
|
+
const scored = results.map((result) => ({
|
|
1209
|
+
memory: toLifecycleMemory(result.entry.id, result.entry),
|
|
1210
|
+
score: result.score,
|
|
1211
|
+
}));
|
|
1212
|
+
|
|
1213
|
+
this.decayEngine.applySearchBoost(scored);
|
|
1214
|
+
|
|
1215
|
+
const reranked = results.map((result, index) => ({
|
|
1216
|
+
...result,
|
|
1217
|
+
score: clamp01(scored[index].score, result.score * 0.3),
|
|
1218
|
+
}));
|
|
1219
|
+
|
|
1220
|
+
return reranked.sort((a, b) => b.score - a.score);
|
|
1221
|
+
}
|
|
1222
|
+
|
|
1223
|
+
/**
|
|
1224
|
+
* Detect multi-hop queries that involve multiple entities or relationships.
|
|
1225
|
+
* Multi-hop queries benefit from LanceDB secondary retrieval rather than
|
|
1226
|
+
* Graphiti spread (which follows single-entity neighborhoods).
|
|
1227
|
+
*/
|
|
1228
|
+
private isMultiHopQuery(query: string): boolean {
|
|
1229
|
+
// Count distinct capitalized words (potential entity names, >= 2 chars)
|
|
1230
|
+
const capitalizedWords = new Set(
|
|
1231
|
+
(query.match(/\b[A-Z\u4e00-\u9fff][a-zA-Z\u4e00-\u9fff]{1,}/g) || [])
|
|
1232
|
+
);
|
|
1233
|
+
if (capitalizedWords.size >= 2) return true;
|
|
1234
|
+
|
|
1235
|
+
// Relationship / comparison patterns
|
|
1236
|
+
const relationPatterns = [
|
|
1237
|
+
/和.{1,20}的关系/,
|
|
1238
|
+
/compared\s+to/i,
|
|
1239
|
+
/difference\s+between/i,
|
|
1240
|
+
/为什么/,
|
|
1241
|
+
/how\s+does\s+.{1,30}\s+relate\s+to/i,
|
|
1242
|
+
/between\s+.{1,30}\s+and\s+/i,
|
|
1243
|
+
/相比/,
|
|
1244
|
+
/区别/,
|
|
1245
|
+
/之间/,
|
|
1246
|
+
];
|
|
1247
|
+
for (const pat of relationPatterns) {
|
|
1248
|
+
if (pat.test(query)) return true;
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
// Long query with question mark
|
|
1252
|
+
if (query.length > 100 && /[??]/.test(query)) return true;
|
|
1253
|
+
|
|
1254
|
+
return false;
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
/**
|
|
1258
|
+
* via sheer keyword density and broad semantic coverage.
|
|
1259
|
+
* Short, focused entries (< anchor) get a slight boost.
|
|
1260
|
+
* Long, sprawling entries (> anchor) get penalized.
|
|
1261
|
+
* Formula: score *= 1 / (1 + log2(charLen / anchor))
|
|
1262
|
+
*/
|
|
1263
|
+
private applyLengthNormalization(
|
|
1264
|
+
results: RetrievalResult[],
|
|
1265
|
+
): RetrievalResult[] {
|
|
1266
|
+
const anchor = this.config.lengthNormAnchor;
|
|
1267
|
+
if (!anchor || anchor <= 0) return results;
|
|
1268
|
+
|
|
1269
|
+
const normalized = results.map((r) => {
|
|
1270
|
+
const charLen = r.entry.text.length;
|
|
1271
|
+
const ratio = charLen / anchor;
|
|
1272
|
+
// No penalty for entries at or below anchor length.
|
|
1273
|
+
// Gentle logarithmic decay for longer entries:
|
|
1274
|
+
// anchor (500) → 1.0, 800 → 0.75, 1000 → 0.67, 1500 → 0.56, 2000 → 0.50
|
|
1275
|
+
// This prevents long, keyword-rich entries from dominating top-k
|
|
1276
|
+
// while keeping their scores reasonable.
|
|
1277
|
+
const logRatio = Math.log2(Math.max(ratio, 1)); // no boost for short entries
|
|
1278
|
+
const factor = 1 / (1 + 0.5 * logRatio);
|
|
1279
|
+
return {
|
|
1280
|
+
...r,
|
|
1281
|
+
score: clamp01(r.score * factor, r.score * 0.3),
|
|
1282
|
+
};
|
|
1283
|
+
});
|
|
1284
|
+
|
|
1285
|
+
return normalized.sort((a, b) => b.score - a.score);
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
/**
|
|
1289
|
+
* Time decay: multiplicative penalty for old entries.
|
|
1290
|
+
* Unlike recencyBoost (additive bonus for new entries), this actively
|
|
1291
|
+
* penalizes stale information so recent knowledge wins ties.
|
|
1292
|
+
* Formula: score *= 0.5 + 0.5 * exp(-ageDays / halfLife)
|
|
1293
|
+
* At 0 days: 1.0x (no penalty)
|
|
1294
|
+
* At halfLife: ~0.68x
|
|
1295
|
+
* At 2*halfLife: ~0.59x
|
|
1296
|
+
* Floor at 0.5x (never penalize more than half)
|
|
1297
|
+
*/
|
|
1298
|
+
private applyTimeDecay(results: RetrievalResult[]): RetrievalResult[] {
|
|
1299
|
+
const halfLife = this.config.timeDecayHalfLifeDays;
|
|
1300
|
+
if (!halfLife || halfLife <= 0) return results;
|
|
1301
|
+
|
|
1302
|
+
const now = Date.now();
|
|
1303
|
+
const decayed = results.map((r) => {
|
|
1304
|
+
const ts =
|
|
1305
|
+
r.entry.timestamp && r.entry.timestamp > 0 ? r.entry.timestamp : now;
|
|
1306
|
+
const ageDays = (now - ts) / 86_400_000;
|
|
1307
|
+
|
|
1308
|
+
// Access reinforcement: frequently recalled memories decay slower
|
|
1309
|
+
const { accessCount, lastAccessedAt } = parseAccessMetadata(
|
|
1310
|
+
r.entry.metadata,
|
|
1311
|
+
);
|
|
1312
|
+
const effectiveHL = computeEffectiveHalfLife(
|
|
1313
|
+
halfLife,
|
|
1314
|
+
accessCount,
|
|
1315
|
+
lastAccessedAt,
|
|
1316
|
+
this.config.reinforcementFactor,
|
|
1317
|
+
this.config.maxHalfLifeMultiplier,
|
|
1318
|
+
);
|
|
1319
|
+
|
|
1320
|
+
// floor at 0.5: even very old entries keep at least 50% of their score
|
|
1321
|
+
const factor = 0.5 + 0.5 * Math.exp(-ageDays / effectiveHL);
|
|
1322
|
+
return {
|
|
1323
|
+
...r,
|
|
1324
|
+
score: clamp01(r.score * factor, r.score * 0.5),
|
|
1325
|
+
};
|
|
1326
|
+
});
|
|
1327
|
+
|
|
1328
|
+
return decayed.sort((a, b) => b.score - a.score);
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
/**
|
|
1332
|
+
* Apply lifecycle-aware score adjustment (decay + tier floors).
|
|
1333
|
+
*
|
|
1334
|
+
* This is intentionally lightweight:
|
|
1335
|
+
* - reads tier/access metadata (if any)
|
|
1336
|
+
* - multiplies scores by max(tierFloor, decayComposite)
|
|
1337
|
+
*/
|
|
1338
|
+
private applyLifecycleBoost(results: RetrievalResult[]): RetrievalResult[] {
|
|
1339
|
+
if (!this.decayEngine) return results;
|
|
1340
|
+
|
|
1341
|
+
const now = Date.now();
|
|
1342
|
+
const pairs = results.map(r => {
|
|
1343
|
+
const { memory } = getDecayableFromEntry(r.entry);
|
|
1344
|
+
return { r, memory };
|
|
1345
|
+
});
|
|
1346
|
+
|
|
1347
|
+
const scored = pairs.map(p => ({ memory: p.memory, score: p.r.score }));
|
|
1348
|
+
this.decayEngine.applySearchBoost(scored, now);
|
|
1349
|
+
|
|
1350
|
+
const boosted = pairs.map((p, i) => ({ ...p.r, score: scored[i].score }));
|
|
1351
|
+
return boosted.sort((a, b) => b.score - a.score);
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
/**
|
|
1355
|
+
* Record access stats (access_count, last_accessed_at) and apply tier
|
|
1356
|
+
* promotion/demotion for a small number of top results.
|
|
1357
|
+
*
|
|
1358
|
+
* Note: this writes back to LanceDB via delete+readd; keep it bounded.
|
|
1359
|
+
*/
|
|
1360
|
+
private async recordAccessAndMaybeTransition(results: RetrievalResult[]): Promise<void> {
|
|
1361
|
+
if (!this.decayEngine && !this.tierManager) return;
|
|
1362
|
+
|
|
1363
|
+
const now = Date.now();
|
|
1364
|
+
const toUpdate = results.slice(0, 3);
|
|
1365
|
+
|
|
1366
|
+
for (const r of toUpdate) {
|
|
1367
|
+
const { memory, meta } = getDecayableFromEntry(r.entry);
|
|
1368
|
+
|
|
1369
|
+
// Update access stats in-memory first
|
|
1370
|
+
const nextAccess = memory.accessCount + 1;
|
|
1371
|
+
meta.access_count = nextAccess;
|
|
1372
|
+
meta.last_accessed_at = now;
|
|
1373
|
+
if (meta.created_at === undefined && meta.createdAt === undefined) {
|
|
1374
|
+
meta.created_at = memory.createdAt;
|
|
1375
|
+
}
|
|
1376
|
+
if (meta.tier === undefined) {
|
|
1377
|
+
meta.tier = memory.tier;
|
|
1378
|
+
}
|
|
1379
|
+
if (meta.confidence === undefined) {
|
|
1380
|
+
meta.confidence = memory.confidence;
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
const updatedMemory: DecayableMemory = {
|
|
1384
|
+
...memory,
|
|
1385
|
+
accessCount: nextAccess,
|
|
1386
|
+
lastAccessedAt: now,
|
|
1387
|
+
};
|
|
1388
|
+
|
|
1389
|
+
// Tier transition (optional)
|
|
1390
|
+
if (this.decayEngine && this.tierManager) {
|
|
1391
|
+
const ds = this.decayEngine.score(updatedMemory, now);
|
|
1392
|
+
const transition = this.tierManager.evaluate(updatedMemory, ds, now);
|
|
1393
|
+
if (transition) {
|
|
1394
|
+
meta.tier = transition.toTier;
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
try {
|
|
1399
|
+
await this.store.update(r.entry.id, {
|
|
1400
|
+
metadata: JSON.stringify(meta),
|
|
1401
|
+
});
|
|
1402
|
+
} catch {
|
|
1403
|
+
// best-effort: ignore
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
|
|
1408
|
+
/**
|
|
1409
|
+
* MMR-inspired diversity filter: greedily select results that are both
|
|
1410
|
+
* relevant (high score) and diverse (low similarity to already-selected).
|
|
1411
|
+
*
|
|
1412
|
+
* Uses cosine similarity between memory vectors. If two memories have
|
|
1413
|
+
* cosine similarity > threshold (default 0.92), the lower-scored one
|
|
1414
|
+
* is demoted to the end rather than removed entirely.
|
|
1415
|
+
*
|
|
1416
|
+
* This prevents top-k from being filled with near-identical entries
|
|
1417
|
+
* (e.g. 3 similar "SVG style" memories) while keeping them available
|
|
1418
|
+
* if the pool is small.
|
|
1419
|
+
*/
|
|
1420
|
+
private applyMMRDiversity(
|
|
1421
|
+
results: RetrievalResult[],
|
|
1422
|
+
similarityThreshold = 0.85,
|
|
1423
|
+
): RetrievalResult[] {
|
|
1424
|
+
if (results.length <= 1) return results;
|
|
1425
|
+
|
|
1426
|
+
const selected: RetrievalResult[] = [];
|
|
1427
|
+
const deferred: RetrievalResult[] = [];
|
|
1428
|
+
|
|
1429
|
+
for (const candidate of results) {
|
|
1430
|
+
// Check if this candidate is too similar to any already-selected result
|
|
1431
|
+
const tooSimilar = selected.some((s) => {
|
|
1432
|
+
// Both must have vectors to compare.
|
|
1433
|
+
// LanceDB returns Arrow Vector objects (not plain arrays),
|
|
1434
|
+
// so use .length directly and Array.from() for conversion.
|
|
1435
|
+
const sVec = s.entry.vector;
|
|
1436
|
+
const cVec = candidate.entry.vector;
|
|
1437
|
+
if (!sVec?.length || !cVec?.length) return false;
|
|
1438
|
+
const sArr = Array.from(sVec as Iterable<number>);
|
|
1439
|
+
const cArr = Array.from(cVec as Iterable<number>);
|
|
1440
|
+
const sim = cosineSimilarity(sArr, cArr);
|
|
1441
|
+
return sim > similarityThreshold;
|
|
1442
|
+
});
|
|
1443
|
+
|
|
1444
|
+
if (tooSimilar) {
|
|
1445
|
+
deferred.push(candidate);
|
|
1446
|
+
} else {
|
|
1447
|
+
selected.push(candidate);
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
// Append deferred results at the end (available but deprioritized)
|
|
1451
|
+
return [...selected, ...deferred];
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
// Update configuration
|
|
1455
|
+
updateConfig(newConfig: Partial<RetrievalConfig>): void {
|
|
1456
|
+
this.config = { ...this.config, ...newConfig };
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
// Get current configuration
|
|
1460
|
+
getConfig(): RetrievalConfig {
|
|
1461
|
+
return { ...this.config };
|
|
1462
|
+
}
|
|
1463
|
+
|
|
1464
|
+
// Test retrieval system
|
|
1465
|
+
async test(query = "test query"): Promise<{
|
|
1466
|
+
success: boolean;
|
|
1467
|
+
mode: string;
|
|
1468
|
+
hasFtsSupport: boolean;
|
|
1469
|
+
error?: string;
|
|
1470
|
+
}> {
|
|
1471
|
+
try {
|
|
1472
|
+
const results = await this.retrieve({
|
|
1473
|
+
query,
|
|
1474
|
+
limit: 1,
|
|
1475
|
+
});
|
|
1476
|
+
|
|
1477
|
+
return {
|
|
1478
|
+
success: true,
|
|
1479
|
+
mode: this.config.mode,
|
|
1480
|
+
hasFtsSupport: this.store.hasFtsSupport,
|
|
1481
|
+
};
|
|
1482
|
+
} catch (error) {
|
|
1483
|
+
return {
|
|
1484
|
+
success: false,
|
|
1485
|
+
mode: this.config.mode,
|
|
1486
|
+
hasFtsSupport: this.store.hasFtsSupport,
|
|
1487
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1488
|
+
};
|
|
1489
|
+
}
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
// ============================================================================
|
|
1494
|
+
// Factory Function
|
|
1495
|
+
// ============================================================================
|
|
1496
|
+
|
|
1497
|
+
export interface RetrieverLifecycleOptions {
|
|
1498
|
+
decayEngine?: DecayEngine;
|
|
1499
|
+
tierManager?: TierManager;
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
export function createRetriever(
|
|
1503
|
+
store: MemoryStore,
|
|
1504
|
+
embedder: Embedder,
|
|
1505
|
+
config?: Partial<RetrievalConfig>,
|
|
1506
|
+
options?: { decayEngine?: DecayEngine | null },
|
|
1507
|
+
): MemoryRetriever {
|
|
1508
|
+
const fullConfig = { ...DEFAULT_RETRIEVAL_CONFIG, ...config };
|
|
1509
|
+
return new MemoryRetriever(store, embedder, fullConfig, options?.decayEngine ?? null);
|
|
1510
|
+
}
|