openclaw-cortex-memory 0.1.0-Alpha.8 → 0.1.0-Alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -43
- package/SKILL.md +46 -51
- package/dist/index.d.ts +24 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +127 -11
- package/dist/index.js.map +1 -1
- package/dist/openclaw.plugin.json +1 -1
- package/dist/src/engine/memory_engine.d.ts +2 -1
- package/dist/src/engine/memory_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.d.ts +59 -0
- package/dist/src/engine/ts_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.js +609 -0
- package/dist/src/engine/ts_engine.js.map +1 -1
- package/dist/src/engine/types.d.ts +7 -0
- package/dist/src/engine/types.d.ts.map +1 -1
- package/dist/src/session/session_end.d.ts.map +1 -1
- package/dist/src/session/session_end.js +18 -4
- package/dist/src/session/session_end.js.map +1 -1
- package/dist/src/store/archive_store.d.ts +24 -0
- package/dist/src/store/archive_store.d.ts.map +1 -1
- package/dist/src/store/archive_store.js +217 -24
- package/dist/src/store/archive_store.js.map +1 -1
- package/dist/src/store/embedding_utils.d.ts +32 -0
- package/dist/src/store/embedding_utils.d.ts.map +1 -0
- package/dist/src/store/embedding_utils.js +173 -0
- package/dist/src/store/embedding_utils.js.map +1 -0
- package/dist/src/store/read_store.d.ts +20 -0
- package/dist/src/store/read_store.d.ts.map +1 -1
- package/dist/src/store/read_store.js +336 -21
- package/dist/src/store/read_store.js.map +1 -1
- package/dist/src/store/vector_store.d.ts +13 -0
- package/dist/src/store/vector_store.d.ts.map +1 -1
- package/dist/src/store/vector_store.js +59 -1
- package/dist/src/store/vector_store.js.map +1 -1
- package/dist/src/store/write_store.d.ts +35 -0
- package/dist/src/store/write_store.d.ts.map +1 -1
- package/dist/src/store/write_store.js +163 -14
- package/dist/src/store/write_store.js.map +1 -1
- package/dist/src/sync/session_sync.d.ts +44 -2
- package/dist/src/sync/session_sync.d.ts.map +1 -1
- package/dist/src/sync/session_sync.js +364 -31
- package/dist/src/sync/session_sync.js.map +1 -1
- package/dist/src/utils/runtime_env.d.ts +4 -0
- package/dist/src/utils/runtime_env.d.ts.map +1 -0
- package/dist/src/utils/runtime_env.js +51 -0
- package/dist/src/utils/runtime_env.js.map +1 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +3 -2
- package/scripts/cli.js +13 -13
- package/scripts/uninstall.js +7 -1
|
@@ -66,6 +66,55 @@ function scoreText(query, text) {
|
|
|
66
66
|
}
|
|
67
67
|
return score;
|
|
68
68
|
}
|
|
69
|
+
function tokenize(text) {
|
|
70
|
+
return text
|
|
71
|
+
.toLowerCase()
|
|
72
|
+
.split(/[^a-z0-9\u4e00-\u9fa5]+/i)
|
|
73
|
+
.map(token => token.trim())
|
|
74
|
+
.filter(Boolean);
|
|
75
|
+
}
|
|
76
|
+
function buildBm25Stats(docs, queryTerms, getTokens) {
|
|
77
|
+
const docFreq = new Map();
|
|
78
|
+
let totalLen = 0;
|
|
79
|
+
for (const doc of docs) {
|
|
80
|
+
const tokens = typeof getTokens === "function" ? getTokens(doc) : tokenize(doc.text);
|
|
81
|
+
totalLen += tokens.length;
|
|
82
|
+
if (queryTerms.length === 0) {
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
const termSet = new Set(tokens);
|
|
86
|
+
for (const term of queryTerms) {
|
|
87
|
+
if (termSet.has(term)) {
|
|
88
|
+
docFreq.set(term, (docFreq.get(term) || 0) + 1);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const avgDocLen = docs.length > 0 ? Math.max(1, totalLen / docs.length) : 1;
|
|
93
|
+
return { avgDocLen, docFreq };
|
|
94
|
+
}
|
|
95
|
+
function bm25Score(args) {
|
|
96
|
+
const tokens = Array.isArray(args.docTokens) ? args.docTokens : tokenize(args.docText);
|
|
97
|
+
if (tokens.length === 0 || args.queryTerms.length === 0 || args.docCount <= 0) {
|
|
98
|
+
return 0;
|
|
99
|
+
}
|
|
100
|
+
const termFreq = new Map();
|
|
101
|
+
for (const token of tokens) {
|
|
102
|
+
termFreq.set(token, (termFreq.get(token) || 0) + 1);
|
|
103
|
+
}
|
|
104
|
+
const k1 = 1.2;
|
|
105
|
+
const b = 0.75;
|
|
106
|
+
let score = 0;
|
|
107
|
+
for (const term of args.queryTerms) {
|
|
108
|
+
const tf = termFreq.get(term) || 0;
|
|
109
|
+
if (tf <= 0)
|
|
110
|
+
continue;
|
|
111
|
+
const df = args.docFreq.get(term) || 0;
|
|
112
|
+
const idf = Math.log(1 + ((args.docCount - df + 0.5) / (df + 0.5)));
|
|
113
|
+
const denominator = tf + k1 * (1 - b + b * (tokens.length / Math.max(1, args.avgDocLen)));
|
|
114
|
+
score += idf * (((k1 + 1) * tf) / Math.max(1e-6, denominator));
|
|
115
|
+
}
|
|
116
|
+
return score;
|
|
117
|
+
}
|
|
69
118
|
function normalizeRecordText(record) {
|
|
70
119
|
const direct = [record.content, record.summary, record.text, record.message]
|
|
71
120
|
.find(v => typeof v === "string" && v.trim());
|
|
@@ -138,9 +187,20 @@ function parseJsonlFile(filePath, sourceLabel, logger) {
|
|
|
138
187
|
text,
|
|
139
188
|
source: sourceLabel,
|
|
140
189
|
timestamp: Number.isFinite(timestampValue) ? timestampValue : undefined,
|
|
190
|
+
layer: parsed.layer === "active" || parsed.layer === "archive"
|
|
191
|
+
? parsed.layer
|
|
192
|
+
: (sourceLabel === "sessions_active" ? "active" : (sourceLabel === "sessions_archive" ? "archive" : undefined)),
|
|
193
|
+
sourceMemoryId: typeof parsed.source_memory_id === "string"
|
|
194
|
+
? parsed.source_memory_id
|
|
195
|
+
: id,
|
|
196
|
+
sourceMemoryCanonicalId: typeof parsed.source_memory_canonical_id === "string"
|
|
197
|
+
? parsed.source_memory_canonical_id
|
|
198
|
+
: (typeof parsed.canonical_id === "string" ? parsed.canonical_id : undefined),
|
|
141
199
|
embedding: Array.isArray(parsed.embedding) ? parsed.embedding.filter(item => Number.isFinite(item)) : undefined,
|
|
142
200
|
eventType: typeof parsed.event_type === "string" ? parsed.event_type.trim() : undefined,
|
|
143
201
|
qualityScore: typeof parsed.quality_score === "number" ? parsed.quality_score : undefined,
|
|
202
|
+
charCount: typeof parsed.char_count === "number" ? parsed.char_count : undefined,
|
|
203
|
+
tokenCount: typeof parsed.token_count === "number" ? parsed.token_count : undefined,
|
|
144
204
|
sessionId: typeof parsed.session_id === "string" ? parsed.session_id : undefined,
|
|
145
205
|
entities,
|
|
146
206
|
relations,
|
|
@@ -262,6 +322,11 @@ function normalizeBaseUrl(value) {
|
|
|
262
322
|
return "";
|
|
263
323
|
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
264
324
|
}
|
|
325
|
+
const READ_FUSION_PROMPT_VERSION = "read-fusion.v1.1.0";
|
|
326
|
+
const READ_FUSION_REGRESSION_SAMPLES = [
|
|
327
|
+
"样例A: 同一 source_memory_id 同时出现在 archive 与 vector,输出中只保留一条主事实并在证据链保留两者关联。",
|
|
328
|
+
"样例B: 新旧决策冲突时,将冲突写入 conflicts,并在 canonical_answer 标注优先级依据(时间、质量、明确性)。",
|
|
329
|
+
];
|
|
265
330
|
function cosineSimilarity(left, right) {
|
|
266
331
|
if (left.length === 0 || right.length === 0) {
|
|
267
332
|
return 0;
|
|
@@ -425,6 +490,62 @@ function sourceWeight(source, intent) {
|
|
|
425
490
|
}
|
|
426
491
|
return 1;
|
|
427
492
|
}
|
|
493
|
+
function mergeKeyFromDoc(doc) {
|
|
494
|
+
const canonical = typeof doc.sourceMemoryCanonicalId === "string" ? doc.sourceMemoryCanonicalId.trim() : "";
|
|
495
|
+
if (canonical) {
|
|
496
|
+
return `canonical:${canonical}`;
|
|
497
|
+
}
|
|
498
|
+
const sourceMemoryId = typeof doc.sourceMemoryId === "string" ? doc.sourceMemoryId.trim() : "";
|
|
499
|
+
if (sourceMemoryId) {
|
|
500
|
+
return `source:${sourceMemoryId}`;
|
|
501
|
+
}
|
|
502
|
+
return `id:${doc.id}`;
|
|
503
|
+
}
|
|
504
|
+
function customChannelWeight(source, options) {
|
|
505
|
+
const weights = options?.channelWeights;
|
|
506
|
+
if (!weights)
|
|
507
|
+
return 1;
|
|
508
|
+
const value = weights[source];
|
|
509
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
|
|
510
|
+
return 1;
|
|
511
|
+
}
|
|
512
|
+
return value;
|
|
513
|
+
}
|
|
514
|
+
function lengthNormalizeFactor(doc, options) {
|
|
515
|
+
const lengthNorm = options?.lengthNorm;
|
|
516
|
+
if (lengthNorm?.enabled === false) {
|
|
517
|
+
return 1;
|
|
518
|
+
}
|
|
519
|
+
const pivotChars = typeof lengthNorm?.pivotChars === "number" && lengthNorm.pivotChars > 0
|
|
520
|
+
? lengthNorm.pivotChars
|
|
521
|
+
: 1200;
|
|
522
|
+
const strength = typeof lengthNorm?.strength === "number" && lengthNorm.strength > 0
|
|
523
|
+
? lengthNorm.strength
|
|
524
|
+
: 0.75;
|
|
525
|
+
const minFactor = typeof lengthNorm?.minFactor === "number" && lengthNorm.minFactor > 0 && lengthNorm.minFactor <= 1
|
|
526
|
+
? lengthNorm.minFactor
|
|
527
|
+
: 0.45;
|
|
528
|
+
const charCount = typeof doc.charCount === "number" && Number.isFinite(doc.charCount)
|
|
529
|
+
? doc.charCount
|
|
530
|
+
: doc.text.length;
|
|
531
|
+
if (charCount <= pivotChars) {
|
|
532
|
+
return 1;
|
|
533
|
+
}
|
|
534
|
+
const over = (charCount - pivotChars) / pivotChars;
|
|
535
|
+
const factor = 1 / (1 + over * strength);
|
|
536
|
+
return Math.max(minFactor, Math.min(1, factor));
|
|
537
|
+
}
|
|
538
|
+
function channelQuota(source, topK, options) {
|
|
539
|
+
const configured = options?.channelTopK?.[source];
|
|
540
|
+
if (typeof configured === "number" && Number.isFinite(configured) && configured >= 1) {
|
|
541
|
+
return Math.floor(configured);
|
|
542
|
+
}
|
|
543
|
+
if (source === "rules")
|
|
544
|
+
return Math.max(6, topK * 2);
|
|
545
|
+
if (source === "graph")
|
|
546
|
+
return Math.max(8, topK * 3);
|
|
547
|
+
return Math.max(12, topK * 4);
|
|
548
|
+
}
|
|
428
549
|
async function searchLanceDb(args) {
|
|
429
550
|
try {
|
|
430
551
|
const require = (0, module_1.createRequire)(__filename);
|
|
@@ -475,9 +596,14 @@ async function searchLanceDb(args) {
|
|
|
475
596
|
text: summary,
|
|
476
597
|
source: "vector_lancedb",
|
|
477
598
|
timestamp: Number.isFinite(ts) ? ts : undefined,
|
|
599
|
+
layer: record.layer === "active" || record.layer === "archive" ? record.layer : undefined,
|
|
600
|
+
sourceMemoryId: typeof record.source_memory_id === "string" ? record.source_memory_id : undefined,
|
|
601
|
+
sourceMemoryCanonicalId: typeof record.source_memory_canonical_id === "string" ? record.source_memory_canonical_id : undefined,
|
|
478
602
|
embedding: Array.isArray(record.vector) ? record.vector.filter(item => Number.isFinite(item)) : undefined,
|
|
479
603
|
eventType: typeof record.event_type === "string" ? record.event_type : undefined,
|
|
480
604
|
qualityScore: typeof record.quality_score === "number" ? record.quality_score : undefined,
|
|
605
|
+
charCount: typeof record.char_count === "number" ? record.char_count : undefined,
|
|
606
|
+
tokenCount: typeof record.token_count === "number" ? record.token_count : undefined,
|
|
481
607
|
sessionId: typeof record.session_id === "string" ? record.session_id : undefined,
|
|
482
608
|
entities,
|
|
483
609
|
relations: Array.isArray(relations) ? relations : [],
|
|
@@ -530,9 +656,14 @@ function parseVectorFallback(filePath, logger) {
|
|
|
530
656
|
text: summary,
|
|
531
657
|
source: "vector_jsonl",
|
|
532
658
|
timestamp: Number.isFinite(ts) ? ts : undefined,
|
|
659
|
+
layer: parsed.layer === "active" || parsed.layer === "archive" ? parsed.layer : undefined,
|
|
660
|
+
sourceMemoryId: typeof parsed.source_memory_id === "string" ? parsed.source_memory_id : undefined,
|
|
661
|
+
sourceMemoryCanonicalId: typeof parsed.source_memory_canonical_id === "string" ? parsed.source_memory_canonical_id : undefined,
|
|
533
662
|
embedding: Array.isArray(parsed.embedding) ? parsed.embedding.filter(item => Number.isFinite(item)) : undefined,
|
|
534
663
|
eventType: typeof parsed.event_type === "string" ? parsed.event_type.trim() : undefined,
|
|
535
664
|
qualityScore: typeof parsed.quality_score === "number" ? parsed.quality_score : undefined,
|
|
665
|
+
charCount: typeof parsed.char_count === "number" ? parsed.char_count : undefined,
|
|
666
|
+
tokenCount: typeof parsed.token_count === "number" ? parsed.token_count : undefined,
|
|
536
667
|
sessionId: typeof parsed.session_id === "string" ? parsed.session_id : undefined,
|
|
537
668
|
entities,
|
|
538
669
|
relations,
|
|
@@ -553,6 +684,7 @@ async function requestFusion(args) {
|
|
|
553
684
|
.join("\n")
|
|
554
685
|
.slice(0, 18000);
|
|
555
686
|
const prompt = [
|
|
687
|
+
`prompt_version=${READ_FUSION_PROMPT_VERSION}`,
|
|
556
688
|
"你是记忆检索融合器。请融合多路召回结果,产出可直接给 Agent 使用的完整记忆包,不要让 Agent 再去翻历史。",
|
|
557
689
|
"必须严格返回 JSON:",
|
|
558
690
|
"{\"canonical_answer\": string, \"coverage_note\": string, \"facts\": [{\"text\": string, \"evidence_ids\": string[]}], \"timeline\": [{\"when\": string, \"event\": string, \"evidence_ids\": string[]}], \"entities\": [{\"name\": string, \"role\": string}], \"decisions\": [{\"decision\": string, \"rationale\": string, \"evidence_ids\": string[]}], \"fixes\": [{\"issue\": string, \"fix\": string, \"evidence_ids\": string[]}], \"preferences\": [{\"subject\": string, \"preference\": string, \"evidence_ids\": string[]}], \"risks\": [{\"risk\": string, \"mitigation\": string, \"evidence_ids\": string[]}], \"action_items\": [{\"item\": string, \"owner\": string, \"status\": string, \"evidence_ids\": string[]}], \"conflicts\": [{\"topic\": string, \"details\": string}], \"evidence_ids\": string[], \"confidence\": number}",
|
|
@@ -563,6 +695,8 @@ async function requestFusion(args) {
|
|
|
563
695
|
"4) 若存在冲突写入 conflicts,否则返回空数组",
|
|
564
696
|
"5) confidence 0~1",
|
|
565
697
|
"6) 不确定信息必须在 coverage_note 标注",
|
|
698
|
+
"7) 同源记录合并:同 source_memory_id/source_memory_canonical_id 的候选只保留一条主结论,其余作为证据补充",
|
|
699
|
+
...READ_FUSION_REGRESSION_SAMPLES,
|
|
566
700
|
].join("\n");
|
|
567
701
|
const body = {
|
|
568
702
|
model: args.llm.model,
|
|
@@ -634,6 +768,22 @@ function createReadStore(options) {
|
|
|
634
768
|
const memoryRoot = options.dbPath ? path.resolve(options.dbPath) : path.join(options.projectRoot, "data", "memory");
|
|
635
769
|
const vectorFallbackPath = path.join(memoryRoot, "vector", "lancedb_events.jsonl");
|
|
636
770
|
const hitStatsPath = path.join(memoryRoot, ".read_hit_stats.json");
|
|
771
|
+
let docsCache = null;
|
|
772
|
+
let vectorFallbackCache = null;
|
|
773
|
+
let bm25TokenCacheSignature = "";
|
|
774
|
+
let bm25TokenCache = new Map();
|
|
775
|
+
function fileSignature(filePath) {
|
|
776
|
+
try {
|
|
777
|
+
if (!fs.existsSync(filePath)) {
|
|
778
|
+
return `${filePath}:missing`;
|
|
779
|
+
}
|
|
780
|
+
const stat = fs.statSync(filePath);
|
|
781
|
+
return `${filePath}:${stat.size}:${Math.floor(stat.mtimeMs)}`;
|
|
782
|
+
}
|
|
783
|
+
catch {
|
|
784
|
+
return `${filePath}:error`;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
637
787
|
function loadHitStats() {
|
|
638
788
|
try {
|
|
639
789
|
if (!fs.existsSync(hitStatsPath)) {
|
|
@@ -696,12 +846,46 @@ function createReadStore(options) {
|
|
|
696
846
|
const memoryMdPath = path.join(memoryRoot, "MEMORY.md");
|
|
697
847
|
const activeSessionsPath = path.join(memoryRoot, "sessions", "active", "sessions.jsonl");
|
|
698
848
|
const archiveSessionsPath = path.join(memoryRoot, "sessions", "archive", "sessions.jsonl");
|
|
699
|
-
|
|
849
|
+
const signature = [
|
|
850
|
+
fileSignature(cortexRulesPath),
|
|
851
|
+
fileSignature(memoryMdPath),
|
|
852
|
+
fileSignature(activeSessionsPath),
|
|
853
|
+
fileSignature(archiveSessionsPath),
|
|
854
|
+
].join("|");
|
|
855
|
+
if (docsCache && docsCache.signature === signature) {
|
|
856
|
+
return docsCache.docs;
|
|
857
|
+
}
|
|
858
|
+
const docs = [
|
|
700
859
|
...parseMarkdownFile(cortexRulesPath, "CORTEX_RULES.md"),
|
|
701
860
|
...parseMarkdownFile(memoryMdPath, "MEMORY.md"),
|
|
702
861
|
...parseJsonlFile(activeSessionsPath, "sessions_active", options.logger),
|
|
703
862
|
...parseJsonlFile(archiveSessionsPath, "sessions_archive", options.logger),
|
|
704
863
|
];
|
|
864
|
+
docsCache = { signature, docs };
|
|
865
|
+
return docs;
|
|
866
|
+
}
|
|
867
|
+
function loadVectorFallbackCached() {
|
|
868
|
+
const signature = fileSignature(vectorFallbackPath);
|
|
869
|
+
if (vectorFallbackCache && vectorFallbackCache.signature === signature) {
|
|
870
|
+
return vectorFallbackCache.docs;
|
|
871
|
+
}
|
|
872
|
+
const docs = parseVectorFallback(vectorFallbackPath, options.logger);
|
|
873
|
+
vectorFallbackCache = { signature, docs };
|
|
874
|
+
return docs;
|
|
875
|
+
}
|
|
876
|
+
function getBm25Tokens(doc, signature) {
|
|
877
|
+
if (bm25TokenCacheSignature !== signature) {
|
|
878
|
+
bm25TokenCacheSignature = signature;
|
|
879
|
+
bm25TokenCache = new Map();
|
|
880
|
+
}
|
|
881
|
+
const key = `${doc.source}|${doc.id}|${doc.text.length}|${doc.text.slice(0, 64)}`;
|
|
882
|
+
const cached = bm25TokenCache.get(key);
|
|
883
|
+
if (cached) {
|
|
884
|
+
return cached;
|
|
885
|
+
}
|
|
886
|
+
const tokens = tokenize(doc.text);
|
|
887
|
+
bm25TokenCache.set(key, tokens);
|
|
888
|
+
return tokens;
|
|
705
889
|
}
|
|
706
890
|
async function searchMemory(args) {
|
|
707
891
|
const query = args.query?.trim();
|
|
@@ -735,7 +919,7 @@ function createReadStore(options) {
|
|
|
735
919
|
: [];
|
|
736
920
|
const vectorDocsFallback = vectorDocsFromLance.length > 0
|
|
737
921
|
? []
|
|
738
|
-
:
|
|
922
|
+
: loadVectorFallbackCached();
|
|
739
923
|
const vectorDocs = [...vectorDocsFromLance, ...vectorDocsFallback];
|
|
740
924
|
const graphDocs = docs
|
|
741
925
|
.filter(doc => Array.isArray(doc.relations) && doc.relations.length > 0)
|
|
@@ -751,6 +935,10 @@ function createReadStore(options) {
|
|
|
751
935
|
});
|
|
752
936
|
const rulesDocs = docs.filter(doc => doc.source === "CORTEX_RULES.md");
|
|
753
937
|
const archiveDocs = docs.filter(doc => doc.source.startsWith("sessions_"));
|
|
938
|
+
const bm25Terms = tokenize(query);
|
|
939
|
+
const bm25Corpus = [...rulesDocs, ...archiveDocs, ...vectorDocs, ...graphDocs];
|
|
940
|
+
const bm25Signature = `${docsCache?.signature || "na"}|vector:${vectorDocs.length}:${vectorDocs.slice(0, 40).map(item => `${item.id}:${item.text.length}`).join(",")}`;
|
|
941
|
+
const bm25Stats = buildBm25Stats(bm25Corpus, bm25Terms, doc => getBm25Tokens(doc, bm25Signature));
|
|
754
942
|
const combinedCandidates = [];
|
|
755
943
|
const channels = {
|
|
756
944
|
rules: [],
|
|
@@ -760,10 +948,19 @@ function createReadStore(options) {
|
|
|
760
948
|
};
|
|
761
949
|
const evaluateDoc = (doc, source) => {
|
|
762
950
|
const lexical = scoreText(query, doc.text);
|
|
951
|
+
const bm25 = bm25Score({
|
|
952
|
+
queryTerms: bm25Terms,
|
|
953
|
+
docText: doc.text,
|
|
954
|
+
docTokens: getBm25Tokens(doc, bm25Signature),
|
|
955
|
+
docCount: bm25Corpus.length,
|
|
956
|
+
avgDocLen: bm25Stats.avgDocLen,
|
|
957
|
+
docFreq: bm25Stats.docFreq,
|
|
958
|
+
});
|
|
959
|
+
const lexicalCombined = lexical + bm25 * 2;
|
|
763
960
|
const semantic = queryEmbedding && Array.isArray(doc.embedding) && doc.embedding.length > 0
|
|
764
961
|
? Math.max(0, cosineSimilarity(queryEmbedding, doc.embedding) * 5)
|
|
765
962
|
: 0;
|
|
766
|
-
if (
|
|
963
|
+
if (lexicalCombined <= 0 && semantic <= 0) {
|
|
767
964
|
return null;
|
|
768
965
|
}
|
|
769
966
|
const recency = recencyScore(doc.timestamp);
|
|
@@ -772,18 +969,22 @@ function createReadStore(options) {
|
|
|
772
969
|
? (preferredTypes.includes(doc.eventType) ? 1 : 0)
|
|
773
970
|
: 0.5;
|
|
774
971
|
const graphMatch = source === "graph" ? 1 : 0;
|
|
775
|
-
const
|
|
776
|
-
|
|
972
|
+
const sourceBaseWeight = sourceWeight(source, intent);
|
|
973
|
+
const sourceConfigWeight = customChannelWeight(source, options.fusion);
|
|
974
|
+
const lengthNorm = lengthNormalizeFactor(doc, options.fusion);
|
|
975
|
+
const baseWeighted = (0.2 * lexicalCombined +
|
|
976
|
+
0.3 * (semantic * lengthNorm) +
|
|
777
977
|
0.1 * recency +
|
|
778
978
|
0.15 * quality +
|
|
779
979
|
0.15 * typeMatch +
|
|
780
|
-
0.1 * graphMatch) *
|
|
980
|
+
0.1 * graphMatch) * sourceBaseWeight * sourceConfigWeight;
|
|
781
981
|
const decayFactor = computeDecayFactor(doc.id, doc.eventType, doc.timestamp, options.memoryDecay, hitStats);
|
|
782
982
|
const weighted = baseWeighted * decayFactor;
|
|
783
983
|
return {
|
|
784
984
|
doc,
|
|
785
985
|
source,
|
|
786
|
-
lexical,
|
|
986
|
+
lexical: lexicalCombined,
|
|
987
|
+
bm25,
|
|
787
988
|
semantic,
|
|
788
989
|
recency,
|
|
789
990
|
quality,
|
|
@@ -815,7 +1016,7 @@ function createReadStore(options) {
|
|
|
815
1016
|
}
|
|
816
1017
|
for (const key of Object.keys(channels)) {
|
|
817
1018
|
channels[key].sort((a, b) => b.weighted - a.weighted);
|
|
818
|
-
combinedCandidates.push(...channels[key].slice(0,
|
|
1019
|
+
combinedCandidates.push(...channels[key].slice(0, channelQuota(key, args.topK, options.fusion)));
|
|
819
1020
|
}
|
|
820
1021
|
const rrfMap = new Map();
|
|
821
1022
|
const weightedMap = new Map();
|
|
@@ -825,22 +1026,39 @@ function createReadStore(options) {
|
|
|
825
1026
|
for (let i = 0; i < list.length; i += 1) {
|
|
826
1027
|
const candidate = list[i];
|
|
827
1028
|
const rrf = 1 / (rrfK + i + 1);
|
|
828
|
-
|
|
829
|
-
|
|
1029
|
+
const mergeKey = mergeKeyFromDoc(candidate.doc);
|
|
1030
|
+
rrfMap.set(mergeKey, (rrfMap.get(mergeKey) || 0) + rrf);
|
|
1031
|
+
const current = weightedMap.get(mergeKey);
|
|
830
1032
|
if (!current || candidate.weighted > current.weighted) {
|
|
831
|
-
weightedMap.set(
|
|
1033
|
+
weightedMap.set(mergeKey, candidate);
|
|
832
1034
|
}
|
|
833
1035
|
}
|
|
834
1036
|
}
|
|
835
|
-
const preRanked = [...weightedMap.
|
|
836
|
-
.map(candidate => ({
|
|
1037
|
+
const preRanked = [...weightedMap.entries()]
|
|
1038
|
+
.map(([mergeKey, candidate]) => ({
|
|
837
1039
|
id: candidate.doc.id,
|
|
1040
|
+
merge_key: mergeKey,
|
|
1041
|
+
source_memory_id: candidate.doc.sourceMemoryId || "",
|
|
1042
|
+
source_memory_canonical_id: candidate.doc.sourceMemoryCanonicalId || "",
|
|
838
1043
|
text: candidate.doc.text,
|
|
839
1044
|
source: candidate.doc.source,
|
|
1045
|
+
layer: candidate.doc.layer || "",
|
|
840
1046
|
event_type: candidate.doc.eventType || "",
|
|
841
1047
|
quality_score: candidate.quality,
|
|
842
1048
|
timestamp: candidate.doc.timestamp ? new Date(candidate.doc.timestamp).toISOString() : "",
|
|
843
|
-
score: candidate.weighted + (rrfMap.get(
|
|
1049
|
+
score: candidate.weighted + (rrfMap.get(mergeKey) || 0) * 1.5,
|
|
1050
|
+
score_breakdown: {
|
|
1051
|
+
lexical: Number(candidate.lexical.toFixed(4)),
|
|
1052
|
+
bm25: Number(candidate.bm25.toFixed(4)),
|
|
1053
|
+
semantic: Number(candidate.semantic.toFixed(4)),
|
|
1054
|
+
recency: Number(candidate.recency.toFixed(4)),
|
|
1055
|
+
quality: Number(candidate.quality.toFixed(4)),
|
|
1056
|
+
type: Number(candidate.typeMatch.toFixed(4)),
|
|
1057
|
+
graph: Number(candidate.graphMatch.toFixed(4)),
|
|
1058
|
+
decay: Number(candidate.decayFactor.toFixed(4)),
|
|
1059
|
+
rrf: Number(((rrfMap.get(mergeKey) || 0) * 1.5).toFixed(4)),
|
|
1060
|
+
weighted: Number(candidate.weighted.toFixed(4)),
|
|
1061
|
+
},
|
|
844
1062
|
reason_tags: [
|
|
845
1063
|
`intent:${intent.toLowerCase()}`,
|
|
846
1064
|
candidate.semantic > 0 ? "vector_hit" : "lexical_hit",
|
|
@@ -849,6 +1067,7 @@ function createReadStore(options) {
|
|
|
849
1067
|
candidate.quality >= 0.7 ? "high_quality" : "normal_quality",
|
|
850
1068
|
candidate.decayFactor < 1 ? `decay:${candidate.decayFactor.toFixed(3)}` : "decay:1.000",
|
|
851
1069
|
`source:${candidate.source}`,
|
|
1070
|
+
`merge_key:${mergeKey}`,
|
|
852
1071
|
],
|
|
853
1072
|
}))
|
|
854
1073
|
.sort((a, b) => b.score - a.score)
|
|
@@ -861,13 +1080,20 @@ function createReadStore(options) {
|
|
|
861
1080
|
const rerankerModel = options.reranker?.model || "";
|
|
862
1081
|
const rerankerApiKey = options.reranker?.apiKey || "";
|
|
863
1082
|
const rerankerBaseUrl = normalizeBaseUrl(options.reranker?.baseURL || options.reranker?.baseUrl);
|
|
1083
|
+
const fusionEnabled = options.fusion?.enabled !== false;
|
|
1084
|
+
const llmModel = options.llm?.model || "";
|
|
1085
|
+
const llmApiKey = options.llm?.apiKey || "";
|
|
1086
|
+
const llmBaseUrl = normalizeBaseUrl(options.llm?.baseURL || options.llm?.baseUrl);
|
|
1087
|
+
const fusionAuthoritative = options.fusion?.authoritative !== false;
|
|
1088
|
+
const skipRerankerForFusion = fusionEnabled && fusionAuthoritative && llmModel && llmApiKey && llmBaseUrl;
|
|
864
1089
|
let rerankedSimple = lexicalRanked.map(item => ({
|
|
865
1090
|
id: item.id,
|
|
1091
|
+
merge_key: item.merge_key,
|
|
866
1092
|
text: item.text,
|
|
867
1093
|
source: item.source,
|
|
868
1094
|
score: item.score,
|
|
869
1095
|
}));
|
|
870
|
-
if (rerankerModel && rerankerApiKey && rerankerBaseUrl && lexicalRanked.length > 1) {
|
|
1096
|
+
if (rerankerModel && rerankerApiKey && rerankerBaseUrl && lexicalRanked.length > 1 && !skipRerankerForFusion) {
|
|
871
1097
|
try {
|
|
872
1098
|
rerankedSimple = await requestRerank({
|
|
873
1099
|
query,
|
|
@@ -876,6 +1102,10 @@ function createReadStore(options) {
|
|
|
876
1102
|
apiKey: rerankerApiKey,
|
|
877
1103
|
baseUrl: rerankerBaseUrl,
|
|
878
1104
|
});
|
|
1105
|
+
rerankedSimple = rerankedSimple.map(item => {
|
|
1106
|
+
const found = lexicalRanked.find(entry => entry.id === item.id);
|
|
1107
|
+
return { ...item, merge_key: found?.merge_key || item.id };
|
|
1108
|
+
});
|
|
879
1109
|
}
|
|
880
1110
|
catch (error) {
|
|
881
1111
|
options.logger.warn(`Reranker failed, keep hybrid ranking: ${error}`);
|
|
@@ -885,19 +1115,98 @@ function createReadStore(options) {
|
|
|
885
1115
|
const hit = lexicalRanked.find(entry => entry.id === item.id);
|
|
886
1116
|
return {
|
|
887
1117
|
id: item.id,
|
|
1118
|
+
merge_key: hit?.merge_key || item.merge_key || item.id,
|
|
1119
|
+
source_memory_id: hit?.source_memory_id || "",
|
|
1120
|
+
source_memory_canonical_id: hit?.source_memory_canonical_id || "",
|
|
888
1121
|
text: item.text,
|
|
889
1122
|
source: item.source,
|
|
1123
|
+
layer: hit?.layer || "",
|
|
890
1124
|
event_type: hit?.event_type || "",
|
|
891
1125
|
quality_score: hit?.quality_score ?? 0,
|
|
892
1126
|
timestamp: hit?.timestamp || "",
|
|
893
1127
|
score: Number(item.score.toFixed(4)),
|
|
1128
|
+
score_breakdown: hit?.score_breakdown || {},
|
|
894
1129
|
reason_tags: Array.isArray(hit?.reason_tags) ? hit?.reason_tags : [],
|
|
1130
|
+
explain: {
|
|
1131
|
+
merge_key: hit?.merge_key || item.merge_key || item.id,
|
|
1132
|
+
source_memory_id: hit?.source_memory_id || "",
|
|
1133
|
+
source_memory_canonical_id: hit?.source_memory_canonical_id || "",
|
|
1134
|
+
channel: item.source,
|
|
1135
|
+
layer: hit?.layer || "",
|
|
1136
|
+
score_breakdown: hit?.score_breakdown || {},
|
|
1137
|
+
reason_tags: Array.isArray(hit?.reason_tags) ? hit?.reason_tags : [],
|
|
1138
|
+
},
|
|
895
1139
|
};
|
|
896
1140
|
});
|
|
897
|
-
const
|
|
898
|
-
const
|
|
899
|
-
const
|
|
900
|
-
const
|
|
1141
|
+
const minLexicalHits = Math.max(0, Math.floor(options.fusion?.minLexicalHits ?? 1));
|
|
1142
|
+
const minSemanticHits = Math.max(0, Math.floor(options.fusion?.minSemanticHits ?? 1));
|
|
1143
|
+
const fallbackPool = lexicalRanked.filter(item => !ranked.some(existing => existing.id === item.id));
|
|
1144
|
+
const lexicalCount = ranked.filter(item => item.reason_tags.includes("lexical_hit")).length;
|
|
1145
|
+
const semanticCount = ranked.filter(item => item.reason_tags.includes("vector_hit")).length;
|
|
1146
|
+
if (semanticCount < minSemanticHits) {
|
|
1147
|
+
const needed = minSemanticHits - semanticCount;
|
|
1148
|
+
const supplement = fallbackPool.filter(item => item.reason_tags.includes("vector_hit")).slice(0, needed);
|
|
1149
|
+
for (const item of supplement) {
|
|
1150
|
+
ranked.push({
|
|
1151
|
+
id: item.id,
|
|
1152
|
+
merge_key: item.merge_key,
|
|
1153
|
+
source_memory_id: item.source_memory_id,
|
|
1154
|
+
source_memory_canonical_id: item.source_memory_canonical_id,
|
|
1155
|
+
text: item.text,
|
|
1156
|
+
source: item.source,
|
|
1157
|
+
layer: item.layer,
|
|
1158
|
+
event_type: item.event_type,
|
|
1159
|
+
quality_score: item.quality_score,
|
|
1160
|
+
timestamp: item.timestamp,
|
|
1161
|
+
score: Number(item.score.toFixed(4)),
|
|
1162
|
+
score_breakdown: item.score_breakdown || {},
|
|
1163
|
+
reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
|
|
1164
|
+
explain: {
|
|
1165
|
+
merge_key: item.merge_key,
|
|
1166
|
+
source_memory_id: item.source_memory_id,
|
|
1167
|
+
source_memory_canonical_id: item.source_memory_canonical_id,
|
|
1168
|
+
channel: item.source,
|
|
1169
|
+
layer: item.layer,
|
|
1170
|
+
score_breakdown: item.score_breakdown || {},
|
|
1171
|
+
reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
|
|
1172
|
+
},
|
|
1173
|
+
});
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
if (lexicalCount < minLexicalHits) {
|
|
1177
|
+
const needed = minLexicalHits - lexicalCount;
|
|
1178
|
+
const supplement = fallbackPool.filter(item => item.reason_tags.includes("lexical_hit")).slice(0, needed);
|
|
1179
|
+
for (const item of supplement) {
|
|
1180
|
+
if (ranked.some(existing => existing.id === item.id)) {
|
|
1181
|
+
continue;
|
|
1182
|
+
}
|
|
1183
|
+
ranked.push({
|
|
1184
|
+
id: item.id,
|
|
1185
|
+
merge_key: item.merge_key,
|
|
1186
|
+
source_memory_id: item.source_memory_id,
|
|
1187
|
+
source_memory_canonical_id: item.source_memory_canonical_id,
|
|
1188
|
+
text: item.text,
|
|
1189
|
+
source: item.source,
|
|
1190
|
+
layer: item.layer,
|
|
1191
|
+
event_type: item.event_type,
|
|
1192
|
+
quality_score: item.quality_score,
|
|
1193
|
+
timestamp: item.timestamp,
|
|
1194
|
+
score: Number(item.score.toFixed(4)),
|
|
1195
|
+
score_breakdown: item.score_breakdown || {},
|
|
1196
|
+
reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
|
|
1197
|
+
explain: {
|
|
1198
|
+
merge_key: item.merge_key,
|
|
1199
|
+
source_memory_id: item.source_memory_id,
|
|
1200
|
+
source_memory_canonical_id: item.source_memory_canonical_id,
|
|
1201
|
+
channel: item.source,
|
|
1202
|
+
layer: item.layer,
|
|
1203
|
+
score_breakdown: item.score_breakdown || {},
|
|
1204
|
+
reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
|
|
1205
|
+
},
|
|
1206
|
+
});
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
ranked.sort((a, b) => b.score - a.score);
|
|
901
1210
|
if (fusionEnabled && llmModel && llmApiKey && llmBaseUrl && ranked.length > 1) {
|
|
902
1211
|
try {
|
|
903
1212
|
const maxCandidates = Math.max(4, Math.min(20, options.fusion?.maxCandidates ?? 10));
|
|
@@ -929,6 +1238,11 @@ function createReadStore(options) {
|
|
|
929
1238
|
timestamp: new Date().toISOString(),
|
|
930
1239
|
score: Number((Math.max(...ranked.map(item => item.score)) + 1).toFixed(4)),
|
|
931
1240
|
reason_tags: ["llm_fused_authoritative", `evidence:${fusion.evidence_ids.length}`],
|
|
1241
|
+
explain: {
|
|
1242
|
+
channel: "llm_fusion",
|
|
1243
|
+
fused_from: ranked.slice(0, maxCandidates).map(item => item.id),
|
|
1244
|
+
reason_tags: ["llm_fused_authoritative", `evidence:${fusion.evidence_ids.length}`],
|
|
1245
|
+
},
|
|
932
1246
|
fused_coverage_note: fusion.coverage_note || "",
|
|
933
1247
|
fused_facts: fusion.facts,
|
|
934
1248
|
fused_timeline: fusion.timeline || [],
|
|
@@ -958,8 +1272,9 @@ function createReadStore(options) {
|
|
|
958
1272
|
options.logger.warn(`LLM fusion failed, fallback to reranked results: ${error}`);
|
|
959
1273
|
}
|
|
960
1274
|
}
|
|
961
|
-
|
|
962
|
-
|
|
1275
|
+
const finalRanked = ranked.slice(0, Math.max(1, args.topK));
|
|
1276
|
+
markHit(finalRanked.map(item => item.id));
|
|
1277
|
+
return { results: finalRanked };
|
|
963
1278
|
}
|
|
964
1279
|
async function getHotContext(args) {
|
|
965
1280
|
const limit = Math.max(1, args.limit);
|