openclaw-cortex-memory 0.1.0-Alpha.8 → 0.1.0-Alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +34 -43
  2. package/SKILL.md +46 -51
  3. package/dist/index.d.ts +24 -0
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +127 -11
  6. package/dist/index.js.map +1 -1
  7. package/dist/openclaw.plugin.json +1 -1
  8. package/dist/src/engine/memory_engine.d.ts +2 -1
  9. package/dist/src/engine/memory_engine.d.ts.map +1 -1
  10. package/dist/src/engine/ts_engine.d.ts +59 -0
  11. package/dist/src/engine/ts_engine.d.ts.map +1 -1
  12. package/dist/src/engine/ts_engine.js +609 -0
  13. package/dist/src/engine/ts_engine.js.map +1 -1
  14. package/dist/src/engine/types.d.ts +7 -0
  15. package/dist/src/engine/types.d.ts.map +1 -1
  16. package/dist/src/session/session_end.d.ts.map +1 -1
  17. package/dist/src/session/session_end.js +18 -4
  18. package/dist/src/session/session_end.js.map +1 -1
  19. package/dist/src/store/archive_store.d.ts +24 -0
  20. package/dist/src/store/archive_store.d.ts.map +1 -1
  21. package/dist/src/store/archive_store.js +217 -24
  22. package/dist/src/store/archive_store.js.map +1 -1
  23. package/dist/src/store/embedding_utils.d.ts +32 -0
  24. package/dist/src/store/embedding_utils.d.ts.map +1 -0
  25. package/dist/src/store/embedding_utils.js +173 -0
  26. package/dist/src/store/embedding_utils.js.map +1 -0
  27. package/dist/src/store/read_store.d.ts +20 -0
  28. package/dist/src/store/read_store.d.ts.map +1 -1
  29. package/dist/src/store/read_store.js +336 -21
  30. package/dist/src/store/read_store.js.map +1 -1
  31. package/dist/src/store/vector_store.d.ts +13 -0
  32. package/dist/src/store/vector_store.d.ts.map +1 -1
  33. package/dist/src/store/vector_store.js +59 -1
  34. package/dist/src/store/vector_store.js.map +1 -1
  35. package/dist/src/store/write_store.d.ts +35 -0
  36. package/dist/src/store/write_store.d.ts.map +1 -1
  37. package/dist/src/store/write_store.js +163 -14
  38. package/dist/src/store/write_store.js.map +1 -1
  39. package/dist/src/sync/session_sync.d.ts +44 -2
  40. package/dist/src/sync/session_sync.d.ts.map +1 -1
  41. package/dist/src/sync/session_sync.js +364 -31
  42. package/dist/src/sync/session_sync.js.map +1 -1
  43. package/dist/src/utils/runtime_env.d.ts +4 -0
  44. package/dist/src/utils/runtime_env.d.ts.map +1 -0
  45. package/dist/src/utils/runtime_env.js +51 -0
  46. package/dist/src/utils/runtime_env.js.map +1 -0
  47. package/openclaw.plugin.json +1 -1
  48. package/package.json +3 -2
  49. package/scripts/cli.js +13 -13
  50. package/scripts/uninstall.js +7 -1
@@ -66,6 +66,55 @@ function scoreText(query, text) {
66
66
  }
67
67
  return score;
68
68
  }
69
+ function tokenize(text) {
70
+ return text
71
+ .toLowerCase()
72
+ .split(/[^a-z0-9\u4e00-\u9fa5]+/i)
73
+ .map(token => token.trim())
74
+ .filter(Boolean);
75
+ }
76
+ function buildBm25Stats(docs, queryTerms, getTokens) {
77
+ const docFreq = new Map();
78
+ let totalLen = 0;
79
+ for (const doc of docs) {
80
+ const tokens = typeof getTokens === "function" ? getTokens(doc) : tokenize(doc.text);
81
+ totalLen += tokens.length;
82
+ if (queryTerms.length === 0) {
83
+ continue;
84
+ }
85
+ const termSet = new Set(tokens);
86
+ for (const term of queryTerms) {
87
+ if (termSet.has(term)) {
88
+ docFreq.set(term, (docFreq.get(term) || 0) + 1);
89
+ }
90
+ }
91
+ }
92
+ const avgDocLen = docs.length > 0 ? Math.max(1, totalLen / docs.length) : 1;
93
+ return { avgDocLen, docFreq };
94
+ }
95
+ function bm25Score(args) {
96
+ const tokens = Array.isArray(args.docTokens) ? args.docTokens : tokenize(args.docText);
97
+ if (tokens.length === 0 || args.queryTerms.length === 0 || args.docCount <= 0) {
98
+ return 0;
99
+ }
100
+ const termFreq = new Map();
101
+ for (const token of tokens) {
102
+ termFreq.set(token, (termFreq.get(token) || 0) + 1);
103
+ }
104
+ const k1 = 1.2;
105
+ const b = 0.75;
106
+ let score = 0;
107
+ for (const term of args.queryTerms) {
108
+ const tf = termFreq.get(term) || 0;
109
+ if (tf <= 0)
110
+ continue;
111
+ const df = args.docFreq.get(term) || 0;
112
+ const idf = Math.log(1 + ((args.docCount - df + 0.5) / (df + 0.5)));
113
+ const denominator = tf + k1 * (1 - b + b * (tokens.length / Math.max(1, args.avgDocLen)));
114
+ score += idf * (((k1 + 1) * tf) / Math.max(1e-6, denominator));
115
+ }
116
+ return score;
117
+ }
69
118
  function normalizeRecordText(record) {
70
119
  const direct = [record.content, record.summary, record.text, record.message]
71
120
  .find(v => typeof v === "string" && v.trim());
@@ -138,9 +187,20 @@ function parseJsonlFile(filePath, sourceLabel, logger) {
138
187
  text,
139
188
  source: sourceLabel,
140
189
  timestamp: Number.isFinite(timestampValue) ? timestampValue : undefined,
190
+ layer: parsed.layer === "active" || parsed.layer === "archive"
191
+ ? parsed.layer
192
+ : (sourceLabel === "sessions_active" ? "active" : (sourceLabel === "sessions_archive" ? "archive" : undefined)),
193
+ sourceMemoryId: typeof parsed.source_memory_id === "string"
194
+ ? parsed.source_memory_id
195
+ : id,
196
+ sourceMemoryCanonicalId: typeof parsed.source_memory_canonical_id === "string"
197
+ ? parsed.source_memory_canonical_id
198
+ : (typeof parsed.canonical_id === "string" ? parsed.canonical_id : undefined),
141
199
  embedding: Array.isArray(parsed.embedding) ? parsed.embedding.filter(item => Number.isFinite(item)) : undefined,
142
200
  eventType: typeof parsed.event_type === "string" ? parsed.event_type.trim() : undefined,
143
201
  qualityScore: typeof parsed.quality_score === "number" ? parsed.quality_score : undefined,
202
+ charCount: typeof parsed.char_count === "number" ? parsed.char_count : undefined,
203
+ tokenCount: typeof parsed.token_count === "number" ? parsed.token_count : undefined,
144
204
  sessionId: typeof parsed.session_id === "string" ? parsed.session_id : undefined,
145
205
  entities,
146
206
  relations,
@@ -262,6 +322,11 @@ function normalizeBaseUrl(value) {
262
322
  return "";
263
323
  return value.endsWith("/") ? value.slice(0, -1) : value;
264
324
  }
325
+ const READ_FUSION_PROMPT_VERSION = "read-fusion.v1.1.0";
326
+ const READ_FUSION_REGRESSION_SAMPLES = [
327
+ "样例A: 同一 source_memory_id 同时出现在 archive 与 vector,输出中只保留一条主事实并在证据链保留两者关联。",
328
+ "样例B: 新旧决策冲突时,将冲突写入 conflicts,并在 canonical_answer 标注优先级依据(时间、质量、明确性)。",
329
+ ];
265
330
  function cosineSimilarity(left, right) {
266
331
  if (left.length === 0 || right.length === 0) {
267
332
  return 0;
@@ -425,6 +490,62 @@ function sourceWeight(source, intent) {
425
490
  }
426
491
  return 1;
427
492
  }
493
+ function mergeKeyFromDoc(doc) {
494
+ const canonical = typeof doc.sourceMemoryCanonicalId === "string" ? doc.sourceMemoryCanonicalId.trim() : "";
495
+ if (canonical) {
496
+ return `canonical:${canonical}`;
497
+ }
498
+ const sourceMemoryId = typeof doc.sourceMemoryId === "string" ? doc.sourceMemoryId.trim() : "";
499
+ if (sourceMemoryId) {
500
+ return `source:${sourceMemoryId}`;
501
+ }
502
+ return `id:${doc.id}`;
503
+ }
504
+ function customChannelWeight(source, options) {
505
+ const weights = options?.channelWeights;
506
+ if (!weights)
507
+ return 1;
508
+ const value = weights[source];
509
+ if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
510
+ return 1;
511
+ }
512
+ return value;
513
+ }
514
+ function lengthNormalizeFactor(doc, options) {
515
+ const lengthNorm = options?.lengthNorm;
516
+ if (lengthNorm?.enabled === false) {
517
+ return 1;
518
+ }
519
+ const pivotChars = typeof lengthNorm?.pivotChars === "number" && lengthNorm.pivotChars > 0
520
+ ? lengthNorm.pivotChars
521
+ : 1200;
522
+ const strength = typeof lengthNorm?.strength === "number" && lengthNorm.strength > 0
523
+ ? lengthNorm.strength
524
+ : 0.75;
525
+ const minFactor = typeof lengthNorm?.minFactor === "number" && lengthNorm.minFactor > 0 && lengthNorm.minFactor <= 1
526
+ ? lengthNorm.minFactor
527
+ : 0.45;
528
+ const charCount = typeof doc.charCount === "number" && Number.isFinite(doc.charCount)
529
+ ? doc.charCount
530
+ : doc.text.length;
531
+ if (charCount <= pivotChars) {
532
+ return 1;
533
+ }
534
+ const over = (charCount - pivotChars) / pivotChars;
535
+ const factor = 1 / (1 + over * strength);
536
+ return Math.max(minFactor, Math.min(1, factor));
537
+ }
538
+ function channelQuota(source, topK, options) {
539
+ const configured = options?.channelTopK?.[source];
540
+ if (typeof configured === "number" && Number.isFinite(configured) && configured >= 1) {
541
+ return Math.floor(configured);
542
+ }
543
+ if (source === "rules")
544
+ return Math.max(6, topK * 2);
545
+ if (source === "graph")
546
+ return Math.max(8, topK * 3);
547
+ return Math.max(12, topK * 4);
548
+ }
428
549
  async function searchLanceDb(args) {
429
550
  try {
430
551
  const require = (0, module_1.createRequire)(__filename);
@@ -475,9 +596,14 @@ async function searchLanceDb(args) {
475
596
  text: summary,
476
597
  source: "vector_lancedb",
477
598
  timestamp: Number.isFinite(ts) ? ts : undefined,
599
+ layer: record.layer === "active" || record.layer === "archive" ? record.layer : undefined,
600
+ sourceMemoryId: typeof record.source_memory_id === "string" ? record.source_memory_id : undefined,
601
+ sourceMemoryCanonicalId: typeof record.source_memory_canonical_id === "string" ? record.source_memory_canonical_id : undefined,
478
602
  embedding: Array.isArray(record.vector) ? record.vector.filter(item => Number.isFinite(item)) : undefined,
479
603
  eventType: typeof record.event_type === "string" ? record.event_type : undefined,
480
604
  qualityScore: typeof record.quality_score === "number" ? record.quality_score : undefined,
605
+ charCount: typeof record.char_count === "number" ? record.char_count : undefined,
606
+ tokenCount: typeof record.token_count === "number" ? record.token_count : undefined,
481
607
  sessionId: typeof record.session_id === "string" ? record.session_id : undefined,
482
608
  entities,
483
609
  relations: Array.isArray(relations) ? relations : [],
@@ -530,9 +656,14 @@ function parseVectorFallback(filePath, logger) {
530
656
  text: summary,
531
657
  source: "vector_jsonl",
532
658
  timestamp: Number.isFinite(ts) ? ts : undefined,
659
+ layer: parsed.layer === "active" || parsed.layer === "archive" ? parsed.layer : undefined,
660
+ sourceMemoryId: typeof parsed.source_memory_id === "string" ? parsed.source_memory_id : undefined,
661
+ sourceMemoryCanonicalId: typeof parsed.source_memory_canonical_id === "string" ? parsed.source_memory_canonical_id : undefined,
533
662
  embedding: Array.isArray(parsed.embedding) ? parsed.embedding.filter(item => Number.isFinite(item)) : undefined,
534
663
  eventType: typeof parsed.event_type === "string" ? parsed.event_type.trim() : undefined,
535
664
  qualityScore: typeof parsed.quality_score === "number" ? parsed.quality_score : undefined,
665
+ charCount: typeof parsed.char_count === "number" ? parsed.char_count : undefined,
666
+ tokenCount: typeof parsed.token_count === "number" ? parsed.token_count : undefined,
536
667
  sessionId: typeof parsed.session_id === "string" ? parsed.session_id : undefined,
537
668
  entities,
538
669
  relations,
@@ -553,6 +684,7 @@ async function requestFusion(args) {
553
684
  .join("\n")
554
685
  .slice(0, 18000);
555
686
  const prompt = [
687
+ `prompt_version=${READ_FUSION_PROMPT_VERSION}`,
556
688
  "你是记忆检索融合器。请融合多路召回结果,产出可直接给 Agent 使用的完整记忆包,不要让 Agent 再去翻历史。",
557
689
  "必须严格返回 JSON:",
558
690
  "{\"canonical_answer\": string, \"coverage_note\": string, \"facts\": [{\"text\": string, \"evidence_ids\": string[]}], \"timeline\": [{\"when\": string, \"event\": string, \"evidence_ids\": string[]}], \"entities\": [{\"name\": string, \"role\": string}], \"decisions\": [{\"decision\": string, \"rationale\": string, \"evidence_ids\": string[]}], \"fixes\": [{\"issue\": string, \"fix\": string, \"evidence_ids\": string[]}], \"preferences\": [{\"subject\": string, \"preference\": string, \"evidence_ids\": string[]}], \"risks\": [{\"risk\": string, \"mitigation\": string, \"evidence_ids\": string[]}], \"action_items\": [{\"item\": string, \"owner\": string, \"status\": string, \"evidence_ids\": string[]}], \"conflicts\": [{\"topic\": string, \"details\": string}], \"evidence_ids\": string[], \"confidence\": number}",
@@ -563,6 +695,8 @@ async function requestFusion(args) {
563
695
  "4) 若存在冲突写入 conflicts,否则返回空数组",
564
696
  "5) confidence 0~1",
565
697
  "6) 不确定信息必须在 coverage_note 标注",
698
+ "7) 同源记录合并:同 source_memory_id/source_memory_canonical_id 的候选只保留一条主结论,其余作为证据补充",
699
+ ...READ_FUSION_REGRESSION_SAMPLES,
566
700
  ].join("\n");
567
701
  const body = {
568
702
  model: args.llm.model,
@@ -634,6 +768,22 @@ function createReadStore(options) {
634
768
  const memoryRoot = options.dbPath ? path.resolve(options.dbPath) : path.join(options.projectRoot, "data", "memory");
635
769
  const vectorFallbackPath = path.join(memoryRoot, "vector", "lancedb_events.jsonl");
636
770
  const hitStatsPath = path.join(memoryRoot, ".read_hit_stats.json");
771
+ let docsCache = null;
772
+ let vectorFallbackCache = null;
773
+ let bm25TokenCacheSignature = "";
774
+ let bm25TokenCache = new Map();
775
+ function fileSignature(filePath) {
776
+ try {
777
+ if (!fs.existsSync(filePath)) {
778
+ return `${filePath}:missing`;
779
+ }
780
+ const stat = fs.statSync(filePath);
781
+ return `${filePath}:${stat.size}:${Math.floor(stat.mtimeMs)}`;
782
+ }
783
+ catch {
784
+ return `${filePath}:error`;
785
+ }
786
+ }
637
787
  function loadHitStats() {
638
788
  try {
639
789
  if (!fs.existsSync(hitStatsPath)) {
@@ -696,12 +846,46 @@ function createReadStore(options) {
696
846
  const memoryMdPath = path.join(memoryRoot, "MEMORY.md");
697
847
  const activeSessionsPath = path.join(memoryRoot, "sessions", "active", "sessions.jsonl");
698
848
  const archiveSessionsPath = path.join(memoryRoot, "sessions", "archive", "sessions.jsonl");
699
- return [
849
+ const signature = [
850
+ fileSignature(cortexRulesPath),
851
+ fileSignature(memoryMdPath),
852
+ fileSignature(activeSessionsPath),
853
+ fileSignature(archiveSessionsPath),
854
+ ].join("|");
855
+ if (docsCache && docsCache.signature === signature) {
856
+ return docsCache.docs;
857
+ }
858
+ const docs = [
700
859
  ...parseMarkdownFile(cortexRulesPath, "CORTEX_RULES.md"),
701
860
  ...parseMarkdownFile(memoryMdPath, "MEMORY.md"),
702
861
  ...parseJsonlFile(activeSessionsPath, "sessions_active", options.logger),
703
862
  ...parseJsonlFile(archiveSessionsPath, "sessions_archive", options.logger),
704
863
  ];
864
+ docsCache = { signature, docs };
865
+ return docs;
866
+ }
867
+ function loadVectorFallbackCached() {
868
+ const signature = fileSignature(vectorFallbackPath);
869
+ if (vectorFallbackCache && vectorFallbackCache.signature === signature) {
870
+ return vectorFallbackCache.docs;
871
+ }
872
+ const docs = parseVectorFallback(vectorFallbackPath, options.logger);
873
+ vectorFallbackCache = { signature, docs };
874
+ return docs;
875
+ }
876
+ function getBm25Tokens(doc, signature) {
877
+ if (bm25TokenCacheSignature !== signature) {
878
+ bm25TokenCacheSignature = signature;
879
+ bm25TokenCache = new Map();
880
+ }
881
+ const key = `${doc.source}|${doc.id}|${doc.text.length}|${doc.text.slice(0, 64)}`;
882
+ const cached = bm25TokenCache.get(key);
883
+ if (cached) {
884
+ return cached;
885
+ }
886
+ const tokens = tokenize(doc.text);
887
+ bm25TokenCache.set(key, tokens);
888
+ return tokens;
705
889
  }
706
890
  async function searchMemory(args) {
707
891
  const query = args.query?.trim();
@@ -735,7 +919,7 @@ function createReadStore(options) {
735
919
  : [];
736
920
  const vectorDocsFallback = vectorDocsFromLance.length > 0
737
921
  ? []
738
- : parseVectorFallback(vectorFallbackPath, options.logger);
922
+ : loadVectorFallbackCached();
739
923
  const vectorDocs = [...vectorDocsFromLance, ...vectorDocsFallback];
740
924
  const graphDocs = docs
741
925
  .filter(doc => Array.isArray(doc.relations) && doc.relations.length > 0)
@@ -751,6 +935,10 @@ function createReadStore(options) {
751
935
  });
752
936
  const rulesDocs = docs.filter(doc => doc.source === "CORTEX_RULES.md");
753
937
  const archiveDocs = docs.filter(doc => doc.source.startsWith("sessions_"));
938
+ const bm25Terms = tokenize(query);
939
+ const bm25Corpus = [...rulesDocs, ...archiveDocs, ...vectorDocs, ...graphDocs];
940
+ const bm25Signature = `${docsCache?.signature || "na"}|vector:${vectorDocs.length}:${vectorDocs.slice(0, 40).map(item => `${item.id}:${item.text.length}`).join(",")}`;
941
+ const bm25Stats = buildBm25Stats(bm25Corpus, bm25Terms, doc => getBm25Tokens(doc, bm25Signature));
754
942
  const combinedCandidates = [];
755
943
  const channels = {
756
944
  rules: [],
@@ -760,10 +948,19 @@ function createReadStore(options) {
760
948
  };
761
949
  const evaluateDoc = (doc, source) => {
762
950
  const lexical = scoreText(query, doc.text);
951
+ const bm25 = bm25Score({
952
+ queryTerms: bm25Terms,
953
+ docText: doc.text,
954
+ docTokens: getBm25Tokens(doc, bm25Signature),
955
+ docCount: bm25Corpus.length,
956
+ avgDocLen: bm25Stats.avgDocLen,
957
+ docFreq: bm25Stats.docFreq,
958
+ });
959
+ const lexicalCombined = lexical + bm25 * 2;
763
960
  const semantic = queryEmbedding && Array.isArray(doc.embedding) && doc.embedding.length > 0
764
961
  ? Math.max(0, cosineSimilarity(queryEmbedding, doc.embedding) * 5)
765
962
  : 0;
766
- if (lexical <= 0 && semantic <= 0) {
963
+ if (lexicalCombined <= 0 && semantic <= 0) {
767
964
  return null;
768
965
  }
769
966
  const recency = recencyScore(doc.timestamp);
@@ -772,18 +969,22 @@ function createReadStore(options) {
772
969
  ? (preferredTypes.includes(doc.eventType) ? 1 : 0)
773
970
  : 0.5;
774
971
  const graphMatch = source === "graph" ? 1 : 0;
775
- const baseWeighted = (0.2 * lexical +
776
- 0.3 * semantic +
972
+ const sourceBaseWeight = sourceWeight(source, intent);
973
+ const sourceConfigWeight = customChannelWeight(source, options.fusion);
974
+ const lengthNorm = lengthNormalizeFactor(doc, options.fusion);
975
+ const baseWeighted = (0.2 * lexicalCombined +
976
+ 0.3 * (semantic * lengthNorm) +
777
977
  0.1 * recency +
778
978
  0.15 * quality +
779
979
  0.15 * typeMatch +
780
- 0.1 * graphMatch) * sourceWeight(source, intent);
980
+ 0.1 * graphMatch) * sourceBaseWeight * sourceConfigWeight;
781
981
  const decayFactor = computeDecayFactor(doc.id, doc.eventType, doc.timestamp, options.memoryDecay, hitStats);
782
982
  const weighted = baseWeighted * decayFactor;
783
983
  return {
784
984
  doc,
785
985
  source,
786
- lexical,
986
+ lexical: lexicalCombined,
987
+ bm25,
787
988
  semantic,
788
989
  recency,
789
990
  quality,
@@ -815,7 +1016,7 @@ function createReadStore(options) {
815
1016
  }
816
1017
  for (const key of Object.keys(channels)) {
817
1018
  channels[key].sort((a, b) => b.weighted - a.weighted);
818
- combinedCandidates.push(...channels[key].slice(0, Math.max(20, args.topK * 5)));
1019
+ combinedCandidates.push(...channels[key].slice(0, channelQuota(key, args.topK, options.fusion)));
819
1020
  }
820
1021
  const rrfMap = new Map();
821
1022
  const weightedMap = new Map();
@@ -825,22 +1026,39 @@ function createReadStore(options) {
825
1026
  for (let i = 0; i < list.length; i += 1) {
826
1027
  const candidate = list[i];
827
1028
  const rrf = 1 / (rrfK + i + 1);
828
- rrfMap.set(candidate.doc.id, (rrfMap.get(candidate.doc.id) || 0) + rrf);
829
- const current = weightedMap.get(candidate.doc.id);
1029
+ const mergeKey = mergeKeyFromDoc(candidate.doc);
1030
+ rrfMap.set(mergeKey, (rrfMap.get(mergeKey) || 0) + rrf);
1031
+ const current = weightedMap.get(mergeKey);
830
1032
  if (!current || candidate.weighted > current.weighted) {
831
- weightedMap.set(candidate.doc.id, candidate);
1033
+ weightedMap.set(mergeKey, candidate);
832
1034
  }
833
1035
  }
834
1036
  }
835
- const preRanked = [...weightedMap.values()]
836
- .map(candidate => ({
1037
+ const preRanked = [...weightedMap.entries()]
1038
+ .map(([mergeKey, candidate]) => ({
837
1039
  id: candidate.doc.id,
1040
+ merge_key: mergeKey,
1041
+ source_memory_id: candidate.doc.sourceMemoryId || "",
1042
+ source_memory_canonical_id: candidate.doc.sourceMemoryCanonicalId || "",
838
1043
  text: candidate.doc.text,
839
1044
  source: candidate.doc.source,
1045
+ layer: candidate.doc.layer || "",
840
1046
  event_type: candidate.doc.eventType || "",
841
1047
  quality_score: candidate.quality,
842
1048
  timestamp: candidate.doc.timestamp ? new Date(candidate.doc.timestamp).toISOString() : "",
843
- score: candidate.weighted + (rrfMap.get(candidate.doc.id) || 0) * 1.5,
1049
+ score: candidate.weighted + (rrfMap.get(mergeKey) || 0) * 1.5,
1050
+ score_breakdown: {
1051
+ lexical: Number(candidate.lexical.toFixed(4)),
1052
+ bm25: Number(candidate.bm25.toFixed(4)),
1053
+ semantic: Number(candidate.semantic.toFixed(4)),
1054
+ recency: Number(candidate.recency.toFixed(4)),
1055
+ quality: Number(candidate.quality.toFixed(4)),
1056
+ type: Number(candidate.typeMatch.toFixed(4)),
1057
+ graph: Number(candidate.graphMatch.toFixed(4)),
1058
+ decay: Number(candidate.decayFactor.toFixed(4)),
1059
+ rrf: Number(((rrfMap.get(mergeKey) || 0) * 1.5).toFixed(4)),
1060
+ weighted: Number(candidate.weighted.toFixed(4)),
1061
+ },
844
1062
  reason_tags: [
845
1063
  `intent:${intent.toLowerCase()}`,
846
1064
  candidate.semantic > 0 ? "vector_hit" : "lexical_hit",
@@ -849,6 +1067,7 @@ function createReadStore(options) {
849
1067
  candidate.quality >= 0.7 ? "high_quality" : "normal_quality",
850
1068
  candidate.decayFactor < 1 ? `decay:${candidate.decayFactor.toFixed(3)}` : "decay:1.000",
851
1069
  `source:${candidate.source}`,
1070
+ `merge_key:${mergeKey}`,
852
1071
  ],
853
1072
  }))
854
1073
  .sort((a, b) => b.score - a.score)
@@ -861,13 +1080,20 @@ function createReadStore(options) {
861
1080
  const rerankerModel = options.reranker?.model || "";
862
1081
  const rerankerApiKey = options.reranker?.apiKey || "";
863
1082
  const rerankerBaseUrl = normalizeBaseUrl(options.reranker?.baseURL || options.reranker?.baseUrl);
1083
+ const fusionEnabled = options.fusion?.enabled !== false;
1084
+ const llmModel = options.llm?.model || "";
1085
+ const llmApiKey = options.llm?.apiKey || "";
1086
+ const llmBaseUrl = normalizeBaseUrl(options.llm?.baseURL || options.llm?.baseUrl);
1087
+ const fusionAuthoritative = options.fusion?.authoritative !== false;
1088
+ const skipRerankerForFusion = fusionEnabled && fusionAuthoritative && llmModel && llmApiKey && llmBaseUrl;
864
1089
  let rerankedSimple = lexicalRanked.map(item => ({
865
1090
  id: item.id,
1091
+ merge_key: item.merge_key,
866
1092
  text: item.text,
867
1093
  source: item.source,
868
1094
  score: item.score,
869
1095
  }));
870
- if (rerankerModel && rerankerApiKey && rerankerBaseUrl && lexicalRanked.length > 1) {
1096
+ if (rerankerModel && rerankerApiKey && rerankerBaseUrl && lexicalRanked.length > 1 && !skipRerankerForFusion) {
871
1097
  try {
872
1098
  rerankedSimple = await requestRerank({
873
1099
  query,
@@ -876,6 +1102,10 @@ function createReadStore(options) {
876
1102
  apiKey: rerankerApiKey,
877
1103
  baseUrl: rerankerBaseUrl,
878
1104
  });
1105
+ rerankedSimple = rerankedSimple.map(item => {
1106
+ const found = lexicalRanked.find(entry => entry.id === item.id);
1107
+ return { ...item, merge_key: found?.merge_key || item.id };
1108
+ });
879
1109
  }
880
1110
  catch (error) {
881
1111
  options.logger.warn(`Reranker failed, keep hybrid ranking: ${error}`);
@@ -885,19 +1115,98 @@ function createReadStore(options) {
885
1115
  const hit = lexicalRanked.find(entry => entry.id === item.id);
886
1116
  return {
887
1117
  id: item.id,
1118
+ merge_key: hit?.merge_key || item.merge_key || item.id,
1119
+ source_memory_id: hit?.source_memory_id || "",
1120
+ source_memory_canonical_id: hit?.source_memory_canonical_id || "",
888
1121
  text: item.text,
889
1122
  source: item.source,
1123
+ layer: hit?.layer || "",
890
1124
  event_type: hit?.event_type || "",
891
1125
  quality_score: hit?.quality_score ?? 0,
892
1126
  timestamp: hit?.timestamp || "",
893
1127
  score: Number(item.score.toFixed(4)),
1128
+ score_breakdown: hit?.score_breakdown || {},
894
1129
  reason_tags: Array.isArray(hit?.reason_tags) ? hit?.reason_tags : [],
1130
+ explain: {
1131
+ merge_key: hit?.merge_key || item.merge_key || item.id,
1132
+ source_memory_id: hit?.source_memory_id || "",
1133
+ source_memory_canonical_id: hit?.source_memory_canonical_id || "",
1134
+ channel: item.source,
1135
+ layer: hit?.layer || "",
1136
+ score_breakdown: hit?.score_breakdown || {},
1137
+ reason_tags: Array.isArray(hit?.reason_tags) ? hit?.reason_tags : [],
1138
+ },
895
1139
  };
896
1140
  });
897
- const fusionEnabled = options.fusion?.enabled !== false;
898
- const llmModel = options.llm?.model || "";
899
- const llmApiKey = options.llm?.apiKey || "";
900
- const llmBaseUrl = normalizeBaseUrl(options.llm?.baseURL || options.llm?.baseUrl);
1141
+ const minLexicalHits = Math.max(0, Math.floor(options.fusion?.minLexicalHits ?? 1));
1142
+ const minSemanticHits = Math.max(0, Math.floor(options.fusion?.minSemanticHits ?? 1));
1143
+ const fallbackPool = lexicalRanked.filter(item => !ranked.some(existing => existing.id === item.id));
1144
+ const lexicalCount = ranked.filter(item => item.reason_tags.includes("lexical_hit")).length;
1145
+ const semanticCount = ranked.filter(item => item.reason_tags.includes("vector_hit")).length;
1146
+ if (semanticCount < minSemanticHits) {
1147
+ const needed = minSemanticHits - semanticCount;
1148
+ const supplement = fallbackPool.filter(item => item.reason_tags.includes("vector_hit")).slice(0, needed);
1149
+ for (const item of supplement) {
1150
+ ranked.push({
1151
+ id: item.id,
1152
+ merge_key: item.merge_key,
1153
+ source_memory_id: item.source_memory_id,
1154
+ source_memory_canonical_id: item.source_memory_canonical_id,
1155
+ text: item.text,
1156
+ source: item.source,
1157
+ layer: item.layer,
1158
+ event_type: item.event_type,
1159
+ quality_score: item.quality_score,
1160
+ timestamp: item.timestamp,
1161
+ score: Number(item.score.toFixed(4)),
1162
+ score_breakdown: item.score_breakdown || {},
1163
+ reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
1164
+ explain: {
1165
+ merge_key: item.merge_key,
1166
+ source_memory_id: item.source_memory_id,
1167
+ source_memory_canonical_id: item.source_memory_canonical_id,
1168
+ channel: item.source,
1169
+ layer: item.layer,
1170
+ score_breakdown: item.score_breakdown || {},
1171
+ reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
1172
+ },
1173
+ });
1174
+ }
1175
+ }
1176
+ if (lexicalCount < minLexicalHits) {
1177
+ const needed = minLexicalHits - lexicalCount;
1178
+ const supplement = fallbackPool.filter(item => item.reason_tags.includes("lexical_hit")).slice(0, needed);
1179
+ for (const item of supplement) {
1180
+ if (ranked.some(existing => existing.id === item.id)) {
1181
+ continue;
1182
+ }
1183
+ ranked.push({
1184
+ id: item.id,
1185
+ merge_key: item.merge_key,
1186
+ source_memory_id: item.source_memory_id,
1187
+ source_memory_canonical_id: item.source_memory_canonical_id,
1188
+ text: item.text,
1189
+ source: item.source,
1190
+ layer: item.layer,
1191
+ event_type: item.event_type,
1192
+ quality_score: item.quality_score,
1193
+ timestamp: item.timestamp,
1194
+ score: Number(item.score.toFixed(4)),
1195
+ score_breakdown: item.score_breakdown || {},
1196
+ reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
1197
+ explain: {
1198
+ merge_key: item.merge_key,
1199
+ source_memory_id: item.source_memory_id,
1200
+ source_memory_canonical_id: item.source_memory_canonical_id,
1201
+ channel: item.source,
1202
+ layer: item.layer,
1203
+ score_breakdown: item.score_breakdown || {},
1204
+ reason_tags: Array.isArray(item.reason_tags) ? item.reason_tags : [],
1205
+ },
1206
+ });
1207
+ }
1208
+ }
1209
+ ranked.sort((a, b) => b.score - a.score);
901
1210
  if (fusionEnabled && llmModel && llmApiKey && llmBaseUrl && ranked.length > 1) {
902
1211
  try {
903
1212
  const maxCandidates = Math.max(4, Math.min(20, options.fusion?.maxCandidates ?? 10));
@@ -929,6 +1238,11 @@ function createReadStore(options) {
929
1238
  timestamp: new Date().toISOString(),
930
1239
  score: Number((Math.max(...ranked.map(item => item.score)) + 1).toFixed(4)),
931
1240
  reason_tags: ["llm_fused_authoritative", `evidence:${fusion.evidence_ids.length}`],
1241
+ explain: {
1242
+ channel: "llm_fusion",
1243
+ fused_from: ranked.slice(0, maxCandidates).map(item => item.id),
1244
+ reason_tags: ["llm_fused_authoritative", `evidence:${fusion.evidence_ids.length}`],
1245
+ },
932
1246
  fused_coverage_note: fusion.coverage_note || "",
933
1247
  fused_facts: fusion.facts,
934
1248
  fused_timeline: fusion.timeline || [],
@@ -958,8 +1272,9 @@ function createReadStore(options) {
958
1272
  options.logger.warn(`LLM fusion failed, fallback to reranked results: ${error}`);
959
1273
  }
960
1274
  }
961
- markHit(ranked.map(item => item.id));
962
- return { results: ranked };
1275
+ const finalRanked = ranked.slice(0, Math.max(1, args.topK));
1276
+ markHit(finalRanked.map(item => item.id));
1277
+ return { results: finalRanked };
963
1278
  }
964
1279
  async function getHotContext(args) {
965
1280
  const limit = Math.max(1, args.limit);