@mingxy/cerebro 1.15.4 → 1.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hooks.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { Model, UserMessage, Part } from "@opencode-ai/sdk";
2
- import type { CerebroClient, SearchResult } from "./client.js";
2
+ import type { CerebroClient, SearchResult, ShouldRecallResponse } from "./client.js";
3
3
  import { type OmemPluginConfig, resolveAgentPolicy } from "./config.js";
4
4
  import { detectSaveKeyword, KEYWORD_NUDGE } from "./keywords.js";
5
5
  import { logDebug, logInfo, logError as logErr } from "./logger.js";
@@ -175,6 +175,14 @@ const profileInjectedSessions = new Map<string, number>();
175
175
  const injectedSessions = new Set<string>();
176
176
  const compactingSummaryCooldown = new Map<string, number>();
177
177
 
178
+ // Per-session async cache for fire-and-forget recall results
179
+ const recallCache = new Map<string, {
180
+ profileBlock: string;
181
+ recallResult: ShouldRecallResponse;
182
+ profileData: { countText: string };
183
+ timestamp: number;
184
+ }>();
185
+
178
186
  function hashString(str: string): string {
179
187
  let hash = 0;
180
188
  for (let i = 0; i < str.length; i++) {
@@ -615,6 +623,29 @@ export function autoRecallHook(client: CerebroClient, containerTags: string[], t
615
623
  };
616
624
  }
617
625
 
626
+ function buildProfileBlock(profile: any): { block: string; countText: string } | null {
627
+ const prefs = ((profile as any)?.static_facts ?? [])
628
+ .filter((sf: any) => {
629
+ const t: string[] = sf.tags ?? [];
630
+ return t.includes("preferences");
631
+ })
632
+ .map((sf: any) => sf.l2_content ?? sf.content ?? "")
633
+ .filter(Boolean);
634
+ const profileLines = prefs.length > 0
635
+ ? prefs.map((c: string) => ` · ${c}`).join("\n")
636
+ : " · (preferences queuing, will populate on next refresh)";
637
+ const block = [
638
+ "<cerebro-profile>",
639
+ profileLines,
640
+ "</cerebro-profile>",
641
+ ].join("\n");
642
+ const p = profile as any;
643
+ const dynamicCount = p?.dynamic_context?.length ?? 0;
644
+ const staticCount = p?.static_facts?.length ?? 0;
645
+ const countText = `Dynamic(${dynamicCount}) · Static(${staticCount})`;
646
+ return { block, countText };
647
+ }
648
+
618
649
  export function memoryInjectionHook(
619
650
  client: CerebroClient,
620
651
  containerTags: string[],
@@ -654,43 +685,6 @@ export function memoryInjectionHook(
654
685
  const messages = sessionMessages.get(input.sessionID) ?? [];
655
686
  const userMessages = messages.filter((m) => m.role === "user");
656
687
 
657
- // --- Profile Fetch ---
658
- const profile = await client.getProfile();
659
- let profileInjected = false;
660
- let profileCountText = "";
661
- let profileBlock = "";
662
- const lastInjected = profileInjectedSessions.get(input.sessionID);
663
- const ttlExpired = !lastInjected || (Date.now() - lastInjected > 30 * 60 * 1000);
664
- const profileIsFirstInjection = !lastInjected;
665
- if (profile && ttlExpired) {
666
- const prefs = ((profile as any)?.static_facts ?? [])
667
- .filter((sf: any) => {
668
- const t: string[] = sf.tags ?? [];
669
- return t.includes("preferences");
670
- })
671
- .map((sf: any) => sf.l2_content ?? sf.content ?? "")
672
- .filter(Boolean);
673
- const profileLines = prefs.length > 0
674
- ? prefs.map((c: string) => ` · ${c}`).join("\n")
675
- : " · (preferences queuing, will populate on next refresh)";
676
- profileBlock = [
677
- "<cerebro-profile>",
678
- profileLines,
679
- "</cerebro-profile>",
680
- ].join("\n");
681
- profileInjected = true;
682
- profileInjectedSessions.set(input.sessionID, Date.now());
683
- const p = profile as any;
684
- const dynamicCount = p?.dynamic_context?.length ?? 0;
685
- const staticCount = p?.static_facts?.length ?? 0;
686
- profileCountText = `Dynamic(${dynamicCount}) · Static(${staticCount})`;
687
- if (profileIsFirstInjection) {
688
- logDebug("memoryInjectionHook profile ready (first)", { dynamicCount, staticCount });
689
- } else {
690
- logDebug("memoryInjectionHook profile ready (TTL)", { dynamicCount, staticCount });
691
- }
692
- }
693
-
694
688
  if (userMessages.length === 0) {
695
689
  logDebug("memoryInjectionHook skipped: no user messages in session (post-compacting?)", { sessionId: input.sessionID });
696
690
  return;
@@ -699,7 +693,7 @@ export function memoryInjectionHook(
699
693
  const rawQuery = userMessages[userMessages.length - 1]?.content || firstMessages.get(input.sessionID) || "";
700
694
  const query_text = extractUserRequest(rawQuery);
701
695
  if (!query_text) {
702
- logDebug("memoryInjectionHook filtered system injection (profile already injected above)", { rawQueryPrefix: rawQuery.slice(0, 60) });
696
+ logDebug("memoryInjectionHook filtered system injection", { rawQueryPrefix: rawQuery.slice(0, 60) });
703
697
  return;
704
698
  }
705
699
  const last_query_text = userMessages.length >= 2 ? userMessages[userMessages.length - 2].content : undefined;
@@ -713,228 +707,309 @@ export function memoryInjectionHook(
713
707
  })
714
708
  : undefined;
715
709
 
716
- const shouldRecallRes = await client.shouldRecall(
717
- query_text, last_query_text, input.sessionID,
718
- similarityThreshold, maxRecallResults,
719
- projectTags.length > 0 ? projectTags : undefined,
720
- conversationContext && conversationContext.length > 0 ? conversationContext : undefined,
721
- {
722
- fetch_multiplier: fetchMultiplier,
723
- topk_cap_multiplier: topkCapMultiplier,
724
- mmr_jaccard_threshold: mmrJaccardThreshold,
725
- mmr_penalty_factor: mmrPenaltyFactor,
726
- phase2_multiplier: phase2Multiplier,
727
- llm_max_eval: llmMaxEval,
728
- refine_strategy: refineStrategy,
729
- refine_medium_chars: refineMediumChars,
730
- },
731
- directory || process.env.OMEM_PROJECT_DIR,
732
- );
733
-
734
- if (!shouldRecallRes) {
735
- showToast(tui, "🧠 Cerebro Service Unavailable", "Unable to reach memory API · check connection", "error", toastDelayMs);
736
- return;
737
- }
738
- logDebug("memoryInjectionHook shouldRecall result", { shouldRecall: shouldRecallRes.should_recall, confidence: shouldRecallRes.confidence, memCount: shouldRecallRes.memories?.length ?? 0, discardedCount: shouldRecallRes.discarded?.length ?? 0, clustered: !!shouldRecallRes.clustered });
739
-
740
- const storedMemoryIds = shouldRecallRes.memories?.map((r) => r.memory.id) ?? [];
741
- const storedDiscardedIds = shouldRecallRes.discarded?.map((d) => d.memory_id) ?? [];
742
- const maxScore = storedMemoryIds.length > 0
743
- ? Math.max(...(shouldRecallRes.memories?.map((r) => r.score) ?? [0]))
744
- : 0;
745
-
746
- const createEventAndReturn = async (
747
- injectedCount: number,
748
- keptCount: number,
749
- discardedCount: number,
750
- injectedContent?: string,
751
- ): Promise<string | undefined> => {
752
- try {
753
- const items = [
754
- ...(shouldRecallRes.memories?.map((r) => ({
755
- memory_id: r.memory.id,
756
- score: r.score,
757
- refine_relevance: r.refine_relevance,
758
- refine_reasoning: r.refine_reasoning,
759
- is_kept: true,
760
- })) ?? []),
761
- ...(shouldRecallRes.discarded?.map((d) => ({
762
- memory_id: d.memory_id,
763
- score: d.score,
764
- refine_relevance: d.refine_relevance,
765
- refine_reasoning: d.refine_reasoning,
766
- is_kept: false,
767
- })) ?? []),
768
- ];
769
- const result = await client.createRecallEvent({
770
- session_id: input.sessionID!,
771
- recall_type: "auto",
772
- query_text,
773
- max_score: maxScore,
774
- llm_confidence: shouldRecallRes.confidence ?? 0,
775
- profile_injected: profileInjected,
776
- kept_count: keptCount,
777
- discarded_count: discardedCount,
778
- injected_count: injectedCount,
779
- profile_content: profileInjected && profileBlock ? profileBlock : undefined,
780
- injected_content: injectedContent,
781
- items: items.length > 0 ? items : undefined,
782
- });
783
- return result?.event_id;
784
- } catch (e) {
785
- logErr("memoryInjectionHook createRecallEvent failed", { error: String(e) });
786
- return undefined;
787
- }
788
- };
710
+ // ========== Phase A: synchronous path (zero await) ==========
711
+ const cached = recallCache.get(input.sessionID);
712
+ let profileBlock = "";
713
+ let profileInjected = false;
714
+ let profileCountText = "";
789
715
 
790
- // --- no-recall path: inject profile only ---
791
- if (!shouldRecallRes.should_recall) {
792
- const partsToInject: string[] = [];
793
- if (profileBlock) partsToInject.push(profileBlock);
794
- if (partsToInject.length > 0) {
795
- const injectText = partsToInject.join("\n\n");
796
- const contextPart: Part = {
797
- id: `prt_cerebro-context-${Date.now()}`,
798
- sessionID: input.sessionID,
799
- messageID: output.message.id,
800
- type: "text",
801
- text: injectText,
802
- synthetic: true,
803
- };
804
- output.parts.unshift(contextPart);
805
- logDebug("memoryInjectionHook profile injected (no-recall path)", { sessionId: input.sessionID });
806
- }
807
- injectedSessions.add(input.sessionID);
808
- if (profileInjected && profileIsFirstInjection) {
809
- await createEventAndReturn(0, 0, 0);
810
- showToast(tui, "👨 Profile Injected", `${profileCountText} · no memory recall needed`, "success", toastDelayMs);
716
+ if (cached) {
717
+ // Phase A: 只读 profileBlock,不更新 TTL(TTL 管理完全由 Phase B 负责)
718
+ if (cached.profileBlock) {
719
+ profileBlock = cached.profileBlock;
720
+ profileInjected = true;
721
+ profileCountText = cached.profileData?.countText ?? "";
811
722
  }
812
- return;
813
- }
814
723
 
815
- const results = shouldRecallRes.memories ?? [];
816
- const clustered = shouldRecallRes.clustered;
724
+ const shouldRecallRes = cached.recallResult;
725
+
726
+ if (!shouldRecallRes.should_recall) {
727
+ const partsToInject: string[] = [];
728
+ if (profileBlock) partsToInject.push(profileBlock);
729
+ if (partsToInject.length > 0) {
730
+ const injectText = partsToInject.join("\n\n");
731
+ const contextPart: Part = {
732
+ id: `prt_cerebro-context-${Date.now()}`,
733
+ sessionID: input.sessionID,
734
+ messageID: output.message.id,
735
+ type: "text",
736
+ text: injectText,
737
+ synthetic: true,
738
+ };
739
+ output.parts.unshift(contextPart);
740
+ logDebug("memoryInjectionHook profile injected from cache (no-recall)", { sessionId: input.sessionID });
741
+ }
742
+ injectedSessions.add(input.sessionID);
743
+ } else {
744
+ const results = shouldRecallRes.memories ?? [];
745
+ const clustered = shouldRecallRes.clustered;
746
+ const existingIds = injectedMemoryIds.get(input.sessionID) ?? new Set<string>();
747
+ const newResults = results.filter((r) => !existingIds.has(r.memory.id));
748
+ logDebug("memoryInjectionHook dedup (cached)", { totalResults: results.length, existingCount: existingIds.size, newCount: newResults.length });
749
+
750
+ if (newResults.length === 0) {
751
+ const partsToInject: string[] = [];
752
+ if (profileBlock) partsToInject.push(profileBlock);
753
+ if (partsToInject.length > 0) {
754
+ const injectText = partsToInject.join("\n\n");
755
+ const contextPart: Part = {
756
+ id: `prt_cerebro-context-${Date.now()}`,
757
+ sessionID: input.sessionID,
758
+ messageID: output.message.id,
759
+ type: "text",
760
+ text: injectText,
761
+ synthetic: true,
762
+ };
763
+ output.parts.unshift(contextPart);
764
+ logDebug("memoryInjectionHook profile injected from cache (dedup)", { sessionId: input.sessionID });
765
+ }
766
+ injectedSessions.add(input.sessionID);
767
+ } else {
768
+ const profileChars = profileInjected ? profileBlock.length : 0;
769
+ const budgetRemaining = maxContentChars - profileChars;
770
+ const itemCount = clustered
771
+ ? (clustered.cluster_summaries.length + clustered.standalone_memories.length)
772
+ : newResults.length;
773
+ const dynamicMaxContentLength = itemCount > 0
774
+ ? Math.min(maxContentLength, Math.max(MIN_ITEM_CONTENT_CHARS, Math.floor(budgetRemaining / itemCount)))
775
+ : maxContentLength;
776
+
777
+ const block = clustered
778
+ ? buildClusteredContextBlock(clustered, dynamicMaxContentLength)
779
+ : buildContextBlock(newResults, dynamicMaxContentLength);
780
+
781
+ const partsToInject: string[] = [];
782
+ if (block) partsToInject.push(block);
783
+ if (block) partsToInject.push(FETCH_POLICY);
784
+ if (profileBlock) partsToInject.push(profileBlock);
785
+ if (isSaveKeyword) partsToInject.push(KEYWORD_NUDGE);
786
+
787
+ if (partsToInject.length > 0) {
788
+ const injectText = partsToInject.join("\n\n");
789
+ const contextPart: Part = {
790
+ id: `prt_cerebro-context-${Date.now()}`,
791
+ sessionID: input.sessionID,
792
+ messageID: output.message.id,
793
+ type: "text",
794
+ text: injectText,
795
+ synthetic: true,
796
+ };
797
+ output.parts.unshift(contextPart);
798
+ logDebug("memoryInjectionHook block injected from cache", {
799
+ sessionId: input.sessionID,
800
+ injectTextLen: injectText.length,
801
+ blockPreview: block?.slice(0, 200),
802
+ });
803
+ }
817
804
 
818
- const existingIds = injectedMemoryIds.get(input.sessionID) ?? new Set<string>();
819
- const newResults = results.filter((r) => !existingIds.has(r.memory.id));
820
- logDebug("memoryInjectionHook dedup", { totalResults: results.length, existingCount: existingIds.size, newCount: newResults.length });
805
+ injectedSessions.add(input.sessionID);
821
806
 
822
- // --- dedup path: inject profile only ---
823
- if (newResults.length === 0) {
824
- const partsToInject: string[] = [];
825
- if (profileBlock) partsToInject.push(profileBlock);
826
- if (partsToInject.length > 0) {
827
- const injectText = partsToInject.join("\n\n");
828
- const contextPart: Part = {
829
- id: `prt_cerebro-context-${Date.now()}`,
830
- sessionID: input.sessionID,
831
- messageID: output.message.id,
832
- type: "text",
833
- text: injectText,
834
- synthetic: true,
835
- };
836
- output.parts.unshift(contextPart);
837
- logDebug("memoryInjectionHook profile injected (dedup path)", { sessionId: input.sessionID });
838
- }
839
- injectedSessions.add(input.sessionID);
840
- if (profileInjected && profileIsFirstInjection) {
841
- showToast(tui, "👨 Profile Injected", `${profileCountText} · all memories already injected`, "success", toastDelayMs);
842
- }
843
- return;
844
- }
807
+ if (isSaveKeyword) {
808
+ saveKeywordDetectedSessions.delete(input.sessionID);
809
+ }
845
810
 
846
- // --- Token Budget Calculation ---
847
- const profileChars = profileInjected ? profileBlock.length : 0;
848
- const budgetRemaining = maxContentChars - profileChars;
849
- if (budgetRemaining < 0) {
850
- logDebug("memoryInjectionHook budget overflow", { profileChars, maxContentChars, deficit: -budgetRemaining });
851
- }
852
- const itemCount = clustered
853
- ? (clustered.cluster_summaries.length + clustered.standalone_memories.length)
854
- : newResults.length;
855
- const dynamicMaxContentLength = itemCount > 0
856
- ? Math.min(maxContentLength, Math.max(MIN_ITEM_CONTENT_CHARS, Math.floor(budgetRemaining / itemCount)))
857
- : maxContentLength;
858
- logDebug("memoryInjectionHook budget", {
859
- maxContentChars, profileChars, budgetRemaining, itemCount,
860
- configuredMax: maxContentLength, dynamicMax: dynamicMaxContentLength,
861
- });
811
+ const newIds = newResults.map((r) => r.memory.id);
812
+ injectedMemoryIds.set(input.sessionID, new Set([...existingIds, ...newIds]));
813
+
814
+ const memDynamic = newResults.filter((r) => r.memory.memory_type === "fact" || r.memory.memory_type === "event").length;
815
+ const memStatic = newResults.filter((r) => r.memory.memory_type === "pinned" || r.memory.memory_type === "preference").length;
816
+ const memOther = newResults.length - memDynamic - memStatic;
817
+
818
+ let memCountMsg = "";
819
+ if (memDynamic > 0) memCountMsg += `Dynamic(${memDynamic}) `;
820
+ if (memStatic > 0) memCountMsg += `Static(${memStatic}) `;
821
+ if (memOther > 0) memCountMsg += `Other(${memOther}) `;
822
+
823
+ const categories = categorize(newResults);
824
+ const catSummary = Array.from(categories.entries())
825
+ .map(([label, items]) => `${label}(${items.length})`)
826
+ .join(" · ");
827
+
828
+ let toastTitle: string;
829
+ let toastMessage: string;
830
+
831
+ if (clustered) {
832
+ const clusterCount = clustered.cluster_summaries.length;
833
+ const standaloneCount = clustered.standalone_memories.length;
834
+ toastTitle = `🧠 Context Injected · ${clusterCount} 主题簇${standaloneCount > 0 ? ` · ${standaloneCount} 补充` : ""}`;
835
+ toastMessage = profileInjected
836
+ ? `Profile: ${profileCountText} · 聚合记忆展示`
837
+ : `聚合记忆展示`;
838
+ } else {
839
+ toastTitle = `🧠 Context Injected · ${newResults.length} fragments`;
840
+ toastMessage = profileInjected
841
+ ? `Profile: ${profileCountText} · Memories: ${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`
842
+ : `${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`;
843
+ }
862
844
 
863
- const block = clustered
864
- ? buildClusteredContextBlock(clustered, dynamicMaxContentLength)
865
- : buildContextBlock(newResults, dynamicMaxContentLength);
845
+ showToast(tui, toastTitle, toastMessage, "success", toastDelayMs);
846
+ }
847
+ }
866
848
 
867
- // ★★★ Core change: inject via output.parts.unshift + synthetic:true ★★★
868
- const partsToInject: string[] = [];
869
- if (profileBlock) partsToInject.push(profileBlock);
870
- if (block) partsToInject.push(block);
871
- if (block) partsToInject.push(FETCH_POLICY);
872
- if (isSaveKeyword) partsToInject.push(KEYWORD_NUDGE);
873
-
874
- if (partsToInject.length > 0) {
875
- const injectText = partsToInject.join("\n\n");
876
- const contextPart: Part = {
877
- id: `prt_cerebro-context-${Date.now()}`,
878
- sessionID: input.sessionID,
879
- messageID: output.message.id,
880
- type: "text",
881
- text: injectText,
882
- synthetic: true,
883
- };
884
- output.parts.unshift(contextPart);
885
- logDebug("memoryInjectionHook block injected to output.parts", {
886
- sessionId: input.sessionID,
887
- injectTextLen: injectText.length,
888
- blockPreview: block?.slice(0, 200),
889
- });
849
+ logDebug("memoryInjectionHook cache hit, injection complete", { sessionId: input.sessionID });
890
850
  } else {
891
- logDebug("memoryInjectionHook no content to inject", { sessionId: input.sessionID });
851
+ logDebug("memoryInjectionHook cache miss, first message in session", { sessionId: input.sessionID });
892
852
  }
893
853
 
894
- injectedSessions.add(input.sessionID);
895
-
896
- if (isSaveKeyword) {
897
- saveKeywordDetectedSessions.delete(input.sessionID);
898
- }
899
-
900
- const newIds = newResults.map((r) => r.memory.id);
901
- injectedMemoryIds.set(input.sessionID, new Set([...existingIds, ...newIds]));
902
- logDebug("memoryInjectionHook injection complete", { newIds: newIds.length, clustered: !!clustered, sessionId: input.sessionID });
903
-
904
- await createEventAndReturn(newResults.length, storedMemoryIds.length, storedDiscardedIds.length, block || undefined);
854
+ // ========== Phase B: fire-and-forget async fetch for NEXT round ==========
855
+ const bgSessionId = input.sessionID;
856
+ const bgQueryText = query_text;
857
+ const bgLastQueryText = last_query_text;
858
+ const bgConversationContext = conversationContext;
859
+ const bgProjectTags = projectTags.length > 0 ? projectTags : undefined;
860
+ const bgDirectory = directory || process.env.OMEM_PROJECT_DIR;
861
+
862
+ Promise.allSettled([
863
+ client.getProfile(),
864
+ client.shouldRecall(
865
+ bgQueryText, bgLastQueryText, bgSessionId,
866
+ similarityThreshold, maxRecallResults,
867
+ bgProjectTags,
868
+ bgConversationContext && bgConversationContext.length > 0 ? bgConversationContext : undefined,
869
+ {
870
+ fetch_multiplier: fetchMultiplier,
871
+ topk_cap_multiplier: topkCapMultiplier,
872
+ mmr_jaccard_threshold: mmrJaccardThreshold,
873
+ mmr_penalty_factor: mmrPenaltyFactor,
874
+ phase2_multiplier: phase2Multiplier,
875
+ llm_max_eval: llmMaxEval,
876
+ refine_strategy: refineStrategy,
877
+ refine_medium_chars: refineMediumChars,
878
+ },
879
+ bgDirectory,
880
+ ),
881
+ ])
882
+ .then(([profileRes, recallRes]) => {
883
+ if (recallRes.status === 'rejected') {
884
+ logErr("memoryInjectionHook shouldRecall failed", { error: String(recallRes.reason) });
885
+ return;
886
+ }
887
+ const profile = profileRes.status === 'fulfilled' ? profileRes.value : null;
888
+ const shouldRecallRes = recallRes.value;
889
+ if (!shouldRecallRes) {
890
+ showToast(tui, "🧠 Cerebro Service Unavailable", "Unable to reach memory API · check connection", "error", toastDelayMs);
891
+ return;
892
+ }
893
+ logDebug("memoryInjectionHook background fetch complete", {
894
+ sessionId: bgSessionId,
895
+ shouldRecall: shouldRecallRes.should_recall,
896
+ confidence: shouldRecallRes.confidence,
897
+ memCount: shouldRecallRes.memories?.length ?? 0,
898
+ });
905
899
 
906
- const memDynamic = newResults.filter((r) => r.memory.memory_type === "fact" || r.memory.memory_type === "event").length;
907
- const memStatic = newResults.filter((r) => r.memory.memory_type === "pinned" || r.memory.memory_type === "preference").length;
908
- const memOther = newResults.length - memDynamic - memStatic;
900
+ if (shouldRecallRes.should_recall && !Array.isArray(shouldRecallRes.memories)) {
901
+ logErr("memoryInjectionHook shouldRecall returned incomplete data", {
902
+ shouldRecall: shouldRecallRes.should_recall,
903
+ hasMemories: !!shouldRecallRes.memories,
904
+ });
905
+ return;
906
+ }
909
907
 
910
- let memCountMsg = "";
911
- if (memDynamic > 0) memCountMsg += `Dynamic(${memDynamic}) `;
912
- if (memStatic > 0) memCountMsg += `Static(${memStatic}) `;
913
- if (memOther > 0) memCountMsg += `Other(${memOther}) `;
908
+ let bgProfileBlock = "";
909
+ let bgProfileCountText = "";
910
+ let bgProfileInjected = false;
911
+
912
+ if (profile) {
913
+ const lastInjected = profileInjectedSessions.get(bgSessionId);
914
+ const ttlExpired = !lastInjected || (Date.now() - lastInjected > 30 * 60 * 1000);
915
+ if (ttlExpired) {
916
+ const built = buildProfileBlock(profile);
917
+ if (built) {
918
+ bgProfileBlock = built.block;
919
+ bgProfileCountText = built.countText;
920
+ bgProfileInjected = true;
921
+ }
922
+ }
923
+ }
914
924
 
915
- const categories = categorize(newResults);
916
- const catSummary = Array.from(categories.entries())
917
- .map(([label, items]) => `${label}(${items.length})`)
918
- .join(" · ");
925
+ recallCache.set(bgSessionId, {
926
+ profileBlock: bgProfileBlock,
927
+ recallResult: shouldRecallRes,
928
+ profileData: { countText: bgProfileCountText },
929
+ timestamp: Date.now(),
930
+ });
919
931
 
920
- let toastTitle: string;
921
- let toastMessage: string;
932
+ if (recallCache.size > 50) {
933
+ let oldestKey: string | null = null;
934
+ let oldestTime = Infinity;
935
+ for (const [k, v] of recallCache) {
936
+ if (v.timestamp < oldestTime) {
937
+ oldestTime = v.timestamp;
938
+ oldestKey = k;
939
+ }
940
+ }
941
+ if (oldestKey) recallCache.delete(oldestKey);
942
+ }
922
943
 
923
- if (clustered) {
924
- const clusterCount = clustered.cluster_summaries.length;
925
- const standaloneCount = clustered.standalone_memories.length;
926
- toastTitle = `🧠 Context Injected · ${clusterCount} 主题簇${standaloneCount > 0 ? ` · ${standaloneCount} 补充` : ""}`;
927
- toastMessage = profileInjected
928
- ? `Profile: ${profileCountText} · 聚合记忆展示`
929
- : `聚合记忆展示`;
930
- } else {
931
- toastTitle = `🧠 Context Injected · ${newResults.length} fragments`;
932
- toastMessage = profileInjected
933
- ? `Profile: ${profileCountText} · Memories: ${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`
934
- : `${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`;
935
- }
944
+ if (shouldRecallRes.should_recall) {
945
+ const results = shouldRecallRes.memories ?? [];
946
+ const existingIds = injectedMemoryIds.get(bgSessionId) ?? new Set<string>();
947
+ const newResults = results.filter((r) => !existingIds.has(r.memory.id));
948
+ if (newResults.length > 0) {
949
+ const newIds = newResults.map((r) => r.memory.id);
950
+ injectedMemoryIds.set(bgSessionId, new Set([...existingIds, ...newIds]));
951
+ }
936
952
 
937
- showToast(tui, toastTitle, toastMessage, "success", toastDelayMs);
953
+ const storedMemoryIds = shouldRecallRes.memories?.map((r) => r.memory.id) ?? [];
954
+ const storedDiscardedIds = shouldRecallRes.discarded?.map((d) => d.memory_id) ?? [];
955
+ const maxScore = storedMemoryIds.length > 0
956
+ ? Math.max(...(shouldRecallRes.memories?.map((r) => r.score) ?? [0]))
957
+ : 0;
958
+
959
+ const bgBlock = shouldRecallRes.clustered
960
+ ? buildClusteredContextBlock(shouldRecallRes.clustered, maxContentLength)
961
+ : buildContextBlock(newResults, maxContentLength);
962
+ const bgInjectedContent = bgBlock ?? undefined;
963
+
964
+ const items = [
965
+ ...(shouldRecallRes.memories?.map((r) => ({
966
+ memory_id: r.memory.id,
967
+ score: r.score,
968
+ refine_relevance: r.refine_relevance,
969
+ refine_reasoning: r.refine_reasoning,
970
+ is_kept: true,
971
+ })) ?? []),
972
+ ...(shouldRecallRes.discarded?.map((d) => ({
973
+ memory_id: d.memory_id,
974
+ score: d.score,
975
+ refine_relevance: d.refine_relevance,
976
+ refine_reasoning: d.refine_reasoning,
977
+ is_kept: false,
978
+ })) ?? []),
979
+ ];
980
+
981
+ client.createRecallEvent({
982
+ session_id: bgSessionId,
983
+ recall_type: "auto",
984
+ query_text: bgQueryText,
985
+ max_score: maxScore,
986
+ llm_confidence: shouldRecallRes.confidence ?? 0,
987
+ profile_injected: bgProfileInjected,
988
+ kept_count: storedMemoryIds.length,
989
+ discarded_count: storedDiscardedIds.length,
990
+ injected_count: newResults.length,
991
+ profile_content: bgProfileInjected && bgProfileBlock ? bgProfileBlock : undefined,
992
+ injected_content: bgInjectedContent,
993
+ items: items.length > 0 ? items : undefined,
994
+ }).catch((e: unknown) => {
995
+ logErr("memoryInjectionHook background createRecallEvent failed", { error: String(e) });
996
+ });
997
+ }
998
+ })
999
+ .catch((err: unknown) => {
1000
+ const errMsg = err instanceof Error ? err.message : String(err);
1001
+ logErr("memoryInjectionHook background fetch failed", { error: errMsg });
1002
+ if (errMsg.includes("[cerebro]")) {
1003
+ const cleanMsg = errMsg.replace(/^\[cerebro\]\s*/, "");
1004
+ if (cleanMsg.startsWith("500")) {
1005
+ showToast(tui, "🧠 Cerebro Server Error", cleanMsg.substring(0, 200), "error");
1006
+ } else if (cleanMsg.includes("timed out")) {
1007
+ showToast(tui, "🧠 Cerebro Service Timeout", cleanMsg.substring(0, 100), "error");
1008
+ }
1009
+ } else if (errMsg.includes("fetch") || errMsg.includes("network")) {
1010
+ showToast(tui, "🧠 Cerebro Service Unavailable", "Network error · check API connection", "error");
1011
+ }
1012
+ });
938
1013
  } catch (err) {
939
1014
  const errMsg = err instanceof Error ? err.message : String(err);
940
1015
  if (errMsg.includes("[cerebro]")) {
@@ -1098,6 +1173,7 @@ export function compactingHook(client: CerebroClient, containerTags: string[], t
1098
1173
  if (input.sessionID) {
1099
1174
  sessionMessages.delete(input.sessionID);
1100
1175
  profileInjectedSessions.delete(input.sessionID);
1176
+ recallCache.delete(input.sessionID);
1101
1177
  firstMessages.delete(input.sessionID);
1102
1178
  }
1103
1179
  return;
@@ -1129,6 +1205,7 @@ export function compactingHook(client: CerebroClient, containerTags: string[], t
1129
1205
  if (isAutoStoreEnabled && !isAutoStoreEnabled(input.sessionID)) {
1130
1206
  sessionMessages.delete(input.sessionID);
1131
1207
  profileInjectedSessions.delete(input.sessionID);
1208
+ recallCache.delete(input.sessionID);
1132
1209
  firstMessages.delete(input.sessionID);
1133
1210
  } else {
1134
1211
  const messages = sessionMessages.get(input.sessionID)!;
@@ -1159,6 +1236,7 @@ export function compactingHook(client: CerebroClient, containerTags: string[], t
1159
1236
  sessionMessages.delete(input.sessionID);
1160
1237
  injectedSessions.delete(input.sessionID);
1161
1238
  profileInjectedSessions.delete(input.sessionID);
1239
+ recallCache.delete(input.sessionID);
1162
1240
  firstMessages.delete(input.sessionID);
1163
1241
  if (input.sessionID) {
1164
1242
  const deleted = pendingToolCalls.delete(input.sessionID);