@mingxy/cerebro 1.15.4 → 1.15.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hooks.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { Model, UserMessage, Part } from "@opencode-ai/sdk";
2
- import type { CerebroClient, SearchResult } from "./client.js";
2
+ import type { CerebroClient, SearchResult, ShouldRecallResponse } from "./client.js";
3
3
  import { type OmemPluginConfig, resolveAgentPolicy } from "./config.js";
4
4
  import { detectSaveKeyword, KEYWORD_NUDGE } from "./keywords.js";
5
5
  import { logDebug, logInfo, logError as logErr } from "./logger.js";
@@ -175,6 +175,14 @@ const profileInjectedSessions = new Map<string, number>();
175
175
  const injectedSessions = new Set<string>();
176
176
  const compactingSummaryCooldown = new Map<string, number>();
177
177
 
178
+ // Per-session async cache for fire-and-forget recall results
179
+ const recallCache = new Map<string, {
180
+ profileBlock: string;
181
+ recallResult: ShouldRecallResponse;
182
+ profileData: { countText: string };
183
+ timestamp: number;
184
+ }>();
185
+
178
186
  function hashString(str: string): string {
179
187
  let hash = 0;
180
188
  for (let i = 0; i < str.length; i++) {
@@ -615,6 +623,29 @@ export function autoRecallHook(client: CerebroClient, containerTags: string[], t
615
623
  };
616
624
  }
617
625
 
626
+ function buildProfileBlock(profile: any): { block: string; countText: string } | null {
627
+ const prefs = ((profile as any)?.static_facts ?? [])
628
+ .filter((sf: any) => {
629
+ const t: string[] = sf.tags ?? [];
630
+ return t.includes("preferences");
631
+ })
632
+ .map((sf: any) => sf.l2_content ?? sf.content ?? "")
633
+ .filter(Boolean);
634
+ const profileLines = prefs.length > 0
635
+ ? prefs.map((c: string) => ` · ${c}`).join("\n")
636
+ : " · (preferences queuing, will populate on next refresh)";
637
+ const block = [
638
+ "<cerebro-profile>",
639
+ profileLines,
640
+ "</cerebro-profile>",
641
+ ].join("\n");
642
+ const p = profile as any;
643
+ const dynamicCount = p?.dynamic_context?.length ?? 0;
644
+ const staticCount = p?.static_facts?.length ?? 0;
645
+ const countText = `Dynamic(${dynamicCount}) · Static(${staticCount})`;
646
+ return { block, countText };
647
+ }
648
+
618
649
  export function memoryInjectionHook(
619
650
  client: CerebroClient,
620
651
  containerTags: string[],
@@ -654,43 +685,6 @@ export function memoryInjectionHook(
654
685
  const messages = sessionMessages.get(input.sessionID) ?? [];
655
686
  const userMessages = messages.filter((m) => m.role === "user");
656
687
 
657
- // --- Profile Fetch ---
658
- const profile = await client.getProfile();
659
- let profileInjected = false;
660
- let profileCountText = "";
661
- let profileBlock = "";
662
- const lastInjected = profileInjectedSessions.get(input.sessionID);
663
- const ttlExpired = !lastInjected || (Date.now() - lastInjected > 30 * 60 * 1000);
664
- const profileIsFirstInjection = !lastInjected;
665
- if (profile && ttlExpired) {
666
- const prefs = ((profile as any)?.static_facts ?? [])
667
- .filter((sf: any) => {
668
- const t: string[] = sf.tags ?? [];
669
- return t.includes("preferences");
670
- })
671
- .map((sf: any) => sf.l2_content ?? sf.content ?? "")
672
- .filter(Boolean);
673
- const profileLines = prefs.length > 0
674
- ? prefs.map((c: string) => ` · ${c}`).join("\n")
675
- : " · (preferences queuing, will populate on next refresh)";
676
- profileBlock = [
677
- "<cerebro-profile>",
678
- profileLines,
679
- "</cerebro-profile>",
680
- ].join("\n");
681
- profileInjected = true;
682
- profileInjectedSessions.set(input.sessionID, Date.now());
683
- const p = profile as any;
684
- const dynamicCount = p?.dynamic_context?.length ?? 0;
685
- const staticCount = p?.static_facts?.length ?? 0;
686
- profileCountText = `Dynamic(${dynamicCount}) · Static(${staticCount})`;
687
- if (profileIsFirstInjection) {
688
- logDebug("memoryInjectionHook profile ready (first)", { dynamicCount, staticCount });
689
- } else {
690
- logDebug("memoryInjectionHook profile ready (TTL)", { dynamicCount, staticCount });
691
- }
692
- }
693
-
694
688
  if (userMessages.length === 0) {
695
689
  logDebug("memoryInjectionHook skipped: no user messages in session (post-compacting?)", { sessionId: input.sessionID });
696
690
  return;
@@ -699,7 +693,7 @@ export function memoryInjectionHook(
699
693
  const rawQuery = userMessages[userMessages.length - 1]?.content || firstMessages.get(input.sessionID) || "";
700
694
  const query_text = extractUserRequest(rawQuery);
701
695
  if (!query_text) {
702
- logDebug("memoryInjectionHook filtered system injection (profile already injected above)", { rawQueryPrefix: rawQuery.slice(0, 60) });
696
+ logDebug("memoryInjectionHook filtered system injection", { rawQueryPrefix: rawQuery.slice(0, 60) });
703
697
  return;
704
698
  }
705
699
  const last_query_text = userMessages.length >= 2 ? userMessages[userMessages.length - 2].content : undefined;
@@ -713,114 +707,92 @@ export function memoryInjectionHook(
713
707
  })
714
708
  : undefined;
715
709
 
716
- const shouldRecallRes = await client.shouldRecall(
717
- query_text, last_query_text, input.sessionID,
718
- similarityThreshold, maxRecallResults,
719
- projectTags.length > 0 ? projectTags : undefined,
720
- conversationContext && conversationContext.length > 0 ? conversationContext : undefined,
721
- {
722
- fetch_multiplier: fetchMultiplier,
723
- topk_cap_multiplier: topkCapMultiplier,
724
- mmr_jaccard_threshold: mmrJaccardThreshold,
725
- mmr_penalty_factor: mmrPenaltyFactor,
726
- phase2_multiplier: phase2Multiplier,
727
- llm_max_eval: llmMaxEval,
728
- refine_strategy: refineStrategy,
729
- refine_medium_chars: refineMediumChars,
730
- },
731
- directory || process.env.OMEM_PROJECT_DIR,
732
- );
733
-
734
- if (!shouldRecallRes) {
735
- showToast(tui, "🧠 Cerebro Service Unavailable", "Unable to reach memory API · check connection", "error", toastDelayMs);
736
- return;
737
- }
738
- logDebug("memoryInjectionHook shouldRecall result", { shouldRecall: shouldRecallRes.should_recall, confidence: shouldRecallRes.confidence, memCount: shouldRecallRes.memories?.length ?? 0, discardedCount: shouldRecallRes.discarded?.length ?? 0, clustered: !!shouldRecallRes.clustered });
710
+ // ========== Phase A: unified data fetch + injection ==========
711
+ let shouldRecallRes: ShouldRecallResponse;
712
+ let profileBlock = "";
713
+ let profileInjected = false;
714
+ let profileCountText = "";
715
+ let isCacheHit = false;
739
716
 
740
- const storedMemoryIds = shouldRecallRes.memories?.map((r) => r.memory.id) ?? [];
741
- const storedDiscardedIds = shouldRecallRes.discarded?.map((d) => d.memory_id) ?? [];
742
- const maxScore = storedMemoryIds.length > 0
743
- ? Math.max(...(shouldRecallRes.memories?.map((r) => r.score) ?? [0]))
744
- : 0;
717
+ const cached = recallCache.get(input.sessionID);
745
718
 
746
- const createEventAndReturn = async (
747
- injectedCount: number,
748
- keptCount: number,
749
- discardedCount: number,
750
- injectedContent?: string,
751
- ): Promise<string | undefined> => {
752
- try {
753
- const items = [
754
- ...(shouldRecallRes.memories?.map((r) => ({
755
- memory_id: r.memory.id,
756
- score: r.score,
757
- refine_relevance: r.refine_relevance,
758
- refine_reasoning: r.refine_reasoning,
759
- is_kept: true,
760
- })) ?? []),
761
- ...(shouldRecallRes.discarded?.map((d) => ({
762
- memory_id: d.memory_id,
763
- score: d.score,
764
- refine_relevance: d.refine_relevance,
765
- refine_reasoning: d.refine_reasoning,
766
- is_kept: false,
767
- })) ?? []),
768
- ];
769
- const result = await client.createRecallEvent({
770
- session_id: input.sessionID!,
771
- recall_type: "auto",
772
- query_text,
773
- max_score: maxScore,
774
- llm_confidence: shouldRecallRes.confidence ?? 0,
775
- profile_injected: profileInjected,
776
- kept_count: keptCount,
777
- discarded_count: discardedCount,
778
- injected_count: injectedCount,
779
- profile_content: profileInjected && profileBlock ? profileBlock : undefined,
780
- injected_content: injectedContent,
781
- items: items.length > 0 ? items : undefined,
782
- });
783
- return result?.event_id;
784
- } catch (e) {
785
- logErr("memoryInjectionHook createRecallEvent failed", { error: String(e) });
786
- return undefined;
719
+ if (cached) {
720
+ isCacheHit = true;
721
+ shouldRecallRes = cached.recallResult;
722
+ if (cached.profileBlock) {
723
+ profileBlock = cached.profileBlock;
724
+ profileInjected = true;
725
+ profileCountText = cached.profileData?.countText ?? "";
787
726
  }
788
- };
789
-
790
- // --- no-recall path: inject profile only ---
791
- if (!shouldRecallRes.should_recall) {
792
- const partsToInject: string[] = [];
793
- if (profileBlock) partsToInject.push(profileBlock);
794
- if (partsToInject.length > 0) {
795
- const injectText = partsToInject.join("\n\n");
796
- const contextPart: Part = {
797
- id: `prt_cerebro-context-${Date.now()}`,
798
- sessionID: input.sessionID,
799
- messageID: output.message.id,
800
- type: "text",
801
- text: injectText,
802
- synthetic: true,
803
- };
804
- output.parts.unshift(contextPart);
805
- logDebug("memoryInjectionHook profile injected (no-recall path)", { sessionId: input.sessionID });
727
+ } else {
728
+ // cache miss: synchronous await (first message takes 5-8s, but gets injection)
729
+ const [profile, recallRes] = await Promise.all([
730
+ client.getProfile(),
731
+ client.shouldRecall(
732
+ query_text, last_query_text, input.sessionID,
733
+ similarityThreshold, maxRecallResults,
734
+ projectTags.length > 0 ? projectTags : undefined,
735
+ conversationContext && conversationContext.length > 0 ? conversationContext : undefined,
736
+ {
737
+ fetch_multiplier: fetchMultiplier,
738
+ topk_cap_multiplier: topkCapMultiplier,
739
+ mmr_jaccard_threshold: mmrJaccardThreshold,
740
+ mmr_penalty_factor: mmrPenaltyFactor,
741
+ phase2_multiplier: phase2Multiplier,
742
+ llm_max_eval: llmMaxEval,
743
+ refine_strategy: refineStrategy,
744
+ refine_medium_chars: refineMediumChars,
745
+ },
746
+ directory || process.env.OMEM_PROJECT_DIR,
747
+ ),
748
+ ]);
749
+ if (!recallRes) {
750
+ showToast(tui, "🧠 Cerebro Service Unavailable", "Unable to reach memory API", "error", toastDelayMs);
751
+ return;
806
752
  }
807
- injectedSessions.add(input.sessionID);
808
- if (profileInjected && profileIsFirstInjection) {
809
- await createEventAndReturn(0, 0, 0);
810
- showToast(tui, "👨 Profile Injected", `${profileCountText} · no memory recall needed`, "success", toastDelayMs);
753
+ shouldRecallRes = recallRes;
754
+
755
+ // build profile block
756
+ if (profile) {
757
+ const built = buildProfileBlock(profile);
758
+ if (built) {
759
+ profileBlock = built.block;
760
+ profileCountText = built.countText;
761
+ profileInjected = true;
762
+ profileInjectedSessions.set(input.sessionID, Date.now());
763
+ }
811
764
  }
812
- return;
813
- }
814
765
 
815
- const results = shouldRecallRes.memories ?? [];
816
- const clustered = shouldRecallRes.clustered;
766
+ // write cache for next round
767
+ recallCache.set(input.sessionID, {
768
+ profileBlock,
769
+ recallResult: shouldRecallRes,
770
+ profileData: { countText: profileCountText },
771
+ timestamp: Date.now(),
772
+ });
817
773
 
818
- const existingIds = injectedMemoryIds.get(input.sessionID) ?? new Set<string>();
819
- const newResults = results.filter((r) => !existingIds.has(r.memory.id));
820
- logDebug("memoryInjectionHook dedup", { totalResults: results.length, existingCount: existingIds.size, newCount: newResults.length });
774
+ // LRU eviction
775
+ if (recallCache.size > 50) {
776
+ let oldestKey: string | null = null;
777
+ let oldestTime = Infinity;
778
+ for (const [k, v] of recallCache) {
779
+ if (v.timestamp < oldestTime) { oldestTime = v.timestamp; oldestKey = k; }
780
+ }
781
+ if (oldestKey) recallCache.delete(oldestKey);
782
+ }
821
783
 
822
- // --- dedup path: inject profile only ---
823
- if (newResults.length === 0) {
784
+ // defensive check
785
+ if (shouldRecallRes.should_recall && !Array.isArray(shouldRecallRes.memories)) {
786
+ logErr("memoryInjectionHook shouldRecall returned incomplete data", { shouldRecall: shouldRecallRes.should_recall, hasMemories: !!shouldRecallRes.memories });
787
+ return;
788
+ }
789
+
790
+ logDebug("memoryInjectionHook cache miss, fetched synchronously", { sessionId: input.sessionID, shouldRecall: shouldRecallRes.should_recall, memCount: shouldRecallRes.memories?.length ?? 0 });
791
+ }
792
+
793
+ // ========== unified injection logic (cache hit + cache miss share this) ==========
794
+ if (!shouldRecallRes.should_recall) {
795
+ // no-recall path: inject profile only
824
796
  const partsToInject: string[] = [];
825
797
  if (profileBlock) partsToInject.push(profileBlock);
826
798
  if (partsToInject.length > 0) {
@@ -834,107 +806,279 @@ export function memoryInjectionHook(
834
806
  synthetic: true,
835
807
  };
836
808
  output.parts.unshift(contextPart);
837
- logDebug("memoryInjectionHook profile injected (dedup path)", { sessionId: input.sessionID });
809
+ logDebug("memoryInjectionHook profile injected (no-recall)", { sessionId: input.sessionID });
838
810
  }
839
811
  injectedSessions.add(input.sessionID);
840
- if (profileInjected && profileIsFirstInjection) {
841
- showToast(tui, "👨 Profile Injected", `${profileCountText} · all memories already injected`, "success", toastDelayMs);
842
- }
843
- return;
844
- }
845
-
846
- // --- Token Budget Calculation ---
847
- const profileChars = profileInjected ? profileBlock.length : 0;
848
- const budgetRemaining = maxContentChars - profileChars;
849
- if (budgetRemaining < 0) {
850
- logDebug("memoryInjectionHook budget overflow", { profileChars, maxContentChars, deficit: -budgetRemaining });
851
- }
852
- const itemCount = clustered
853
- ? (clustered.cluster_summaries.length + clustered.standalone_memories.length)
854
- : newResults.length;
855
- const dynamicMaxContentLength = itemCount > 0
856
- ? Math.min(maxContentLength, Math.max(MIN_ITEM_CONTENT_CHARS, Math.floor(budgetRemaining / itemCount)))
857
- : maxContentLength;
858
- logDebug("memoryInjectionHook budget", {
859
- maxContentChars, profileChars, budgetRemaining, itemCount,
860
- configuredMax: maxContentLength, dynamicMax: dynamicMaxContentLength,
861
- });
812
+ showToast(tui, "🧠 Profile Injected", profileCountText ? `Profile: ${profileCountText} · no recall needed` : "No memory recall needed", "success", toastDelayMs);
813
+ } else {
814
+ const results = shouldRecallRes.memories ?? [];
815
+ const clustered = shouldRecallRes.clustered;
816
+ const existingIds = injectedMemoryIds.get(input.sessionID) ?? new Set<string>();
817
+ const newResults = results.filter((r) => !existingIds.has(r.memory.id));
818
+ logDebug("memoryInjectionHook dedup", { totalResults: results.length, existingCount: existingIds.size, newCount: newResults.length });
819
+
820
+ if (newResults.length === 0) {
821
+ const partsToInject: string[] = [];
822
+ if (profileBlock) partsToInject.push(profileBlock);
823
+ if (partsToInject.length > 0) {
824
+ const injectText = partsToInject.join("\n\n");
825
+ const contextPart: Part = {
826
+ id: `prt_cerebro-context-${Date.now()}`,
827
+ sessionID: input.sessionID,
828
+ messageID: output.message.id,
829
+ type: "text",
830
+ text: injectText,
831
+ synthetic: true,
832
+ };
833
+ output.parts.unshift(contextPart);
834
+ logDebug("memoryInjectionHook profile injected (dedup)", { sessionId: input.sessionID });
835
+ }
836
+ injectedSessions.add(input.sessionID);
837
+ } else {
838
+ const profileChars = profileInjected ? profileBlock.length : 0;
839
+ const budgetRemaining = maxContentChars - profileChars;
840
+ const itemCount = clustered
841
+ ? (clustered.cluster_summaries.length + clustered.standalone_memories.length)
842
+ : newResults.length;
843
+ const dynamicMaxContentLength = itemCount > 0
844
+ ? Math.min(maxContentLength, Math.max(MIN_ITEM_CONTENT_CHARS, Math.floor(budgetRemaining / itemCount)))
845
+ : maxContentLength;
846
+
847
+ const block = clustered
848
+ ? buildClusteredContextBlock(clustered, dynamicMaxContentLength)
849
+ : buildContextBlock(newResults, dynamicMaxContentLength);
850
+
851
+ const partsToInject: string[] = [];
852
+ if (block) partsToInject.push(block);
853
+ if (block) partsToInject.push(FETCH_POLICY);
854
+ if (profileBlock) partsToInject.push(profileBlock);
855
+ if (isSaveKeyword) partsToInject.push(KEYWORD_NUDGE);
856
+
857
+ if (partsToInject.length > 0) {
858
+ const injectText = partsToInject.join("\n\n");
859
+ const contextPart: Part = {
860
+ id: `prt_cerebro-context-${Date.now()}`,
861
+ sessionID: input.sessionID,
862
+ messageID: output.message.id,
863
+ type: "text",
864
+ text: injectText,
865
+ synthetic: true,
866
+ };
867
+ output.parts.unshift(contextPart);
868
+ logDebug("memoryInjectionHook block injected", {
869
+ sessionId: input.sessionID,
870
+ injectTextLen: injectText.length,
871
+ blockPreview: block?.slice(0, 200),
872
+ });
873
+ }
862
874
 
863
- const block = clustered
864
- ? buildClusteredContextBlock(clustered, dynamicMaxContentLength)
865
- : buildContextBlock(newResults, dynamicMaxContentLength);
875
+ injectedSessions.add(input.sessionID);
866
876
 
867
- // ★★★ Core change: inject via output.parts.unshift + synthetic:true ★★★
868
- const partsToInject: string[] = [];
869
- if (profileBlock) partsToInject.push(profileBlock);
870
- if (block) partsToInject.push(block);
871
- if (block) partsToInject.push(FETCH_POLICY);
872
- if (isSaveKeyword) partsToInject.push(KEYWORD_NUDGE);
873
-
874
- if (partsToInject.length > 0) {
875
- const injectText = partsToInject.join("\n\n");
876
- const contextPart: Part = {
877
- id: `prt_cerebro-context-${Date.now()}`,
878
- sessionID: input.sessionID,
879
- messageID: output.message.id,
880
- type: "text",
881
- text: injectText,
882
- synthetic: true,
883
- };
884
- output.parts.unshift(contextPart);
885
- logDebug("memoryInjectionHook block injected to output.parts", {
886
- sessionId: input.sessionID,
887
- injectTextLen: injectText.length,
888
- blockPreview: block?.slice(0, 200),
889
- });
890
- } else {
891
- logDebug("memoryInjectionHook no content to inject", { sessionId: input.sessionID });
892
- }
877
+ if (isSaveKeyword) {
878
+ saveKeywordDetectedSessions.delete(input.sessionID);
879
+ }
893
880
 
894
- injectedSessions.add(input.sessionID);
881
+ const newIds = newResults.map((r) => r.memory.id);
882
+ injectedMemoryIds.set(input.sessionID, new Set([...existingIds, ...newIds]));
883
+
884
+ const memDynamic = newResults.filter((r) => r.memory.memory_type === "fact" || r.memory.memory_type === "event").length;
885
+ const memStatic = newResults.filter((r) => r.memory.memory_type === "pinned" || r.memory.memory_type === "preference").length;
886
+ const memOther = newResults.length - memDynamic - memStatic;
887
+
888
+ let memCountMsg = "";
889
+ if (memDynamic > 0) memCountMsg += `Dynamic(${memDynamic}) `;
890
+ if (memStatic > 0) memCountMsg += `Static(${memStatic}) `;
891
+ if (memOther > 0) memCountMsg += `Other(${memOther}) `;
892
+
893
+ const categories = categorize(newResults);
894
+ const catSummary = Array.from(categories.entries())
895
+ .map(([label, items]) => `${label}(${items.length})`)
896
+ .join(" · ");
897
+
898
+ let toastTitle: string;
899
+ let toastMessage: string;
900
+
901
+ if (clustered) {
902
+ const clusterCount = clustered.cluster_summaries.length;
903
+ const standaloneCount = clustered.standalone_memories.length;
904
+ toastTitle = `🧠 Context Injected · ${clusterCount} clusters${standaloneCount > 0 ? ` · ${standaloneCount} standalone` : ""}`;
905
+ toastMessage = profileInjected
906
+ ? `Profile: ${profileCountText} · Clustered memory display`
907
+ : `Clustered memory display`;
908
+ } else {
909
+ toastTitle = `🧠 Context Injected · ${newResults.length} fragments`;
910
+ toastMessage = profileInjected
911
+ ? `Profile: ${profileCountText} · Memories: ${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`
912
+ : `${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`;
913
+ }
895
914
 
896
- if (isSaveKeyword) {
897
- saveKeywordDetectedSessions.delete(input.sessionID);
915
+ showToast(tui, toastTitle, toastMessage, "success", toastDelayMs);
916
+ }
898
917
  }
899
918
 
900
- const newIds = newResults.map((r) => r.memory.id);
901
- injectedMemoryIds.set(input.sessionID, new Set([...existingIds, ...newIds]));
902
- logDebug("memoryInjectionHook injection complete", { newIds: newIds.length, clustered: !!clustered, sessionId: input.sessionID });
919
+ logDebug("memoryInjectionHook injection complete", { sessionId: input.sessionID, isCacheHit });
920
+
921
+ // ========== Phase B: fire-and-forget async fetch for NEXT round (cache hit only) ==========
922
+ if (isCacheHit) {
923
+ const bgSessionId = input.sessionID;
924
+ const bgQueryText = query_text;
925
+ const bgLastQueryText = last_query_text;
926
+ const bgConversationContext = conversationContext;
927
+ const bgProjectTags = projectTags.length > 0 ? projectTags : undefined;
928
+ const bgDirectory = directory || process.env.OMEM_PROJECT_DIR;
929
+
930
+ Promise.allSettled([
931
+ client.getProfile(),
932
+ client.shouldRecall(
933
+ bgQueryText, bgLastQueryText, bgSessionId,
934
+ similarityThreshold, maxRecallResults,
935
+ bgProjectTags,
936
+ bgConversationContext && bgConversationContext.length > 0 ? bgConversationContext : undefined,
937
+ {
938
+ fetch_multiplier: fetchMultiplier,
939
+ topk_cap_multiplier: topkCapMultiplier,
940
+ mmr_jaccard_threshold: mmrJaccardThreshold,
941
+ mmr_penalty_factor: mmrPenaltyFactor,
942
+ phase2_multiplier: phase2Multiplier,
943
+ llm_max_eval: llmMaxEval,
944
+ refine_strategy: refineStrategy,
945
+ refine_medium_chars: refineMediumChars,
946
+ },
947
+ bgDirectory,
948
+ ),
949
+ ])
950
+ .then(([profileRes, recallRes]) => {
951
+ if (recallRes.status === 'rejected') {
952
+ logErr("memoryInjectionHook shouldRecall failed", { error: String(recallRes.reason) });
953
+ return;
954
+ }
955
+ const profile = profileRes.status === 'fulfilled' ? profileRes.value : null;
956
+ const shouldRecallRes = recallRes.value;
957
+ if (!shouldRecallRes) {
958
+ showToast(tui, "🧠 Cerebro Service Unavailable", "Unable to reach memory API · check connection", "error", toastDelayMs);
959
+ return;
960
+ }
961
+ logDebug("memoryInjectionHook background fetch complete", {
962
+ sessionId: bgSessionId,
963
+ shouldRecall: shouldRecallRes.should_recall,
964
+ confidence: shouldRecallRes.confidence,
965
+ memCount: shouldRecallRes.memories?.length ?? 0,
966
+ });
903
967
 
904
- await createEventAndReturn(newResults.length, storedMemoryIds.length, storedDiscardedIds.length, block || undefined);
968
+ if (shouldRecallRes.should_recall && !Array.isArray(shouldRecallRes.memories)) {
969
+ logErr("memoryInjectionHook shouldRecall returned incomplete data", {
970
+ shouldRecall: shouldRecallRes.should_recall,
971
+ hasMemories: !!shouldRecallRes.memories,
972
+ });
973
+ return;
974
+ }
905
975
 
906
- const memDynamic = newResults.filter((r) => r.memory.memory_type === "fact" || r.memory.memory_type === "event").length;
907
- const memStatic = newResults.filter((r) => r.memory.memory_type === "pinned" || r.memory.memory_type === "preference").length;
908
- const memOther = newResults.length - memDynamic - memStatic;
976
+ let bgProfileBlock = "";
977
+ let bgProfileCountText = "";
978
+ let bgProfileInjected = false;
979
+
980
+ if (profile) {
981
+ const lastInjected = profileInjectedSessions.get(bgSessionId);
982
+ const ttlExpired = !lastInjected || (Date.now() - lastInjected > 30 * 60 * 1000);
983
+ if (ttlExpired) {
984
+ const built = buildProfileBlock(profile);
985
+ if (built) {
986
+ bgProfileBlock = built.block;
987
+ bgProfileCountText = built.countText;
988
+ bgProfileInjected = true;
989
+ }
990
+ }
991
+ }
909
992
 
910
- let memCountMsg = "";
911
- if (memDynamic > 0) memCountMsg += `Dynamic(${memDynamic}) `;
912
- if (memStatic > 0) memCountMsg += `Static(${memStatic}) `;
913
- if (memOther > 0) memCountMsg += `Other(${memOther}) `;
993
+ recallCache.set(bgSessionId, {
994
+ profileBlock: bgProfileBlock,
995
+ recallResult: shouldRecallRes,
996
+ profileData: { countText: bgProfileCountText },
997
+ timestamp: Date.now(),
998
+ });
914
999
 
915
- const categories = categorize(newResults);
916
- const catSummary = Array.from(categories.entries())
917
- .map(([label, items]) => `${label}(${items.length})`)
918
- .join(" · ");
1000
+ if (recallCache.size > 50) {
1001
+ let oldestKey: string | null = null;
1002
+ let oldestTime = Infinity;
1003
+ for (const [k, v] of recallCache) {
1004
+ if (v.timestamp < oldestTime) {
1005
+ oldestTime = v.timestamp;
1006
+ oldestKey = k;
1007
+ }
1008
+ }
1009
+ if (oldestKey) recallCache.delete(oldestKey);
1010
+ }
919
1011
 
920
- let toastTitle: string;
921
- let toastMessage: string;
1012
+ if (shouldRecallRes.should_recall) {
1013
+ const results = shouldRecallRes.memories ?? [];
1014
+ const existingIds = injectedMemoryIds.get(bgSessionId) ?? new Set<string>();
1015
+ const newResults = results.filter((r) => !existingIds.has(r.memory.id));
1016
+ if (newResults.length > 0) {
1017
+ const newIds = newResults.map((r) => r.memory.id);
1018
+ injectedMemoryIds.set(bgSessionId, new Set([...existingIds, ...newIds]));
1019
+ }
922
1020
 
923
- if (clustered) {
924
- const clusterCount = clustered.cluster_summaries.length;
925
- const standaloneCount = clustered.standalone_memories.length;
926
- toastTitle = `🧠 Context Injected · ${clusterCount} 主题簇${standaloneCount > 0 ? ` · ${standaloneCount} 补充` : ""}`;
927
- toastMessage = profileInjected
928
- ? `Profile: ${profileCountText} · 聚合记忆展示`
929
- : `聚合记忆展示`;
930
- } else {
931
- toastTitle = `🧠 Context Injected · ${newResults.length} fragments`;
932
- toastMessage = profileInjected
933
- ? `Profile: ${profileCountText} · Memories: ${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`
934
- : `${memCountMsg.trim()}${catSummary ? ` · ${catSummary}` : ""}`;
1021
+ const storedMemoryIds = shouldRecallRes.memories?.map((r) => r.memory.id) ?? [];
1022
+ const storedDiscardedIds = shouldRecallRes.discarded?.map((d) => d.memory_id) ?? [];
1023
+ const maxScore = storedMemoryIds.length > 0
1024
+ ? Math.max(...(shouldRecallRes.memories?.map((r) => r.score) ?? [0]))
1025
+ : 0;
1026
+
1027
+ const bgBlock = shouldRecallRes.clustered
1028
+ ? buildClusteredContextBlock(shouldRecallRes.clustered, maxContentLength)
1029
+ : buildContextBlock(newResults, maxContentLength);
1030
+ const bgInjectedContent = bgBlock ?? undefined;
1031
+
1032
+ const items = [
1033
+ ...(shouldRecallRes.memories?.map((r) => ({
1034
+ memory_id: r.memory.id,
1035
+ score: r.score,
1036
+ refine_relevance: r.refine_relevance,
1037
+ refine_reasoning: r.refine_reasoning,
1038
+ is_kept: true,
1039
+ })) ?? []),
1040
+ ...(shouldRecallRes.discarded?.map((d) => ({
1041
+ memory_id: d.memory_id,
1042
+ score: d.score,
1043
+ refine_relevance: d.refine_relevance,
1044
+ refine_reasoning: d.refine_reasoning,
1045
+ is_kept: false,
1046
+ })) ?? []),
1047
+ ];
1048
+
1049
+ client.createRecallEvent({
1050
+ session_id: bgSessionId,
1051
+ recall_type: "auto",
1052
+ query_text: bgQueryText,
1053
+ max_score: maxScore,
1054
+ llm_confidence: shouldRecallRes.confidence ?? 0,
1055
+ profile_injected: bgProfileInjected,
1056
+ kept_count: storedMemoryIds.length,
1057
+ discarded_count: storedDiscardedIds.length,
1058
+ injected_count: newResults.length,
1059
+ profile_content: bgProfileInjected && bgProfileBlock ? bgProfileBlock : undefined,
1060
+ injected_content: bgInjectedContent,
1061
+ items: items.length > 0 ? items : undefined,
1062
+ }).catch((e: unknown) => {
1063
+ logErr("memoryInjectionHook background createRecallEvent failed", { error: String(e) });
1064
+ });
1065
+ }
1066
+ })
1067
+ .catch((err: unknown) => {
1068
+ const errMsg = err instanceof Error ? err.message : String(err);
1069
+ logErr("memoryInjectionHook background fetch failed", { error: errMsg });
1070
+ if (errMsg.includes("[cerebro]")) {
1071
+ const cleanMsg = errMsg.replace(/^\[cerebro\]\s*/, "");
1072
+ if (cleanMsg.startsWith("500")) {
1073
+ showToast(tui, "🧠 Cerebro Server Error", cleanMsg.substring(0, 200), "error");
1074
+ } else if (cleanMsg.includes("timed out")) {
1075
+ showToast(tui, "🧠 Cerebro Service Timeout", cleanMsg.substring(0, 100), "error");
1076
+ }
1077
+ } else if (errMsg.includes("fetch") || errMsg.includes("network")) {
1078
+ showToast(tui, "🧠 Cerebro Service Unavailable", "Network error · check API connection", "error");
1079
+ }
1080
+ });
935
1081
  }
936
-
937
- showToast(tui, toastTitle, toastMessage, "success", toastDelayMs);
938
1082
  } catch (err) {
939
1083
  const errMsg = err instanceof Error ? err.message : String(err);
940
1084
  if (errMsg.includes("[cerebro]")) {
@@ -1098,6 +1242,7 @@ export function compactingHook(client: CerebroClient, containerTags: string[], t
1098
1242
  if (input.sessionID) {
1099
1243
  sessionMessages.delete(input.sessionID);
1100
1244
  profileInjectedSessions.delete(input.sessionID);
1245
+ recallCache.delete(input.sessionID);
1101
1246
  firstMessages.delete(input.sessionID);
1102
1247
  }
1103
1248
  return;
@@ -1129,6 +1274,7 @@ export function compactingHook(client: CerebroClient, containerTags: string[], t
1129
1274
  if (isAutoStoreEnabled && !isAutoStoreEnabled(input.sessionID)) {
1130
1275
  sessionMessages.delete(input.sessionID);
1131
1276
  profileInjectedSessions.delete(input.sessionID);
1277
+ recallCache.delete(input.sessionID);
1132
1278
  firstMessages.delete(input.sessionID);
1133
1279
  } else {
1134
1280
  const messages = sessionMessages.get(input.sessionID)!;
@@ -1159,6 +1305,7 @@ export function compactingHook(client: CerebroClient, containerTags: string[], t
1159
1305
  sessionMessages.delete(input.sessionID);
1160
1306
  injectedSessions.delete(input.sessionID);
1161
1307
  profileInjectedSessions.delete(input.sessionID);
1308
+ recallCache.delete(input.sessionID);
1162
1309
  firstMessages.delete(input.sessionID);
1163
1310
  if (input.sessionID) {
1164
1311
  const deleted = pendingToolCalls.delete(input.sessionID);