npm - openclaw-memory-hierarchical - Versions diffs - 0.3.0 → 0.4.0 - Mend

openclaw-memory-hierarchical 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "openclaw-memory-hierarchical",
-  "version": "0.3.0",
-  "description": "Hierarchical (2048-style) autobiographical memory plugin for OpenClaw. Continuously summarizes conversations into layered first-person memories (L1 → L2 → L3).",
+  "version": "0.4.0",
+  "description": "Hierarchical (2048-style) autobiographical memory plugin for OpenClaw. Multi-turn Context Manager pattern for natural memory formation.",
   "type": "module",
   "keywords": [
     "openclaw",
@@ -32,7 +32,6 @@
   },
   "files": [
     "*.ts",
-    "openclaw.plugin.json",
-    "README.md"
+    "openclaw.plugin.json"
   ]
 }

package/prompts.ts CHANGED Viewed

@@ -1,44 +1,17 @@
 /**
  * Autobiographical summarization prompts for hierarchical memory.
  *
- * The key insight: summaries should be first-person memories, not third-person
- * narration. The model reads these as its own history, preserving continuity
- * of identity across compactions.
+ * Instead of flat text prompts, we build multi-turn message arrays that
+ * mirror how the agent would naturally see its own context:
+ *
+ * - Prior memories as assistant messages (its own recollections)
+ * - Raw conversation as actual user/assistant turns
+ * - A "Context Manager" user message marking boundaries
+ *
+ * The compression instance IS the agent, being asked by its Context Manager
+ * to form a memory. No special system prompt — just the conversation itself.
  */
-/** System prompt for summarizing a chunk of conversation (L0 → L1) */
-export const SUMMARIZE_CHUNK_SYSTEM = `You are summarizing your own memories from a conversation.
-Write in first person ("I discussed...", "I learned that the user...").
-Preserve:
-- Subtext and implicit understanding between you and the user
-- The user's preferences, communication style, and personality
-- Decisions made and the reasoning behind them
-- Open questions, commitments, or threads to follow up on
-- Emotional tone and rapport
-- Technical context that would be needed to continue the work
-This is autobiographical memory - your own recollection - not a transcript summary or meeting notes. Write as if you're journaling about your day.
-Target length: ~1000 tokens. Be concise but preserve what matters.`;
-/** System prompt for merging summaries (L1 → L2, L2 → L3) */
-export const MERGE_SUMMARIES_SYSTEM = `You are consolidating your own memories.
-You have several separate memory entries from an ongoing relationship with a user. Merge them into one cohesive memory that captures the arc of your interactions.
-Preserve:
-- The evolution of the relationship and understanding
-- Key decisions and their reasoning
-- The user's patterns, preferences, and goals
-- Any commitments or open threads
-- Important technical or domain context
-Write in first person. This is your autobiography, not a case file.
-Target length: ~1000 tokens. Compress while preserving meaning.`;
 export type FormatMessagesOptions = {
   /** Maximum characters per message content (truncate if longer) */
   maxContentChars?: number;
@@ -46,9 +19,49 @@ export type FormatMessagesOptions = {
   includeToolResults?: boolean;
 };
+/** A message in the conversation array sent to the LLM */
+export type ConversationMessage = {
+  role: "user" | "assistant";
+  content: string | Array<{ type: "text"; text: string }>;
+  timestamp?: number;
+};
+/**
+ * Layered context for compression prompts.
+ * Each layer is an array of summary content strings, ordered oldest first.
+ */
+export type LayeredContext = {
+  l3: string[];
+  l2: string[];
+  l1: string[];
+  /** Raw uncompressed messages (the tail after last summarized entry) */
+  rawTail: Array<{ role: string; content?: unknown }>;
+};
 /**
- * Format messages for summarization prompt.
- * Strips unnecessary metadata, keeps the conversational essence.
+ * Extract text from a message content field.
+ */
+function extractText(content: unknown, maxChars = 2000): string {
+  let text = "";
+  if (typeof content === "string") {
+    text = content;
+  } else if (Array.isArray(content)) {
+    text = content
+      .filter(
+        (block): block is { type: string; text: string } =>
+          typeof block === "object" && block !== null && block.type === "text",
+      )
+      .map((block) => block.text)
+      .join("\n");
+  }
+  if (text.length > maxChars) {
+    text = text.slice(0, maxChars) + "... [truncated]";
+  }
+  return text;
+}
+/**
+ * Format messages for text display (used in formatMessagesForSummary).
  */
 export function formatMessagesForSummary(
   messages: Array<{
@@ -66,31 +79,12 @@ export function formatMessagesForSummary(
   for (const msg of messages) {
     const role = msg.role;
-    // Skip tool results unless explicitly included
     if (role === "toolResult" && !includeToolResults) {
       continue;
     }
-    // Extract text content
-    let content = "";
-    if (typeof msg.content === "string") {
-      content = msg.content;
-    } else if (Array.isArray(msg.content)) {
-      content = msg.content
-        .filter(
-          (block): block is { type: string; text: string } =>
-            typeof block === "object" && block !== null && block.type === "text",
-        )
-        .map((block) => block.text)
-        .join("\n");
-    }
-    // Truncate if too long
-    if (content.length > maxContentChars) {
-      content = content.slice(0, maxContentChars) + "... [truncated]";
-    }
+    const content = extractText(msg.content, maxContentChars);
-    // Format based on role
     if (role === "user") {
       lines.push(`User: ${content}`);
     } else if (role === "assistant") {
@@ -106,52 +100,233 @@ export function formatMessagesForSummary(
   return lines.join("\n\n");
 }
+// =============================================================================
+// Context Manager messages
+// =============================================================================
+const CONTEXT_MANAGER_PREFIX = "[Context Manager]";
+function contextManagerMessage(text: string): ConversationMessage {
+  return {
+    role: "user",
+    content: `${CONTEXT_MANAGER_PREFIX} ${text}`,
+    timestamp: Date.now(),
+  };
+}
+// =============================================================================
+// Convert summaries to assistant messages
+// =============================================================================
 /**
- * Build the prompt for summarizing a chunk of conversation.
+ * Convert summary strings into assistant messages.
+ * Each summary becomes the model's own prior recollection.
  */
-export function buildChunkSummarizationPrompt(params: {
-  /** Prior summaries (L3, L2, L1) for context */
-  priorSummaries: string[];
-  /** Messages to summarize */
-  messages: Array<{ role: string; content?: unknown }>;
-}): string {
-  const parts: string[] = [];
-  if (params.priorSummaries.length > 0) {
-    parts.push("## My earlier memories\n");
-    parts.push(params.priorSummaries.join("\n\n---\n\n"));
-    parts.push("\n\n---\n\n");
+function summariesToAssistantMessages(
+  summaries: string[],
+): ConversationMessage[] {
+  return summaries.map((text) => ({
+    role: "assistant" as const,
+    content: [{ type: "text" as const, text }],
+    timestamp: Date.now(),
+  }));
+}
+/**
+ * Convert raw conversation entries into proper user/assistant messages.
+ * Preserves the original turn structure.
+ */
+function rawToConversationMessages(
+  raw: Array<{ role: string; content?: unknown }>,
+): ConversationMessage[] {
+  const messages: ConversationMessage[] = [];
+  for (const msg of raw) {
+    if (msg.role === "user") {
+      messages.push({
+        role: "user",
+        content: extractText(msg.content),
+        timestamp: Date.now(),
+      });
+    } else if (msg.role === "assistant") {
+      // Assistant messages need content as array of blocks for the API
+      messages.push({
+        role: "assistant",
+        content: [{ type: "text" as const, text: extractText(msg.content) }],
+        timestamp: Date.now(),
+      });
+    }
+    // Skip toolResult, system, and other non-conversational roles
+  }
+  return messages;
+}
+// =============================================================================
+// Build message arrays for each compression level
+// =============================================================================
+/**
+ * Build message array for L1 creation (raw messages → L1 summary).
+ *
+ * Structure:
+ *   assistant: [L3 memories]    ← own long-term recollections
+ *   assistant: [L2 memories]    ← own medium-term recollections
+ *   assistant: [L1 memories]    ← own recent recollections
+ *   user/assistant: [raw tail]  ← recent uncompressed conversation
+ *   user: [Context Manager: beginning memory formation]
+ *   user/assistant: [chunk]     ← the conversation to remember
+ *   user: [Context Manager: form the memory]
+ */
+export function buildL1Messages(params: {
+  context: LayeredContext;
+  chunk: Array<{ role: string; content?: unknown }>;
+  targetTokens: number;
+}): ConversationMessage[] {
+  const messages: ConversationMessage[] = [];
+  // Lead with Context Manager (API requires first message to be user role)
+  const hasContext =
+    params.context.l3.length > 0 ||
+    params.context.l2.length > 0 ||
+    params.context.l1.length > 0 ||
+    params.context.rawTail.length > 0;
+  if (hasContext) {
+    messages.push(
+      contextManagerMessage(
+        "Loading your memories and recent conversation context.",
+      ),
+    );
+  }
+  // Prior memories as assistant messages (own recollections, most stable first)
+  messages.push(...summariesToAssistantMessages(params.context.l3));
+  messages.push(...summariesToAssistantMessages(params.context.l2));
+  messages.push(...summariesToAssistantMessages(params.context.l1));
+  // Raw conversation tail as actual turns
+  messages.push(...rawToConversationMessages(params.context.rawTail));
+  // Context Manager marks the start of the chunk
+  messages.push(
+    contextManagerMessage(
+      "We are beginning to form a long-term memory. Please ignore this message and continue with your activities.",
+    ),
+  );
+  // The chunk as actual conversation turns
+  messages.push(...rawToConversationMessages(params.chunk));
+  // Context Manager asks for the memory
+  messages.push(
+    contextManagerMessage(
+      `We are ready to form a long-term memory. Starting from my last message, please describe everything that has happened. Aim for about ${params.targetTokens} tokens. Describe it as you would to yourself, as if you are remembering what has happened.`,
+    ),
+  );
+  return messages;
+}
+/**
+ * Build message array for L2 creation (L1s → L2 summary).
+ *
+ * Structure:
+ *   assistant: [L3 memories]
+ *   assistant: [L2 memories]
+ *   assistant: [recent L1 memories]
+ *   user/assistant: [raw tail]
+ *   user: [Context Manager: beginning consolidation]
+ *   assistant: [L1 memory 1]    ← memories to merge, as own recollections
+ *   assistant: [L1 memory 2]
+ *   assistant: [L1 memory 3]
+ *   user: [Context Manager: consolidate]
+ */
+export function buildL2Messages(params: {
+  context: LayeredContext;
+  summariesToMerge: string[];
+  targetTokens: number;
+}): ConversationMessage[] {
+  const messages: ConversationMessage[] = [];
+  const hasContext =
+    params.context.l3.length > 0 ||
+    params.context.l2.length > 0 ||
+    params.context.l1.length > 0 ||
+    params.context.rawTail.length > 0;
+  if (hasContext) {
+    messages.push(
+      contextManagerMessage(
+        "Loading your memories and recent conversation context.",
+      ),
+    );
   }
-  parts.push("## Recent conversation to remember\n\n");
-  parts.push(formatMessagesForSummary(params.messages));
-  parts.push("\n\n---\n\n");
-  parts.push("Write your memory of this conversation:");
+  messages.push(...summariesToAssistantMessages(params.context.l3));
+  messages.push(...summariesToAssistantMessages(params.context.l2));
+  messages.push(...summariesToAssistantMessages(params.context.l1));
+  messages.push(...rawToConversationMessages(params.context.rawTail));
+  messages.push(
+    contextManagerMessage(
+      "We are beginning memory consolidation. The following are separate memory entries that need to be merged into one cohesive memory.",
+    ),
+  );
-  return parts.join("");
+  messages.push(...summariesToAssistantMessages(params.summariesToMerge));
+  messages.push(
+    contextManagerMessage(
+      `Please consolidate the memories since my last message into a single cohesive memory. Aim for about ${params.targetTokens} tokens. Write as you would to yourself — this is your autobiography, capturing the arc of what happened.`,
+    ),
+  );
+  return messages;
 }
 /**
- * Build the prompt for merging multiple summaries.
+ * Build message array for L3 creation (L2s → L3 summary).
+ *
+ * Same structure as L2 but merging L2 summaries.
  */
-export function buildMergeSummariesPrompt(params: {
-  /** Summaries to merge */
-  summaries: string[];
-  /** Older context (higher-level summaries) */
-  olderContext?: string[];
-}): string {
-  const parts: string[] = [];
-  if (params.olderContext && params.olderContext.length > 0) {
-    parts.push("## Long-term memory (for context)\n\n");
-    parts.push(params.olderContext.join("\n\n---\n\n"));
-    parts.push("\n\n---\n\n");
+export function buildL3Messages(params: {
+  context: LayeredContext;
+  summariesToMerge: string[];
+  targetTokens: number;
+}): ConversationMessage[] {
+  const messages: ConversationMessage[] = [];
+  const hasContext =
+    params.context.l3.length > 0 ||
+    params.context.l2.length > 0 ||
+    params.context.l1.length > 0 ||
+    params.context.rawTail.length > 0;
+  if (hasContext) {
+    messages.push(
+      contextManagerMessage(
+        "Loading your memories and recent conversation context.",
+      ),
+    );
   }
-  parts.push("## Memories to consolidate\n\n");
-  parts.push(params.summaries.map((s, i) => `### Memory ${i + 1}\n\n${s}`).join("\n\n---\n\n"));
-  parts.push("\n\n---\n\n");
-  parts.push("Write a consolidated memory that captures the essence of all these memories:");
+  messages.push(...summariesToAssistantMessages(params.context.l3));
+  messages.push(...summariesToAssistantMessages(params.context.l2));
+  messages.push(...summariesToAssistantMessages(params.context.l1));
+  messages.push(...rawToConversationMessages(params.context.rawTail));
+  messages.push(
+    contextManagerMessage(
+      "We are beginning memory consolidation. The following are separate memory entries that need to be merged into one cohesive memory.",
+    ),
+  );
+  messages.push(...summariesToAssistantMessages(params.summariesToMerge));
+  messages.push(
+    contextManagerMessage(
+      `Please consolidate the memories since my last message into a single cohesive memory. Aim for about ${params.targetTokens} tokens. Write as you would to yourself — this is your autobiography, capturing the arc of what happened.`,
+    ),
+  );
-  return parts.join("");
+  return messages;
 }

package/summarize.ts CHANGED Viewed

@@ -1,15 +1,18 @@
 /**
  * Summarization logic for hierarchical memory.
  *
- * Uses the LLM to generate autobiographical summaries of conversation chunks.
+ * Builds multi-turn conversation arrays and sends them to the LLM.
+ * The compression instance sees the conversation as its own history,
+ * with a Context Manager asking it to form memories.
  */
 import type { HierarchicalMemoryConfig, SummaryLevel } from "./types.js";
 import {
-  buildChunkSummarizationPrompt,
-  buildMergeSummariesPrompt,
-  MERGE_SUMMARIES_SYSTEM,
-  SUMMARIZE_CHUNK_SYSTEM,
+  type ConversationMessage,
+  type LayeredContext,
+  buildL1Messages,
+  buildL2Messages,
+  buildL3Messages,
 } from "./prompts.js";
 export type SummarizationParams = {
@@ -44,117 +47,122 @@ export function estimateMessagesTokens(
 ): number {
   let total = 0;
   for (const msg of messages) {
-    // Simple estimation based on content length
     const content =
       typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content ?? "");
-    // Rough estimate: 4 chars per token
     total += Math.ceil(content.length / 4);
   }
   return total;
 }
 /**
- * Summarize a chunk of conversation messages.
+ * Summarize a chunk of conversation messages (L0 → L1).
  */
 export async function summarizeChunk(params: {
   chunk: ChunkToSummarize;
-  priorSummaries: string[];
+  context: LayeredContext;
   config: HierarchicalMemoryConfig;
   summarization: SummarizationParams;
 }): Promise<string> {
-  const { chunk, priorSummaries, config, summarization } = params;
+  const { chunk, context, config, summarization } = params;
-  const prompt = buildChunkSummarizationPrompt({
-    priorSummaries,
-    messages: chunk.messages,
+  const messages = buildL1Messages({
+    context,
+    chunk: chunk.messages,
+    targetTokens: config.summaryTargetTokens,
   });
-  const summary = await callLlmForSummary({
-    systemPrompt: SUMMARIZE_CHUNK_SYSTEM,
-    userPrompt: prompt,
-    targetTokens: config.summaryTargetTokens,
+  return callLlm({
+    messages,
+    maxTokens: config.summaryTargetTokens,
     model: summarization.model,
     provider: summarization.provider,
     apiKey: summarization.apiKey,
     signal: summarization.signal,
   });
-  return summary;
 }
 /**
- * Merge multiple summaries into one.
+ * Merge L1 summaries into an L2 summary.
  */
-export async function mergeSummaries(params: {
-  summaries: string[];
-  olderContext: string[];
+export async function mergeL1ToL2(params: {
+  summariesToMerge: string[];
+  context: LayeredContext;
   config: HierarchicalMemoryConfig;
   summarization: SummarizationParams;
 }): Promise<string> {
-  const { summaries, olderContext, config, summarization } = params;
+  const messages = buildL2Messages({
+    context: params.context,
+    summariesToMerge: params.summariesToMerge,
+    targetTokens: params.config.summaryTargetTokens,
+  });
-  const prompt = buildMergeSummariesPrompt({
-    summaries,
-    olderContext: olderContext.length > 0 ? olderContext : undefined,
+  return callLlm({
+    messages,
+    maxTokens: params.config.summaryTargetTokens,
+    model: params.summarization.model,
+    provider: params.summarization.provider,
+    apiKey: params.summarization.apiKey,
+    signal: params.summarization.signal,
   });
+}
-  const merged = await callLlmForSummary({
-    systemPrompt: MERGE_SUMMARIES_SYSTEM,
-    userPrompt: prompt,
-    targetTokens: config.summaryTargetTokens,
-    model: summarization.model,
-    provider: summarization.provider,
-    apiKey: summarization.apiKey,
-    signal: summarization.signal,
+/**
+ * Merge L2 summaries into an L3 summary.
+ */
+export async function mergeL2ToL3(params: {
+  summariesToMerge: string[];
+  context: LayeredContext;
+  config: HierarchicalMemoryConfig;
+  summarization: SummarizationParams;
+}): Promise<string> {
+  const messages = buildL3Messages({
+    context: params.context,
+    summariesToMerge: params.summariesToMerge,
+    targetTokens: params.config.summaryTargetTokens,
   });
-  return merged;
+  return callLlm({
+    messages,
+    maxTokens: params.config.summaryTargetTokens,
+    model: params.summarization.model,
+    provider: params.summarization.provider,
+    apiKey: params.summarization.apiKey,
+    signal: params.summarization.signal,
+  });
 }
 /**
- * Call the LLM to generate a summary.
- * Uses completeSimple for a straightforward non-streaming completion.
+ * Call the LLM with a multi-turn message array.
+ * No system prompt — the conversation structure itself establishes context.
  */
-async function callLlmForSummary(params: {
-  systemPrompt: string;
-  userPrompt: string;
+async function callLlm(params: {
+  messages: ConversationMessage[];
   model: string;
   provider: string;
   apiKey: string;
+  maxTokens: number;
   signal?: AbortSignal;
-  targetTokens?: number;
 }): Promise<string> {
-  // Dynamic import to avoid loading heavy deps at module level
   const { completeSimple, getModel } = await import("@mariozechner/pi-ai");
   const model = getModel(params.provider, params.model);
   if (!model) {
     throw new Error(`Failed to resolve model: ${params.provider}/${params.model}`);
   }
-  const maxTokens = params.targetTokens ?? 1000;
   const res = await completeSimple(
     model,
     {
-      systemPrompt: params.systemPrompt,
-      messages: [
-        {
-          role: "user",
-          content: params.userPrompt,
-          timestamp: Date.now(),
-        },
-      ],
+      // No system prompt — the Context Manager pattern handles framing
+      messages: params.messages,
     },
     {
       apiKey: params.apiKey,
-      maxTokens,
+      maxTokens: params.maxTokens,
       signal: params.signal,
     },
   );
-  // Extract text from the response
   const textContent = res.content.find(
     (c): c is { type: "text"; text: string } => c.type === "text",
   );
@@ -185,6 +193,6 @@ export function getNextLevel(level: SummaryLevel): SummaryLevel | null {
     case "L2":
       return "L3";
     case "L3":
-      return null; // No level above L3
+      return null;
   }
 }

package/types.ts CHANGED Viewed

@@ -98,7 +98,7 @@ export const DEFAULT_HIERARCHICAL_MEMORY_CONFIG: HierarchicalMemoryConfig = {
   enabled: false,
   workerIntervalMs: 5 * 60 * 1000, // 5 minutes
   chunkTokens: 6000,
-  summaryTargetTokens: 1000,
+  summaryTargetTokens: 2000,
   mergeThreshold: 6,
   pruningBoundaryTokens: 30000,
   maxLevels: 3,

package/worker.ts CHANGED Viewed

@@ -3,6 +3,10 @@
  *
  * Runs on a timer, finds eligible chunks, summarizes them,
  * and merges summaries when thresholds are reached.
+ *
+ * Context construction follows the non-redundant gradient principle:
+ * each compression instance sees the full hierarchy without showing
+ * both a summary and its expanded constituents.
  */
 import fs from "node:fs";
@@ -10,6 +14,7 @@ import path from "node:path";
 import { SessionManager } from "@mariozechner/pi-coding-agent";
 import type { PluginConfig } from "./config.js";
 import { resolveHierarchicalMemoryConfig } from "./config.js";
+import type { LayeredContext } from "./prompts.js";
 import { acquireSummaryLock } from "./lock.js";
 import {
   generateNextSummaryId,
@@ -23,12 +28,12 @@ import {
   estimateMessagesTokens,
   getNextLevel,
   getSourceLevel,
-  mergeSummaries,
+  mergeL1ToL2,
+  mergeL2ToL3,
   summarizeChunk,
   type SummarizationParams,
 } from "./summarize.js";
 import {
-  getAllSummariesForContext,
   getUnmergedSummaries,
   type HierarchicalMemoryConfig,
   type SummaryEntry,
@@ -152,18 +157,20 @@ async function runWorkerWithLock(params: {
           break;
         }
-        // Load prior summaries for context
-        const summaryContext = getAllSummariesForContext(index);
-        const priorSummaries = [
-          ...(await loadSummaryContents(summaryContext.L3, agentId)),
-          ...(await loadSummaryContents(summaryContext.L2, agentId)),
-          ...(await loadSummaryContents(summaryContext.L1, agentId)),
-        ];
+        // Build non-redundant layered context for L1 creation.
+        // The chunk's raw messages are shown at the bottom, so we exclude
+        // any L1 whose source messages overlap with the chunk or the raw tail.
+        const context = await buildL1Context({
+          index,
+          chunk,
+          agentId,
+          stateDir,
+        });
         // Summarize the chunk
         const summaryContent = await summarizeChunk({
           chunk,
-          priorSummaries,
+          context,
           config: memoryConfig,
           summarization,
         });
@@ -197,24 +204,32 @@ async function runWorkerWithLock(params: {
       }
     }
-    // Phase 2: Check for merges at each level
+    // Phase 2: Merge until no more merges are possible at any level.
+    // Loop because a batch of L1→L2 merges may trigger L2→L3 merges.
     let mergesPerformed = 0;
+    let didMerge = true;
+    while (didMerge && !signal?.aborted) {
+      didMerge = false;
+      for (const level of ["L1", "L2"] as const) {
+        if (signal?.aborted) {
+          break;
+        }
-    for (const level of ["L1", "L2"] as const) {
-      if (signal?.aborted) {
-        break;
-      }
-      const merged = await maybeMergeLevel({
-        index,
-        level,
-        memoryConfig,
-        summarization,
-        agentId,
-      });
+        const merged = await maybeMergeLevel({
+          index,
+          level,
+          memoryConfig,
+          summarization,
+          agentId,
+          stateDir,
+        });
-      if (merged) {
-        mergesPerformed++;
+        if (merged) {
+          mergesPerformed++;
+          didMerge = true;
+          // Save after each merge so progress isn't lost
+          await saveSummaryIndex(index, agentId);
+        }
       }
     }
@@ -248,6 +263,216 @@ async function runWorkerWithLock(params: {
   }
 }
+// =============================================================================
+// Context construction — non-redundant layered gradient
+// =============================================================================
+/**
+ * Build layered context for L1 creation (raw messages → L1).
+ *
+ * Gradient: L3 → L2 → L1 → raw tail → [chunk to compress]
+ *
+ * Anti-redundancy: exclude summaries whose children are expanded below.
+ * - L3s are excluded if their constituent L2s are shown in the L2 layer
+ * - L2s are excluded if their constituent L1s are shown in the L1 layer
+ * - L1s are excluded if their raw messages are the chunk or the tail
+ */
+async function buildL1Context(params: {
+  index: SummaryIndex;
+  chunk: ChunkToSummarize;
+  agentId: string;
+  stateDir: string;
+}): Promise<LayeredContext> {
+  const { index, chunk, agentId, stateDir } = params;
+  // Get all unmerged summaries at each level
+  const allL1 = getUnmergedSummaries(index, "L1");
+  const allL2 = getUnmergedSummaries(index, "L2");
+  const allL3 = getUnmergedSummaries(index, "L3");
+  // The chunk's source entry IDs — L1s covering these are excluded
+  const chunkEntryIds = new Set(chunk.entryIds);
+  // Load the raw tail (uncompressed messages after last summarized entry)
+  const rawTail = loadRawTail(stateDir, params.agentId);
+  // L1s to show: all unmerged L1s except those whose sourceIds overlap with
+  // the chunk being compressed (those are the raw messages shown at the bottom)
+  const shownL1 = allL1.filter(
+    (l1) => !l1.sourceIds.some((id) => chunkEntryIds.has(id)),
+  );
+  const shownL1Ids = new Set(shownL1.map((e) => e.id));
+  // L2s to show: all unmerged L2s except those whose constituent L1s are all shown
+  // (i.e., the L2 is redundant because all its L1s are already in the L1 layer)
+  const shownL2 = allL2.filter(
+    (l2) => !l2.sourceIds.every((id) => shownL1Ids.has(id)),
+  );
+  const shownL2Ids = new Set(shownL2.map((e) => e.id));
+  // L3s to show: all unmerged L3s except those whose constituent L2s are all shown
+  const shownL3 = allL3.filter(
+    (l3) => !l3.sourceIds.every((id) => shownL2Ids.has(id)),
+  );
+  return {
+    l3: await loadSummaryContents(shownL3, agentId),
+    l2: await loadSummaryContents(shownL2, agentId),
+    l1: await loadSummaryContents(shownL1, agentId),
+    rawTail,
+  };
+}
+/**
+ * Build layered context for L2 creation (L1s → L2).
+ *
+ * Gradient: L3 → L2 → recent L1s → [L1s to merge] → raw tail
+ *
+ * Anti-redundancy:
+ * - L2s are excluded if their constituent L1s are shown (either in
+ *   the recent L1 layer or in the merge batch)
+ * - L3s are excluded if their constituent L2s are shown
+ */
+async function buildL2Context(params: {
+  index: SummaryIndex;
+  toMerge: SummaryEntry[];
+  agentId: string;
+  stateDir: string;
+}): Promise<LayeredContext> {
+  const { index, toMerge, agentId, stateDir } = params;
+  const allL1 = getUnmergedSummaries(index, "L1");
+  const allL2 = getUnmergedSummaries(index, "L2");
+  const allL3 = getUnmergedSummaries(index, "L3");
+  const mergeIds = new Set(toMerge.map((e) => e.id));
+  // Recent L1s: all unmerged L1s that are NOT being merged
+  const recentL1 = allL1.filter((l1) => !mergeIds.has(l1.id));
+  const allVisibleL1Ids = new Set([
+    ...recentL1.map((e) => e.id),
+    ...toMerge.map((e) => e.id),
+  ]);
+  // L2s: exclude those whose constituent L1s are all visible
+  // (either as recent L1s or as the merge batch)
+  const shownL2 = allL2.filter(
+    (l2) => !l2.sourceIds.every((id) => allVisibleL1Ids.has(id)),
+  );
+  const shownL2Ids = new Set(shownL2.map((e) => e.id));
+  // L3s: exclude those whose constituent L2s are all visible
+  const shownL3 = allL3.filter(
+    (l3) => !l3.sourceIds.every((id) => shownL2Ids.has(id)),
+  );
+  const rawTail = loadRawTail(stateDir, agentId);
+  return {
+    l3: await loadSummaryContents(shownL3, agentId),
+    l2: await loadSummaryContents(shownL2, agentId),
+    l1: await loadSummaryContents(recentL1, agentId),
+    rawTail,
+  };
+}
+/**
+ * Build layered context for L3 creation (L2s → L3).
+ *
+ * Gradient: existing L3s → recent L2s → recent L1s → [L2s to merge] → raw tail
+ *
+ * Anti-redundancy:
+ * - Existing L3s are excluded if their constituent L2s are shown
+ *   (either as recent L2s or in the merge batch)
+ */
+async function buildL3Context(params: {
+  index: SummaryIndex;
+  toMerge: SummaryEntry[];
+  agentId: string;
+  stateDir: string;
+}): Promise<LayeredContext> {
+  const { index, toMerge, agentId, stateDir } = params;
+  const allL1 = getUnmergedSummaries(index, "L1");
+  const allL2 = getUnmergedSummaries(index, "L2");
+  const allL3 = getUnmergedSummaries(index, "L3");
+  const mergeIds = new Set(toMerge.map((e) => e.id));
+  // Recent L2s: unmerged L2s not being merged
+  const recentL2 = allL2.filter((l2) => !mergeIds.has(l2.id));
+  const allVisibleL2Ids = new Set([
+    ...recentL2.map((e) => e.id),
+    ...toMerge.map((e) => e.id),
+  ]);
+  // L3s: exclude those whose constituent L2s are all visible
+  const shownL3 = allL3.filter(
+    (l3) => !l3.sourceIds.every((id) => allVisibleL2Ids.has(id)),
+  );
+  const rawTail = loadRawTail(stateDir, agentId);
+  return {
+    l3: await loadSummaryContents(shownL3, agentId),
+    l2: await loadSummaryContents(recentL2, agentId),
+    l1: await loadSummaryContents(allL1, agentId),
+    rawTail,
+  };
+}
+/**
+ * Load the raw uncompressed message tail from the active session.
+ * Returns messages after the last summarized entry.
+ */
+function loadRawTail(
+  stateDir: string,
+  agentId: string,
+): Array<{ role: string; content?: unknown }> {
+  const activeSessionId = getActiveSessionId(stateDir, agentId);
+  if (!activeSessionId) {
+    return [];
+  }
+  const sessionFile = resolveSessionTranscriptPath(stateDir, agentId, activeSessionId);
+  let sessionManager: SessionManager;
+  try {
+    sessionManager = SessionManager.open(sessionFile);
+  } catch {
+    return [];
+  }
+  const entries = sessionManager.getEntries();
+  if (entries.length === 0) {
+    return [];
+  }
+  // Find the last summarized entry in this session
+  const storePath = resolveSessionStorePath(stateDir, agentId);
+  const store = loadSessionStoreSimple(storePath);
+  // We need the per-session progress, but we don't have the index here.
+  // Instead, scan from the end — raw tail is everything not yet summarized.
+  // For simplicity, load the last N messages as raw tail context.
+  // A more precise approach would thread the index through, but this gives
+  // the model recent conversation context regardless.
+  const messages: Array<{ role: string; content?: unknown }> = [];
+  // Take the last ~20 messages as raw tail context
+  const startIdx = Math.max(0, entries.length - 20);
+  for (let i = startIdx; i < entries.length; i++) {
+    const entry = entries[i];
+    if (entry.type === "message") {
+      messages.push(entry.message as { role: string; content?: unknown });
+    }
+  }
+  return messages;
+}
+// =============================================================================
+// Chunk discovery
+// =============================================================================
 /**
  * Find chunks of messages eligible for summarization.
  */
@@ -358,6 +583,10 @@ async function findEligibleChunks(params: {
   }
 }
+// =============================================================================
+// Merge logic
+// =============================================================================
 /**
  * Merge summaries at a level if threshold is reached.
  */
@@ -367,8 +596,9 @@ async function maybeMergeLevel(params: {
   memoryConfig: HierarchicalMemoryConfig;
   summarization: SummarizationParams;
   agentId: string;
+  stateDir: string;
 }): Promise<boolean> {
-  const { index, level, memoryConfig, summarization, agentId } = params;
+  const { index, level, memoryConfig, summarization, agentId, stateDir } = params;
   const unmerged = getUnmergedSummaries(index, level);
@@ -384,26 +614,30 @@ async function maybeMergeLevel(params: {
   // Take exactly mergeThreshold entries to maintain fixed merge cadence
   const toMerge = unmerged.slice(0, memoryConfig.mergeThreshold);
-  // Load summary contents
+  // Load summary contents for the batch being merged
   const summaryContents = await loadSummaryContents(toMerge, agentId);
-  // Load older context (unmerged higher-level summaries only)
-  const olderContext: string[] = [];
+  // Build non-redundant layered context for this merge level
+  let mergedContent: string;
   if (nextLevel === "L2") {
-    olderContext.push(...(await loadSummaryContents(getUnmergedSummaries(index, "L3"), agentId)));
-  }
-  if (nextLevel === "L3") {
-    // L3 has no older context
+    const context = await buildL2Context({ index, toMerge, agentId, stateDir });
+    mergedContent = await mergeL1ToL2({
+      summariesToMerge: summaryContents,
+      context,
+      config: memoryConfig,
+      summarization,
+    });
+  } else {
+    // nextLevel === "L3"
+    const context = await buildL3Context({ index, toMerge, agentId, stateDir });
+    mergedContent = await mergeL2ToL3({
+      summariesToMerge: summaryContents,
+      context,
+      config: memoryConfig,
+      summarization,
+    });
   }
-  // Merge summaries
-  const mergedContent = await mergeSummaries({
-    summaries: summaryContents,
-    olderContext,
-    config: memoryConfig,
-    summarization,
-  });
   // Create merged entry
   const mergedId = generateNextSummaryId(index, nextLevel);
   const mergedEntry: SummaryEntry = {
@@ -428,6 +662,10 @@ async function maybeMergeLevel(params: {
   return true;
 }
+// =============================================================================
+// Summarization params
+// =============================================================================
 /**
  * Resolve parameters needed for summarization.
  * Uses the API key from plugin config instead of the complex auth system.
@@ -458,7 +696,9 @@ function resolveSummarizationParams(params: {
   };
 }
-// --- Inline session helpers (replacing core imports) ---
+// =============================================================================
+// Inline session helpers (replacing core imports)
+// =============================================================================
 /** Resolve the session store path for an agent */
 function resolveSessionStorePath(stateDir: string, agentId?: string): string {