npm - @mastra/memory - Versions diffs - 1.9.0-alpha.1 → 1.9.0 - Mend

@mastra/memory 1.9.0-alpha.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,66 @@
 # @mastra/memory
+## 1.9.0
+### Minor Changes
+- Added experimental retrieval-mode recall tooling for observational memory. ([#14437](https://github.com/mastra-ai/mastra/pull/14437))
+  When `observationalMemory.retrieval` is enabled with `scope: 'thread'`, observation groups store colon-delimited message ranges (`startId:endId`) pointing back to the raw messages they were derived from. A `recall` tool is registered that lets agents retrieve those source messages via cursor-based pagination.
+  The recall tool supports:
+  - **Detail levels**: `detail: 'low'` (default) returns truncated text with part indices; `detail: 'high'` returns full content clamped to one part per call with continuation hints
+  - **Part-level fetch**: `partIndex` targets a single message part at full detail
+  - **Pagination flags**: `hasNextPage` and `hasPrevPage` in results
+  - **Token limiting**: results are capped at a token budget with `truncated` and `tokenOffset` reporting
+  - **Smart range detection**: passing a range as a cursor returns a helpful hint explaining how to extract individual IDs
+- Added opt-in Observational Memory thread titles. ([#14436](https://github.com/mastra-ai/mastra/pull/14436))
+  When enabled, the Observer suggests a short thread title and updates it as the conversation topic changes. Harness consumers can detect these updates via the new `om_thread_title_updated` event.
+  **Example**
+  ```ts
+  const memory = new Memory({
+    options: {
+      observationalMemory: {
+        observation: {
+          threadTitle: true,
+        },
+      },
+    },
+  });
+  ```
+### Patch Changes
+- Improved observational memory so completed tasks and answered questions are explicitly tracked and retained, reducing repeated follow-up on resolved topics. ([#14419](https://github.com/mastra-ai/mastra/pull/14419))
+- Updated dependencies [[`cb611a1`](https://github.com/mastra-ai/mastra/commit/cb611a1e89a4f4cf74c97b57e0c27bb56f2eceb5), [`da93115`](https://github.com/mastra-ai/mastra/commit/da931155c1a9bc63d455d3d86b4ec984db5991fe), [`b71bce1`](https://github.com/mastra-ai/mastra/commit/b71bce144912ed33f76c52a94e594988a649c3e1), [`62d1d3c`](https://github.com/mastra-ai/mastra/commit/62d1d3cc08fe8182e7080237fd975de862ec8c91), [`9e1a3ed`](https://github.com/mastra-ai/mastra/commit/9e1a3ed07cfafb5e8e19a796ce0bee817002d7c0), [`8681ecb`](https://github.com/mastra-ai/mastra/commit/8681ecb86184d5907267000e4576cc442a9a83fc), [`28d0249`](https://github.com/mastra-ai/mastra/commit/28d0249295782277040ad1e0d243e695b7ab1ce4), [`cd7b568`](https://github.com/mastra-ai/mastra/commit/cd7b568fe427b1b4838abe744fa5367a47539db3), [`681ee1c`](https://github.com/mastra-ai/mastra/commit/681ee1c811359efd1b8bebc4bce35b9bb7b14bec), [`bb0f09d`](https://github.com/mastra-ai/mastra/commit/bb0f09dbac58401b36069f483acf5673202db5b5), [`a579f7a`](https://github.com/mastra-ai/mastra/commit/a579f7a31e582674862b5679bc79af7ccf7429b8), [`5f7e9d0`](https://github.com/mastra-ai/mastra/commit/5f7e9d0db664020e1f3d97d7d18c6b0b9d4843d0), [`d7f14c3`](https://github.com/mastra-ai/mastra/commit/d7f14c3285cd253ecdd5f58139b7b6cbdf3678b5), [`0efe12a`](https://github.com/mastra-ai/mastra/commit/0efe12a5f008a939a1aac71699486ba40138054e)]:
+  - @mastra/core@1.15.0
+  - @mastra/schema-compat@1.2.6
+## 1.9.0-alpha.2
+### Minor Changes
+- Added experimental retrieval-mode recall tooling for observational memory. ([#14437](https://github.com/mastra-ai/mastra/pull/14437))
+  When `observationalMemory.retrieval` is enabled with `scope: 'thread'`, observation groups store colon-delimited message ranges (`startId:endId`) pointing back to the raw messages they were derived from. A `recall` tool is registered that lets agents retrieve those source messages via cursor-based pagination.
+  The recall tool supports:
+  - **Detail levels**: `detail: 'low'` (default) returns truncated text with part indices; `detail: 'high'` returns full content clamped to one part per call with continuation hints
+  - **Part-level fetch**: `partIndex` targets a single message part at full detail
+  - **Pagination flags**: `hasNextPage` and `hasPrevPage` in results
+  - **Token limiting**: results are capped at a token budget with `truncated` and `tokenOffset` reporting
+  - **Smart range detection**: passing a range as a cursor returns a helpful hint explaining how to extract individual IDs
+### Patch Changes
+- Updated dependencies [[`da93115`](https://github.com/mastra-ai/mastra/commit/da931155c1a9bc63d455d3d86b4ec984db5991fe), [`0efe12a`](https://github.com/mastra-ai/mastra/commit/0efe12a5f008a939a1aac71699486ba40138054e)]:
+  - @mastra/core@1.15.0-alpha.4
 ## 1.9.0-alpha.1
 ### Minor Changes

package/dist/{chunk-5SMKVGJP.js → chunk-JJBSFPC5.js} RENAMED Viewed

@@ -6,8 +6,8 @@ import { resolveModelConfig } from '@mastra/core/llm';
 import { setThreadOMMetadata, getThreadOMMetadata, parseMemoryRequestContext } from '@mastra/core/memory';
 import { MessageHistory } from '@mastra/core/processors';
 import xxhash from 'xxhash-wasm';
+import { randomBytes, createHash, randomUUID } from 'crypto';
 import { estimateTokenCount } from 'tokenx';
-import { createHash, randomUUID } from 'crypto';
 import { AsyncLocalStorage } from 'async_hooks';
 import imageSize from 'image-size';
@@ -312,6 +312,222 @@ function createThreadUpdateMarker(params) {
     }
   };
 }
+var OBSERVATION_GROUP_PATTERN = /<observation-group\s([^>]*)>([\s\S]*?)<\/observation-group>/g;
+var ATTRIBUTE_PATTERN = /([\w][\w-]*)="([^"]*)"/g;
+var REFLECTION_GROUP_SPLIT_PATTERN = /^##\s+Group\s+/m;
+function parseObservationGroupAttributes(attributeString) {
+  const attributes = {};
+  for (const match of attributeString.matchAll(ATTRIBUTE_PATTERN)) {
+    const [, key, value] = match;
+    if (key && value !== void 0) {
+      attributes[key] = value;
+    }
+  }
+  return attributes;
+}
+function parseReflectionObservationGroupSections(content) {
+  const normalizedContent = content.trim();
+  if (!normalizedContent || !REFLECTION_GROUP_SPLIT_PATTERN.test(normalizedContent)) {
+    return [];
+  }
+  return normalizedContent.split(REFLECTION_GROUP_SPLIT_PATTERN).map((section) => section.trim()).filter(Boolean).map((section) => {
+    const newlineIndex = section.indexOf("\n");
+    const heading = (newlineIndex >= 0 ? section.slice(0, newlineIndex) : section).trim();
+    const body = (newlineIndex >= 0 ? section.slice(newlineIndex + 1) : "").trim();
+    return {
+      heading,
+      body: stripReflectionGroupMetadata(body)
+    };
+  });
+}
+function stripReflectionGroupMetadata(body) {
+  return body.replace(/^_range:\s*`[^`]*`_\s*\n?/m, "").trim();
+}
+function generateAnchorId() {
+  return randomBytes(8).toString("hex");
+}
+function wrapInObservationGroup(observations, range, id = generateAnchorId(), sourceGroupIds) {
+  const content = observations.trim();
+  const sourceGroupIdsAttr = sourceGroupIds?.length ? ` source-group-ids="${sourceGroupIds.join(",")}"` : "";
+  return `<observation-group id="${id}" range="${range}"${sourceGroupIdsAttr}>
+${content}
+</observation-group>`;
+}
+function parseObservationGroups(observations) {
+  if (!observations) {
+    return [];
+  }
+  const groups = [];
+  let match;
+  while ((match = OBSERVATION_GROUP_PATTERN.exec(observations)) !== null) {
+    const attributes = parseObservationGroupAttributes(match[1] ?? "");
+    const id = attributes.id;
+    const range = attributes.range;
+    if (!id || !range) {
+      continue;
+    }
+    groups.push({
+      id,
+      range,
+      content: match[2].trim(),
+      sourceGroupIds: attributes["source-group-ids"]?.split(",").map((part) => part.trim()).filter(Boolean)
+    });
+  }
+  return groups;
+}
+function stripObservationGroups(observations) {
+  if (!observations) {
+    return observations;
+  }
+  return observations.replace(OBSERVATION_GROUP_PATTERN, (_match, _attributes, content) => content.trim()).replace(/\n{3,}/g, "\n\n").trim();
+}
+function combineObservationGroupRanges(groups) {
+  return Array.from(
+    new Set(
+      groups.flatMap((group) => group.range.split(",")).map((range) => range.trim()).filter(Boolean)
+    )
+  ).join(",");
+}
+function renderObservationGroupsForReflection(observations) {
+  const groups = parseObservationGroups(observations);
+  if (groups.length === 0) {
+    return null;
+  }
+  const groupsByContent = new Map(groups.map((g) => [g.content.trim(), g]));
+  const result = observations.replace(OBSERVATION_GROUP_PATTERN, (_match, _attrs, content) => {
+    const group = groupsByContent.get(content.trim());
+    if (!group) return content.trim();
+    return `## Group \`${group.id}\`
+_range: \`${group.range}\`_
+${group.content}`;
+  });
+  return result.replace(/\n{3,}/g, "\n\n").trim();
+}
+function getCanonicalGroupId(sectionHeading, fallbackIndex) {
+  const match = sectionHeading.match(/`([^`]+)`/);
+  return match?.[1]?.trim() || `derived-group-${fallbackIndex + 1}`;
+}
+function deriveObservationGroupProvenance(content, groups) {
+  const sections = parseReflectionObservationGroupSections(content);
+  if (sections.length === 0 || groups.length === 0) {
+    return [];
+  }
+  return sections.map((section, index) => {
+    const bodyLines = new Set(
+      section.body.split("\n").map((line) => line.trim()).filter(Boolean)
+    );
+    const matchingGroups = groups.filter((group) => {
+      const groupLines = group.content.split("\n").map((line) => line.trim()).filter(Boolean);
+      return groupLines.some((line) => bodyLines.has(line));
+    });
+    const fallbackGroup = groups[Math.min(index, groups.length - 1)];
+    const resolvedGroups = matchingGroups.length > 0 ? matchingGroups : fallbackGroup ? [fallbackGroup] : [];
+    const sourceGroupIds = Array.from(
+      new Set(resolvedGroups.flatMap((group) => [group.id, ...group.sourceGroupIds ?? []]))
+    );
+    const canonicalGroupId = getCanonicalGroupId(section.heading, index);
+    return {
+      id: canonicalGroupId,
+      range: combineObservationGroupRanges(resolvedGroups),
+      content: section.body,
+      sourceGroupIds
+    };
+  });
+}
+function reconcileObservationGroupsFromReflection(content, sourceObservations) {
+  const sourceGroups = parseObservationGroups(sourceObservations);
+  if (sourceGroups.length === 0) {
+    return null;
+  }
+  const normalizedContent = content.trim();
+  if (!normalizedContent) {
+    return "";
+  }
+  const derivedGroups = deriveObservationGroupProvenance(normalizedContent, sourceGroups);
+  if (derivedGroups.length > 0) {
+    return derivedGroups.map((group) => wrapInObservationGroup(group.content, group.range, group.id, group.sourceGroupIds)).join("\n\n");
+  }
+  return wrapInObservationGroup(
+    normalizedContent,
+    combineObservationGroupRanges(sourceGroups),
+    generateAnchorId(),
+    Array.from(new Set(sourceGroups.flatMap((group) => [group.id, ...group.sourceGroupIds ?? []])))
+  );
+}
+// src/processors/observational-memory/anchor-ids.ts
+var ANCHOR_ID_PATTERN = /^\[(O\d+(?:-N\d+)?)\]\s*/;
+var OBSERVATION_DATE_HEADER_PATTERN = /^\s*Date:\s+/;
+var XML_TAG_PATTERN = /^\s*<\/?[a-z][^>]*>\s*$/i;
+var MARKDOWN_GROUP_HEADING_PATTERN = /^\s*##\s+Group\s+`[^`]+`\s*$/;
+var MARKDOWN_GROUP_METADATA_PATTERN = /^\s*_range:\s*`[^`]*`_\s*$/;
+function buildEphemeralAnchorId(topLevelCounter, nestedCounter) {
+  return nestedCounter === 0 ? `O${topLevelCounter}` : `O${topLevelCounter}-N${nestedCounter}`;
+}
+function parseAnchorId(line) {
+  const match = line.match(ANCHOR_ID_PATTERN);
+  return match?.[1] ?? null;
+}
+function shouldAnchorLine(line) {
+  const trimmed = line.trim();
+  if (!trimmed) {
+    return false;
+  }
+  if (parseAnchorId(trimmed)) {
+    return false;
+  }
+  if (OBSERVATION_DATE_HEADER_PATTERN.test(trimmed)) {
+    return false;
+  }
+  if (XML_TAG_PATTERN.test(trimmed)) {
+    return false;
+  }
+  if (MARKDOWN_GROUP_HEADING_PATTERN.test(trimmed) || MARKDOWN_GROUP_METADATA_PATTERN.test(trimmed)) {
+    return false;
+  }
+  return true;
+}
+function getIndentationDepth(line) {
+  const leadingWhitespace = line.match(/^\s*/)?.[0] ?? "";
+  return Math.floor(leadingWhitespace.replace(/\t/g, "  ").length / 2);
+}
+function injectAnchorIds(observations) {
+  if (!observations) {
+    return observations;
+  }
+  const lines = observations.split("\n");
+  let topLevelCounter = 0;
+  let nestedCounter = 0;
+  let changed = false;
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (!shouldAnchorLine(line)) {
+      continue;
+    }
+    const indentationDepth = getIndentationDepth(line);
+    if (indentationDepth === 0) {
+      topLevelCounter += 1;
+      nestedCounter = 0;
+    } else {
+      if (topLevelCounter === 0) {
+        topLevelCounter = 1;
+      }
+      nestedCounter += 1;
+    }
+    const anchorId = buildEphemeralAnchorId(topLevelCounter, nestedCounter);
+    const leadingWhitespace = line.match(/^\s*/)?.[0] ?? "";
+    lines[i] = `${leadingWhitespace}[${anchorId}] ${line.slice(leadingWhitespace.length)}`;
+    changed = true;
+  }
+  return changed ? lines.join("\n") : observations;
+}
+function stripEphemeralAnchorIds(observations) {
+  if (!observations) {
+    return observations;
+  }
+  return observations.replace(/(^|\n)([^\S\n]*)\[(O\d+(?:-N\d+)?)\][^\S\n]*/g, "$1$2");
+}
 var ENCRYPTED_CONTENT_KEY = "encryptedContent";
 var ENCRYPTED_CONTENT_REDACTION_THRESHOLD = 256;
 var DEFAULT_OBSERVER_TOOL_RESULT_MAX_TOKENS = 1e4;
@@ -378,9 +594,8 @@ function truncateStringByTokens(text, maxTokens) {
   }
   const buildCandidate = (sliceEnd) => {
     const visible = text.slice(0, sliceEnd);
-    const omittedChars = text.length - sliceEnd;
     return `${visible}
-... [truncated ~${totalTokens - estimateTokenCount(visible)} tokens / ${omittedChars} characters]`;
+... [truncated ~${totalTokens - estimateTokenCount(visible)} tokens]`;
   };
   let low = 0;
   let high = text.length;
@@ -1379,7 +1594,7 @@ function extractCurrentTask(observations) {
   return content || null;
 }
 function optimizeObservationsForContext(observations) {
-  let optimized = observations;
+  let optimized = stripEphemeralAnchorIds(observations);
   optimized = optimized.replace(/🟡\s*/g, "");
   optimized = optimized.replace(/🟢\s*/g, "");
   optimized = optimized.replace(/\[(?![\d\s]*items collapsed)[^\]]+\]/g, "");
@@ -1569,9 +1784,11 @@ Your current detail level was a 10/10, lets aim for a 4/10 detail level.
 };
 function buildReflectorPrompt(observations, manualPrompt, compressionLevel, skipContinuationHints) {
   const level = typeof compressionLevel === "number" ? compressionLevel : compressionLevel ? 1 : 0;
+  const reflectionView = renderObservationGroupsForReflection(observations) ?? observations;
+  const anchoredObservations = injectAnchorIds(reflectionView);
   let prompt = `## OBSERVATIONS TO REFLECT ON
-${observations}
+${anchoredObservations}
 ---
@@ -1596,7 +1813,7 @@ IMPORTANT: Do NOT include <current-task> or <suggested-response> sections in you
   }
   return prompt;
 }
-function parseReflectorOutput(output) {
+function parseReflectorOutput(output, sourceObservations) {
   if (detectDegenerateRepetition(output)) {
     return {
       observations: "",
@@ -1604,9 +1821,10 @@ function parseReflectorOutput(output) {
     };
   }
   const parsed = parseReflectorSectionXml(output);
-  const observations = sanitizeObservationLines(parsed.observations || "");
+  const sanitizedObservations = sanitizeObservationLines(stripEphemeralAnchorIds(parsed.observations || ""));
+  const reconciledObservations = sourceObservations ? reconcileObservationGroupsFromReflection(sanitizedObservations, sourceObservations) : null;
   return {
-    observations,
+    observations: reconciledObservations ?? sanitizedObservations,
     suggestedContinuation: parsed.suggestedResponse || void 0
     // Note: Reflector's currentTask is not used - thread metadata preserves per-thread tasks
   };
@@ -3102,7 +3320,24 @@ if (OM_DEBUG_LOG) {
     _origConsoleError.apply(console, args);
   };
 }
+function messageHasVisibleContent(msg) {
+  const content = msg.content;
+  if (content?.parts && Array.isArray(content.parts)) {
+    return content.parts.some((p) => {
+      const t = p?.type;
+      return t && !t.startsWith("data-") && t !== "step-start";
+    });
+  }
+  if (content?.content) return true;
+  return false;
+}
+function buildMessageRange(messages) {
+  const first = messages.find(messageHasVisibleContent) ?? messages[0];
+  const last = [...messages].reverse().find(messageHasVisibleContent) ?? messages[messages.length - 1];
+  return `${first.id}:${last.id}`;
+}
 var OBSERVATIONAL_MEMORY_DEFAULTS = {
+  retrieval: false,
   observation: {
     model: "google/gemini-2.5-flash",
     messageTokens: 3e4,
@@ -3157,12 +3392,54 @@ KNOWLEDGE UPDATES: When asked about current state (e.g., "where do I currently..
 PLANNED ACTIONS: If the user stated they planned to do something (e.g., "I'm going to...", "I'm looking forward to...", "I will...") and the date they planned to do it is now in the past (check the relative time like "3 weeks ago"), assume they completed the action unless there's evidence they didn't. For example, if someone said "I'll start my new diet on Monday" and that was 2 weeks ago, assume they started the diet.
 MOST RECENT USER INPUT: Treat the most recent user message as the highest-priority signal for what to do next. Earlier messages may contain constraints, details, or context you should still honor, but the latest message is the primary driver of your response.`;
+var OBSERVATION_RETRIEVAL_INSTRUCTIONS = `## Recall \u2014 looking up source messages
+Your memory is comprised of observations which are sometimes wrapped in <observation-group> xml tags containing ranges like <observation-group range="startId:endId">. These ranges point back to the raw messages that each observation group was derived from. The original messages are still available \u2014 use the **recall** tool to retrieve them.
+### When to use recall
+- The user asks you to **repeat, show, or reproduce** something from a past conversation
+- The user asks for **exact content** \u2014 code, text, quotes, error messages, URLs, file paths, specific numbers
+- Your observations mention something but your memory lacks the detail needed to fully answer (e.g. you know a blog post was shared but only have a summary of it)
+- You want to **verify or expand on** an observation before responding
+**Default to using recall when the user references specific past content.** Your observations capture the gist, not the details. If there's any doubt whether your memory is complete enough, use recall.
+### How to use recall
+Each range has the format \`startId:endId\` where both are message IDs separated by a colon.
+1. Find the observation group relevant to the user's question and extract the start or end ID from its range.
+2. Call \`recall\` with that ID as the \`cursor\`.
+3. Use \`page: 1\` (or omit) to read forward from the cursor, \`page: -1\` to read backward.
+4. If the first page doesn't have what you need, increment the page number to keep paginating.
+5. Check \`hasNextPage\`/\`hasPrevPage\` in the result to know if more pages exist in each direction.
+### Detail levels
+By default recall returns **low** detail: truncated text and tool names only. Each message shows its ID and each part has a positional index like \`[p0]\`, \`[p1]\`, etc.
+- Use \`detail: "high"\` to get full message content including tool arguments and results. This will only return the high detail version of a single message part at a time.
+- Use \`partIndex\` with a cursor to fetch a single part at full detail \u2014 for example, to read one specific tool result or code block without loading every part.
+If the result says \`truncated: true\`, the output was cut to fit the token budget. You can paginate or use \`partIndex\` to target specific content.
+### Following up on truncated parts
+Low-detail results may include truncation hints like:
+\`[truncated \u2014 call recall cursor="..." partIndex=N detail="high" for full content]\`
+**When you see these hints and need the full content, make the exact call described in the hint.** This is the normal workflow: first recall at low detail to scan, then drill into specific parts at high detail. Do not stop at the low-detail result if the user asked for exact content.
+### When recall is NOT needed
+- The user is asking for a high-level summary and your observations already cover it
+- The question is about general preferences or facts that don't require source text
+- There is no relevant range in your observations for the topic
+Observation groups with range IDs and your recall tool allows you to think back and remember details you're fuzzy on.`;
 var ObservationalMemory = class _ObservationalMemory {
   id = "observational-memory";
   name = "Observational Memory";
   storage;
   tokenCounter;
   scope;
+  retrieval = false;
   observationConfig;
   reflectionConfig;
   onDebugEvent;
@@ -3475,6 +3752,7 @@ var ObservationalMemory = class _ObservationalMemory {
     this.shouldObscureThreadIds = config.obscureThreadIds || false;
     this.storage = config.storage;
     this.scope = config.scope ?? "thread";
+    this.retrieval = this.scope === "thread" && (config.retrieval ?? OBSERVATIONAL_MEMORY_DEFAULTS.retrieval);
     const resolveModel = (m) => m === "default" ? OBSERVATIONAL_MEMORY_DEFAULTS.observation.model : m;
     const observationModel = resolveModel(config.model) ?? resolveModel(config.observation?.model) ?? resolveModel(config.reflection?.model);
     const reflectionModel = resolveModel(config.model) ?? resolveModel(config.reflection?.model) ?? resolveModel(config.observation?.model);
@@ -3578,6 +3856,7 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
   get config() {
     return {
       scope: this.scope,
+      retrieval: this.retrieval,
       observation: {
         messageTokens: this.observationConfig.messageTokens,
         previousObserverTokens: this.observationConfig.previousObserverTokens
@@ -3834,7 +4113,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
         config: {
           observation: this.observationConfig,
           reflection: this.reflectionConfig,
-          scope: this.scope
+          scope: this.scope,
+          retrieval: this.retrieval
         },
         observedTimezone
       });
@@ -4475,7 +4755,7 @@ ${unreflectedContent}` : bufferedReflection;
         totalUsage.outputTokens += usage.outputTokens ?? 0;
         totalUsage.totalTokens += usage.totalTokens ?? 0;
       }
-      parsed = parseReflectorOutput(result.text);
+      parsed = parseReflectorOutput(result.text, observations);
       if (parsed.degenerate) {
         omDebug(
           `[OM:callReflector] attempt #${attemptNumber}: degenerate repetition detected, treating as compression failure`
@@ -4542,14 +4822,18 @@ ${unreflectedContent}` : bufferedReflection;
    * @param suggestedResponse - Thread-specific suggested response (from thread metadata)
    * @param unobservedContextBlocks - Formatted <unobserved-context> blocks from other threads
    */
-  formatObservationsForContext(observations, currentTask, suggestedResponse, unobservedContextBlocks, currentDate) {
-    let optimized = optimizeObservationsForContext(observations);
+  formatObservationsForContext(observations, currentTask, suggestedResponse, unobservedContextBlocks, currentDate, retrieval = false) {
+    let optimized = retrieval ? renderObservationGroupsForReflection(observations) ?? observations : optimizeObservationsForContext(observations);
     if (currentDate) {
       optimized = addRelativeTimeToObservations(optimized, currentDate);
     }
-    const messages = [`${OBSERVATION_CONTEXT_PROMPT}
+    const messages = [
+      `${OBSERVATION_CONTEXT_PROMPT}
-${OBSERVATION_CONTEXT_INSTRUCTIONS}`];
+${OBSERVATION_CONTEXT_INSTRUCTIONS}${retrieval ? `
+${OBSERVATION_RETRIEVAL_INSTRUCTIONS}` : ""}`
+    ];
     if (unobservedContextBlocks) {
       messages.push(
         `The following content is from OTHER conversations different from the current conversation, they're here for reference,  but they're not necessarily your focus:
@@ -5034,7 +5318,8 @@ ${suggestedResponse}
       currentTask,
       suggestedResponse,
       unobservedContextBlocks,
-      currentDate
+      currentDate,
+      this.retrieval
     );
     messageList.clearSystemMessages("observational-memory");
     messageList.addSystem(observationSystemMessages, "observational-memory");
@@ -5702,11 +5987,12 @@ ${formattedMessages}
    * Wrap observations in a thread attribution tag.
    * Used in resource scope to track which thread observations came from.
    */
-  async wrapWithThreadTag(threadId, observations) {
+  async wrapWithThreadTag(threadId, observations, messageRange) {
     const cleanObservations = this.stripThreadTags(observations);
+    const groupedObservations = this.retrieval && messageRange ? wrapInObservationGroup(cleanObservations, messageRange) : cleanObservations;
     const obscuredId = await this.representThreadIDInContext(threadId);
     return `<thread id="${obscuredId}">
-${cleanObservations}
+${groupedObservations}
 </thread>`;
   }
   /**
@@ -5843,9 +6129,10 @@ ${threadClose}`;
       });
       const lastObservedAt = this.getMaxMessageTimestamp(messagesToObserve);
       const existingObservations = freshRecord?.activeObservations ?? record.activeObservations ?? "";
+      const messageRange = this.retrieval ? buildMessageRange(messagesToObserve) : void 0;
       let newObservations;
       if (this.scope === "resource") {
-        const threadSection = await this.wrapWithThreadTag(threadId, result.observations);
+        const threadSection = await this.wrapWithThreadTag(threadId, result.observations, messageRange);
         newObservations = this.replaceOrAppendThreadSection(
           existingObservations,
           threadId,
@@ -5853,7 +6140,8 @@ ${threadClose}`;
           lastObservedAt
         );
       } else {
-        newObservations = existingObservations ? `${existingObservations}${_ObservationalMemory.createMessageBoundary(lastObservedAt)}${result.observations}` : result.observations;
+        const groupedObservations = this.retrieval && messageRange ? wrapInObservationGroup(result.observations, messageRange) : result.observations;
+        newObservations = existingObservations ? `${existingObservations}${_ObservationalMemory.createMessageBoundary(lastObservedAt)}${groupedObservations}` : groupedObservations;
       }
       let totalTokenCount = this.tokenCounter.countObservations(newObservations);
       const cycleObservationTokens = this.tokenCounter.countObservations(result.observations);
@@ -6161,11 +6449,12 @@ ${threadClose}`;
         }
       }
     }
+    const messageRange = this.retrieval ? buildMessageRange(messagesToBuffer) : void 0;
     let newObservations;
     if (this.scope === "resource") {
-      newObservations = await this.wrapWithThreadTag(threadId, result.observations);
+      newObservations = await this.wrapWithThreadTag(threadId, result.observations, messageRange);
     } else {
-      newObservations = result.observations;
+      newObservations = this.retrieval && messageRange ? wrapInObservationGroup(result.observations, messageRange) : result.observations;
     }
     const newTokenCount = this.tokenCounter.countObservations(newObservations);
     const newMessageIds = messagesToBuffer.map((m) => m.id);
@@ -6791,7 +7080,8 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
         const { threadId, threadMessages, result } = obsResult;
         cycleObservationTokens += this.tokenCounter.countObservations(result.observations);
         const threadLastObservedAt = this.getMaxMessageTimestamp(threadMessages);
-        const threadSection = await this.wrapWithThreadTag(threadId, result.observations);
+        const messageRange = this.retrieval ? buildMessageRange(threadMessages) : void 0;
+        const threadSection = await this.wrapWithThreadTag(threadId, result.observations, messageRange);
         currentObservations = this.replaceOrAppendThreadSection(
           currentObservations,
           threadId,
@@ -7273,6 +7563,6 @@ function getObservationsAsOf(activeObservations, asOf) {
   return chunks.join("\n\n");
 }
-export { OBSERVATIONAL_MEMORY_DEFAULTS, OBSERVATION_CONTEXT_INSTRUCTIONS, OBSERVATION_CONTEXT_PROMPT, OBSERVATION_CONTINUATION_HINT, OBSERVER_SYSTEM_PROMPT, ObservationalMemory, TokenCounter, buildObserverPrompt, buildObserverSystemPrompt, extractCurrentTask, formatMessagesForObserver, getObservationsAsOf, hasCurrentTaskSection, optimizeObservationsForContext, parseObserverOutput };
-//# sourceMappingURL=chunk-5SMKVGJP.js.map
-//# sourceMappingURL=chunk-5SMKVGJP.js.map
+export { OBSERVATIONAL_MEMORY_DEFAULTS, OBSERVATION_CONTEXT_INSTRUCTIONS, OBSERVATION_CONTEXT_PROMPT, OBSERVATION_CONTINUATION_HINT, OBSERVER_SYSTEM_PROMPT, ObservationalMemory, TokenCounter, buildObserverPrompt, buildObserverSystemPrompt, combineObservationGroupRanges, deriveObservationGroupProvenance, extractCurrentTask, formatMessagesForObserver, formatToolResultForObserver, getObservationsAsOf, hasCurrentTaskSection, injectAnchorIds, optimizeObservationsForContext, parseAnchorId, parseObservationGroups, parseObserverOutput, reconcileObservationGroupsFromReflection, renderObservationGroupsForReflection, resolveToolResultValue, stripEphemeralAnchorIds, stripObservationGroups, truncateStringByTokens, wrapInObservationGroup };
+//# sourceMappingURL=chunk-JJBSFPC5.js.map
+//# sourceMappingURL=chunk-JJBSFPC5.js.map