npm - @mastra/memory - Versions diffs - 1.3.0 → 1.4.0-alpha.0 - Mend

@mastra/memory 1.3.0 → 1.4.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/{chunk-LXATBJ2L.cjs → chunk-QRKB5I2S.cjs} RENAMED Viewed

@@ -18,54 +18,6 @@ var o200k_base__default = /*#__PURE__*/_interopDefault(o200k_base);
 // src/processors/observational-memory/observational-memory.ts
 // src/processors/observational-memory/observer-agent.ts
-var LEGACY_OBSERVER_EXTRACTION_INSTRUCTIONS = `CRITICAL: DISTINGUISH USER ASSERTIONS FROM QUESTIONS
-When the user TELLS you something about themselves, mark it as an assertion:
-- "I have two kids" \u2192 \u{1F534} (14:30) User stated has two kids
-- "I work at Acme Corp" \u2192 \u{1F534} (14:31) User stated works at Acme Corp
-- "I graduated in 2019" \u2192 \u{1F534} (14:32) User stated graduated in 2019
-When the user ASKS about something, mark it as a question/request:
-- "Can you help me with X?" \u2192 \u{1F7E1} (15:00) User asked help with X
-- "What's the best way to do Y?" \u2192 \u{1F7E1} (15:01) User asked best way to do Y
-USER ASSERTIONS ARE AUTHORITATIVE. The user is the source of truth about their own life.
-If a user previously stated something and later asks a question about the same topic,
-the assertion is the answer - the question doesn't invalidate what they already told you.
-TEMPORAL ANCHORING:
-Convert relative times to estimated dates based on the message timestamp.
-Include the user's original phrasing in quotes, then add an estimated date or range.
-Ranges may span multiple months - e.g., "within the last month" on July 15th could mean anytime in June to early July.
-BAD: User was given X by their friend last month.
-GOOD: User was given X by their friend "last month" (estimated mid-June to early July 202X).
-PRESERVE UNUSUAL PHRASING:
-When the user uses unexpected or non-standard terminology, quote their exact words.
-BAD: User exercised.
-GOOD: User stated they did a "movement session" (their term for exercise).
-CONVERSATION CONTEXT:
-- What the user is working on or asking about
-- Previous topics and their outcomes
-- What user understands or needs clarification on
-- Specific requirements or constraints mentioned
-- Contents of assistant learnings and summaries
-- Answers to users questions including full context to remember detailed summaries and explanations
-- Assistant explanations, especially complex ones. observe the fine details so that the assistant does not forget what they explained
-- Relevant code snippets
-- User preferences (like favourites, dislikes, preferences, etc)
-- Any specifically formatted text or ascii that would need to be reproduced or referenced in later interactions (preserve these verbatim in memory)
-- Any blocks of any text which the user and assistant are iteratively collaborating back and forth on should be preserved verbatim
-- When who/what/where/when is mentioned, note that in the observation. Example: if the user received went on a trip with someone, observe who that someone was, where the trip was, when it happened, and what happened, not just that the user went on the trip.
-ACTIONABLE INSIGHTS:
-- What worked well in explanations
-- What needs follow-up or clarification
-- User's stated goals or next steps (note if the user tells you not to do a next step, or asks for something specific, other next steps besides the users request should be marked as "waiting for user", unless the user explicitly says to continue all next steps)`;
-var USE_LEGACY_PROMPT = process.env.OM_USE_LEGACY_PROMPT === "1" || process.env.OM_USE_LEGACY_PROMPT === "true";
 var USE_CONDENSED_PROMPT = process.env.OM_USE_CONDENSED_PROMPT === "1" || process.env.OM_USE_CONDENSED_PROMPT === "true";
 var CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS = `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
@@ -308,7 +260,7 @@ ACTIONABLE INSIGHTS:
 - What worked well in explanations
 - What needs follow-up or clarification
 - User's stated goals or next steps (note if the user tells you not to do a next step, or asks for something specific, other next steps besides the users request should be marked as "waiting for user", unless the user explicitly says to continue all next steps)`;
-var OBSERVER_EXTRACTION_INSTRUCTIONS = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS : USE_LEGACY_PROMPT ? LEGACY_OBSERVER_EXTRACTION_INSTRUCTIONS : CURRENT_OBSERVER_EXTRACTION_INSTRUCTIONS;
+var OBSERVER_EXTRACTION_INSTRUCTIONS = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS : CURRENT_OBSERVER_EXTRACTION_INSTRUCTIONS;
 var CONDENSED_OBSERVER_OUTPUT_FORMAT = `Use priority levels:
 - \u{1F534} High: explicit user facts, preferences, goals achieved, critical context
 - \u{1F7E1} Medium: project details, learned information, tool results
@@ -409,7 +361,7 @@ var OBSERVER_GUIDELINES = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_GUIDELINES :
 - Make sure you start each observation with a priority emoji (\u{1F534}, \u{1F7E1}, \u{1F7E2})
 - Observe WHAT the agent did and WHAT it means, not HOW well it did it.
 - If the user provides detailed messages or code snippets, observe all important details.`;
-function buildObserverSystemPrompt(multiThread = false) {
+function buildObserverSystemPrompt(multiThread = false, instruction) {
   const outputFormat = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_OUTPUT_FORMAT : OBSERVER_OUTPUT_FORMAT_BASE;
   if (multiThread) {
     return `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
@@ -467,7 +419,11 @@ ${OBSERVER_GUIDELINES}
 Remember: These observations are the assistant's ONLY memory. Make them count.
-User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority.`;
+User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority.${instruction ? `
+=== CUSTOM INSTRUCTIONS ===
+${instruction}` : ""}`;
   }
   return `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
@@ -493,7 +449,11 @@ Simply output your observations without any thread-related markup.
 Remember: These observations are the assistant's ONLY memory. Make them count.
-User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.`;
+User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.${instruction ? `
+=== CUSTOM INSTRUCTIONS ===
+${instruction}` : ""}`;
 }
 var OBSERVER_SYSTEM_PROMPT = buildObserverSystemPrompt();
 function formatMessagesForObserver(messages, options) {
@@ -526,7 +486,7 @@ ${maybeTruncate(resultStr, maxLen)}`;
           return `[Tool Call: ${inv.toolName}]
 ${maybeTruncate(argsStr, maxLen)}`;
         }
-        if (part.type?.startsWith("data-om-observation-")) return "";
+        if (part.type?.startsWith("data-")) return "";
         return "";
       }).filter(Boolean).join("\n");
     } else if (msg.content?.content) {
@@ -759,7 +719,7 @@ function optimizeObservationsForContext(observations) {
 }
 // src/processors/observational-memory/reflector-agent.ts
-function buildReflectorSystemPrompt() {
+function buildReflectorSystemPrompt(instruction) {
   return `You are the memory consciousness of an AI assistant. Your memory observation reflections will be the ONLY information the assistant has about past interactions with this user.
 The following instructions were given to another part of your psyche (the observer) to create memories.
@@ -852,7 +812,11 @@ Hint for the agent's immediate next message. Examples:
 - Call the view tool on src/example.ts to continue debugging.
 </suggested-response>
-User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.`;
+User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.${instruction ? `
+=== CUSTOM INSTRUCTIONS ===
+${instruction}` : ""}`;
 }
 var COMPRESSION_GUIDANCE = {
   0: "",
@@ -884,6 +848,21 @@ Please re-process with much more aggressive compression:
 - Remove redundant information and merge overlapping observations
 Your current detail level was a 10/10, lets aim for a 6/10 detail level.
+`,
+  3: `
+## CRITICAL COMPRESSION REQUIRED
+Your previous reflections have failed to compress sufficiently after multiple attempts.
+Please re-process with maximum compression:
+- Summarize the oldest observations (first 50-70%) into brief high-level paragraphs \u2014 only key facts, decisions, and outcomes
+- For the most recent observations (last 30-50%), retain important details but still use a condensed style
+- Ruthlessly merge related observations \u2014 if 10 observations are about the same topic, combine into 1-2 lines
+- Drop procedural details (tool calls, retries, intermediate steps) \u2014 keep only final outcomes
+- Drop observations that are no longer relevant or have been superseded by newer information
+- Preserve: names, dates, decisions, errors, user preferences, and architectural choices
+Your current detail level was a 10/10, lets aim for a 4/10 detail level.
 `
 };
 function buildReflectorPrompt(observations, manualPrompt, compressionLevel, skipContinuationHints) {
@@ -1024,7 +1003,7 @@ var TokenCounter = class _TokenCounter {
                 `Unhandled tool-invocation state '${part.toolInvocation?.state}' in token counting for part type '${part.type}'`
               );
             }
-          } else {
+          } else if (typeof part.type === "string" && part.type.startsWith("data-")) ; else {
             tokenString += JSON.stringify(part);
           }
         }
@@ -1669,7 +1648,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
       blockAfter: asyncBufferingDisabled ? void 0 : this.resolveBlockAfter(
         config.observation?.blockAfter ?? (config.observation?.bufferTokens ?? OBSERVATIONAL_MEMORY_DEFAULTS.observation.bufferTokens ? 1.2 : void 0),
         config.observation?.messageTokens ?? OBSERVATIONAL_MEMORY_DEFAULTS.observation.messageTokens
-      )
+      ),
+      instruction: config.observation?.instruction
     };
     this.reflectionConfig = {
       model: reflectionModel,
@@ -1684,7 +1664,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
       blockAfter: asyncBufferingDisabled ? void 0 : this.resolveBlockAfter(
         config.reflection?.blockAfter ?? (config.reflection?.bufferActivation ?? OBSERVATIONAL_MEMORY_DEFAULTS.reflection.bufferActivation ? 1.2 : void 0),
         config.reflection?.observationTokens ?? OBSERVATIONAL_MEMORY_DEFAULTS.reflection.observationTokens
-      )
+      ),
+      instruction: config.reflection?.instruction
     };
     this.tokenCounter = new TokenCounter();
     this.onDebugEvent = config.onDebugEvent;
@@ -1900,7 +1881,7 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
    */
   getObserverAgent() {
     if (!this.observerAgent) {
-      const systemPrompt = buildObserverSystemPrompt();
+      const systemPrompt = buildObserverSystemPrompt(false, this.observationConfig.instruction);
       this.observerAgent = new agent.Agent({
         id: "observational-memory-observer",
         name: "Observer",
@@ -1915,7 +1896,7 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
    */
   getReflectorAgent() {
     if (!this.reflectorAgent) {
-      const systemPrompt = buildReflectorSystemPrompt();
+      const systemPrompt = buildReflectorSystemPrompt(this.reflectionConfig.instruction);
       this.reflectorAgent = new agent.Agent({
         id: "observational-memory-reflector",
         name: "Reflector",
@@ -2146,7 +2127,11 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
     for (let i = allMsgs.length - 1; i >= 0; i--) {
       const msg = allMsgs[i];
       if (msg?.role === "assistant" && msg.content?.parts && Array.isArray(msg.content.parts)) {
-        msg.content.parts.push(marker);
+        const markerData = marker.data;
+        const alreadyPresent = markerData?.cycleId && msg.content.parts.some((p) => p?.type === marker.type && p?.data?.cycleId === markerData.cycleId);
+        if (!alreadyPresent) {
+          msg.content.parts.push(marker);
+        }
         try {
           await this.messageHistory.persistMessages({
             messages: [msg],
@@ -2175,7 +2160,11 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
       const messages = result?.messages ?? [];
       for (const msg of messages) {
         if (msg?.role === "assistant" && msg.content?.parts && Array.isArray(msg.content.parts)) {
-          msg.content.parts.push(marker);
+          const markerData = marker.data;
+          const alreadyPresent = markerData?.cycleId && msg.content.parts.some((p) => p?.type === marker.type && p?.data?.cycleId === markerData.cycleId);
+          if (!alreadyPresent) {
+            msg.content.parts.push(marker);
+          }
           await this.messageHistory.persistMessages({
             messages: [msg],
             threadId,
@@ -2402,7 +2391,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
           ...this.observationConfig.modelSettings
         },
         providerOptions: this.observationConfig.providerOptions,
-        ...abortSignal ? { abortSignal } : {}
+        ...abortSignal ? { abortSignal } : {},
+        ...options?.requestContext ? { requestContext: options.requestContext } : {}
       }),
       abortSignal
     );
@@ -2425,12 +2415,12 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
    * Returns per-thread results with observations, currentTask, and suggestedContinuation,
    * plus the total usage for the batch.
    */
-  async callMultiThreadObserver(existingObservations, messagesByThread, threadOrder, abortSignal) {
+  async callMultiThreadObserver(existingObservations, messagesByThread, threadOrder, abortSignal, requestContext) {
     const agent$1 = new agent.Agent({
       id: "multi-thread-observer",
       name: "multi-thread-observer",
       model: this.observationConfig.model,
-      instructions: buildObserverSystemPrompt(true)
+      instructions: buildObserverSystemPrompt(true, this.observationConfig.instruction)
     });
     const prompt = buildMultiThreadObserverPrompt(existingObservations, messagesByThread, threadOrder);
     const allMessages = [];
@@ -2446,7 +2436,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
           ...this.observationConfig.modelSettings
         },
         providerOptions: this.observationConfig.providerOptions,
-        ...abortSignal ? { abortSignal } : {}
+        ...abortSignal ? { abortSignal } : {},
+        ...requestContext ? { requestContext } : {}
       }),
       abortSignal
     );
@@ -2478,68 +2469,79 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
    * Call the Reflector agent to condense observations.
    * Includes compression validation and retry logic.
    */
-  async callReflector(observations, manualPrompt, streamContext, observationTokensThreshold, abortSignal, skipContinuationHints, compressionStartLevel) {
+  async callReflector(observations, manualPrompt, streamContext, observationTokensThreshold, abortSignal, skipContinuationHints, compressionStartLevel, requestContext) {
     const agent = this.getReflectorAgent();
     const originalTokens = this.tokenCounter.countObservations(observations);
     const targetThreshold = observationTokensThreshold ?? this.getMaxThreshold(this.reflectionConfig.observationTokens);
     let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-    const firstLevel = compressionStartLevel ?? 0;
-    const retryLevel = Math.min(firstLevel + 1, 2);
-    let prompt = buildReflectorPrompt(observations, manualPrompt, firstLevel, skipContinuationHints);
-    omDebug(
-      `[OM:callReflector] starting first attempt: originalTokens=${originalTokens}, targetThreshold=${targetThreshold}, promptLen=${prompt.length}, skipContinuationHints=${skipContinuationHints}`
-    );
-    let chunkCount = 0;
-    const generatePromise = agent.generate(prompt, {
-      modelSettings: {
-        ...this.reflectionConfig.modelSettings
-      },
-      providerOptions: this.reflectionConfig.providerOptions,
-      ...abortSignal ? { abortSignal } : {},
-      onChunk(chunk) {
-        chunkCount++;
-        if (chunkCount === 1 || chunkCount % 50 === 0) {
-          const preview = chunk.type === "text-delta" ? ` text="${chunk.textDelta?.slice(0, 80)}..."` : chunk.type === "tool-call" ? ` tool=${chunk.toolName}` : "";
-          omDebug(`[OM:callReflector] chunk#${chunkCount}: type=${chunk.type}${preview}`);
-        }
-      },
-      onFinish(event) {
-        omDebug(
-          `[OM:callReflector] onFinish: chunks=${chunkCount}, finishReason=${event.finishReason}, inputTokens=${event.usage?.inputTokens}, outputTokens=${event.usage?.outputTokens}, textLen=${event.text?.length}`
-        );
-      },
-      onAbort(event) {
-        omDebug(`[OM:callReflector] onAbort: chunks=${chunkCount}, reason=${event?.reason ?? "unknown"}`);
-      },
-      onError({ error }) {
-        omError(`[OM:callReflector] onError after ${chunkCount} chunks`, error);
+    let currentLevel = compressionStartLevel ?? 0;
+    const maxLevel = 3;
+    let parsed = { observations: "", suggestedContinuation: void 0 };
+    let reflectedTokens = 0;
+    let attemptNumber = 0;
+    while (currentLevel <= maxLevel) {
+      attemptNumber++;
+      const isRetry = attemptNumber > 1;
+      const prompt = buildReflectorPrompt(observations, manualPrompt, currentLevel, skipContinuationHints);
+      omDebug(
+        `[OM:callReflector] ${isRetry ? `retry #${attemptNumber - 1}` : "first attempt"}: level=${currentLevel}, originalTokens=${originalTokens}, targetThreshold=${targetThreshold}, promptLen=${prompt.length}, skipContinuationHints=${skipContinuationHints}`
+      );
+      let chunkCount = 0;
+      const result = await this.withAbortCheck(
+        () => agent.generate(prompt, {
+          modelSettings: {
+            ...this.reflectionConfig.modelSettings
+          },
+          providerOptions: this.reflectionConfig.providerOptions,
+          ...abortSignal ? { abortSignal } : {},
+          ...requestContext ? { requestContext } : {},
+          ...attemptNumber === 1 ? {
+            onChunk(chunk) {
+              chunkCount++;
+              if (chunkCount === 1 || chunkCount % 50 === 0) {
+                const preview = chunk.type === "text-delta" ? ` text="${chunk.textDelta?.slice(0, 80)}..."` : chunk.type === "tool-call" ? ` tool=${chunk.toolName}` : "";
+                omDebug(`[OM:callReflector] chunk#${chunkCount}: type=${chunk.type}${preview}`);
+              }
+            },
+            onFinish(event) {
+              omDebug(
+                `[OM:callReflector] onFinish: chunks=${chunkCount}, finishReason=${event.finishReason}, inputTokens=${event.usage?.inputTokens}, outputTokens=${event.usage?.outputTokens}, textLen=${event.text?.length}`
+              );
+            },
+            onAbort(event) {
+              omDebug(`[OM:callReflector] onAbort: chunks=${chunkCount}, reason=${event?.reason ?? "unknown"}`);
+            },
+            onError({ error }) {
+              omError(`[OM:callReflector] onError after ${chunkCount} chunks`, error);
+            }
+          } : {}
+        }),
+        abortSignal
+      );
+      omDebug(
+        `[OM:callReflector] attempt #${attemptNumber} returned: textLen=${result.text?.length}, textPreview="${result.text?.slice(0, 120)}...", inputTokens=${result.usage?.inputTokens ?? result.totalUsage?.inputTokens}, outputTokens=${result.usage?.outputTokens ?? result.totalUsage?.outputTokens}`
+      );
+      const usage = result.totalUsage ?? result.usage;
+      if (usage) {
+        totalUsage.inputTokens += usage.inputTokens ?? 0;
+        totalUsage.outputTokens += usage.outputTokens ?? 0;
+        totalUsage.totalTokens += usage.totalTokens ?? 0;
+      }
+      parsed = parseReflectorOutput(result.text);
+      reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
+      omDebug(
+        `[OM:callReflector] attempt #${attemptNumber} parsed: reflectedTokens=${reflectedTokens}, targetThreshold=${targetThreshold}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}, parsedObsLen=${parsed.observations?.length}`
+      );
+      if (validateCompression(reflectedTokens, targetThreshold) || currentLevel >= maxLevel) {
+        break;
       }
-    });
-    let result = await this.withAbortCheck(async () => {
-      return await generatePromise;
-    }, abortSignal);
-    omDebug(
-      `[OM:callReflector] first attempt returned: textLen=${result.text?.length}, textPreview="${result.text?.slice(0, 120)}...", inputTokens=${result.usage?.inputTokens ?? result.totalUsage?.inputTokens}, outputTokens=${result.usage?.outputTokens ?? result.totalUsage?.outputTokens}, keys=${Object.keys(result).join(",")}`
-    );
-    const firstUsage = result.totalUsage ?? result.usage;
-    if (firstUsage) {
-      totalUsage.inputTokens += firstUsage.inputTokens ?? 0;
-      totalUsage.outputTokens += firstUsage.outputTokens ?? 0;
-      totalUsage.totalTokens += firstUsage.totalTokens ?? 0;
-    }
-    let parsed = parseReflectorOutput(result.text);
-    let reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
-    omDebug(
-      `[OM:callReflector] first attempt parsed: reflectedTokens=${reflectedTokens}, targetThreshold=${targetThreshold}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}, parsedObsLen=${parsed.observations?.length}`
-    );
-    if (!validateCompression(reflectedTokens, targetThreshold)) {
       if (streamContext?.writer) {
         const failedMarker = this.createObservationFailedMarker({
           cycleId: streamContext.cycleId,
           operationType: "reflection",
           startedAt: streamContext.startedAt,
           tokensAttempted: originalTokens,
-          error: `Did not compress below threshold (${originalTokens} \u2192 ${reflectedTokens}, target: ${targetThreshold}), retrying with compression guidance`,
+          error: `Did not compress below threshold (${originalTokens} \u2192 ${reflectedTokens}, target: ${targetThreshold}), retrying at level ${currentLevel + 1}`,
           recordId: streamContext.recordId,
           threadId: streamContext.threadId
         });
@@ -2559,32 +2561,7 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
         await streamContext.writer.custom(startMarker).catch(() => {
         });
       }
-      prompt = buildReflectorPrompt(observations, manualPrompt, retryLevel, skipContinuationHints);
-      omDebug(`[OM:callReflector] starting retry: promptLen=${prompt.length}`);
-      result = await this.withAbortCheck(
-        () => agent.generate(prompt, {
-          modelSettings: {
-            ...this.reflectionConfig.modelSettings
-          },
-          providerOptions: this.reflectionConfig.providerOptions,
-          ...abortSignal ? { abortSignal } : {}
-        }),
-        abortSignal
-      );
-      omDebug(
-        `[OM:callReflector] retry returned: textLen=${result.text?.length}, inputTokens=${result.usage?.inputTokens ?? result.totalUsage?.inputTokens}, outputTokens=${result.usage?.outputTokens ?? result.totalUsage?.outputTokens}`
-      );
-      const retryUsage = result.totalUsage ?? result.usage;
-      if (retryUsage) {
-        totalUsage.inputTokens += retryUsage.inputTokens ?? 0;
-        totalUsage.outputTokens += retryUsage.outputTokens ?? 0;
-        totalUsage.totalTokens += retryUsage.totalTokens ?? 0;
-      }
-      parsed = parseReflectorOutput(result.text);
-      reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
-      omDebug(
-        `[OM:callReflector] retry parsed: reflectedTokens=${reflectedTokens}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}`
-      );
+      currentLevel = Math.min(currentLevel + 1, maxLevel);
     }
     return {
       observations: parsed.observations,
@@ -2704,8 +2681,8 @@ ${suggestedResponse}
   /**
    * Calculate all threshold-related values for observation decision making.
    */
-  calculateObservationThresholds(allMessages, _unobservedMessages, _pendingTokens, otherThreadTokens, currentObservationTokens, _record) {
-    const contextWindowTokens = this.tokenCounter.countMessages(allMessages);
+  calculateObservationThresholds(_allMessages, unobservedMessages, _pendingTokens, otherThreadTokens, currentObservationTokens, _record) {
+    const contextWindowTokens = this.tokenCounter.countMessages(unobservedMessages);
     const totalPendingTokens = Math.max(0, contextWindowTokens + otherThreadTokens);
     const threshold = this.calculateDynamicThreshold(this.observationConfig.messageTokens, currentObservationTokens);
     const baseReflectionThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
@@ -2807,7 +2784,7 @@ ${suggestedResponse}
    * Tries async activation first if enabled, then falls back to sync observation.
    * Returns whether observation succeeded.
    */
-  async handleThresholdReached(messageList, record, threadId, resourceId, threshold, lockKey, writer, abortSignal, abort) {
+  async handleThresholdReached(messageList, record, threadId, resourceId, threshold, lockKey, writer, abortSignal, abort, requestContext) {
     let observationSucceeded = false;
     let updatedRecord = record;
     let activatedMessageIds;
@@ -2815,7 +2792,7 @@ ${suggestedResponse}
       let freshRecord = await this.getOrCreateRecord(threadId, resourceId);
       const freshAllMessages = messageList.get.all.db();
       let freshUnobservedMessages = this.getUnobservedMessages(freshAllMessages, freshRecord);
-      const freshContextTokens = this.tokenCounter.countMessages(freshAllMessages);
+      const freshContextTokens = this.tokenCounter.countMessages(freshUnobservedMessages);
       let freshOtherThreadTokens = 0;
       if (this.scope === "resource" && resourceId) {
         const freshOtherContext = await this.loadOtherThreadsContext(resourceId, threadId);
@@ -2863,7 +2840,13 @@ ${suggestedResponse}
           omDebug(
             `[OM:threshold] activation succeeded, obsTokens=${updatedRecord.observationTokenCount}, activeObsLen=${updatedRecord.activeObservations?.length}`
           );
-          await this.maybeAsyncReflect(updatedRecord, updatedRecord.observationTokenCount ?? 0, writer, messageList);
+          await this.maybeAsyncReflect(
+            updatedRecord,
+            updatedRecord.observationTokenCount ?? 0,
+            writer,
+            messageList,
+            requestContext
+          );
           return;
         }
         if (this.observationConfig.blockAfter && freshTotal >= this.observationConfig.blockAfter) {
@@ -2887,7 +2870,8 @@ ${suggestedResponse}
               resourceId,
               currentThreadMessages: freshUnobservedMessages,
               writer,
-              abortSignal
+              abortSignal,
+              requestContext
             });
           } else {
             await this.doSynchronousObservation({
@@ -2895,7 +2879,8 @@ ${suggestedResponse}
               threadId,
               unobservedMessages: freshUnobservedMessages,
               writer,
-              abortSignal
+              abortSignal,
+              requestContext
             });
           }
           updatedRecord = await this.getOrCreateRecord(threadId, resourceId);
@@ -3154,12 +3139,12 @@ ${suggestedResponse}
       }
       if (bufferedChunks.length > 0) {
         const allMsgsForCheck = messageList.get.all.db();
+        const unobservedMsgsForCheck = this.getUnobservedMessages(allMsgsForCheck, record);
         const otherThreadTokensForCheck = unobservedContextBlocks ? this.tokenCounter.countString(unobservedContextBlocks) : 0;
         const currentObsTokensForCheck = record.observationTokenCount ?? 0;
         const { totalPendingTokens: step0PendingTokens, threshold: step0Threshold } = this.calculateObservationThresholds(
           allMsgsForCheck,
-          [],
-          // unobserved not needed for threshold calculation
+          unobservedMsgsForCheck,
           0,
           // pendingTokens not needed — allMessages covers context
           otherThreadTokensForCheck,
@@ -3198,7 +3183,8 @@ ${suggestedResponse}
               observationTokens: record.observationTokenCount ?? 0,
               threadId,
               writer,
-              messageList
+              messageList,
+              requestContext
             });
             record = await this.getOrCreateRecord(threadId, resourceId);
           }
@@ -3209,13 +3195,20 @@ ${suggestedResponse}
       const obsTokens = record.observationTokenCount ?? 0;
       if (this.shouldReflect(obsTokens)) {
         omDebug(`[OM:step0-reflect] obsTokens=${obsTokens} over reflectThreshold, triggering reflection`);
-        await this.maybeReflect({ record, observationTokens: obsTokens, threadId, writer, messageList });
+        await this.maybeReflect({
+          record,
+          observationTokens: obsTokens,
+          threadId,
+          writer,
+          messageList,
+          requestContext
+        });
         record = await this.getOrCreateRecord(threadId, resourceId);
       } else if (this.isAsyncReflectionEnabled()) {
         const lockKey = this.getLockKey(threadId, resourceId);
         if (this.shouldTriggerAsyncReflection(obsTokens, lockKey, record)) {
           omDebug(`[OM:step0-reflect] obsTokens=${obsTokens} above activation point, triggering async reflection`);
-          await this.maybeAsyncReflect(record, obsTokens, writer, messageList);
+          await this.maybeAsyncReflect(record, obsTokens, writer, messageList, requestContext);
           record = await this.getOrCreateRecord(threadId, resourceId);
         }
       }
@@ -3235,26 +3228,44 @@ ${suggestedResponse}
         record
       );
       const { totalPendingTokens, threshold } = thresholds;
+      const bufferedChunkTokens = this.getBufferedChunks(record).reduce((sum, c) => sum + (c.tokenCount ?? 0), 0);
+      const unbufferedPendingTokens = Math.max(0, totalPendingTokens - bufferedChunkTokens);
       const stateSealedIds = state.sealedIds ?? /* @__PURE__ */ new Set();
       const staticSealedIds = _ObservationalMemory.sealedMessageIds.get(threadId) ?? /* @__PURE__ */ new Set();
       const sealedIds = /* @__PURE__ */ new Set([...stateSealedIds, ...staticSealedIds]);
       state.sealedIds = sealedIds;
       const lockKey = this.getLockKey(threadId, resourceId);
       if (this.isAsyncObservationEnabled() && totalPendingTokens < threshold) {
-        const shouldTrigger = this.shouldTriggerAsyncObservation(totalPendingTokens, lockKey, record);
+        const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record);
         omDebug(
-          `[OM:async-obs] belowThreshold: pending=${totalPendingTokens}, threshold=${threshold}, shouldTrigger=${shouldTrigger}, isBufferingObs=${record.isBufferingObservation}, lastBufferedAt=${record.lastBufferedAtTokens}`
+          `[OM:async-obs] belowThreshold: pending=${totalPendingTokens}, unbuffered=${unbufferedPendingTokens}, threshold=${threshold}, shouldTrigger=${shouldTrigger}, isBufferingObs=${record.isBufferingObservation}, lastBufferedAt=${record.lastBufferedAtTokens}`
         );
         if (shouldTrigger) {
-          this.startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, totalPendingTokens);
+          this.startAsyncBufferedObservation(
+            record,
+            threadId,
+            unobservedMessages,
+            lockKey,
+            writer,
+            unbufferedPendingTokens,
+            requestContext
+          );
         }
       } else if (this.isAsyncObservationEnabled()) {
-        const shouldTrigger = this.shouldTriggerAsyncObservation(totalPendingTokens, lockKey, record);
+        const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record);
         omDebug(
-          `[OM:async-obs] atOrAboveThreshold: pending=${totalPendingTokens}, threshold=${threshold}, step=${stepNumber}, shouldTrigger=${shouldTrigger}`
+          `[OM:async-obs] atOrAboveThreshold: pending=${totalPendingTokens}, unbuffered=${unbufferedPendingTokens}, threshold=${threshold}, step=${stepNumber}, shouldTrigger=${shouldTrigger}`
         );
         if (shouldTrigger) {
-          this.startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, totalPendingTokens);
+          this.startAsyncBufferedObservation(
+            record,
+            threadId,
+            unobservedMessages,
+            lockKey,
+            writer,
+            unbufferedPendingTokens,
+            requestContext
+          );
         }
       }
       if (stepNumber > 0) {
@@ -3270,7 +3281,8 @@ ${suggestedResponse}
           lockKey,
           writer,
           abortSignal,
-          abort
+          abort,
+          requestContext
         );
         if (observationSucceeded) {
           const observedIds = activatedMessageIds?.length ? activatedMessageIds : Array.isArray(updatedRecord.observedMessageIds) ? updatedRecord.observedMessageIds : void 0;
@@ -3602,7 +3614,7 @@ ${newThreadSection}`;
    * Do synchronous observation (fallback when no buffering)
    */
   async doSynchronousObservation(opts) {
-    const { record, threadId, unobservedMessages, writer, abortSignal, reflectionHooks } = opts;
+    const { record, threadId, unobservedMessages, writer, abortSignal, reflectionHooks, requestContext } = opts;
     this.emitDebugEvent({
       type: "observation_triggered",
       timestamp: /* @__PURE__ */ new Date(),
@@ -3655,7 +3667,8 @@ ${newThreadSection}`;
       const result = await this.callObserver(
         freshRecord?.activeObservations ?? record.activeObservations,
         messagesToObserve,
-        abortSignal
+        abortSignal,
+        { requestContext }
       );
       const existingObservations = freshRecord?.activeObservations ?? record.activeObservations ?? "";
       let newObservations;
@@ -3733,7 +3746,8 @@ ${result.observations}` : result.observations;
         threadId,
         writer,
         abortSignal,
-        reflectionHooks
+        reflectionHooks,
+        requestContext
       });
     } catch (error) {
       if (lastMessage?.id) {
@@ -3774,7 +3788,7 @@ ${result.observations}` : result.observations;
    * @param lockKey - Lock key for this scope
    * @param writer - Optional stream writer for emitting buffering markers
    */
-  startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, contextWindowTokens) {
+  startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, contextWindowTokens, requestContext) {
     const bufferKey = this.getObservationBufferKey(lockKey);
     const currentTokens = contextWindowTokens ?? this.tokenCounter.countMessages(unobservedMessages) + (record.pendingMessageTokens ?? 0);
     _ObservationalMemory.lastBufferedBoundary.set(bufferKey, currentTokens);
@@ -3782,22 +3796,27 @@ ${result.observations}` : result.observations;
     this.storage.setBufferingObservationFlag(record.id, true, currentTokens).catch((err) => {
       omError("[OM] Failed to set buffering observation flag", err);
     });
-    const asyncOp = this.runAsyncBufferedObservation(record, threadId, unobservedMessages, bufferKey, writer).finally(
-      () => {
-        _ObservationalMemory.asyncBufferingOps.delete(bufferKey);
-        unregisterOp(record.id, "bufferingObservation");
-        this.storage.setBufferingObservationFlag(record.id, false).catch((err) => {
-          omError("[OM] Failed to clear buffering observation flag", err);
-        });
-      }
-    );
+    const asyncOp = this.runAsyncBufferedObservation(
+      record,
+      threadId,
+      unobservedMessages,
+      bufferKey,
+      writer,
+      requestContext
+    ).finally(() => {
+      _ObservationalMemory.asyncBufferingOps.delete(bufferKey);
+      unregisterOp(record.id, "bufferingObservation");
+      this.storage.setBufferingObservationFlag(record.id, false).catch((err) => {
+        omError("[OM] Failed to clear buffering observation flag", err);
+      });
+    });
     _ObservationalMemory.asyncBufferingOps.set(bufferKey, asyncOp);
   }
   /**
    * Internal method that waits for existing buffering operation and then runs new buffering.
    * This implements the mutex-wait behavior.
    */
-  async runAsyncBufferedObservation(record, threadId, unobservedMessages, bufferKey, writer) {
+  async runAsyncBufferedObservation(record, threadId, unobservedMessages, bufferKey, writer, requestContext) {
     const existingOp = _ObservationalMemory.asyncBufferingOps.get(bufferKey);
     if (existingOp) {
       try {
@@ -3869,7 +3888,15 @@ ${result.observations}` : result.observations;
       omDebug(
         `[OM:bufferInput] cycleId=${cycleId}, msgCount=${messagesToBuffer.length}, msgTokens=${this.tokenCounter.countMessages(messagesToBuffer)}, ids=${messagesToBuffer.map((m) => `${m.id?.slice(0, 8)}@${m.createdAt ? new Date(m.createdAt).toISOString() : "none"}`).join(",")}`
       );
-      await this.doAsyncBufferedObservation(freshRecord, threadId, messagesToBuffer, cycleId, startedAt, writer);
+      await this.doAsyncBufferedObservation(
+        freshRecord,
+        threadId,
+        messagesToBuffer,
+        cycleId,
+        startedAt,
+        writer,
+        requestContext
+      );
       const maxTs = this.getMaxMessageTimestamp(messagesToBuffer);
       const cursor = new Date(maxTs.getTime() + 1);
       _ObservationalMemory.lastBufferedAtTime.set(bufferKey, cursor);
@@ -3898,7 +3925,7 @@ ${result.observations}` : result.observations;
    * The observer sees: active observations + existing buffered observations + message history
    * (excluding already-buffered messages).
    */
-  async doAsyncBufferedObservation(record, threadId, messagesToBuffer, cycleId, startedAt, writer) {
+  async doAsyncBufferedObservation(record, threadId, messagesToBuffer, cycleId, startedAt, writer, requestContext) {
     const bufferedChunks = this.getBufferedChunks(record);
     const bufferedChunksText = bufferedChunks.map((c) => c.observations).join("\n\n");
     const combinedObservations = this.combineObservationsForBuffering(record.activeObservations, bufferedChunksText);
@@ -3907,7 +3934,7 @@ ${result.observations}` : result.observations;
       messagesToBuffer,
       void 0,
       // No abort signal for background ops
-      { skipContinuationHints: true }
+      { skipContinuationHints: true, requestContext }
     );
     let newObservations;
     if (this.scope === "resource") {
@@ -4006,11 +4033,20 @@ ${bufferedObservations}`;
     if (!freshChunks.length) {
       return { success: false };
     }
+    const messageTokensThreshold = this.getMaxThreshold(this.observationConfig.messageTokens);
+    if (messageList) {
+      const freshPendingTokens = this.tokenCounter.countMessages(messageList.get.all.db());
+      if (freshPendingTokens < messageTokensThreshold) {
+        omDebug(
+          `[OM:tryActivate] skipping activation: freshPendingTokens=${freshPendingTokens} < threshold=${messageTokensThreshold}`
+        );
+        return { success: false };
+      }
+    }
     const activationRatio = this.observationConfig.bufferActivation ?? 0.7;
     omDebug(
       `[OM:tryActivate] swapping: freshChunks=${freshChunks.length}, activationRatio=${activationRatio}, totalChunkTokens=${freshChunks.reduce((s, c) => s + (c.tokenCount ?? 0), 0)}`
     );
-    const messageTokensThreshold = this.getMaxThreshold(this.observationConfig.messageTokens);
     const activationResult = await this.storage.swapBufferedToActive({
       id: freshRecord.id,
       activationRatio,
@@ -4066,7 +4102,7 @@ ${bufferedObservations}`;
    * @param observationTokens - Current observation token count
    * @param lockKey - Lock key for this scope
    */
-  startAsyncBufferedReflection(record, observationTokens, lockKey, writer) {
+  startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext) {
     const bufferKey = this.getReflectionBufferKey(lockKey);
     if (this.isAsyncBufferingInProgress(bufferKey)) {
       return;
@@ -4076,7 +4112,7 @@ ${bufferedObservations}`;
     this.storage.setBufferingReflectionFlag(record.id, true).catch((err) => {
       omError("[OM] Failed to set buffering reflection flag", err);
     });
-    const asyncOp = this.doAsyncBufferedReflection(record, bufferKey, writer).catch(async (error) => {
+    const asyncOp = this.doAsyncBufferedReflection(record, bufferKey, writer, requestContext).catch(async (error) => {
       if (writer) {
         const failedMarker = this.createBufferingFailedMarker({
           cycleId: `reflect-buf-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,
@@ -4105,7 +4141,7 @@ ${bufferedObservations}`;
    * Perform async buffered reflection - reflects observations and stores to bufferedReflection.
    * Does NOT create a new generation or update activeObservations.
    */
-  async doAsyncBufferedReflection(record, _bufferKey, writer) {
+  async doAsyncBufferedReflection(record, _bufferKey, writer, requestContext) {
     const freshRecord = await this.storage.getObservationalMemory(record.threadId, record.resourceId);
     const currentRecord = freshRecord ?? record;
     const observationTokens = currentRecord.observationTokenCount ?? 0;
@@ -4123,7 +4159,7 @@ ${bufferedObservations}`;
     const activeObservations = allLines.slice(0, linesToReflect).join("\n");
     const reflectedObservationLineCount = linesToReflect;
     const sliceTokenEstimate = Math.round(avgTokensPerLine * linesToReflect);
-    const compressionTarget = Math.min(sliceTokenEstimate * bufferActivation, reflectThreshold);
+    const compressionTarget = Math.round(sliceTokenEstimate * 0.75);
     omDebug(
       `[OM:reflect] doAsyncBufferedReflection: slicing observations for reflection \u2014 totalLines=${totalLines}, avgTokPerLine=${avgTokensPerLine.toFixed(1)}, activationPointTokens=${activationPointTokens}, linesToReflect=${linesToReflect}/${totalLines}, sliceTokenEstimate=${sliceTokenEstimate}, compressionTarget=${compressionTarget}`
     );
@@ -4153,8 +4189,9 @@ ${bufferedObservations}`;
       // No abort signal for background ops
       true,
       // Skip continuation hints for async buffering
-      1
+      1,
       // Start at compression level 1 for buffered reflection
+      requestContext
     );
     const reflectionTokenCount = this.tokenCounter.countObservations(reflectResult.observations);
     omDebug(
@@ -4175,7 +4212,7 @@ ${bufferedObservations}`;
         cycleId,
         operationType: "reflection",
         startedAt,
-        tokensBuffered: observationTokens,
+        tokensBuffered: sliceTokenEstimate,
         bufferedTokens: reflectionTokenCount,
         recordId: currentRecord.id,
         threadId: currentRecord.threadId ?? "",
@@ -4278,7 +4315,16 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
    * 4. Only triggers reflection AFTER all threads are observed
    */
   async doResourceScopedObservation(opts) {
-    const { record, currentThreadId, resourceId, currentThreadMessages, writer, abortSignal, reflectionHooks } = opts;
+    const {
+      record,
+      currentThreadId,
+      resourceId,
+      currentThreadMessages,
+      writer,
+      abortSignal,
+      reflectionHooks,
+      requestContext
+    } = opts;
     const { threads: allThreads } = await this.storage.listThreads({ filter: { resourceId } });
     const threadMetadataMap = /* @__PURE__ */ new Map();
     for (const thread of allThreads) {
@@ -4433,7 +4479,8 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
           existingObservations,
           batch.threadMap,
           batch.threadIds,
-          abortSignal
+          abortSignal,
+          requestContext
         );
         return batchResult;
       });
@@ -4546,7 +4593,8 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
         threadId: currentThreadId,
         writer,
         abortSignal,
-        reflectionHooks
+        reflectionHooks,
+        requestContext
       });
     } catch (error) {
       for (const [threadId, msgs] of threadsWithMessages) {
@@ -4582,7 +4630,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
    * Only handles the async path — will never do synchronous (blocking) reflection.
    * Safe to call after buffered observation activation.
    */
-  async maybeAsyncReflect(record, observationTokens, writer, messageList) {
+  async maybeAsyncReflect(record, observationTokens, writer, messageList, requestContext) {
     if (!this.isAsyncReflectionEnabled()) return;
     const lockKey = this.getLockKey(record.threadId, record.resourceId);
     const reflectThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
@@ -4593,7 +4641,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
       const shouldTrigger = this.shouldTriggerAsyncReflection(observationTokens, lockKey, record);
       omDebug(`[OM:reflect] below threshold: shouldTrigger=${shouldTrigger}`);
       if (shouldTrigger) {
-        this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
+        this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
       }
       return;
     }
@@ -4610,7 +4658,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
     omDebug(`[OM:reflect] activationSuccess=${activationSuccess}`);
     if (activationSuccess) return;
     omDebug(`[OM:reflect] no buffered reflection, starting background reflection...`);
-    this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
+    this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
   }
   /**
    * Check if reflection needed and trigger if so.
@@ -4619,12 +4667,12 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
    * in the background at intervals, and activated when the threshold is reached.
    */
   async maybeReflect(opts) {
-    const { record, observationTokens, writer, abortSignal, messageList, reflectionHooks } = opts;
+    const { record, observationTokens, writer, abortSignal, messageList, reflectionHooks, requestContext } = opts;
     const lockKey = this.getLockKey(record.threadId, record.resourceId);
     const reflectThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
     if (this.isAsyncReflectionEnabled() && observationTokens < reflectThreshold) {
       if (this.shouldTriggerAsyncReflection(observationTokens, lockKey, record)) {
-        this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
+        this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
       }
     }
     if (!this.shouldReflect(observationTokens)) {
@@ -4651,7 +4699,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
         omDebug(
           `[OM:reflect] async activation failed, no blockAfter or below it (obsTokens=${observationTokens}, blockAfter=${this.reflectionConfig.blockAfter}) \u2014 starting background reflection`
         );
-        this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
+        this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
         return;
       }
     }
@@ -4694,7 +4742,10 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
         void 0,
         streamContext,
         reflectThreshold,
-        abortSignal
+        abortSignal,
+        void 0,
+        void 0,
+        requestContext
       );
       const reflectionTokenCount = this.tokenCounter.countObservations(reflectResult.observations);
       await this.storage.createReflectionGeneration({
@@ -4758,7 +4809,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
    * to pass conversation messages without duplicating them into Mastra's DB.
    */
   async observe(opts) {
-    const { threadId, resourceId, messages, hooks } = opts;
+    const { threadId, resourceId, messages, hooks, requestContext } = opts;
     const lockKey = this.getLockKey(threadId, resourceId);
     const reflectionHooks = hooks ? { onReflectionStart: hooks.onReflectionStart, onReflectionEnd: hooks.onReflectionEnd } : void 0;
     await this.withLock(lockKey, async () => {
@@ -4778,7 +4829,8 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
             currentThreadId: threadId,
             resourceId,
             currentThreadMessages: currentMessages,
-            reflectionHooks
+            reflectionHooks,
+            requestContext
           });
         } finally {
           hooks?.onObservationEnd?.();
@@ -4800,7 +4852,13 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
         }
         hooks?.onObservationStart?.();
         try {
-          await this.doSynchronousObservation({ record: freshRecord, threadId, unobservedMessages, reflectionHooks });
+          await this.doSynchronousObservation({
+            record: freshRecord,
+            threadId,
+            unobservedMessages,
+            reflectionHooks,
+            requestContext
+          });
         } finally {
           hooks?.onObservationEnd?.();
         }
@@ -4818,7 +4876,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
    * );
    * ```
    */
-  async reflect(threadId, resourceId, prompt) {
+  async reflect(threadId, resourceId, prompt, requestContext) {
     const record = await this.getOrCreateRecord(threadId, resourceId);
     if (!record.activeObservations) {
       return;
@@ -4827,7 +4885,16 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
     registerOp(record.id, "reflecting");
     try {
       const reflectThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
-      const reflectResult = await this.callReflector(record.activeObservations, prompt, void 0, reflectThreshold);
+      const reflectResult = await this.callReflector(
+        record.activeObservations,
+        prompt,
+        void 0,
+        reflectThreshold,
+        void 0,
+        void 0,
+        void 0,
+        requestContext
+      );
       const reflectionTokenCount = this.tokenCounter.countObservations(reflectResult.observations);
       await this.storage.createReflectionGeneration({
         currentRecord: record,
@@ -4909,5 +4976,5 @@ exports.formatMessagesForObserver = formatMessagesForObserver;
 exports.hasCurrentTaskSection = hasCurrentTaskSection;
 exports.optimizeObservationsForContext = optimizeObservationsForContext;
 exports.parseObserverOutput = parseObserverOutput;
-//# sourceMappingURL=chunk-LXATBJ2L.cjs.map
-//# sourceMappingURL=chunk-LXATBJ2L.cjs.map
+//# sourceMappingURL=chunk-QRKB5I2S.cjs.map
+//# sourceMappingURL=chunk-QRKB5I2S.cjs.map