@mastra/memory 1.3.0 → 1.4.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/dist/{chunk-F5P5HTMC.js → chunk-D4AWAGLM.js} +270 -203
  3. package/dist/chunk-D4AWAGLM.js.map +1 -0
  4. package/dist/{chunk-LXATBJ2L.cjs → chunk-QRKB5I2S.cjs} +270 -203
  5. package/dist/chunk-QRKB5I2S.cjs.map +1 -0
  6. package/dist/docs/SKILL.md +1 -1
  7. package/dist/docs/assets/SOURCE_MAP.json +25 -25
  8. package/dist/docs/references/reference-memory-observational-memory.md +36 -0
  9. package/dist/index.cjs +7 -5
  10. package/dist/index.cjs.map +1 -1
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +7 -5
  13. package/dist/index.js.map +1 -1
  14. package/dist/{observational-memory-3DA7KJIH.js → observational-memory-53AFLLSH.js} +3 -3
  15. package/dist/{observational-memory-3DA7KJIH.js.map → observational-memory-53AFLLSH.js.map} +1 -1
  16. package/dist/{observational-memory-SA5RITIG.cjs → observational-memory-UCOMAMSF.cjs} +17 -17
  17. package/dist/{observational-memory-SA5RITIG.cjs.map → observational-memory-UCOMAMSF.cjs.map} +1 -1
  18. package/dist/processors/index.cjs +15 -15
  19. package/dist/processors/index.js +1 -1
  20. package/dist/processors/observational-memory/observational-memory.d.ts +6 -1
  21. package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
  22. package/dist/processors/observational-memory/observer-agent.d.ts +2 -2
  23. package/dist/processors/observational-memory/observer-agent.d.ts.map +1 -1
  24. package/dist/processors/observational-memory/reflector-agent.d.ts +5 -3
  25. package/dist/processors/observational-memory/reflector-agent.d.ts.map +1 -1
  26. package/dist/processors/observational-memory/token-counter.d.ts.map +1 -1
  27. package/dist/processors/observational-memory/types.d.ts +10 -0
  28. package/dist/processors/observational-memory/types.d.ts.map +1 -1
  29. package/package.json +6 -6
  30. package/dist/chunk-F5P5HTMC.js.map +0 -1
  31. package/dist/chunk-LXATBJ2L.cjs.map +0 -1
@@ -18,54 +18,6 @@ var o200k_base__default = /*#__PURE__*/_interopDefault(o200k_base);
18
18
  // src/processors/observational-memory/observational-memory.ts
19
19
 
20
20
  // src/processors/observational-memory/observer-agent.ts
21
- var LEGACY_OBSERVER_EXTRACTION_INSTRUCTIONS = `CRITICAL: DISTINGUISH USER ASSERTIONS FROM QUESTIONS
22
-
23
- When the user TELLS you something about themselves, mark it as an assertion:
24
- - "I have two kids" \u2192 \u{1F534} (14:30) User stated has two kids
25
- - "I work at Acme Corp" \u2192 \u{1F534} (14:31) User stated works at Acme Corp
26
- - "I graduated in 2019" \u2192 \u{1F534} (14:32) User stated graduated in 2019
27
-
28
- When the user ASKS about something, mark it as a question/request:
29
- - "Can you help me with X?" \u2192 \u{1F7E1} (15:00) User asked help with X
30
- - "What's the best way to do Y?" \u2192 \u{1F7E1} (15:01) User asked best way to do Y
31
-
32
- USER ASSERTIONS ARE AUTHORITATIVE. The user is the source of truth about their own life.
33
- If a user previously stated something and later asks a question about the same topic,
34
- the assertion is the answer - the question doesn't invalidate what they already told you.
35
-
36
- TEMPORAL ANCHORING:
37
- Convert relative times to estimated dates based on the message timestamp.
38
- Include the user's original phrasing in quotes, then add an estimated date or range.
39
- Ranges may span multiple months - e.g., "within the last month" on July 15th could mean anytime in June to early July.
40
-
41
- BAD: User was given X by their friend last month.
42
- GOOD: User was given X by their friend "last month" (estimated mid-June to early July 202X).
43
-
44
- PRESERVE UNUSUAL PHRASING:
45
- When the user uses unexpected or non-standard terminology, quote their exact words.
46
-
47
- BAD: User exercised.
48
- GOOD: User stated they did a "movement session" (their term for exercise).
49
-
50
- CONVERSATION CONTEXT:
51
- - What the user is working on or asking about
52
- - Previous topics and their outcomes
53
- - What user understands or needs clarification on
54
- - Specific requirements or constraints mentioned
55
- - Contents of assistant learnings and summaries
56
- - Answers to users questions including full context to remember detailed summaries and explanations
57
- - Assistant explanations, especially complex ones. observe the fine details so that the assistant does not forget what they explained
58
- - Relevant code snippets
59
- - User preferences (like favourites, dislikes, preferences, etc)
60
- - Any specifically formatted text or ascii that would need to be reproduced or referenced in later interactions (preserve these verbatim in memory)
61
- - Any blocks of any text which the user and assistant are iteratively collaborating back and forth on should be preserved verbatim
62
- - When who/what/where/when is mentioned, note that in the observation. Example: if the user received went on a trip with someone, observe who that someone was, where the trip was, when it happened, and what happened, not just that the user went on the trip.
63
-
64
- ACTIONABLE INSIGHTS:
65
- - What worked well in explanations
66
- - What needs follow-up or clarification
67
- - User's stated goals or next steps (note if the user tells you not to do a next step, or asks for something specific, other next steps besides the users request should be marked as "waiting for user", unless the user explicitly says to continue all next steps)`;
68
- var USE_LEGACY_PROMPT = process.env.OM_USE_LEGACY_PROMPT === "1" || process.env.OM_USE_LEGACY_PROMPT === "true";
69
21
  var USE_CONDENSED_PROMPT = process.env.OM_USE_CONDENSED_PROMPT === "1" || process.env.OM_USE_CONDENSED_PROMPT === "true";
70
22
  var CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS = `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
71
23
 
@@ -308,7 +260,7 @@ ACTIONABLE INSIGHTS:
308
260
  - What worked well in explanations
309
261
  - What needs follow-up or clarification
310
262
  - User's stated goals or next steps (note if the user tells you not to do a next step, or asks for something specific, other next steps besides the users request should be marked as "waiting for user", unless the user explicitly says to continue all next steps)`;
311
- var OBSERVER_EXTRACTION_INSTRUCTIONS = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS : USE_LEGACY_PROMPT ? LEGACY_OBSERVER_EXTRACTION_INSTRUCTIONS : CURRENT_OBSERVER_EXTRACTION_INSTRUCTIONS;
263
+ var OBSERVER_EXTRACTION_INSTRUCTIONS = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS : CURRENT_OBSERVER_EXTRACTION_INSTRUCTIONS;
312
264
  var CONDENSED_OBSERVER_OUTPUT_FORMAT = `Use priority levels:
313
265
  - \u{1F534} High: explicit user facts, preferences, goals achieved, critical context
314
266
  - \u{1F7E1} Medium: project details, learned information, tool results
@@ -409,7 +361,7 @@ var OBSERVER_GUIDELINES = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_GUIDELINES :
409
361
  - Make sure you start each observation with a priority emoji (\u{1F534}, \u{1F7E1}, \u{1F7E2})
410
362
  - Observe WHAT the agent did and WHAT it means, not HOW well it did it.
411
363
  - If the user provides detailed messages or code snippets, observe all important details.`;
412
- function buildObserverSystemPrompt(multiThread = false) {
364
+ function buildObserverSystemPrompt(multiThread = false, instruction) {
413
365
  const outputFormat = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_OUTPUT_FORMAT : OBSERVER_OUTPUT_FORMAT_BASE;
414
366
  if (multiThread) {
415
367
  return `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
@@ -467,7 +419,11 @@ ${OBSERVER_GUIDELINES}
467
419
 
468
420
  Remember: These observations are the assistant's ONLY memory. Make them count.
469
421
 
470
- User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority.`;
422
+ User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority.${instruction ? `
423
+
424
+ === CUSTOM INSTRUCTIONS ===
425
+
426
+ ${instruction}` : ""}`;
471
427
  }
472
428
  return `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
473
429
 
@@ -493,7 +449,11 @@ Simply output your observations without any thread-related markup.
493
449
 
494
450
  Remember: These observations are the assistant's ONLY memory. Make them count.
495
451
 
496
- User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.`;
452
+ User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.${instruction ? `
453
+
454
+ === CUSTOM INSTRUCTIONS ===
455
+
456
+ ${instruction}` : ""}`;
497
457
  }
498
458
  var OBSERVER_SYSTEM_PROMPT = buildObserverSystemPrompt();
499
459
  function formatMessagesForObserver(messages, options) {
@@ -526,7 +486,7 @@ ${maybeTruncate(resultStr, maxLen)}`;
526
486
  return `[Tool Call: ${inv.toolName}]
527
487
  ${maybeTruncate(argsStr, maxLen)}`;
528
488
  }
529
- if (part.type?.startsWith("data-om-observation-")) return "";
489
+ if (part.type?.startsWith("data-")) return "";
530
490
  return "";
531
491
  }).filter(Boolean).join("\n");
532
492
  } else if (msg.content?.content) {
@@ -759,7 +719,7 @@ function optimizeObservationsForContext(observations) {
759
719
  }
760
720
 
761
721
  // src/processors/observational-memory/reflector-agent.ts
762
- function buildReflectorSystemPrompt() {
722
+ function buildReflectorSystemPrompt(instruction) {
763
723
  return `You are the memory consciousness of an AI assistant. Your memory observation reflections will be the ONLY information the assistant has about past interactions with this user.
764
724
 
765
725
  The following instructions were given to another part of your psyche (the observer) to create memories.
@@ -852,7 +812,11 @@ Hint for the agent's immediate next message. Examples:
852
812
  - Call the view tool on src/example.ts to continue debugging.
853
813
  </suggested-response>
854
814
 
855
- User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.`;
815
+ User messages are extremely important. If the user asks a question or gives a new task, make it clear in <current-task> that this is the priority. If the assistant needs to respond to the user, indicate in <suggested-response> that it should pause for user reply before continuing other tasks.${instruction ? `
816
+
817
+ === CUSTOM INSTRUCTIONS ===
818
+
819
+ ${instruction}` : ""}`;
856
820
  }
857
821
  var COMPRESSION_GUIDANCE = {
858
822
  0: "",
@@ -884,6 +848,21 @@ Please re-process with much more aggressive compression:
884
848
  - Remove redundant information and merge overlapping observations
885
849
 
886
850
  Your current detail level was a 10/10, lets aim for a 6/10 detail level.
851
+ `,
852
+ 3: `
853
+ ## CRITICAL COMPRESSION REQUIRED
854
+
855
+ Your previous reflections have failed to compress sufficiently after multiple attempts.
856
+
857
+ Please re-process with maximum compression:
858
+ - Summarize the oldest observations (first 50-70%) into brief high-level paragraphs \u2014 only key facts, decisions, and outcomes
859
+ - For the most recent observations (last 30-50%), retain important details but still use a condensed style
860
+ - Ruthlessly merge related observations \u2014 if 10 observations are about the same topic, combine into 1-2 lines
861
+ - Drop procedural details (tool calls, retries, intermediate steps) \u2014 keep only final outcomes
862
+ - Drop observations that are no longer relevant or have been superseded by newer information
863
+ - Preserve: names, dates, decisions, errors, user preferences, and architectural choices
864
+
865
+ Your current detail level was a 10/10, lets aim for a 4/10 detail level.
887
866
  `
888
867
  };
889
868
  function buildReflectorPrompt(observations, manualPrompt, compressionLevel, skipContinuationHints) {
@@ -1024,7 +1003,7 @@ var TokenCounter = class _TokenCounter {
1024
1003
  `Unhandled tool-invocation state '${part.toolInvocation?.state}' in token counting for part type '${part.type}'`
1025
1004
  );
1026
1005
  }
1027
- } else {
1006
+ } else if (typeof part.type === "string" && part.type.startsWith("data-")) ; else {
1028
1007
  tokenString += JSON.stringify(part);
1029
1008
  }
1030
1009
  }
@@ -1669,7 +1648,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
1669
1648
  blockAfter: asyncBufferingDisabled ? void 0 : this.resolveBlockAfter(
1670
1649
  config.observation?.blockAfter ?? (config.observation?.bufferTokens ?? OBSERVATIONAL_MEMORY_DEFAULTS.observation.bufferTokens ? 1.2 : void 0),
1671
1650
  config.observation?.messageTokens ?? OBSERVATIONAL_MEMORY_DEFAULTS.observation.messageTokens
1672
- )
1651
+ ),
1652
+ instruction: config.observation?.instruction
1673
1653
  };
1674
1654
  this.reflectionConfig = {
1675
1655
  model: reflectionModel,
@@ -1684,7 +1664,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
1684
1664
  blockAfter: asyncBufferingDisabled ? void 0 : this.resolveBlockAfter(
1685
1665
  config.reflection?.blockAfter ?? (config.reflection?.bufferActivation ?? OBSERVATIONAL_MEMORY_DEFAULTS.reflection.bufferActivation ? 1.2 : void 0),
1686
1666
  config.reflection?.observationTokens ?? OBSERVATIONAL_MEMORY_DEFAULTS.reflection.observationTokens
1687
- )
1667
+ ),
1668
+ instruction: config.reflection?.instruction
1688
1669
  };
1689
1670
  this.tokenCounter = new TokenCounter();
1690
1671
  this.onDebugEvent = config.onDebugEvent;
@@ -1900,7 +1881,7 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
1900
1881
  */
1901
1882
  getObserverAgent() {
1902
1883
  if (!this.observerAgent) {
1903
- const systemPrompt = buildObserverSystemPrompt();
1884
+ const systemPrompt = buildObserverSystemPrompt(false, this.observationConfig.instruction);
1904
1885
  this.observerAgent = new agent.Agent({
1905
1886
  id: "observational-memory-observer",
1906
1887
  name: "Observer",
@@ -1915,7 +1896,7 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
1915
1896
  */
1916
1897
  getReflectorAgent() {
1917
1898
  if (!this.reflectorAgent) {
1918
- const systemPrompt = buildReflectorSystemPrompt();
1899
+ const systemPrompt = buildReflectorSystemPrompt(this.reflectionConfig.instruction);
1919
1900
  this.reflectorAgent = new agent.Agent({
1920
1901
  id: "observational-memory-reflector",
1921
1902
  name: "Reflector",
@@ -2146,7 +2127,11 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2146
2127
  for (let i = allMsgs.length - 1; i >= 0; i--) {
2147
2128
  const msg = allMsgs[i];
2148
2129
  if (msg?.role === "assistant" && msg.content?.parts && Array.isArray(msg.content.parts)) {
2149
- msg.content.parts.push(marker);
2130
+ const markerData = marker.data;
2131
+ const alreadyPresent = markerData?.cycleId && msg.content.parts.some((p) => p?.type === marker.type && p?.data?.cycleId === markerData.cycleId);
2132
+ if (!alreadyPresent) {
2133
+ msg.content.parts.push(marker);
2134
+ }
2150
2135
  try {
2151
2136
  await this.messageHistory.persistMessages({
2152
2137
  messages: [msg],
@@ -2175,7 +2160,11 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2175
2160
  const messages = result?.messages ?? [];
2176
2161
  for (const msg of messages) {
2177
2162
  if (msg?.role === "assistant" && msg.content?.parts && Array.isArray(msg.content.parts)) {
2178
- msg.content.parts.push(marker);
2163
+ const markerData = marker.data;
2164
+ const alreadyPresent = markerData?.cycleId && msg.content.parts.some((p) => p?.type === marker.type && p?.data?.cycleId === markerData.cycleId);
2165
+ if (!alreadyPresent) {
2166
+ msg.content.parts.push(marker);
2167
+ }
2179
2168
  await this.messageHistory.persistMessages({
2180
2169
  messages: [msg],
2181
2170
  threadId,
@@ -2402,7 +2391,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2402
2391
  ...this.observationConfig.modelSettings
2403
2392
  },
2404
2393
  providerOptions: this.observationConfig.providerOptions,
2405
- ...abortSignal ? { abortSignal } : {}
2394
+ ...abortSignal ? { abortSignal } : {},
2395
+ ...options?.requestContext ? { requestContext: options.requestContext } : {}
2406
2396
  }),
2407
2397
  abortSignal
2408
2398
  );
@@ -2425,12 +2415,12 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2425
2415
  * Returns per-thread results with observations, currentTask, and suggestedContinuation,
2426
2416
  * plus the total usage for the batch.
2427
2417
  */
2428
- async callMultiThreadObserver(existingObservations, messagesByThread, threadOrder, abortSignal) {
2418
+ async callMultiThreadObserver(existingObservations, messagesByThread, threadOrder, abortSignal, requestContext) {
2429
2419
  const agent$1 = new agent.Agent({
2430
2420
  id: "multi-thread-observer",
2431
2421
  name: "multi-thread-observer",
2432
2422
  model: this.observationConfig.model,
2433
- instructions: buildObserverSystemPrompt(true)
2423
+ instructions: buildObserverSystemPrompt(true, this.observationConfig.instruction)
2434
2424
  });
2435
2425
  const prompt = buildMultiThreadObserverPrompt(existingObservations, messagesByThread, threadOrder);
2436
2426
  const allMessages = [];
@@ -2446,7 +2436,8 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2446
2436
  ...this.observationConfig.modelSettings
2447
2437
  },
2448
2438
  providerOptions: this.observationConfig.providerOptions,
2449
- ...abortSignal ? { abortSignal } : {}
2439
+ ...abortSignal ? { abortSignal } : {},
2440
+ ...requestContext ? { requestContext } : {}
2450
2441
  }),
2451
2442
  abortSignal
2452
2443
  );
@@ -2478,68 +2469,79 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2478
2469
  * Call the Reflector agent to condense observations.
2479
2470
  * Includes compression validation and retry logic.
2480
2471
  */
2481
- async callReflector(observations, manualPrompt, streamContext, observationTokensThreshold, abortSignal, skipContinuationHints, compressionStartLevel) {
2472
+ async callReflector(observations, manualPrompt, streamContext, observationTokensThreshold, abortSignal, skipContinuationHints, compressionStartLevel, requestContext) {
2482
2473
  const agent = this.getReflectorAgent();
2483
2474
  const originalTokens = this.tokenCounter.countObservations(observations);
2484
2475
  const targetThreshold = observationTokensThreshold ?? this.getMaxThreshold(this.reflectionConfig.observationTokens);
2485
2476
  let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
2486
- const firstLevel = compressionStartLevel ?? 0;
2487
- const retryLevel = Math.min(firstLevel + 1, 2);
2488
- let prompt = buildReflectorPrompt(observations, manualPrompt, firstLevel, skipContinuationHints);
2489
- omDebug(
2490
- `[OM:callReflector] starting first attempt: originalTokens=${originalTokens}, targetThreshold=${targetThreshold}, promptLen=${prompt.length}, skipContinuationHints=${skipContinuationHints}`
2491
- );
2492
- let chunkCount = 0;
2493
- const generatePromise = agent.generate(prompt, {
2494
- modelSettings: {
2495
- ...this.reflectionConfig.modelSettings
2496
- },
2497
- providerOptions: this.reflectionConfig.providerOptions,
2498
- ...abortSignal ? { abortSignal } : {},
2499
- onChunk(chunk) {
2500
- chunkCount++;
2501
- if (chunkCount === 1 || chunkCount % 50 === 0) {
2502
- const preview = chunk.type === "text-delta" ? ` text="${chunk.textDelta?.slice(0, 80)}..."` : chunk.type === "tool-call" ? ` tool=${chunk.toolName}` : "";
2503
- omDebug(`[OM:callReflector] chunk#${chunkCount}: type=${chunk.type}${preview}`);
2504
- }
2505
- },
2506
- onFinish(event) {
2507
- omDebug(
2508
- `[OM:callReflector] onFinish: chunks=${chunkCount}, finishReason=${event.finishReason}, inputTokens=${event.usage?.inputTokens}, outputTokens=${event.usage?.outputTokens}, textLen=${event.text?.length}`
2509
- );
2510
- },
2511
- onAbort(event) {
2512
- omDebug(`[OM:callReflector] onAbort: chunks=${chunkCount}, reason=${event?.reason ?? "unknown"}`);
2513
- },
2514
- onError({ error }) {
2515
- omError(`[OM:callReflector] onError after ${chunkCount} chunks`, error);
2477
+ let currentLevel = compressionStartLevel ?? 0;
2478
+ const maxLevel = 3;
2479
+ let parsed = { observations: "", suggestedContinuation: void 0 };
2480
+ let reflectedTokens = 0;
2481
+ let attemptNumber = 0;
2482
+ while (currentLevel <= maxLevel) {
2483
+ attemptNumber++;
2484
+ const isRetry = attemptNumber > 1;
2485
+ const prompt = buildReflectorPrompt(observations, manualPrompt, currentLevel, skipContinuationHints);
2486
+ omDebug(
2487
+ `[OM:callReflector] ${isRetry ? `retry #${attemptNumber - 1}` : "first attempt"}: level=${currentLevel}, originalTokens=${originalTokens}, targetThreshold=${targetThreshold}, promptLen=${prompt.length}, skipContinuationHints=${skipContinuationHints}`
2488
+ );
2489
+ let chunkCount = 0;
2490
+ const result = await this.withAbortCheck(
2491
+ () => agent.generate(prompt, {
2492
+ modelSettings: {
2493
+ ...this.reflectionConfig.modelSettings
2494
+ },
2495
+ providerOptions: this.reflectionConfig.providerOptions,
2496
+ ...abortSignal ? { abortSignal } : {},
2497
+ ...requestContext ? { requestContext } : {},
2498
+ ...attemptNumber === 1 ? {
2499
+ onChunk(chunk) {
2500
+ chunkCount++;
2501
+ if (chunkCount === 1 || chunkCount % 50 === 0) {
2502
+ const preview = chunk.type === "text-delta" ? ` text="${chunk.textDelta?.slice(0, 80)}..."` : chunk.type === "tool-call" ? ` tool=${chunk.toolName}` : "";
2503
+ omDebug(`[OM:callReflector] chunk#${chunkCount}: type=${chunk.type}${preview}`);
2504
+ }
2505
+ },
2506
+ onFinish(event) {
2507
+ omDebug(
2508
+ `[OM:callReflector] onFinish: chunks=${chunkCount}, finishReason=${event.finishReason}, inputTokens=${event.usage?.inputTokens}, outputTokens=${event.usage?.outputTokens}, textLen=${event.text?.length}`
2509
+ );
2510
+ },
2511
+ onAbort(event) {
2512
+ omDebug(`[OM:callReflector] onAbort: chunks=${chunkCount}, reason=${event?.reason ?? "unknown"}`);
2513
+ },
2514
+ onError({ error }) {
2515
+ omError(`[OM:callReflector] onError after ${chunkCount} chunks`, error);
2516
+ }
2517
+ } : {}
2518
+ }),
2519
+ abortSignal
2520
+ );
2521
+ omDebug(
2522
+ `[OM:callReflector] attempt #${attemptNumber} returned: textLen=${result.text?.length}, textPreview="${result.text?.slice(0, 120)}...", inputTokens=${result.usage?.inputTokens ?? result.totalUsage?.inputTokens}, outputTokens=${result.usage?.outputTokens ?? result.totalUsage?.outputTokens}`
2523
+ );
2524
+ const usage = result.totalUsage ?? result.usage;
2525
+ if (usage) {
2526
+ totalUsage.inputTokens += usage.inputTokens ?? 0;
2527
+ totalUsage.outputTokens += usage.outputTokens ?? 0;
2528
+ totalUsage.totalTokens += usage.totalTokens ?? 0;
2529
+ }
2530
+ parsed = parseReflectorOutput(result.text);
2531
+ reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
2532
+ omDebug(
2533
+ `[OM:callReflector] attempt #${attemptNumber} parsed: reflectedTokens=${reflectedTokens}, targetThreshold=${targetThreshold}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}, parsedObsLen=${parsed.observations?.length}`
2534
+ );
2535
+ if (validateCompression(reflectedTokens, targetThreshold) || currentLevel >= maxLevel) {
2536
+ break;
2516
2537
  }
2517
- });
2518
- let result = await this.withAbortCheck(async () => {
2519
- return await generatePromise;
2520
- }, abortSignal);
2521
- omDebug(
2522
- `[OM:callReflector] first attempt returned: textLen=${result.text?.length}, textPreview="${result.text?.slice(0, 120)}...", inputTokens=${result.usage?.inputTokens ?? result.totalUsage?.inputTokens}, outputTokens=${result.usage?.outputTokens ?? result.totalUsage?.outputTokens}, keys=${Object.keys(result).join(",")}`
2523
- );
2524
- const firstUsage = result.totalUsage ?? result.usage;
2525
- if (firstUsage) {
2526
- totalUsage.inputTokens += firstUsage.inputTokens ?? 0;
2527
- totalUsage.outputTokens += firstUsage.outputTokens ?? 0;
2528
- totalUsage.totalTokens += firstUsage.totalTokens ?? 0;
2529
- }
2530
- let parsed = parseReflectorOutput(result.text);
2531
- let reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
2532
- omDebug(
2533
- `[OM:callReflector] first attempt parsed: reflectedTokens=${reflectedTokens}, targetThreshold=${targetThreshold}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}, parsedObsLen=${parsed.observations?.length}`
2534
- );
2535
- if (!validateCompression(reflectedTokens, targetThreshold)) {
2536
2538
  if (streamContext?.writer) {
2537
2539
  const failedMarker = this.createObservationFailedMarker({
2538
2540
  cycleId: streamContext.cycleId,
2539
2541
  operationType: "reflection",
2540
2542
  startedAt: streamContext.startedAt,
2541
2543
  tokensAttempted: originalTokens,
2542
- error: `Did not compress below threshold (${originalTokens} \u2192 ${reflectedTokens}, target: ${targetThreshold}), retrying with compression guidance`,
2544
+ error: `Did not compress below threshold (${originalTokens} \u2192 ${reflectedTokens}, target: ${targetThreshold}), retrying at level ${currentLevel + 1}`,
2543
2545
  recordId: streamContext.recordId,
2544
2546
  threadId: streamContext.threadId
2545
2547
  });
@@ -2559,32 +2561,7 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2559
2561
  await streamContext.writer.custom(startMarker).catch(() => {
2560
2562
  });
2561
2563
  }
2562
- prompt = buildReflectorPrompt(observations, manualPrompt, retryLevel, skipContinuationHints);
2563
- omDebug(`[OM:callReflector] starting retry: promptLen=${prompt.length}`);
2564
- result = await this.withAbortCheck(
2565
- () => agent.generate(prompt, {
2566
- modelSettings: {
2567
- ...this.reflectionConfig.modelSettings
2568
- },
2569
- providerOptions: this.reflectionConfig.providerOptions,
2570
- ...abortSignal ? { abortSignal } : {}
2571
- }),
2572
- abortSignal
2573
- );
2574
- omDebug(
2575
- `[OM:callReflector] retry returned: textLen=${result.text?.length}, inputTokens=${result.usage?.inputTokens ?? result.totalUsage?.inputTokens}, outputTokens=${result.usage?.outputTokens ?? result.totalUsage?.outputTokens}`
2576
- );
2577
- const retryUsage = result.totalUsage ?? result.usage;
2578
- if (retryUsage) {
2579
- totalUsage.inputTokens += retryUsage.inputTokens ?? 0;
2580
- totalUsage.outputTokens += retryUsage.outputTokens ?? 0;
2581
- totalUsage.totalTokens += retryUsage.totalTokens ?? 0;
2582
- }
2583
- parsed = parseReflectorOutput(result.text);
2584
- reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
2585
- omDebug(
2586
- `[OM:callReflector] retry parsed: reflectedTokens=${reflectedTokens}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}`
2587
- );
2564
+ currentLevel = Math.min(currentLevel + 1, maxLevel);
2588
2565
  }
2589
2566
  return {
2590
2567
  observations: parsed.observations,
@@ -2704,8 +2681,8 @@ ${suggestedResponse}
2704
2681
  /**
2705
2682
  * Calculate all threshold-related values for observation decision making.
2706
2683
  */
2707
- calculateObservationThresholds(allMessages, _unobservedMessages, _pendingTokens, otherThreadTokens, currentObservationTokens, _record) {
2708
- const contextWindowTokens = this.tokenCounter.countMessages(allMessages);
2684
+ calculateObservationThresholds(_allMessages, unobservedMessages, _pendingTokens, otherThreadTokens, currentObservationTokens, _record) {
2685
+ const contextWindowTokens = this.tokenCounter.countMessages(unobservedMessages);
2709
2686
  const totalPendingTokens = Math.max(0, contextWindowTokens + otherThreadTokens);
2710
2687
  const threshold = this.calculateDynamicThreshold(this.observationConfig.messageTokens, currentObservationTokens);
2711
2688
  const baseReflectionThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
@@ -2807,7 +2784,7 @@ ${suggestedResponse}
2807
2784
  * Tries async activation first if enabled, then falls back to sync observation.
2808
2785
  * Returns whether observation succeeded.
2809
2786
  */
2810
- async handleThresholdReached(messageList, record, threadId, resourceId, threshold, lockKey, writer, abortSignal, abort) {
2787
+ async handleThresholdReached(messageList, record, threadId, resourceId, threshold, lockKey, writer, abortSignal, abort, requestContext) {
2811
2788
  let observationSucceeded = false;
2812
2789
  let updatedRecord = record;
2813
2790
  let activatedMessageIds;
@@ -2815,7 +2792,7 @@ ${suggestedResponse}
2815
2792
  let freshRecord = await this.getOrCreateRecord(threadId, resourceId);
2816
2793
  const freshAllMessages = messageList.get.all.db();
2817
2794
  let freshUnobservedMessages = this.getUnobservedMessages(freshAllMessages, freshRecord);
2818
- const freshContextTokens = this.tokenCounter.countMessages(freshAllMessages);
2795
+ const freshContextTokens = this.tokenCounter.countMessages(freshUnobservedMessages);
2819
2796
  let freshOtherThreadTokens = 0;
2820
2797
  if (this.scope === "resource" && resourceId) {
2821
2798
  const freshOtherContext = await this.loadOtherThreadsContext(resourceId, threadId);
@@ -2863,7 +2840,13 @@ ${suggestedResponse}
2863
2840
  omDebug(
2864
2841
  `[OM:threshold] activation succeeded, obsTokens=${updatedRecord.observationTokenCount}, activeObsLen=${updatedRecord.activeObservations?.length}`
2865
2842
  );
2866
- await this.maybeAsyncReflect(updatedRecord, updatedRecord.observationTokenCount ?? 0, writer, messageList);
2843
+ await this.maybeAsyncReflect(
2844
+ updatedRecord,
2845
+ updatedRecord.observationTokenCount ?? 0,
2846
+ writer,
2847
+ messageList,
2848
+ requestContext
2849
+ );
2867
2850
  return;
2868
2851
  }
2869
2852
  if (this.observationConfig.blockAfter && freshTotal >= this.observationConfig.blockAfter) {
@@ -2887,7 +2870,8 @@ ${suggestedResponse}
2887
2870
  resourceId,
2888
2871
  currentThreadMessages: freshUnobservedMessages,
2889
2872
  writer,
2890
- abortSignal
2873
+ abortSignal,
2874
+ requestContext
2891
2875
  });
2892
2876
  } else {
2893
2877
  await this.doSynchronousObservation({
@@ -2895,7 +2879,8 @@ ${suggestedResponse}
2895
2879
  threadId,
2896
2880
  unobservedMessages: freshUnobservedMessages,
2897
2881
  writer,
2898
- abortSignal
2882
+ abortSignal,
2883
+ requestContext
2899
2884
  });
2900
2885
  }
2901
2886
  updatedRecord = await this.getOrCreateRecord(threadId, resourceId);
@@ -3154,12 +3139,12 @@ ${suggestedResponse}
3154
3139
  }
3155
3140
  if (bufferedChunks.length > 0) {
3156
3141
  const allMsgsForCheck = messageList.get.all.db();
3142
+ const unobservedMsgsForCheck = this.getUnobservedMessages(allMsgsForCheck, record);
3157
3143
  const otherThreadTokensForCheck = unobservedContextBlocks ? this.tokenCounter.countString(unobservedContextBlocks) : 0;
3158
3144
  const currentObsTokensForCheck = record.observationTokenCount ?? 0;
3159
3145
  const { totalPendingTokens: step0PendingTokens, threshold: step0Threshold } = this.calculateObservationThresholds(
3160
3146
  allMsgsForCheck,
3161
- [],
3162
- // unobserved not needed for threshold calculation
3147
+ unobservedMsgsForCheck,
3163
3148
  0,
3164
3149
  // pendingTokens not needed — allMessages covers context
3165
3150
  otherThreadTokensForCheck,
@@ -3198,7 +3183,8 @@ ${suggestedResponse}
3198
3183
  observationTokens: record.observationTokenCount ?? 0,
3199
3184
  threadId,
3200
3185
  writer,
3201
- messageList
3186
+ messageList,
3187
+ requestContext
3202
3188
  });
3203
3189
  record = await this.getOrCreateRecord(threadId, resourceId);
3204
3190
  }
@@ -3209,13 +3195,20 @@ ${suggestedResponse}
3209
3195
  const obsTokens = record.observationTokenCount ?? 0;
3210
3196
  if (this.shouldReflect(obsTokens)) {
3211
3197
  omDebug(`[OM:step0-reflect] obsTokens=${obsTokens} over reflectThreshold, triggering reflection`);
3212
- await this.maybeReflect({ record, observationTokens: obsTokens, threadId, writer, messageList });
3198
+ await this.maybeReflect({
3199
+ record,
3200
+ observationTokens: obsTokens,
3201
+ threadId,
3202
+ writer,
3203
+ messageList,
3204
+ requestContext
3205
+ });
3213
3206
  record = await this.getOrCreateRecord(threadId, resourceId);
3214
3207
  } else if (this.isAsyncReflectionEnabled()) {
3215
3208
  const lockKey = this.getLockKey(threadId, resourceId);
3216
3209
  if (this.shouldTriggerAsyncReflection(obsTokens, lockKey, record)) {
3217
3210
  omDebug(`[OM:step0-reflect] obsTokens=${obsTokens} above activation point, triggering async reflection`);
3218
- await this.maybeAsyncReflect(record, obsTokens, writer, messageList);
3211
+ await this.maybeAsyncReflect(record, obsTokens, writer, messageList, requestContext);
3219
3212
  record = await this.getOrCreateRecord(threadId, resourceId);
3220
3213
  }
3221
3214
  }
@@ -3235,26 +3228,44 @@ ${suggestedResponse}
3235
3228
  record
3236
3229
  );
3237
3230
  const { totalPendingTokens, threshold } = thresholds;
3231
+ const bufferedChunkTokens = this.getBufferedChunks(record).reduce((sum, c) => sum + (c.tokenCount ?? 0), 0);
3232
+ const unbufferedPendingTokens = Math.max(0, totalPendingTokens - bufferedChunkTokens);
3238
3233
  const stateSealedIds = state.sealedIds ?? /* @__PURE__ */ new Set();
3239
3234
  const staticSealedIds = _ObservationalMemory.sealedMessageIds.get(threadId) ?? /* @__PURE__ */ new Set();
3240
3235
  const sealedIds = /* @__PURE__ */ new Set([...stateSealedIds, ...staticSealedIds]);
3241
3236
  state.sealedIds = sealedIds;
3242
3237
  const lockKey = this.getLockKey(threadId, resourceId);
3243
3238
  if (this.isAsyncObservationEnabled() && totalPendingTokens < threshold) {
3244
- const shouldTrigger = this.shouldTriggerAsyncObservation(totalPendingTokens, lockKey, record);
3239
+ const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record);
3245
3240
  omDebug(
3246
- `[OM:async-obs] belowThreshold: pending=${totalPendingTokens}, threshold=${threshold}, shouldTrigger=${shouldTrigger}, isBufferingObs=${record.isBufferingObservation}, lastBufferedAt=${record.lastBufferedAtTokens}`
3241
+ `[OM:async-obs] belowThreshold: pending=${totalPendingTokens}, unbuffered=${unbufferedPendingTokens}, threshold=${threshold}, shouldTrigger=${shouldTrigger}, isBufferingObs=${record.isBufferingObservation}, lastBufferedAt=${record.lastBufferedAtTokens}`
3247
3242
  );
3248
3243
  if (shouldTrigger) {
3249
- this.startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, totalPendingTokens);
3244
+ this.startAsyncBufferedObservation(
3245
+ record,
3246
+ threadId,
3247
+ unobservedMessages,
3248
+ lockKey,
3249
+ writer,
3250
+ unbufferedPendingTokens,
3251
+ requestContext
3252
+ );
3250
3253
  }
3251
3254
  } else if (this.isAsyncObservationEnabled()) {
3252
- const shouldTrigger = this.shouldTriggerAsyncObservation(totalPendingTokens, lockKey, record);
3255
+ const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record);
3253
3256
  omDebug(
3254
- `[OM:async-obs] atOrAboveThreshold: pending=${totalPendingTokens}, threshold=${threshold}, step=${stepNumber}, shouldTrigger=${shouldTrigger}`
3257
+ `[OM:async-obs] atOrAboveThreshold: pending=${totalPendingTokens}, unbuffered=${unbufferedPendingTokens}, threshold=${threshold}, step=${stepNumber}, shouldTrigger=${shouldTrigger}`
3255
3258
  );
3256
3259
  if (shouldTrigger) {
3257
- this.startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, totalPendingTokens);
3260
+ this.startAsyncBufferedObservation(
3261
+ record,
3262
+ threadId,
3263
+ unobservedMessages,
3264
+ lockKey,
3265
+ writer,
3266
+ unbufferedPendingTokens,
3267
+ requestContext
3268
+ );
3258
3269
  }
3259
3270
  }
3260
3271
  if (stepNumber > 0) {
@@ -3270,7 +3281,8 @@ ${suggestedResponse}
3270
3281
  lockKey,
3271
3282
  writer,
3272
3283
  abortSignal,
3273
- abort
3284
+ abort,
3285
+ requestContext
3274
3286
  );
3275
3287
  if (observationSucceeded) {
3276
3288
  const observedIds = activatedMessageIds?.length ? activatedMessageIds : Array.isArray(updatedRecord.observedMessageIds) ? updatedRecord.observedMessageIds : void 0;
@@ -3602,7 +3614,7 @@ ${newThreadSection}`;
3602
3614
  * Do synchronous observation (fallback when no buffering)
3603
3615
  */
3604
3616
  async doSynchronousObservation(opts) {
3605
- const { record, threadId, unobservedMessages, writer, abortSignal, reflectionHooks } = opts;
3617
+ const { record, threadId, unobservedMessages, writer, abortSignal, reflectionHooks, requestContext } = opts;
3606
3618
  this.emitDebugEvent({
3607
3619
  type: "observation_triggered",
3608
3620
  timestamp: /* @__PURE__ */ new Date(),
@@ -3655,7 +3667,8 @@ ${newThreadSection}`;
3655
3667
  const result = await this.callObserver(
3656
3668
  freshRecord?.activeObservations ?? record.activeObservations,
3657
3669
  messagesToObserve,
3658
- abortSignal
3670
+ abortSignal,
3671
+ { requestContext }
3659
3672
  );
3660
3673
  const existingObservations = freshRecord?.activeObservations ?? record.activeObservations ?? "";
3661
3674
  let newObservations;
@@ -3733,7 +3746,8 @@ ${result.observations}` : result.observations;
3733
3746
  threadId,
3734
3747
  writer,
3735
3748
  abortSignal,
3736
- reflectionHooks
3749
+ reflectionHooks,
3750
+ requestContext
3737
3751
  });
3738
3752
  } catch (error) {
3739
3753
  if (lastMessage?.id) {
@@ -3774,7 +3788,7 @@ ${result.observations}` : result.observations;
3774
3788
  * @param lockKey - Lock key for this scope
3775
3789
  * @param writer - Optional stream writer for emitting buffering markers
3776
3790
  */
3777
- startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, contextWindowTokens) {
3791
+ startAsyncBufferedObservation(record, threadId, unobservedMessages, lockKey, writer, contextWindowTokens, requestContext) {
3778
3792
  const bufferKey = this.getObservationBufferKey(lockKey);
3779
3793
  const currentTokens = contextWindowTokens ?? this.tokenCounter.countMessages(unobservedMessages) + (record.pendingMessageTokens ?? 0);
3780
3794
  _ObservationalMemory.lastBufferedBoundary.set(bufferKey, currentTokens);
@@ -3782,22 +3796,27 @@ ${result.observations}` : result.observations;
3782
3796
  this.storage.setBufferingObservationFlag(record.id, true, currentTokens).catch((err) => {
3783
3797
  omError("[OM] Failed to set buffering observation flag", err);
3784
3798
  });
3785
- const asyncOp = this.runAsyncBufferedObservation(record, threadId, unobservedMessages, bufferKey, writer).finally(
3786
- () => {
3787
- _ObservationalMemory.asyncBufferingOps.delete(bufferKey);
3788
- unregisterOp(record.id, "bufferingObservation");
3789
- this.storage.setBufferingObservationFlag(record.id, false).catch((err) => {
3790
- omError("[OM] Failed to clear buffering observation flag", err);
3791
- });
3792
- }
3793
- );
3799
+ const asyncOp = this.runAsyncBufferedObservation(
3800
+ record,
3801
+ threadId,
3802
+ unobservedMessages,
3803
+ bufferKey,
3804
+ writer,
3805
+ requestContext
3806
+ ).finally(() => {
3807
+ _ObservationalMemory.asyncBufferingOps.delete(bufferKey);
3808
+ unregisterOp(record.id, "bufferingObservation");
3809
+ this.storage.setBufferingObservationFlag(record.id, false).catch((err) => {
3810
+ omError("[OM] Failed to clear buffering observation flag", err);
3811
+ });
3812
+ });
3794
3813
  _ObservationalMemory.asyncBufferingOps.set(bufferKey, asyncOp);
3795
3814
  }
3796
3815
  /**
3797
3816
  * Internal method that waits for existing buffering operation and then runs new buffering.
3798
3817
  * This implements the mutex-wait behavior.
3799
3818
  */
3800
- async runAsyncBufferedObservation(record, threadId, unobservedMessages, bufferKey, writer) {
3819
+ async runAsyncBufferedObservation(record, threadId, unobservedMessages, bufferKey, writer, requestContext) {
3801
3820
  const existingOp = _ObservationalMemory.asyncBufferingOps.get(bufferKey);
3802
3821
  if (existingOp) {
3803
3822
  try {
@@ -3869,7 +3888,15 @@ ${result.observations}` : result.observations;
3869
3888
  omDebug(
3870
3889
  `[OM:bufferInput] cycleId=${cycleId}, msgCount=${messagesToBuffer.length}, msgTokens=${this.tokenCounter.countMessages(messagesToBuffer)}, ids=${messagesToBuffer.map((m) => `${m.id?.slice(0, 8)}@${m.createdAt ? new Date(m.createdAt).toISOString() : "none"}`).join(",")}`
3871
3890
  );
3872
- await this.doAsyncBufferedObservation(freshRecord, threadId, messagesToBuffer, cycleId, startedAt, writer);
3891
+ await this.doAsyncBufferedObservation(
3892
+ freshRecord,
3893
+ threadId,
3894
+ messagesToBuffer,
3895
+ cycleId,
3896
+ startedAt,
3897
+ writer,
3898
+ requestContext
3899
+ );
3873
3900
  const maxTs = this.getMaxMessageTimestamp(messagesToBuffer);
3874
3901
  const cursor = new Date(maxTs.getTime() + 1);
3875
3902
  _ObservationalMemory.lastBufferedAtTime.set(bufferKey, cursor);
@@ -3898,7 +3925,7 @@ ${result.observations}` : result.observations;
3898
3925
  * The observer sees: active observations + existing buffered observations + message history
3899
3926
  * (excluding already-buffered messages).
3900
3927
  */
3901
- async doAsyncBufferedObservation(record, threadId, messagesToBuffer, cycleId, startedAt, writer) {
3928
+ async doAsyncBufferedObservation(record, threadId, messagesToBuffer, cycleId, startedAt, writer, requestContext) {
3902
3929
  const bufferedChunks = this.getBufferedChunks(record);
3903
3930
  const bufferedChunksText = bufferedChunks.map((c) => c.observations).join("\n\n");
3904
3931
  const combinedObservations = this.combineObservationsForBuffering(record.activeObservations, bufferedChunksText);
@@ -3907,7 +3934,7 @@ ${result.observations}` : result.observations;
3907
3934
  messagesToBuffer,
3908
3935
  void 0,
3909
3936
  // No abort signal for background ops
3910
- { skipContinuationHints: true }
3937
+ { skipContinuationHints: true, requestContext }
3911
3938
  );
3912
3939
  let newObservations;
3913
3940
  if (this.scope === "resource") {
@@ -4006,11 +4033,20 @@ ${bufferedObservations}`;
4006
4033
  if (!freshChunks.length) {
4007
4034
  return { success: false };
4008
4035
  }
4036
+ const messageTokensThreshold = this.getMaxThreshold(this.observationConfig.messageTokens);
4037
+ if (messageList) {
4038
+ const freshPendingTokens = this.tokenCounter.countMessages(messageList.get.all.db());
4039
+ if (freshPendingTokens < messageTokensThreshold) {
4040
+ omDebug(
4041
+ `[OM:tryActivate] skipping activation: freshPendingTokens=${freshPendingTokens} < threshold=${messageTokensThreshold}`
4042
+ );
4043
+ return { success: false };
4044
+ }
4045
+ }
4009
4046
  const activationRatio = this.observationConfig.bufferActivation ?? 0.7;
4010
4047
  omDebug(
4011
4048
  `[OM:tryActivate] swapping: freshChunks=${freshChunks.length}, activationRatio=${activationRatio}, totalChunkTokens=${freshChunks.reduce((s, c) => s + (c.tokenCount ?? 0), 0)}`
4012
4049
  );
4013
- const messageTokensThreshold = this.getMaxThreshold(this.observationConfig.messageTokens);
4014
4050
  const activationResult = await this.storage.swapBufferedToActive({
4015
4051
  id: freshRecord.id,
4016
4052
  activationRatio,
@@ -4066,7 +4102,7 @@ ${bufferedObservations}`;
4066
4102
  * @param observationTokens - Current observation token count
4067
4103
  * @param lockKey - Lock key for this scope
4068
4104
  */
4069
- startAsyncBufferedReflection(record, observationTokens, lockKey, writer) {
4105
+ startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext) {
4070
4106
  const bufferKey = this.getReflectionBufferKey(lockKey);
4071
4107
  if (this.isAsyncBufferingInProgress(bufferKey)) {
4072
4108
  return;
@@ -4076,7 +4112,7 @@ ${bufferedObservations}`;
4076
4112
  this.storage.setBufferingReflectionFlag(record.id, true).catch((err) => {
4077
4113
  omError("[OM] Failed to set buffering reflection flag", err);
4078
4114
  });
4079
- const asyncOp = this.doAsyncBufferedReflection(record, bufferKey, writer).catch(async (error) => {
4115
+ const asyncOp = this.doAsyncBufferedReflection(record, bufferKey, writer, requestContext).catch(async (error) => {
4080
4116
  if (writer) {
4081
4117
  const failedMarker = this.createBufferingFailedMarker({
4082
4118
  cycleId: `reflect-buf-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,
@@ -4105,7 +4141,7 @@ ${bufferedObservations}`;
4105
4141
  * Perform async buffered reflection - reflects observations and stores to bufferedReflection.
4106
4142
  * Does NOT create a new generation or update activeObservations.
4107
4143
  */
4108
- async doAsyncBufferedReflection(record, _bufferKey, writer) {
4144
+ async doAsyncBufferedReflection(record, _bufferKey, writer, requestContext) {
4109
4145
  const freshRecord = await this.storage.getObservationalMemory(record.threadId, record.resourceId);
4110
4146
  const currentRecord = freshRecord ?? record;
4111
4147
  const observationTokens = currentRecord.observationTokenCount ?? 0;
@@ -4123,7 +4159,7 @@ ${bufferedObservations}`;
4123
4159
  const activeObservations = allLines.slice(0, linesToReflect).join("\n");
4124
4160
  const reflectedObservationLineCount = linesToReflect;
4125
4161
  const sliceTokenEstimate = Math.round(avgTokensPerLine * linesToReflect);
4126
- const compressionTarget = Math.min(sliceTokenEstimate * bufferActivation, reflectThreshold);
4162
+ const compressionTarget = Math.round(sliceTokenEstimate * 0.75);
4127
4163
  omDebug(
4128
4164
  `[OM:reflect] doAsyncBufferedReflection: slicing observations for reflection \u2014 totalLines=${totalLines}, avgTokPerLine=${avgTokensPerLine.toFixed(1)}, activationPointTokens=${activationPointTokens}, linesToReflect=${linesToReflect}/${totalLines}, sliceTokenEstimate=${sliceTokenEstimate}, compressionTarget=${compressionTarget}`
4129
4165
  );
@@ -4153,8 +4189,9 @@ ${bufferedObservations}`;
4153
4189
  // No abort signal for background ops
4154
4190
  true,
4155
4191
  // Skip continuation hints for async buffering
4156
- 1
4192
+ 1,
4157
4193
  // Start at compression level 1 for buffered reflection
4194
+ requestContext
4158
4195
  );
4159
4196
  const reflectionTokenCount = this.tokenCounter.countObservations(reflectResult.observations);
4160
4197
  omDebug(
@@ -4175,7 +4212,7 @@ ${bufferedObservations}`;
4175
4212
  cycleId,
4176
4213
  operationType: "reflection",
4177
4214
  startedAt,
4178
- tokensBuffered: observationTokens,
4215
+ tokensBuffered: sliceTokenEstimate,
4179
4216
  bufferedTokens: reflectionTokenCount,
4180
4217
  recordId: currentRecord.id,
4181
4218
  threadId: currentRecord.threadId ?? "",
@@ -4278,7 +4315,16 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4278
4315
  * 4. Only triggers reflection AFTER all threads are observed
4279
4316
  */
4280
4317
  async doResourceScopedObservation(opts) {
4281
- const { record, currentThreadId, resourceId, currentThreadMessages, writer, abortSignal, reflectionHooks } = opts;
4318
+ const {
4319
+ record,
4320
+ currentThreadId,
4321
+ resourceId,
4322
+ currentThreadMessages,
4323
+ writer,
4324
+ abortSignal,
4325
+ reflectionHooks,
4326
+ requestContext
4327
+ } = opts;
4282
4328
  const { threads: allThreads } = await this.storage.listThreads({ filter: { resourceId } });
4283
4329
  const threadMetadataMap = /* @__PURE__ */ new Map();
4284
4330
  for (const thread of allThreads) {
@@ -4433,7 +4479,8 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4433
4479
  existingObservations,
4434
4480
  batch.threadMap,
4435
4481
  batch.threadIds,
4436
- abortSignal
4482
+ abortSignal,
4483
+ requestContext
4437
4484
  );
4438
4485
  return batchResult;
4439
4486
  });
@@ -4546,7 +4593,8 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4546
4593
  threadId: currentThreadId,
4547
4594
  writer,
4548
4595
  abortSignal,
4549
- reflectionHooks
4596
+ reflectionHooks,
4597
+ requestContext
4550
4598
  });
4551
4599
  } catch (error) {
4552
4600
  for (const [threadId, msgs] of threadsWithMessages) {
@@ -4582,7 +4630,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4582
4630
  * Only handles the async path — will never do synchronous (blocking) reflection.
4583
4631
  * Safe to call after buffered observation activation.
4584
4632
  */
4585
- async maybeAsyncReflect(record, observationTokens, writer, messageList) {
4633
+ async maybeAsyncReflect(record, observationTokens, writer, messageList, requestContext) {
4586
4634
  if (!this.isAsyncReflectionEnabled()) return;
4587
4635
  const lockKey = this.getLockKey(record.threadId, record.resourceId);
4588
4636
  const reflectThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
@@ -4593,7 +4641,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4593
4641
  const shouldTrigger = this.shouldTriggerAsyncReflection(observationTokens, lockKey, record);
4594
4642
  omDebug(`[OM:reflect] below threshold: shouldTrigger=${shouldTrigger}`);
4595
4643
  if (shouldTrigger) {
4596
- this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
4644
+ this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
4597
4645
  }
4598
4646
  return;
4599
4647
  }
@@ -4610,7 +4658,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4610
4658
  omDebug(`[OM:reflect] activationSuccess=${activationSuccess}`);
4611
4659
  if (activationSuccess) return;
4612
4660
  omDebug(`[OM:reflect] no buffered reflection, starting background reflection...`);
4613
- this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
4661
+ this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
4614
4662
  }
4615
4663
  /**
4616
4664
  * Check if reflection needed and trigger if so.
@@ -4619,12 +4667,12 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4619
4667
  * in the background at intervals, and activated when the threshold is reached.
4620
4668
  */
4621
4669
  async maybeReflect(opts) {
4622
- const { record, observationTokens, writer, abortSignal, messageList, reflectionHooks } = opts;
4670
+ const { record, observationTokens, writer, abortSignal, messageList, reflectionHooks, requestContext } = opts;
4623
4671
  const lockKey = this.getLockKey(record.threadId, record.resourceId);
4624
4672
  const reflectThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
4625
4673
  if (this.isAsyncReflectionEnabled() && observationTokens < reflectThreshold) {
4626
4674
  if (this.shouldTriggerAsyncReflection(observationTokens, lockKey, record)) {
4627
- this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
4675
+ this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
4628
4676
  }
4629
4677
  }
4630
4678
  if (!this.shouldReflect(observationTokens)) {
@@ -4651,7 +4699,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4651
4699
  omDebug(
4652
4700
  `[OM:reflect] async activation failed, no blockAfter or below it (obsTokens=${observationTokens}, blockAfter=${this.reflectionConfig.blockAfter}) \u2014 starting background reflection`
4653
4701
  );
4654
- this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer);
4702
+ this.startAsyncBufferedReflection(record, observationTokens, lockKey, writer, requestContext);
4655
4703
  return;
4656
4704
  }
4657
4705
  }
@@ -4694,7 +4742,10 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4694
4742
  void 0,
4695
4743
  streamContext,
4696
4744
  reflectThreshold,
4697
- abortSignal
4745
+ abortSignal,
4746
+ void 0,
4747
+ void 0,
4748
+ requestContext
4698
4749
  );
4699
4750
  const reflectionTokenCount = this.tokenCounter.countObservations(reflectResult.observations);
4700
4751
  await this.storage.createReflectionGeneration({
@@ -4758,7 +4809,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4758
4809
  * to pass conversation messages without duplicating them into Mastra's DB.
4759
4810
  */
4760
4811
  async observe(opts) {
4761
- const { threadId, resourceId, messages, hooks } = opts;
4812
+ const { threadId, resourceId, messages, hooks, requestContext } = opts;
4762
4813
  const lockKey = this.getLockKey(threadId, resourceId);
4763
4814
  const reflectionHooks = hooks ? { onReflectionStart: hooks.onReflectionStart, onReflectionEnd: hooks.onReflectionEnd } : void 0;
4764
4815
  await this.withLock(lockKey, async () => {
@@ -4778,7 +4829,8 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4778
4829
  currentThreadId: threadId,
4779
4830
  resourceId,
4780
4831
  currentThreadMessages: currentMessages,
4781
- reflectionHooks
4832
+ reflectionHooks,
4833
+ requestContext
4782
4834
  });
4783
4835
  } finally {
4784
4836
  hooks?.onObservationEnd?.();
@@ -4800,7 +4852,13 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4800
4852
  }
4801
4853
  hooks?.onObservationStart?.();
4802
4854
  try {
4803
- await this.doSynchronousObservation({ record: freshRecord, threadId, unobservedMessages, reflectionHooks });
4855
+ await this.doSynchronousObservation({
4856
+ record: freshRecord,
4857
+ threadId,
4858
+ unobservedMessages,
4859
+ reflectionHooks,
4860
+ requestContext
4861
+ });
4804
4862
  } finally {
4805
4863
  hooks?.onObservationEnd?.();
4806
4864
  }
@@ -4818,7 +4876,7 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4818
4876
  * );
4819
4877
  * ```
4820
4878
  */
4821
- async reflect(threadId, resourceId, prompt) {
4879
+ async reflect(threadId, resourceId, prompt, requestContext) {
4822
4880
  const record = await this.getOrCreateRecord(threadId, resourceId);
4823
4881
  if (!record.activeObservations) {
4824
4882
  return;
@@ -4827,7 +4885,16 @@ ${unreflectedContent}` : freshRecord.bufferedReflection;
4827
4885
  registerOp(record.id, "reflecting");
4828
4886
  try {
4829
4887
  const reflectThreshold = this.getMaxThreshold(this.reflectionConfig.observationTokens);
4830
- const reflectResult = await this.callReflector(record.activeObservations, prompt, void 0, reflectThreshold);
4888
+ const reflectResult = await this.callReflector(
4889
+ record.activeObservations,
4890
+ prompt,
4891
+ void 0,
4892
+ reflectThreshold,
4893
+ void 0,
4894
+ void 0,
4895
+ void 0,
4896
+ requestContext
4897
+ );
4831
4898
  const reflectionTokenCount = this.tokenCounter.countObservations(reflectResult.observations);
4832
4899
  await this.storage.createReflectionGeneration({
4833
4900
  currentRecord: record,
@@ -4909,5 +4976,5 @@ exports.formatMessagesForObserver = formatMessagesForObserver;
4909
4976
  exports.hasCurrentTaskSection = hasCurrentTaskSection;
4910
4977
  exports.optimizeObservationsForContext = optimizeObservationsForContext;
4911
4978
  exports.parseObserverOutput = parseObserverOutput;
4912
- //# sourceMappingURL=chunk-LXATBJ2L.cjs.map
4913
- //# sourceMappingURL=chunk-LXATBJ2L.cjs.map
4979
+ //# sourceMappingURL=chunk-QRKB5I2S.cjs.map
4980
+ //# sourceMappingURL=chunk-QRKB5I2S.cjs.map