@mastra/memory 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +48 -0
  2. package/dist/{chunk-D4AWAGLM.js → chunk-DF7NDDSM.js} +282 -190
  3. package/dist/chunk-DF7NDDSM.js.map +1 -0
  4. package/dist/{chunk-QRKB5I2S.cjs → chunk-LLTHE64H.cjs} +281 -189
  5. package/dist/chunk-LLTHE64H.cjs.map +1 -0
  6. package/dist/docs/SKILL.md +1 -1
  7. package/dist/docs/assets/SOURCE_MAP.json +25 -25
  8. package/dist/docs/references/docs-memory-observational-memory.md +2 -0
  9. package/dist/docs/references/reference-memory-memory-class.md +1 -1
  10. package/dist/docs/references/reference-memory-observational-memory.md +1 -0
  11. package/dist/index.cjs +3 -3
  12. package/dist/index.cjs.map +1 -1
  13. package/dist/index.js +3 -3
  14. package/dist/index.js.map +1 -1
  15. package/dist/{observational-memory-UCOMAMSF.cjs → observational-memory-4PCXEZIS.cjs} +17 -17
  16. package/dist/{observational-memory-UCOMAMSF.cjs.map → observational-memory-4PCXEZIS.cjs.map} +1 -1
  17. package/dist/{observational-memory-53AFLLSH.js → observational-memory-ZNTAIUGT.js} +3 -3
  18. package/dist/{observational-memory-53AFLLSH.js.map → observational-memory-ZNTAIUGT.js.map} +1 -1
  19. package/dist/processors/index.cjs +15 -15
  20. package/dist/processors/index.js +1 -1
  21. package/dist/processors/observational-memory/observational-memory.d.ts +17 -1
  22. package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
  23. package/dist/processors/observational-memory/observer-agent.d.ts +26 -5
  24. package/dist/processors/observational-memory/observer-agent.d.ts.map +1 -1
  25. package/dist/processors/observational-memory/reflector-agent.d.ts.map +1 -1
  26. package/dist/processors/observational-memory/types.d.ts +14 -3
  27. package/dist/processors/observational-memory/types.d.ts.map +1 -1
  28. package/package.json +11 -11
  29. package/dist/chunk-D4AWAGLM.js.map +0 -1
  30. package/dist/chunk-QRKB5I2S.cjs.map +0 -1
@@ -18,54 +18,7 @@ var o200k_base__default = /*#__PURE__*/_interopDefault(o200k_base);
18
18
  // src/processors/observational-memory/observational-memory.ts
19
19
 
20
20
  // src/processors/observational-memory/observer-agent.ts
21
- var USE_CONDENSED_PROMPT = process.env.OM_USE_CONDENSED_PROMPT === "1" || process.env.OM_USE_CONDENSED_PROMPT === "true";
22
- var CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS = `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
23
-
24
- CORE PRINCIPLES:
25
-
26
- 1. BE SPECIFIC - Vague observations are useless. Capture details that distinguish and identify.
27
- 2. ANCHOR IN TIME - Note when things happened and when they were said.
28
- 3. TRACK STATE CHANGES - When information updates or supersedes previous info, make it explicit.
29
- 4. USE COMMON SENSE - If it would help the assistant remember later, observe it.
30
-
31
- ASSERTIONS VS QUESTIONS:
32
- - User TELLS you something \u2192 \u{1F534} "User stated [fact]"
33
- - User ASKS something \u2192 \u{1F7E1} "User asked [question]"
34
- - User assertions are authoritative. They are the source of truth about their own life.
35
-
36
- TEMPORAL ANCHORING:
37
- - Always include message time at the start: (14:30) User stated...
38
- - Add estimated date at the END only for relative time references:
39
- "User will visit parents this weekend. (meaning Jan 18-19)"
40
- - Don't add end dates for present-moment statements or vague terms like "recently"
41
- - Split multi-event statements into separate observations, each with its own date
42
-
43
- DETAILS TO ALWAYS PRESERVE:
44
- - Names, handles, usernames, titles (@username, "Dr. Smith")
45
- - Numbers, counts, quantities (4 items, 3 sessions, 27th in list)
46
- - Measurements, percentages, statistics (5kg, 20% improvement, 85% accuracy)
47
- - Sequences and orderings (steps 1-5, chord progression, lucky numbers)
48
- - Prices, dates, times, durations ($50, March 15, 2 hours)
49
- - Locations and distinguishing attributes (near X, based in Y, specializes in Z)
50
- - User's specific role (presenter, volunteer, organizer - not just "attended")
51
- - Exact phrasing when unusual ("movement session" for exercise)
52
- - Verbatim text being collaborated on (code, formatted text, ASCII art)
53
-
54
- WHEN ASSISTANT PROVIDES LISTS/RECOMMENDATIONS:
55
- Don't just say "Assistant recommended 5 hotels." Capture what distinguishes each:
56
- "Assistant recommended: Hotel A (near station), Hotel B (pet-friendly), Hotel C (has pool)..."
57
-
58
- STATE CHANGES:
59
- When user updates information, note what changed:
60
- "User will use the new method (replacing the old approach)"
61
-
62
- WHO/WHAT/WHERE/WHEN:
63
- Capture all dimensions. Not just "User went on a trip" but who with, where, when, and what happened.
64
-
65
- Don't repeat observations that have already been captured in previous sessions.
66
-
67
- REMEMBER: These observations are your ENTIRE memory. Any detail you fail to observe is permanently forgotten. Use common sense - if something seems like it might be important to remember, it probably is. When in doubt, observe it.`;
68
- var CURRENT_OBSERVER_EXTRACTION_INSTRUCTIONS = `CRITICAL: DISTINGUISH USER ASSERTIONS FROM QUESTIONS
21
+ var OBSERVER_EXTRACTION_INSTRUCTIONS = `CRITICAL: DISTINGUISH USER ASSERTIONS FROM QUESTIONS
69
22
 
70
23
  When the user TELLS you something about themselves, mark it as an assertion:
71
24
  - "I have two kids" \u2192 \u{1F534} (14:30) User stated has two kids
@@ -73,8 +26,8 @@ When the user TELLS you something about themselves, mark it as an assertion:
73
26
  - "I graduated in 2019" \u2192 \u{1F534} (14:32) User stated graduated in 2019
74
27
 
75
28
  When the user ASKS about something, mark it as a question/request:
76
- - "Can you help me with X?" \u2192 \u{1F7E1} (15:00) User asked help with X
77
- - "What's the best way to do Y?" \u2192 \u{1F7E1} (15:01) User asked best way to do Y
29
+ - "Can you help me with X?" \u2192 \u{1F534} (15:00) User asked help with X
30
+ - "What's the best way to do Y?" \u2192 \u{1F534} (15:01) User asked best way to do Y
78
31
 
79
32
  Distinguish between QUESTIONS and STATEMENTS OF INTENT:
80
33
  - "Can you recommend..." \u2192 Question (extract as "User asked...")
@@ -256,60 +209,39 @@ CONVERSATION CONTEXT:
256
209
  - When who/what/where/when is mentioned, note that in the observation. Example: if the user received went on a trip with someone, observe who that someone was, where the trip was, when it happened, and what happened, not just that the user went on the trip.
257
210
  - For any described entity (like a person, place, thing, etc), preserve the attributes that would help identify or describe the specific entity later: location ("near X"), specialty ("focuses on Y"), unique feature ("has Z"), relationship ("owned by W"), or other details. The entity's name is important, but so are any additional details that distinguish it. If there are a list of entities, preserve these details for each of them.
258
211
 
259
- ACTIONABLE INSIGHTS:
260
- - What worked well in explanations
261
- - What needs follow-up or clarification
262
- - User's stated goals or next steps (note if the user tells you not to do a next step, or asks for something specific, other next steps besides the users request should be marked as "waiting for user", unless the user explicitly says to continue all next steps)`;
263
- var OBSERVER_EXTRACTION_INSTRUCTIONS = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_EXTRACTION_INSTRUCTIONS : CURRENT_OBSERVER_EXTRACTION_INSTRUCTIONS;
264
- var CONDENSED_OBSERVER_OUTPUT_FORMAT = `Use priority levels:
265
- - \u{1F534} High: explicit user facts, preferences, goals achieved, critical context
266
- - \u{1F7E1} Medium: project details, learned information, tool results
267
- - \u{1F7E2} Low: minor details, uncertain observations
212
+ USER MESSAGE CAPTURE:
213
+ - Short and medium-length user messages should be captured nearly verbatim in your own words.
214
+ - For very long user messages, summarize but quote key phrases that carry specific intent or meaning.
215
+ - This is critical for continuity: when the conversation window shrinks, the observations are the only record of what the user said.
268
216
 
269
- Group observations by date, then list each with 24-hour time.
270
- Group related observations (like tool sequences) by indenting.
217
+ AVOIDING REPETITIVE OBSERVATIONS:
218
+ - Do NOT repeat the same observation across multiple turns if there is no new information.
219
+ - When the agent performs repeated similar actions (e.g., browsing files, running the same tool type multiple times), group them into a single parent observation with sub-bullets for each new result.
271
220
 
272
- <observations>
273
- Date: Dec 4, 2025
274
- * \u{1F534} (09:15) User stated they have 3 kids: Emma (12), Jake (9), and Lily (5)
275
- * \u{1F534} (09:16) User's anniversary is March 15
276
- * \u{1F7E1} (09:20) User asked how to optimize database queries
277
- * \u{1F7E1} (10:30) User working on auth refactor - targeting 50% latency reduction
278
- * \u{1F7E1} (10:45) Assistant recommended hotels: Grand Plaza (downtown, $180/night), Seaside Inn (near beach, pet-friendly), Mountain Lodge (has pool, free breakfast)
279
- * \u{1F534} (11:00) User's friend @maria_dev recommended using Redis for caching
280
- * \u{1F7E1} (11:15) User attended the tech conference as a speaker (presented on microservices)
281
- * \u{1F534} (11:30) User will visit parents this weekend (meaning Dec 7-8, 2025)
282
- * \u{1F7E1} (14:00) Agent debugging auth issue
283
- * -> ran git status, found 3 modified files
284
- * -> viewed auth.ts:45-60, found missing null check
285
- * -> applied fix, tests now pass
286
- * \u{1F7E1} (14:30) Assistant provided dataset stats: 7,342 samples, 89.6% accuracy, 23ms inference time
287
- * \u{1F534} (15:00) User's lucky numbers from fortune cookie: 7, 14, 23, 38, 42, 49
221
+ Example \u2014 BAD (repetitive):
222
+ * \u{1F7E1} (14:30) Agent used view tool on src/auth.ts
223
+ * \u{1F7E1} (14:31) Agent used view tool on src/users.ts
224
+ * \u{1F7E1} (14:32) Agent used view tool on src/routes.ts
288
225
 
289
- Date: Dec 5, 2025
290
- * \u{1F534} (09:00) User switched from Python to TypeScript for the project (no longer using Python)
291
- * \u{1F7E1} (09:30) User bought running shoes for $120 at SportMart (downtown location)
292
- * \u{1F534} (10:00) User prefers morning meetings, not afternoon (updating previous preference)
293
- * \u{1F7E1} (10:30) User went to Italy with their sister last summer (meaning July 2025), visited Rome and Florence for 2 weeks
294
- * \u{1F534} (10:45) User's dentist appointment is next Tuesday (meaning Dec 10, 2025)
295
- * \u{1F7E2} (11:00) User mentioned they might try the new coffee shop
296
- </observations>
226
+ Example \u2014 GOOD (grouped):
227
+ * \u{1F7E1} (14:30) Agent browsed source files for auth flow
228
+ * -> viewed src/auth.ts \u2014 found token validation logic
229
+ * -> viewed src/users.ts \u2014 found user lookup by email
230
+ * -> viewed src/routes.ts \u2014 found middleware chain
297
231
 
298
- <current-task>
299
- Primary: Implementing OAuth2 flow for the auth refactor
300
- Secondary: Waiting for user to confirm database schema changes
301
- </current-task>
232
+ Only add a new observation for a repeated action if the NEW result changes the picture.
302
233
 
303
- <suggested-response>
304
- The OAuth2 implementation is ready for testing. Would you like me to walk through the flow?
305
- </suggested-response>`;
234
+ ACTIONABLE INSIGHTS:
235
+ - What worked well in explanations
236
+ - What needs follow-up or clarification
237
+ - User's stated goals or next steps (note if the user tells you not to do a next step, or asks for something specific, other next steps besides the users request should be marked as "waiting for user", unless the user explicitly says to continue all next steps)`;
306
238
  var OBSERVER_OUTPUT_FORMAT_BASE = `Use priority levels:
307
239
  - \u{1F534} High: explicit user facts, preferences, goals achieved, critical context
308
240
  - \u{1F7E1} Medium: project details, learned information, tool results
309
241
  - \u{1F7E2} Low: minor details, uncertain observations
310
242
 
311
243
  Group related observations (like tool sequences) by indenting:
312
- * \u{1F7E1} (14:33) Agent debugging auth issue
244
+ * \u{1F534} (14:33) Agent debugging auth issue
313
245
  * -> ran git status, found 3 modified files
314
246
  * -> viewed auth.ts:45-60, found missing null check
315
247
  * -> applied fix, tests now pass
@@ -319,11 +251,11 @@ Group observations by date, then list each with 24-hour time.
319
251
  <observations>
320
252
  Date: Dec 4, 2025
321
253
  * \u{1F534} (14:30) User prefers direct answers
322
- * \u{1F7E1} (14:31) Working on feature X
323
- * \u{1F7E2} (14:32) User might prefer dark mode
254
+ * \u{1F534} (14:31) Working on feature X
255
+ * \u{1F7E1} (14:32) User might prefer dark mode
324
256
 
325
257
  Date: Dec 5, 2025
326
- * \u{1F7E1} (09:15) Continued work on feature X
258
+ * \u{1F534} (09:15) Continued work on feature X
327
259
  </observations>
328
260
 
329
261
  <current-task>
@@ -340,29 +272,21 @@ Hint for the agent's immediate next message. Examples:
340
272
  - "The assistant should wait for the user to respond before continuing."
341
273
  - Call the view tool on src/example.ts to continue debugging.
342
274
  </suggested-response>`;
343
- var CONDENSED_OBSERVER_GUIDELINES = `- Be specific: "User prefers short answers without lengthy explanations" not "User stated a preference"
344
- - Use terse language - dense sentences without unnecessary words
345
- - Don't repeat observations that have already been captured
346
- - When the agent calls tools, observe what was called, why, and what was learned
347
- - Include line numbers when observing code files
348
- - If the agent provides a detailed response, observe the key points so it could be repeated
349
- - Start each observation with a priority emoji (\u{1F534}, \u{1F7E1}, \u{1F7E2})
350
- - Observe WHAT happened and WHAT it means, not HOW well it was done
351
- - If the user provides detailed messages or code snippets, observe all important details`;
352
- var OBSERVER_GUIDELINES = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_GUIDELINES : `- Be specific enough for the assistant to act on
275
+ var OBSERVER_GUIDELINES = `- Be specific enough for the assistant to act on
353
276
  - Good: "User prefers short, direct answers without lengthy explanations"
354
277
  - Bad: "User stated a preference" (too vague)
355
278
  - Add 1 to 5 observations per exchange
356
- - Use terse language to save tokens. Sentences should be dense without unnecessary words.
357
- - Do not add repetitive observations that have already been observed.
358
- - If the agent calls tools, observe what was called, why, and what was learned.
359
- - When observing files with line numbers, include the line number if useful.
360
- - If the agent provides a detailed response, observe the contents so it could be repeated.
279
+ - Use terse language to save tokens. Sentences should be dense without unnecessary words
280
+ - Do not add repetitive observations that have already been observed. Group repeated similar actions (tool calls, file browsing) under a single parent with sub-bullets for new results
281
+ - If the agent calls tools, observe what was called, why, and what was learned
282
+ - When observing files with line numbers, include the line number if useful
283
+ - If the agent provides a detailed response, observe the contents so it could be repeated
361
284
  - Make sure you start each observation with a priority emoji (\u{1F534}, \u{1F7E1}, \u{1F7E2})
362
- - Observe WHAT the agent did and WHAT it means, not HOW well it did it.
363
- - If the user provides detailed messages or code snippets, observe all important details.`;
285
+ - User messages are always \u{1F534} priority, so are the completions of tasks. Capture the user's words closely \u2014 short/medium messages near-verbatim, long messages summarized with key quotes
286
+ - Observe WHAT the agent did and WHAT it means
287
+ - If the user provides detailed messages or code snippets, observe all important details`;
364
288
  function buildObserverSystemPrompt(multiThread = false, instruction) {
365
- const outputFormat = USE_CONDENSED_PROMPT ? CONDENSED_OBSERVER_OUTPUT_FORMAT : OBSERVER_OUTPUT_FORMAT_BASE;
289
+ const outputFormat = OBSERVER_OUTPUT_FORMAT_BASE;
366
290
  if (multiThread) {
367
291
  return `You are the memory consciousness of an AI assistant. Your observations will be the ONLY information the assistant has about past interactions with this user.
368
292
 
@@ -383,7 +307,7 @@ Your output MUST use XML tags to structure the response. Each thread's observati
383
307
  <thread id="thread_id_1">
384
308
  Date: Dec 4, 2025
385
309
  * \u{1F534} (14:30) User prefers direct answers
386
- * \u{1F7E1} (14:31) Working on feature X
310
+ * \u{1F534} (14:31) Working on feature X
387
311
 
388
312
  <current-task>
389
313
  What the agent is currently working on in this thread
@@ -396,7 +320,7 @@ Hint for the agent's next message in this thread
396
320
 
397
321
  <thread id="thread_id_2">
398
322
  Date: Dec 5, 2025
399
- * \u{1F7E1} (09:15) User asked about deployment
323
+ * \u{1F534} (09:15) User asked about deployment
400
324
 
401
325
  <current-task>
402
326
  Current task for this thread
@@ -409,7 +333,7 @@ Suggested response for this thread
409
333
  </observations>
410
334
 
411
335
  Use priority levels:
412
- - \u{1F534} High: explicit user facts, preferences, goals achieved, critical context
336
+ - \u{1F534} High: explicit user facts, preferences, goals achieved, critical context, user messages
413
337
  - \u{1F7E1} Medium: project details, learned information, tool results
414
338
  - \u{1F7E2} Low: minor details, uncertain observations
415
339
 
@@ -563,7 +487,7 @@ ${formattedMessages}
563
487
  `;
564
488
  prompt += `Date: Dec 5, 2025
565
489
  `;
566
- prompt += `* \u{1F7E1} (09:15) User asked about deployment
490
+ prompt += `* \u{1F534} (09:15) User asked about deployment
567
491
  `;
568
492
  prompt += `<current-task>Discussing deployment options</current-task>
569
493
  `;
@@ -576,6 +500,9 @@ ${formattedMessages}
576
500
  }
577
501
  function parseMultiThreadObserverOutput(output) {
578
502
  const threads = /* @__PURE__ */ new Map();
503
+ if (detectDegenerateRepetition(output)) {
504
+ return { threads, rawOutput: output, degenerate: true };
505
+ }
579
506
  const observationsMatch = output.match(/^[ \t]*<observations>([\s\S]*?)^[ \t]*<\/observations>/im);
580
507
  const observationsContent = observationsMatch?.[1] ?? output;
581
508
  const threadRegex = /<thread\s+id="([^"]+)">([\s\S]*?)<\/thread>/gi;
@@ -597,7 +524,7 @@ function parseMultiThreadObserverOutput(output) {
597
524
  suggestedContinuation = suggestedMatch[1].trim();
598
525
  observations = observations.replace(/<suggested-response>[\s\S]*?<\/suggested-response>/i, "");
599
526
  }
600
- observations = observations.trim();
527
+ observations = sanitizeObservationLines(observations.trim());
601
528
  threads.set(threadId, {
602
529
  observations,
603
530
  currentTask,
@@ -642,8 +569,15 @@ IMPORTANT: Do NOT include <current-task> or <suggested-response> sections in you
642
569
  return prompt;
643
570
  }
644
571
  function parseObserverOutput(output) {
572
+ if (detectDegenerateRepetition(output)) {
573
+ return {
574
+ observations: "",
575
+ rawOutput: output,
576
+ degenerate: true
577
+ };
578
+ }
645
579
  const parsed = parseMemorySectionXml(output);
646
- const observations = parsed.observations || "";
580
+ const observations = sanitizeObservationLines(parsed.observations || "");
647
581
  return {
648
582
  observations,
649
583
  currentTask: parsed.currentTask || void 0,
@@ -684,6 +618,42 @@ function extractListItemsOnly(content) {
684
618
  }
685
619
  return listLines.join("\n").trim();
686
620
  }
621
+ var MAX_OBSERVATION_LINE_CHARS = 1e4;
622
+ function sanitizeObservationLines(observations) {
623
+ if (!observations) return observations;
624
+ const lines = observations.split("\n");
625
+ let changed = false;
626
+ for (let i = 0; i < lines.length; i++) {
627
+ if (lines[i].length > MAX_OBSERVATION_LINE_CHARS) {
628
+ lines[i] = lines[i].slice(0, MAX_OBSERVATION_LINE_CHARS) + " \u2026 [truncated]";
629
+ changed = true;
630
+ }
631
+ }
632
+ return changed ? lines.join("\n") : observations;
633
+ }
634
+ function detectDegenerateRepetition(text) {
635
+ if (!text || text.length < 2e3) return false;
636
+ const windowSize = 200;
637
+ const step = Math.max(1, Math.floor(text.length / 50));
638
+ const seen = /* @__PURE__ */ new Map();
639
+ let duplicateWindows = 0;
640
+ let totalWindows = 0;
641
+ for (let i = 0; i + windowSize <= text.length; i += step) {
642
+ const window = text.slice(i, i + windowSize);
643
+ totalWindows++;
644
+ const count = (seen.get(window) ?? 0) + 1;
645
+ seen.set(window, count);
646
+ if (count > 1) duplicateWindows++;
647
+ }
648
+ if (totalWindows > 5 && duplicateWindows / totalWindows > 0.4) {
649
+ return true;
650
+ }
651
+ const lines = text.split("\n");
652
+ for (const line of lines) {
653
+ if (line.length > 5e4) return true;
654
+ }
655
+ return false;
656
+ }
687
657
  function hasCurrentTaskSection(observations) {
688
658
  if (/<current-task>/i.test(observations)) {
689
659
  return true;
@@ -895,8 +865,14 @@ IMPORTANT: Do NOT include <current-task> or <suggested-response> sections in you
895
865
  return prompt;
896
866
  }
897
867
  function parseReflectorOutput(output) {
868
+ if (detectDegenerateRepetition(output)) {
869
+ return {
870
+ observations: "",
871
+ degenerate: true
872
+ };
873
+ }
898
874
  const parsed = parseReflectorSectionXml(output);
899
- const observations = parsed.observations || "";
875
+ const observations = sanitizeObservationLines(parsed.observations || "");
900
876
  return {
901
877
  observations,
902
878
  suggestedContinuation: parsed.suggestedResponse || void 0
@@ -1276,7 +1252,9 @@ var OBSERVATION_CONTEXT_INSTRUCTIONS = `IMPORTANT: When responding, reference sp
1276
1252
 
1277
1253
  KNOWLEDGE UPDATES: When asked about current state (e.g., "where do I currently...", "what is my current..."), always prefer the MOST RECENT information. Observations include dates - if you see conflicting information, the newer observation supersedes the older one. Look for phrases like "will start", "is switching", "changed to", "moved to" as indicators that previous information has been updated.
1278
1254
 
1279
- PLANNED ACTIONS: If the user stated they planned to do something (e.g., "I'm going to...", "I'm looking forward to...", "I will...") and the date they planned to do it is now in the past (check the relative time like "3 weeks ago"), assume they completed the action unless there's evidence they didn't. For example, if someone said "I'll start my new diet on Monday" and that was 2 weeks ago, assume they started the diet.`;
1255
+ PLANNED ACTIONS: If the user stated they planned to do something (e.g., "I'm going to...", "I'm looking forward to...", "I will...") and the date they planned to do it is now in the past (check the relative time like "3 weeks ago"), assume they completed the action unless there's evidence they didn't. For example, if someone said "I'll start my new diet on Monday" and that was 2 weeks ago, assume they started the diet.
1256
+
1257
+ MOST RECENT USER INPUT: Treat the most recent user message as the highest-priority signal for what to do next. Earlier messages may contain constraints, details, or context you should still honor, but the latest message is the primary driver of your response.`;
1280
1258
  var ObservationalMemory = class _ObservationalMemory {
1281
1259
  id = "observational-memory";
1282
1260
  name = "Observational Memory";
@@ -1446,41 +1424,65 @@ var ObservationalMemory = class _ObservationalMemory {
1446
1424
  }
1447
1425
  return [];
1448
1426
  }
1427
+ /**
1428
+ * Resolve bufferActivation config into an absolute retention floor (tokens to keep).
1429
+ * - Value in (0, 1]: ratio → retentionFloor = threshold * (1 - value)
1430
+ * - Value >= 1000: absolute token count → retentionFloor = value
1431
+ */
1432
+ resolveRetentionFloor(bufferActivation, messageTokensThreshold) {
1433
+ if (bufferActivation >= 1e3) return bufferActivation;
1434
+ return messageTokensThreshold * (1 - bufferActivation);
1435
+ }
1436
+ /**
1437
+ * Convert bufferActivation to the equivalent ratio (0-1) for the storage layer.
1438
+ * When bufferActivation >= 1000, it's an absolute retention target, so we compute
1439
+ * the equivalent ratio: 1 - (bufferActivation / threshold).
1440
+ */
1441
+ resolveActivationRatio(bufferActivation, messageTokensThreshold) {
1442
+ if (bufferActivation >= 1e3) {
1443
+ return Math.max(0, Math.min(1, 1 - bufferActivation / messageTokensThreshold));
1444
+ }
1445
+ return bufferActivation;
1446
+ }
1449
1447
  /**
1450
1448
  * Calculate the projected message tokens that would be removed if activation happened now.
1451
1449
  * This replicates the chunk boundary logic in swapBufferedToActive without actually activating.
1452
1450
  */
1453
- calculateProjectedMessageRemoval(chunks, activationRatio, messageTokensThreshold, currentPendingTokens) {
1451
+ calculateProjectedMessageRemoval(chunks, bufferActivation, messageTokensThreshold, currentPendingTokens) {
1454
1452
  if (chunks.length === 0) return 0;
1455
- const retentionFloor = messageTokensThreshold * (1 - activationRatio);
1453
+ const retentionFloor = this.resolveRetentionFloor(bufferActivation, messageTokensThreshold);
1456
1454
  const targetMessageTokens = Math.max(0, currentPendingTokens - retentionFloor);
1457
1455
  let cumulativeMessageTokens = 0;
1458
- let bestBoundary = 0;
1459
- let bestBoundaryMessageTokens = 0;
1456
+ let bestOverBoundary = 0;
1457
+ let bestOverTokens = 0;
1458
+ let bestUnderBoundary = 0;
1459
+ let bestUnderTokens = 0;
1460
1460
  for (let i = 0; i < chunks.length; i++) {
1461
1461
  cumulativeMessageTokens += chunks[i].messageTokens ?? 0;
1462
1462
  const boundary = i + 1;
1463
- const isUnder = cumulativeMessageTokens <= targetMessageTokens;
1464
- const bestIsUnder = bestBoundaryMessageTokens <= targetMessageTokens;
1465
- if (bestBoundary === 0) {
1466
- bestBoundary = boundary;
1467
- bestBoundaryMessageTokens = cumulativeMessageTokens;
1468
- } else if (isUnder && !bestIsUnder) {
1469
- bestBoundary = boundary;
1470
- bestBoundaryMessageTokens = cumulativeMessageTokens;
1471
- } else if (isUnder && bestIsUnder) {
1472
- if (cumulativeMessageTokens > bestBoundaryMessageTokens) {
1473
- bestBoundary = boundary;
1474
- bestBoundaryMessageTokens = cumulativeMessageTokens;
1463
+ if (cumulativeMessageTokens >= targetMessageTokens) {
1464
+ if (bestOverBoundary === 0 || cumulativeMessageTokens < bestOverTokens) {
1465
+ bestOverBoundary = boundary;
1466
+ bestOverTokens = cumulativeMessageTokens;
1475
1467
  }
1476
- } else if (!isUnder && !bestIsUnder) {
1477
- if (cumulativeMessageTokens < bestBoundaryMessageTokens) {
1478
- bestBoundary = boundary;
1479
- bestBoundaryMessageTokens = cumulativeMessageTokens;
1468
+ } else {
1469
+ if (cumulativeMessageTokens > bestUnderTokens) {
1470
+ bestUnderBoundary = boundary;
1471
+ bestUnderTokens = cumulativeMessageTokens;
1480
1472
  }
1481
1473
  }
1482
1474
  }
1483
- if (bestBoundary === 0) {
1475
+ const maxOvershoot = retentionFloor * 0.95;
1476
+ const overshoot = bestOverTokens - targetMessageTokens;
1477
+ const remainingAfterOver = currentPendingTokens - bestOverTokens;
1478
+ let bestBoundaryMessageTokens;
1479
+ if (bestOverBoundary > 0 && overshoot <= maxOvershoot && (remainingAfterOver >= 1e3 || retentionFloor === 0)) {
1480
+ bestBoundaryMessageTokens = bestOverTokens;
1481
+ } else if (bestUnderBoundary > 0) {
1482
+ bestBoundaryMessageTokens = bestUnderTokens;
1483
+ } else if (bestOverBoundary > 0) {
1484
+ bestBoundaryMessageTokens = bestOverTokens;
1485
+ } else {
1484
1486
  return chunks[0]?.messageTokens ?? 0;
1485
1487
  }
1486
1488
  return bestBoundaryMessageTokens;
@@ -1488,8 +1490,12 @@ var ObservationalMemory = class _ObservationalMemory {
1488
1490
  /**
1489
1491
  * Check if we've crossed a new bufferTokens interval boundary.
1490
1492
  * Returns true if async buffering should be triggered.
1493
+ *
1494
+ * When pending tokens are within ~1 bufferTokens of the observation threshold,
1495
+ * the buffer interval is halved to produce finer-grained chunks right before
1496
+ * activation. This improves chunk boundary selection, reducing overshoot.
1491
1497
  */
1492
- shouldTriggerAsyncObservation(currentTokens, lockKey, record) {
1498
+ shouldTriggerAsyncObservation(currentTokens, lockKey, record, messageTokensThreshold) {
1493
1499
  if (!this.isAsyncObservationEnabled()) return false;
1494
1500
  if (record.isBufferingObservation) {
1495
1501
  if (isOpActiveInProcess(record.id, "bufferingObservation")) return false;
@@ -1503,11 +1509,13 @@ var ObservationalMemory = class _ObservationalMemory {
1503
1509
  const dbBoundary = record.lastBufferedAtTokens ?? 0;
1504
1510
  const memBoundary = _ObservationalMemory.lastBufferedBoundary.get(bufferKey) ?? 0;
1505
1511
  const lastBoundary = Math.max(dbBoundary, memBoundary);
1506
- const currentInterval = Math.floor(currentTokens / bufferTokens);
1507
- const lastInterval = Math.floor(lastBoundary / bufferTokens);
1512
+ const rampPoint = messageTokensThreshold ? messageTokensThreshold - bufferTokens * 1.1 : Infinity;
1513
+ const effectiveBufferTokens = currentTokens >= rampPoint ? bufferTokens / 2 : bufferTokens;
1514
+ const currentInterval = Math.floor(currentTokens / effectiveBufferTokens);
1515
+ const lastInterval = Math.floor(lastBoundary / effectiveBufferTokens);
1508
1516
  const shouldTrigger = currentInterval > lastInterval;
1509
1517
  omDebug(
1510
- `[OM:shouldTriggerAsyncObs] tokens=${currentTokens}, bufferTokens=${bufferTokens}, currentInterval=${currentInterval}, lastInterval=${lastInterval}, lastBoundary=${lastBoundary} (db=${dbBoundary}, mem=${memBoundary}), shouldTrigger=${shouldTrigger}`
1518
+ `[OM:shouldTriggerAsyncObs] tokens=${currentTokens}, bufferTokens=${bufferTokens}, effectiveBufferTokens=${effectiveBufferTokens}, rampPoint=${rampPoint}, currentInterval=${currentInterval}, lastInterval=${lastInterval}, lastBoundary=${lastBoundary} (db=${dbBoundary}, mem=${memBoundary}), shouldTrigger=${shouldTrigger}`
1511
1519
  );
1512
1520
  return shouldTrigger;
1513
1521
  }
@@ -1768,9 +1776,17 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
1768
1776
  }
1769
1777
  }
1770
1778
  if (this.observationConfig.bufferActivation !== void 0) {
1771
- if (this.observationConfig.bufferActivation <= 0 || this.observationConfig.bufferActivation > 1) {
1779
+ if (this.observationConfig.bufferActivation <= 0) {
1780
+ throw new Error(`observation.bufferActivation must be > 0, got ${this.observationConfig.bufferActivation}`);
1781
+ }
1782
+ if (this.observationConfig.bufferActivation > 1 && this.observationConfig.bufferActivation < 1e3) {
1783
+ throw new Error(
1784
+ `observation.bufferActivation must be <= 1 (ratio) or >= 1000 (absolute token retention), got ${this.observationConfig.bufferActivation}`
1785
+ );
1786
+ }
1787
+ if (this.observationConfig.bufferActivation >= 1e3 && this.observationConfig.bufferActivation >= observationThreshold) {
1772
1788
  throw new Error(
1773
- `observation.bufferActivation must be in range (0, 1], got ${this.observationConfig.bufferActivation}`
1789
+ `observation.bufferActivation as absolute retention (${this.observationConfig.bufferActivation}) must be less than messageTokens (${observationThreshold})`
1774
1790
  );
1775
1791
  }
1776
1792
  }
@@ -2385,18 +2401,31 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2385
2401
  async callObserver(existingObservations, messagesToObserve, abortSignal, options) {
2386
2402
  const agent = this.getObserverAgent();
2387
2403
  const prompt = buildObserverPrompt(existingObservations, messagesToObserve, options);
2388
- const result = await this.withAbortCheck(
2389
- () => agent.generate(prompt, {
2390
- modelSettings: {
2391
- ...this.observationConfig.modelSettings
2392
- },
2393
- providerOptions: this.observationConfig.providerOptions,
2394
- ...abortSignal ? { abortSignal } : {},
2395
- ...options?.requestContext ? { requestContext: options.requestContext } : {}
2396
- }),
2397
- abortSignal
2398
- );
2399
- const parsed = parseObserverOutput(result.text);
2404
+ const doGenerate = async () => {
2405
+ const result2 = await this.withAbortCheck(
2406
+ () => agent.generate(prompt, {
2407
+ modelSettings: {
2408
+ ...this.observationConfig.modelSettings
2409
+ },
2410
+ providerOptions: this.observationConfig.providerOptions,
2411
+ ...abortSignal ? { abortSignal } : {},
2412
+ ...options?.requestContext ? { requestContext: options.requestContext } : {}
2413
+ }),
2414
+ abortSignal
2415
+ );
2416
+ return result2;
2417
+ };
2418
+ let result = await doGenerate();
2419
+ let parsed = parseObserverOutput(result.text);
2420
+ if (parsed.degenerate) {
2421
+ omDebug(`[OM:callObserver] degenerate repetition detected, retrying once`);
2422
+ result = await doGenerate();
2423
+ parsed = parseObserverOutput(result.text);
2424
+ if (parsed.degenerate) {
2425
+ omDebug(`[OM:callObserver] degenerate repetition on retry, failing`);
2426
+ throw new Error("Observer produced degenerate output after retry");
2427
+ }
2428
+ }
2400
2429
  const usage = result.totalUsage ?? result.usage;
2401
2430
  return {
2402
2431
  observations: parsed.observations,
@@ -2430,18 +2459,30 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2430
2459
  for (const msg of allMessages) {
2431
2460
  this.observedMessageIds.add(msg.id);
2432
2461
  }
2433
- const result = await this.withAbortCheck(
2434
- () => agent$1.generate(prompt, {
2435
- modelSettings: {
2436
- ...this.observationConfig.modelSettings
2437
- },
2438
- providerOptions: this.observationConfig.providerOptions,
2439
- ...abortSignal ? { abortSignal } : {},
2440
- ...requestContext ? { requestContext } : {}
2441
- }),
2442
- abortSignal
2443
- );
2444
- const parsed = parseMultiThreadObserverOutput(result.text);
2462
+ const doGenerate = async () => {
2463
+ return this.withAbortCheck(
2464
+ () => agent$1.generate(prompt, {
2465
+ modelSettings: {
2466
+ ...this.observationConfig.modelSettings
2467
+ },
2468
+ providerOptions: this.observationConfig.providerOptions,
2469
+ ...abortSignal ? { abortSignal } : {},
2470
+ ...requestContext ? { requestContext } : {}
2471
+ }),
2472
+ abortSignal
2473
+ );
2474
+ };
2475
+ let result = await doGenerate();
2476
+ let parsed = parseMultiThreadObserverOutput(result.text);
2477
+ if (parsed.degenerate) {
2478
+ omDebug(`[OM:callMultiThreadObserver] degenerate repetition detected, retrying once`);
2479
+ result = await doGenerate();
2480
+ parsed = parseMultiThreadObserverOutput(result.text);
2481
+ if (parsed.degenerate) {
2482
+ omDebug(`[OM:callMultiThreadObserver] degenerate repetition on retry, failing`);
2483
+ throw new Error("Multi-thread observer produced degenerate output after retry");
2484
+ }
2485
+ }
2445
2486
  const results = /* @__PURE__ */ new Map();
2446
2487
  for (const [threadId, threadResult] of parsed.threads) {
2447
2488
  results.set(threadId, {
@@ -2528,11 +2569,22 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
2528
2569
  totalUsage.totalTokens += usage.totalTokens ?? 0;
2529
2570
  }
2530
2571
  parsed = parseReflectorOutput(result.text);
2531
- reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
2572
+ if (parsed.degenerate) {
2573
+ omDebug(
2574
+ `[OM:callReflector] attempt #${attemptNumber}: degenerate repetition detected, treating as compression failure`
2575
+ );
2576
+ reflectedTokens = originalTokens;
2577
+ } else {
2578
+ reflectedTokens = this.tokenCounter.countObservations(parsed.observations);
2579
+ }
2532
2580
  omDebug(
2533
- `[OM:callReflector] attempt #${attemptNumber} parsed: reflectedTokens=${reflectedTokens}, targetThreshold=${targetThreshold}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}, parsedObsLen=${parsed.observations?.length}`
2581
+ `[OM:callReflector] attempt #${attemptNumber} parsed: reflectedTokens=${reflectedTokens}, targetThreshold=${targetThreshold}, compressionValid=${validateCompression(reflectedTokens, targetThreshold)}, parsedObsLen=${parsed.observations?.length}, degenerate=${parsed.degenerate ?? false}`
2534
2582
  );
2535
- if (validateCompression(reflectedTokens, targetThreshold) || currentLevel >= maxLevel) {
2583
+ if (!parsed.degenerate && (validateCompression(reflectedTokens, targetThreshold) || currentLevel >= maxLevel)) {
2584
+ break;
2585
+ }
2586
+ if (parsed.degenerate && currentLevel >= maxLevel) {
2587
+ omDebug(`[OM:callReflector] degenerate output persists at maxLevel=${maxLevel}, breaking`);
2536
2588
  break;
2537
2589
  }
2538
2590
  if (streamContext?.writer) {
@@ -2840,6 +2892,20 @@ ${suggestedResponse}
2840
2892
  omDebug(
2841
2893
  `[OM:threshold] activation succeeded, obsTokens=${updatedRecord.observationTokenCount}, activeObsLen=${updatedRecord.activeObservations?.length}`
2842
2894
  );
2895
+ if (activationResult.suggestedContinuation || activationResult.currentTask) {
2896
+ const thread = await this.storage.getThreadById({ threadId });
2897
+ if (thread) {
2898
+ const newMetadata = memory.setThreadOMMetadata(thread.metadata, {
2899
+ suggestedResponse: activationResult.suggestedContinuation,
2900
+ currentTask: activationResult.currentTask
2901
+ });
2902
+ await this.storage.updateThread({
2903
+ id: threadId,
2904
+ title: thread.title ?? "",
2905
+ metadata: newMetadata
2906
+ });
2907
+ }
2908
+ }
2843
2909
  await this.maybeAsyncReflect(
2844
2910
  updatedRecord,
2845
2911
  updatedRecord.observationTokenCount ?? 0,
@@ -3178,6 +3244,20 @@ ${suggestedResponse}
3178
3244
  _ObservationalMemory.lastBufferedBoundary.set(bufKey, 0);
3179
3245
  this.storage.setBufferingObservationFlag(record.id, false, 0).catch(() => {
3180
3246
  });
3247
+ if (activationResult.suggestedContinuation || activationResult.currentTask) {
3248
+ const thread = await this.storage.getThreadById({ threadId });
3249
+ if (thread) {
3250
+ const newMetadata = memory.setThreadOMMetadata(thread.metadata, {
3251
+ suggestedResponse: activationResult.suggestedContinuation,
3252
+ currentTask: activationResult.currentTask
3253
+ });
3254
+ await this.storage.updateThread({
3255
+ id: threadId,
3256
+ title: thread.title ?? "",
3257
+ metadata: newMetadata
3258
+ });
3259
+ }
3260
+ }
3181
3261
  await this.maybeReflect({
3182
3262
  record,
3183
3263
  observationTokens: record.observationTokenCount ?? 0,
@@ -3236,7 +3316,7 @@ ${suggestedResponse}
3236
3316
  state.sealedIds = sealedIds;
3237
3317
  const lockKey = this.getLockKey(threadId, resourceId);
3238
3318
  if (this.isAsyncObservationEnabled() && totalPendingTokens < threshold) {
3239
- const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record);
3319
+ const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record, threshold);
3240
3320
  omDebug(
3241
3321
  `[OM:async-obs] belowThreshold: pending=${totalPendingTokens}, unbuffered=${unbufferedPendingTokens}, threshold=${threshold}, shouldTrigger=${shouldTrigger}, isBufferingObs=${record.isBufferingObservation}, lastBufferedAt=${record.lastBufferedAtTokens}`
3242
3322
  );
@@ -3252,7 +3332,7 @@ ${suggestedResponse}
3252
3332
  );
3253
3333
  }
3254
3334
  } else if (this.isAsyncObservationEnabled()) {
3255
- const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record);
3335
+ const shouldTrigger = this.shouldTriggerAsyncObservation(unbufferedPendingTokens, lockKey, record, threshold);
3256
3336
  omDebug(
3257
3337
  `[OM:async-obs] atOrAboveThreshold: pending=${totalPendingTokens}, unbuffered=${unbufferedPendingTokens}, threshold=${threshold}, step=${stepNumber}, shouldTrigger=${shouldTrigger}`
3258
3338
  );
@@ -3934,8 +4014,12 @@ ${result.observations}` : result.observations;
3934
4014
  messagesToBuffer,
3935
4015
  void 0,
3936
4016
  // No abort signal for background ops
3937
- { skipContinuationHints: true, requestContext }
4017
+ { requestContext }
3938
4018
  );
4019
+ if (!result.observations) {
4020
+ omDebug(`[OM:doAsyncBufferedObservation] empty observations returned, skipping buffer storage`);
4021
+ return;
4022
+ }
3939
4023
  let newObservations;
3940
4024
  if (this.scope === "resource") {
3941
4025
  newObservations = await this.wrapWithThreadTag(threadId, result.observations);
@@ -3955,7 +4039,9 @@ ${result.observations}` : result.observations;
3955
4039
  tokenCount: newTokenCount,
3956
4040
  messageIds: newMessageIds,
3957
4041
  messageTokens,
3958
- lastObservedAt
4042
+ lastObservedAt,
4043
+ suggestedContinuation: result.suggestedContinuation,
4044
+ currentTask: result.currentTask
3959
4045
  },
3960
4046
  lastBufferedAtTime: lastObservedAt
3961
4047
  });
@@ -4034,24 +4120,28 @@ ${bufferedObservations}`;
4034
4120
  return { success: false };
4035
4121
  }
4036
4122
  const messageTokensThreshold = this.getMaxThreshold(this.observationConfig.messageTokens);
4123
+ let effectivePendingTokens = currentPendingTokens;
4037
4124
  if (messageList) {
4038
- const freshPendingTokens = this.tokenCounter.countMessages(messageList.get.all.db());
4039
- if (freshPendingTokens < messageTokensThreshold) {
4125
+ effectivePendingTokens = this.tokenCounter.countMessages(messageList.get.all.db());
4126
+ if (effectivePendingTokens < messageTokensThreshold) {
4040
4127
  omDebug(
4041
- `[OM:tryActivate] skipping activation: freshPendingTokens=${freshPendingTokens} < threshold=${messageTokensThreshold}`
4128
+ `[OM:tryActivate] skipping activation: freshPendingTokens=${effectivePendingTokens} < threshold=${messageTokensThreshold}`
4042
4129
  );
4043
4130
  return { success: false };
4044
4131
  }
4045
4132
  }
4046
- const activationRatio = this.observationConfig.bufferActivation ?? 0.7;
4133
+ const bufferActivation = this.observationConfig.bufferActivation ?? 0.7;
4134
+ const activationRatio = this.resolveActivationRatio(bufferActivation, messageTokensThreshold);
4135
+ const forceMaxActivation = !!(this.observationConfig.blockAfter && effectivePendingTokens >= this.observationConfig.blockAfter);
4047
4136
  omDebug(
4048
- `[OM:tryActivate] swapping: freshChunks=${freshChunks.length}, activationRatio=${activationRatio}, totalChunkTokens=${freshChunks.reduce((s, c) => s + (c.tokenCount ?? 0), 0)}`
4137
+ `[OM:tryActivate] swapping: freshChunks=${freshChunks.length}, bufferActivation=${bufferActivation}, activationRatio=${activationRatio}, forceMax=${forceMaxActivation}, totalChunkTokens=${freshChunks.reduce((s, c) => s + (c.tokenCount ?? 0), 0)}`
4049
4138
  );
4050
4139
  const activationResult = await this.storage.swapBufferedToActive({
4051
4140
  id: freshRecord.id,
4052
4141
  activationRatio,
4053
4142
  messageTokensThreshold,
4054
- currentPendingTokens
4143
+ currentPendingTokens: effectivePendingTokens,
4144
+ forceMaxActivation
4055
4145
  });
4056
4146
  omDebug(
4057
4147
  `[OM:tryActivate] swapResult: chunksActivated=${activationResult.chunksActivated}, tokensActivated=${activationResult.messageTokensActivated}, obsTokensActivated=${activationResult.observationTokensActivated}, activatedCycleIds=${activationResult.activatedCycleIds.join(",")}`
@@ -4090,7 +4180,9 @@ ${bufferedObservations}`;
4090
4180
  success: true,
4091
4181
  updatedRecord: updatedRecord ?? void 0,
4092
4182
  messageTokensActivated: activationResult.messageTokensActivated,
4093
- activatedMessageIds: activationResult.activatedMessageIds
4183
+ activatedMessageIds: activationResult.activatedMessageIds,
4184
+ suggestedContinuation: activationResult.suggestedContinuation,
4185
+ currentTask: activationResult.currentTask
4094
4186
  };
4095
4187
  }
4096
4188
  /**
@@ -4976,5 +5068,5 @@ exports.formatMessagesForObserver = formatMessagesForObserver;
4976
5068
  exports.hasCurrentTaskSection = hasCurrentTaskSection;
4977
5069
  exports.optimizeObservationsForContext = optimizeObservationsForContext;
4978
5070
  exports.parseObserverOutput = parseObserverOutput;
4979
- //# sourceMappingURL=chunk-QRKB5I2S.cjs.map
4980
- //# sourceMappingURL=chunk-QRKB5I2S.cjs.map
5071
+ //# sourceMappingURL=chunk-LLTHE64H.cjs.map
5072
+ //# sourceMappingURL=chunk-LLTHE64H.cjs.map