@mastra/memory 1.9.0 → 1.9.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # @mastra/memory
2
2
 
3
+ ## 1.9.1-alpha.1
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [[`423aa6f`](https://github.com/mastra-ai/mastra/commit/423aa6fd12406de6a1cc6b68e463d30af1d790fb), [`47358d9`](https://github.com/mastra-ai/mastra/commit/47358d960bb2b931321de7e798f341ab0df81f44), [`4bb5adc`](https://github.com/mastra-ai/mastra/commit/4bb5adc05c88e3a83fe1ea5ecb9eae6e17313124), [`4bb5adc`](https://github.com/mastra-ai/mastra/commit/4bb5adc05c88e3a83fe1ea5ecb9eae6e17313124)]:
8
+ - @mastra/core@1.16.0-alpha.3
9
+ - @mastra/schema-compat@1.2.7-alpha.1
10
+
11
+ ## 1.9.1-alpha.0
12
+
13
+ ### Patch Changes
14
+
15
+ - Fixed observational memory reflection compression for `google/gemini-2.5-flash` by using stronger compression guidance and starting it at a higher compression level during reflection. `google/gemini-2.5-flash` is unusually good at generating long, faithful outputs. That made reflection retries more likely to preserve too much detail and miss the compression target, wasting tokens in the process. ([#14612](https://github.com/mastra-ai/mastra/pull/14612))
16
+
17
+ - Updated dependencies [[`be37de4`](https://github.com/mastra-ai/mastra/commit/be37de4391bd1d5486ce38efacbf00ca51637262), [`f3ce603`](https://github.com/mastra-ai/mastra/commit/f3ce603fd76180f4a5be90b6dc786d389b6b3e98), [`2871451`](https://github.com/mastra-ai/mastra/commit/2871451703829aefa06c4a5d6eca7fd3731222ef), [`d3930ea`](https://github.com/mastra-ai/mastra/commit/d3930eac51c30b0ecf7eaa54bb9430758b399777)]:
18
+ - @mastra/core@1.16.0-alpha.2
19
+ - @mastra/schema-compat@1.2.7-alpha.0
20
+
3
21
  ## 1.9.0
4
22
 
5
23
  ### Minor Changes
@@ -1736,6 +1736,7 @@ User messages are extremely important. If the user asks a question or gives a ne
1736
1736
 
1737
1737
  ${instruction}` : ""}`;
1738
1738
  }
1739
+ var MAX_COMPRESSION_LEVEL = 4;
1739
1740
  var COMPRESSION_GUIDANCE = {
1740
1741
  0: "",
1741
1742
  1: `
@@ -1748,11 +1749,11 @@ Please re-process with slightly more compression:
1748
1749
  - Closer to the end, retain more fine details (recent context matters more)
1749
1750
  - Memory is getting long - use a more condensed style throughout
1750
1751
  - Combine related items more aggressively but do not lose important specific details of names, places, events, and people
1752
+ - Combine repeated similar tool calls (e.g. multiple file views, searches, or edits in the same area) into a single summary line describing what was explored/changed and the outcome
1751
1753
  - Preserve \u2705 completion markers \u2014 they are memory signals that tell the assistant what is already resolved and help prevent repeated work
1752
1754
  - Preserve the concrete resolved outcome captured by \u2705 markers so the assistant knows what exactly is done
1753
- - For example if there is a long nested observation list about repeated tool calls, you can combine those into a single line and observe that the tool was called multiple times for x reason, and finally y outcome happened.
1754
1755
 
1755
- Your current detail level was a 10/10, lets aim for a 8/10 detail level.
1756
+ Aim for a 8/10 detail level.
1756
1757
  `,
1757
1758
  2: `
1758
1759
  ## AGGRESSIVE COMPRESSION REQUIRED
@@ -1764,12 +1765,13 @@ Please re-process with much more aggressive compression:
1764
1765
  - Closer to the end, retain fine details (recent context matters more)
1765
1766
  - Memory is getting very long - use a significantly more condensed style throughout
1766
1767
  - Combine related items aggressively but do not lose important specific details of names, places, events, and people
1768
+ - Combine repeated similar tool calls (e.g. multiple file views, searches, or edits in the same area) into a single summary line describing what was explored/changed and the outcome
1769
+ - If the same file or module is mentioned across many observations, merge into one entry covering the full arc
1767
1770
  - Preserve \u2705 completion markers \u2014 they are memory signals that tell the assistant what is already resolved and help prevent repeated work
1768
1771
  - Preserve the concrete resolved outcome captured by \u2705 markers so the assistant knows what exactly is done
1769
- - For example if there is a long nested observation list about repeated tool calls, you can combine those into a single line and observe that the tool was called multiple times for x reason, and finally y outcome happened.
1770
1772
  - Remove redundant information and merge overlapping observations
1771
1773
 
1772
- Your current detail level was a 10/10, lets aim for a 6/10 detail level.
1774
+ Aim for a 6/10 detail level.
1773
1775
  `,
1774
1776
  3: `
1775
1777
  ## CRITICAL COMPRESSION REQUIRED
@@ -1780,13 +1782,32 @@ Please re-process with maximum compression:
1780
1782
  - Summarize the oldest observations (first 50-70%) into brief high-level paragraphs \u2014 only key facts, decisions, and outcomes
1781
1783
  - For the most recent observations (last 30-50%), retain important details but still use a condensed style
1782
1784
  - Ruthlessly merge related observations \u2014 if 10 observations are about the same topic, combine into 1-2 lines
1785
+ - Combine all tool call sequences (file views, searches, edits, builds) into outcome-only summaries \u2014 drop individual steps entirely
1783
1786
  - Drop procedural details (tool calls, retries, intermediate steps) \u2014 keep only final outcomes
1784
1787
  - Drop observations that are no longer relevant or have been superseded by newer information
1785
1788
  - Preserve \u2705 completion markers \u2014 they are memory signals that tell the assistant what is already resolved and help prevent repeated work
1786
1789
  - Preserve the concrete resolved outcome captured by \u2705 markers so the assistant knows what exactly is done
1787
1790
  - Preserve: names, dates, decisions, errors, user preferences, and architectural choices
1788
1791
 
1789
- Your current detail level was a 10/10, lets aim for a 4/10 detail level.
1792
+ Aim for a 4/10 detail level.
1793
+ `,
1794
+ 4: `
1795
+ ## EXTREME COMPRESSION REQUIRED
1796
+
1797
+ Multiple compression attempts have failed. The content may already be dense from a prior reflection.
1798
+
1799
+ You MUST dramatically reduce the number of observations while keeping the standard observation format (date groups with bullet points and priority emojis):
1800
+ - Tool call observations are the biggest source of bloat. Collapse ALL tool call sequences into outcome-only observations \u2014 e.g. 10 observations about viewing/searching/editing files become 1 observation about what was actually learned or achieved (e.g. "Investigated auth module and found token validation was skipping expiry check")
1801
+ - Never preserve individual tool calls (viewed file X, searched for Y, ran build) \u2014 only preserve what was discovered or accomplished
1802
+ - Consolidate many related observations into single, more generic observations
1803
+ - Merge all same-day date groups into at most 2-3 date groups per day
1804
+ - For older content, each topic or task should be at most 1-2 observations capturing the key outcome
1805
+ - For recent content, retain more detail but still merge related items aggressively
1806
+ - If multiple observations describe incremental progress on the same task, keep only the final state
1807
+ - Preserve \u2705 completion markers and their outcomes but merge related completions into fewer lines
1808
+ - Preserve: user preferences, key decisions, architectural choices, and unresolved issues
1809
+
1810
+ Aim for a 2/10 detail level. Fewer, more generic observations are better than many specific ones that exceed the budget.
1790
1811
  `
1791
1812
  };
1792
1813
  function buildReflectorPrompt(observations, manualPrompt, compressionLevel, skipContinuationHints) {
@@ -3912,6 +3933,22 @@ Async buffering is enabled by default \u2014 this opt-out is only needed when us
3912
3933
  modelId: resolved.modelId
3913
3934
  };
3914
3935
  }
3936
+ /**
3937
+ * Get the default compression start level based on model behavior.
3938
+ * gemini-2.5-flash is a faithful transcriber that needs explicit pressure to compress effectively.
3939
+ */
3940
+ async getCompressionStartLevel(requestContext) {
3941
+ try {
3942
+ const resolved = await this.resolveModelContext(this.reflectionConfig.model, requestContext);
3943
+ const modelId = resolved?.modelId ?? "";
3944
+ if (modelId.includes("gemini-2.5-flash")) {
3945
+ return 2;
3946
+ }
3947
+ return 1;
3948
+ } catch {
3949
+ return 1;
3950
+ }
3951
+ }
3915
3952
  getRuntimeModelContext(model) {
3916
3953
  if (!model?.modelId) {
3917
3954
  return void 0;
@@ -4709,8 +4746,9 @@ ${unreflectedContent}` : bufferedReflection;
4709
4746
  const originalTokens = this.tokenCounter.countObservations(observations);
4710
4747
  const targetThreshold = observationTokensThreshold ?? getMaxThreshold(this.reflectionConfig.observationTokens);
4711
4748
  let totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
4712
- let currentLevel = compressionStartLevel ?? 0;
4713
- const maxLevel = 3;
4749
+ const startLevel = compressionStartLevel ?? 0;
4750
+ let currentLevel = startLevel;
4751
+ const maxLevel = Math.min(MAX_COMPRESSION_LEVEL, startLevel + 3);
4714
4752
  let parsed = { observations: "", suggestedContinuation: void 0 };
4715
4753
  let reflectedTokens = 0;
4716
4754
  let attemptNumber = 0;
@@ -4781,13 +4819,17 @@ ${unreflectedContent}` : bufferedReflection;
4781
4819
  omDebug(`[OM:callReflector] degenerate output persists at maxLevel=${maxLevel}, breaking`);
4782
4820
  break;
4783
4821
  }
4822
+ if (currentLevel >= maxLevel) {
4823
+ break;
4824
+ }
4825
+ const nextLevel = currentLevel + 1;
4784
4826
  if (streamContext?.writer) {
4785
4827
  const failedMarker = createObservationFailedMarker({
4786
4828
  cycleId: streamContext.cycleId,
4787
4829
  operationType: "reflection",
4788
4830
  startedAt: streamContext.startedAt,
4789
4831
  tokensAttempted: originalTokens,
4790
- error: `Did not compress below threshold (${originalTokens} \u2192 ${reflectedTokens}, target: ${targetThreshold}), retrying at level ${currentLevel + 1}`,
4832
+ error: `Did not compress below threshold (${originalTokens} \u2192 ${reflectedTokens}, target: ${targetThreshold}), retrying at level ${nextLevel}`,
4791
4833
  recordId: streamContext.recordId,
4792
4834
  threadId: streamContext.threadId
4793
4835
  });
@@ -4808,7 +4850,7 @@ ${unreflectedContent}` : bufferedReflection;
4808
4850
  await streamContext.writer.custom(startMarker).catch(() => {
4809
4851
  });
4810
4852
  }
4811
- currentLevel = Math.min(currentLevel + 1, maxLevel);
4853
+ currentLevel = nextLevel;
4812
4854
  }
4813
4855
  return {
4814
4856
  observations: parsed.observations,
@@ -6709,9 +6751,6 @@ ${bufferedObservations}`;
6709
6751
  omDebug(
6710
6752
  `[OM:reflect] doAsyncBufferedReflection: slicing observations for reflection \u2014 totalLines=${totalLines}, avgTokPerLine=${avgTokensPerLine.toFixed(1)}, activationPointTokens=${activationPointTokens}, linesToReflect=${linesToReflect}/${totalLines}, sliceTokenEstimate=${sliceTokenEstimate}, compressionTarget=${compressionTarget}`
6711
6753
  );
6712
- omDebug(
6713
- `[OM:reflect] doAsyncBufferedReflection: starting reflector call, recordId=${currentRecord.id}, observationTokens=${sliceTokenEstimate}, compressionTarget=${compressionTarget} (inputTokens), activeObsLength=${activeObservations.length}, reflectedLineCount=${reflectedObservationLineCount}`
6714
- );
6715
6754
  if (writer) {
6716
6755
  const startMarker = createBufferingStartMarker({
6717
6756
  cycleId,
@@ -6725,6 +6764,9 @@ ${bufferedObservations}`;
6725
6764
  void writer.custom(startMarker).catch(() => {
6726
6765
  });
6727
6766
  }
6767
+ omDebug(
6768
+ `[OM:reflect] doAsyncBufferedReflection: starting reflector call, recordId=${currentRecord.id}, observationTokens=${sliceTokenEstimate}, compressionTarget=${compressionTarget} (inputTokens), activeObsLength=${activeObservations.length}, reflectedLineCount=${reflectedObservationLineCount}`
6769
+ );
6728
6770
  const reflectResult = await this.callReflector(
6729
6771
  activeObservations,
6730
6772
  void 0,
@@ -6736,8 +6778,7 @@ ${bufferedObservations}`;
6736
6778
  // No abort signal for background ops
6737
6779
  true,
6738
6780
  // Skip continuation hints for async buffering
6739
- 1,
6740
- // Start at compression level 1 for buffered reflection
6781
+ await this.getCompressionStartLevel(requestContext),
6741
6782
  requestContext
6742
6783
  );
6743
6784
  const reflectionTokenCount = this.tokenCounter.countObservations(reflectResult.observations);
@@ -7598,5 +7639,5 @@ exports.stripEphemeralAnchorIds = stripEphemeralAnchorIds;
7598
7639
  exports.stripObservationGroups = stripObservationGroups;
7599
7640
  exports.truncateStringByTokens = truncateStringByTokens;
7600
7641
  exports.wrapInObservationGroup = wrapInObservationGroup;
7601
- //# sourceMappingURL=chunk-LVV2RT42.cjs.map
7602
- //# sourceMappingURL=chunk-LVV2RT42.cjs.map
7642
+ //# sourceMappingURL=chunk-CNOHXG5O.cjs.map
7643
+ //# sourceMappingURL=chunk-CNOHXG5O.cjs.map