@mastra/memory 1.17.6-alpha.1 → 1.18.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/dist/{chunk-WNLFJKTX.cjs → chunk-ET2TVAT3.cjs} +84 -33
  3. package/dist/chunk-ET2TVAT3.cjs.map +1 -0
  4. package/dist/{chunk-QZGJY67D.js → chunk-XVVCS6R6.js} +84 -33
  5. package/dist/chunk-XVVCS6R6.js.map +1 -0
  6. package/dist/docs/SKILL.md +1 -1
  7. package/dist/docs/assets/SOURCE_MAP.json +39 -39
  8. package/dist/docs/references/docs-memory-observational-memory.md +56 -12
  9. package/dist/docs/references/docs-memory-semantic-recall.md +68 -6
  10. package/dist/docs/references/reference-memory-observational-memory.md +11 -1
  11. package/dist/index.cjs +49 -20
  12. package/dist/index.cjs.map +1 -1
  13. package/dist/index.d.ts +2 -1
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +40 -11
  16. package/dist/index.js.map +1 -1
  17. package/dist/{observational-memory-HAJ3K5JJ.js → observational-memory-2PRVG6BF.js} +3 -3
  18. package/dist/{observational-memory-HAJ3K5JJ.js.map → observational-memory-2PRVG6BF.js.map} +1 -1
  19. package/dist/{observational-memory-BJF72NKJ.cjs → observational-memory-UJUAQKJB.cjs} +26 -26
  20. package/dist/{observational-memory-BJF72NKJ.cjs.map → observational-memory-UJUAQKJB.cjs.map} +1 -1
  21. package/dist/processors/index.cjs +24 -24
  22. package/dist/processors/index.js +1 -1
  23. package/dist/processors/observational-memory/observation-turn/load-memory-context.d.ts +9 -0
  24. package/dist/processors/observational-memory/observation-turn/load-memory-context.d.ts.map +1 -0
  25. package/dist/processors/observational-memory/observation-turn/turn.d.ts.map +1 -1
  26. package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
  27. package/dist/processors/observational-memory/processor.d.ts.map +1 -1
  28. package/dist/processors/observational-memory/types.d.ts +35 -4
  29. package/dist/processors/observational-memory/types.d.ts.map +1 -1
  30. package/package.json +5 -5
  31. package/dist/chunk-QZGJY67D.js.map +0 -1
  32. package/dist/chunk-WNLFJKTX.cjs.map +0 -1
@@ -3,7 +3,7 @@ name: mastra-memory
3
3
  description: Documentation for @mastra/memory. Use when working with @mastra/memory APIs, configuration, or implementation.
4
4
  metadata:
5
5
  package: "@mastra/memory"
6
- version: "1.17.6-alpha.1"
6
+ version: "1.18.0-alpha.3"
7
7
  ---
8
8
 
9
9
  ## When to use
@@ -1,119 +1,119 @@
1
1
  {
2
- "version": "1.17.6-alpha.1",
2
+ "version": "1.18.0-alpha.3",
3
3
  "package": "@mastra/memory",
4
4
  "exports": {
5
5
  "ModelByInputTokens": {
6
6
  "types": "dist/processors/index.d.ts",
7
- "implementation": "dist/chunk-QZGJY67D.js",
7
+ "implementation": "dist/chunk-XVVCS6R6.js",
8
8
  "line": 745
9
9
  },
10
10
  "OBSERVER_SYSTEM_PROMPT": {
11
11
  "types": "dist/processors/index.d.ts",
12
- "implementation": "dist/chunk-QZGJY67D.js"
12
+ "implementation": "dist/chunk-XVVCS6R6.js"
13
13
  },
14
14
  "ObservationalMemory": {
15
15
  "types": "dist/processors/index.d.ts",
16
- "implementation": "dist/chunk-QZGJY67D.js",
17
- "line": 6674
16
+ "implementation": "dist/chunk-XVVCS6R6.js",
17
+ "line": 6690
18
18
  },
19
19
  "ObservationalMemoryProcessor": {
20
20
  "types": "dist/processors/index.d.ts",
21
- "implementation": "dist/chunk-QZGJY67D.js",
22
- "line": 9219
21
+ "implementation": "dist/chunk-XVVCS6R6.js",
22
+ "line": 9264
23
23
  },
24
24
  "TokenCounter": {
25
25
  "types": "dist/processors/index.d.ts",
26
- "implementation": "dist/chunk-QZGJY67D.js",
27
- "line": 6144
26
+ "implementation": "dist/chunk-XVVCS6R6.js",
27
+ "line": 6160
28
28
  },
29
29
  "buildObserverPrompt": {
30
30
  "types": "dist/processors/index.d.ts",
31
- "implementation": "dist/chunk-QZGJY67D.js",
32
- "line": 3643
31
+ "implementation": "dist/chunk-XVVCS6R6.js",
32
+ "line": 3659
33
33
  },
34
34
  "buildObserverSystemPrompt": {
35
35
  "types": "dist/processors/index.d.ts",
36
- "implementation": "dist/chunk-QZGJY67D.js",
37
- "line": 2951
36
+ "implementation": "dist/chunk-XVVCS6R6.js",
37
+ "line": 2967
38
38
  },
39
39
  "combineObservationGroupRanges": {
40
40
  "types": "dist/processors/index.d.ts",
41
- "implementation": "dist/chunk-QZGJY67D.js",
41
+ "implementation": "dist/chunk-XVVCS6R6.js",
42
42
  "line": 837
43
43
  },
44
44
  "deriveObservationGroupProvenance": {
45
45
  "types": "dist/processors/index.d.ts",
46
- "implementation": "dist/chunk-QZGJY67D.js",
46
+ "implementation": "dist/chunk-XVVCS6R6.js",
47
47
  "line": 871
48
48
  },
49
49
  "extractCurrentTask": {
50
50
  "types": "dist/processors/index.d.ts",
51
- "implementation": "dist/chunk-QZGJY67D.js",
52
- "line": 3757
51
+ "implementation": "dist/chunk-XVVCS6R6.js",
52
+ "line": 3773
53
53
  },
54
54
  "formatMessagesForObserver": {
55
55
  "types": "dist/processors/index.d.ts",
56
- "implementation": "dist/chunk-QZGJY67D.js",
57
- "line": 3369
56
+ "implementation": "dist/chunk-XVVCS6R6.js",
57
+ "line": 3385
58
58
  },
59
59
  "getObservationsAsOf": {
60
60
  "types": "dist/processors/index.d.ts",
61
- "implementation": "dist/chunk-QZGJY67D.js",
62
- "line": 9425
61
+ "implementation": "dist/chunk-XVVCS6R6.js",
62
+ "line": 9476
63
63
  },
64
64
  "hasCurrentTaskSection": {
65
65
  "types": "dist/processors/index.d.ts",
66
- "implementation": "dist/chunk-QZGJY67D.js",
67
- "line": 3745
66
+ "implementation": "dist/chunk-XVVCS6R6.js",
67
+ "line": 3761
68
68
  },
69
69
  "injectAnchorIds": {
70
70
  "types": "dist/processors/index.d.ts",
71
- "implementation": "dist/chunk-QZGJY67D.js",
72
- "line": 2499
71
+ "implementation": "dist/chunk-XVVCS6R6.js",
72
+ "line": 2515
73
73
  },
74
74
  "optimizeObservationsForContext": {
75
75
  "types": "dist/processors/index.d.ts",
76
- "implementation": "dist/chunk-QZGJY67D.js",
77
- "line": 3768
76
+ "implementation": "dist/chunk-XVVCS6R6.js",
77
+ "line": 3784
78
78
  },
79
79
  "parseAnchorId": {
80
80
  "types": "dist/processors/index.d.ts",
81
- "implementation": "dist/chunk-QZGJY67D.js",
82
- "line": 2472
81
+ "implementation": "dist/chunk-XVVCS6R6.js",
82
+ "line": 2488
83
83
  },
84
84
  "parseObservationGroups": {
85
85
  "types": "dist/processors/index.d.ts",
86
- "implementation": "dist/chunk-QZGJY67D.js",
86
+ "implementation": "dist/chunk-XVVCS6R6.js",
87
87
  "line": 806
88
88
  },
89
89
  "parseObserverOutput": {
90
90
  "types": "dist/processors/index.d.ts",
91
- "implementation": "dist/chunk-QZGJY67D.js",
92
- "line": 3653
91
+ "implementation": "dist/chunk-XVVCS6R6.js",
92
+ "line": 3669
93
93
  },
94
94
  "reconcileObservationGroupsFromReflection": {
95
95
  "types": "dist/processors/index.d.ts",
96
- "implementation": "dist/chunk-QZGJY67D.js",
96
+ "implementation": "dist/chunk-XVVCS6R6.js",
97
97
  "line": 895
98
98
  },
99
99
  "renderObservationGroupsForReflection": {
100
100
  "types": "dist/processors/index.d.ts",
101
- "implementation": "dist/chunk-QZGJY67D.js",
101
+ "implementation": "dist/chunk-XVVCS6R6.js",
102
102
  "line": 851
103
103
  },
104
104
  "stripEphemeralAnchorIds": {
105
105
  "types": "dist/processors/index.d.ts",
106
- "implementation": "dist/chunk-QZGJY67D.js",
107
- "line": 2529
106
+ "implementation": "dist/chunk-XVVCS6R6.js",
107
+ "line": 2545
108
108
  },
109
109
  "stripObservationGroups": {
110
110
  "types": "dist/processors/index.d.ts",
111
- "implementation": "dist/chunk-QZGJY67D.js",
111
+ "implementation": "dist/chunk-XVVCS6R6.js",
112
112
  "line": 828
113
113
  },
114
114
  "wrapInObservationGroup": {
115
115
  "types": "dist/processors/index.d.ts",
116
- "implementation": "dist/chunk-QZGJY67D.js",
116
+ "implementation": "dist/chunk-XVVCS6R6.js",
117
117
  "line": 799
118
118
  },
119
119
  "OBSERVATIONAL_MEMORY_DEFAULTS": {
@@ -149,7 +149,7 @@
149
149
  "processors": {
150
150
  "index": "dist/processors/index.js",
151
151
  "chunks": [
152
- "chunk-QZGJY67D.js",
152
+ "chunk-XVVCS6R6.js",
153
153
  "chunk-LSJJAJAF.js"
154
154
  ]
155
155
  }
@@ -77,6 +77,48 @@ The observer also sees these markers when it processes the thread, so the observ
77
77
 
78
78
  See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
79
79
 
80
+ ## Early activation
81
+
82
+ OM can activate buffered observations before the token threshold is reached. This is useful when a prompt cache is likely to expire, or when the agent changes model providers.
83
+
84
+ Top-level early activation settings apply to observations by default:
85
+
86
+ ```typescript
87
+ const memory = new Memory({
88
+ options: {
89
+ observationalMemory: {
90
+ model: 'google/gemini-2.5-flash',
91
+ activateAfterIdle: '5m',
92
+ activateOnProviderChange: true,
93
+ },
94
+ },
95
+ })
96
+ ```
97
+
98
+ Use nested `observation` and `reflection` settings for per-phase control. Reflection early activation is opt-in, so top-level settings affect only observations.
99
+
100
+ ```typescript
101
+ const memory = new Memory({
102
+ options: {
103
+ observationalMemory: {
104
+ model: 'google/gemini-2.5-flash',
105
+ activateAfterIdle: '5m',
106
+ observation: {
107
+ activateAfterIdle: false,
108
+ },
109
+ reflection: {
110
+ activateAfterIdle: '10m',
111
+ activateOnProviderChange: true,
112
+ },
113
+ },
114
+ },
115
+ })
116
+ ```
117
+
118
+ In this example, the top-level idle setting is disabled for observations, while reflections opt into idle and provider-change activation.
119
+
120
+ See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
121
+
80
122
  ## Benefits
81
123
 
82
124
  - **Prompt caching**: OM's context is stable and observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
@@ -368,17 +410,19 @@ Reflection works similarly — the Reflector runs in the background when observa
368
410
 
369
411
  ### Settings
370
412
 
371
- | Setting | Default | What it controls |
372
- | ------------------------------ | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
373
- | `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
374
- | `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
375
- | `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
376
- | `activateAfterIdle` | none | Forces buffered observations and buffered reflections to activate after a period of inactivity, even if their token thresholds have not been reached yet. Accepts milliseconds or duration strings like `300_000`, `"5m"`, or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
377
- | `activateOnProviderChange` | `false` | Forces buffered observations and reflections to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
378
- | `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
379
- | `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
413
+ | Setting | Default | What it controls |
414
+ | ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
415
+ | `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
416
+ | `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
417
+ | `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
418
+ | `activateAfterIdle` | none | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, or duration strings like `"5m"` or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
419
+ | `activateOnProviderChange` | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
420
+ | `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
421
+ | `reflection.activateAfterIdle` | none | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`. |
422
+ | `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`. |
423
+ | `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
380
424
 
381
- If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations or reflections first and send a smaller compressed context window.
425
+ If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
382
426
 
383
427
  ```typescript
384
428
  const memory = new Memory({
@@ -392,9 +436,9 @@ const memory = new Memory({
392
436
  })
393
437
  ```
394
438
 
395
- With a 5-minute prompt cache TTL, this activates buffered context after 5 minutes of inactivity so the next uncached prompt uses observations and reflections instead of a larger raw message window. If you prefer, `300_000` works the same way.
439
+ With a 5-minute prompt cache TTL, this activates buffered observations after 5 minutes of inactivity so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer, `300_000` works the same way.
396
440
 
397
- Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered context to activate before the new provider runs. That avoids sending a large raw window to a provider that cannot reuse the previous prompt cache.
441
+ Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
398
442
 
399
443
  ### Disabling
400
444
 
@@ -121,26 +121,88 @@ Each vector store page below includes installation instructions, configuration p
121
121
 
122
122
  ## Recall configuration
123
123
 
124
- The three main parameters that control semantic recall behavior are:
124
+ The following options control semantic recall behavior:
125
125
 
126
- 1. **topK**: How many semantically similar messages to retrieve
127
- 2. **messageRange**: How much surrounding context to include with each match
128
- 3. **scope**: Whether to search within the current thread or across all threads owned by a resource (the default is resource scope).
126
+ 1. **topK**: The number of similar messages to retrieve
127
+ 2. **messageRange**: The surrounding messages to include with each match
128
+ 3. **scope**: Whether to search the current thread or all threads for a resource
129
+ 4. **filter**: Metadata criteria that restrict search results
129
130
 
130
131
  ```typescript
131
132
  const agent = new Agent({
132
133
  memory: new Memory({
133
134
  options: {
134
135
  semanticRecall: {
135
- topK: 3, // Retrieve 3 most similar messages
136
+ topK: 3, // Retrieve 3 similar messages
136
137
  messageRange: 2, // Include 2 messages before and after each match
137
- scope: 'resource', // Search across all threads for this user (default setting if omitted)
138
+ scope: 'resource', // Search all threads for this resource
139
+ filter: { projectId: { $eq: 'project-a' } },
138
140
  },
139
141
  },
140
142
  }),
141
143
  })
142
144
  ```
143
145
 
146
+ > **Note:** `scope: 'resource'` is supported by the LibSQL, PostgreSQL, and Upstash storage adapters.
147
+
148
+ ### Metadata filtering
149
+
150
+ The `filter` option restricts semantic recall results to messages with matching thread metadata.
151
+
152
+ ```typescript
153
+ const agent = new Agent({
154
+ memory: new Memory({
155
+ options: {
156
+ semanticRecall: {
157
+ scope: 'resource',
158
+ filter: {
159
+ projectId: { $eq: 'project-a' },
160
+ category: { $in: ['work', 'personal'] },
161
+ },
162
+ },
163
+ },
164
+ }),
165
+ })
166
+ ```
167
+
168
+ Filters match metadata stored on message embeddings when messages are saved. If thread metadata changes later, existing embeddings keep their previous metadata until those messages are saved or indexed again.
169
+
170
+ Supported filter operators:
171
+
172
+ - `$and`: Logical AND
173
+ - `$eq`: Equal to
174
+ - `$gt`: Greater than
175
+ - `$gte`: Greater than or equal
176
+ - `$in`: In array
177
+ - `$lt`: Less than
178
+ - `$lte`: Less than or equal
179
+ - `$ne`: Not equal to
180
+ - `$nin`: Not in array
181
+ - `$or`: Logical OR
182
+
183
+ The following example demonstrates metadata filters for common use cases:
184
+
185
+ ```typescript
186
+ // Filter by project
187
+ const options = {
188
+ semanticRecall: { filter: { projectId: { $eq: 'my-project' } } },
189
+ }
190
+
191
+ // Filter by multiple categories
192
+ const options = {
193
+ semanticRecall: { filter: { category: { $in: ['work', 'research'] } } },
194
+ }
195
+
196
+ // Filter by project and priority
197
+ const options = {
198
+ semanticRecall: {
199
+ filter: {
200
+ $and: [{ projectId: { $eq: 'project-a' } }, { priority: { $gte: 3 } }],
201
+ },
202
+ },
203
+ }
204
+ ```
205
+
144
206
  ## Embedder configuration
145
207
 
146
208
  Semantic recall relies on an [embedding model](https://mastra.ai/reference/memory/memory-class) to convert messages into embeddings. Mastra supports embedding models through the model router using `provider/model` strings, or you can use any [embedding model](https://sdk.vercel.ai/docs/ai-sdk-core/embeddings) compatible with the AI SDK.
@@ -36,7 +36,9 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
36
36
 
37
37
  **scope** (`'resource' | 'thread'`): Memory scope for observations. \`'thread'\` keeps observations per-thread. \`'resource'\` (experimental) shares observations across all threads for a resource, enabling cross-conversation memory. (Default: `'thread'`)
38
38
 
39
- **activateAfterIdle** (`number | string`): Time before buffered observations or buffered reflections are forced to activate after inactivity, even if their token thresholds have not been reached yet. Accepts milliseconds or duration strings like \`300\_000\`, \`"5m"\`, or \`"1hr"\`. When the gap between the current time and the last assistant message part timestamp exceeds this value, buffered observational memory activates before the next prompt. Useful for aligning with prompt cache TTLs.
39
+ **activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity, even before \`observation.messageTokens\` is reached. Accepts a numeric millisecond value such as \`300\_000\`, duration strings like \`"5m"\` or \`"1hr"\`, or \`false\` to disable inherited observation idle activation. Reflections do not inherit this setting. Use \`reflection.activateAfterIdle\` to opt reflections into idle activation.
40
+
41
+ **activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. Reflections do not inherit this setting. Use \`reflection.activateOnProviderChange\` to opt reflections into provider-change activation. (Default: `false`)
40
42
 
41
43
  **shareTokenBudget** (`boolean`): Share the token budget between messages and observations. When enabled, the total budget is \`observation.messageTokens + reflection.observationTokens\`. Messages can use more space when observations are small, and vice versa. This maximizes context usage through flexible allocation. \`shareTokenBudget\` is not yet compatible with async buffering. You must set \`observation: { bufferTokens: false }\` when using this option (this is a temporary limitation). (Default: `false`)
42
44
 
@@ -66,6 +68,10 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
66
68
 
67
69
  **observation.bufferActivation** (`number`): Controls how much of the message window to retain after activation. Accepts a ratio (0-1) or an absolute token count (≥ 1000). For example, \`0.8\` means: activate enough buffers to remove 80% of \`messageTokens\` and leave 20% as active message history. An absolute token count like \`4000\` targets a goal of keeping \~4k message tokens remaining after activation. Higher values remove more message history per activation when using a ratio. Higher values keep more message history when using a token count.
68
70
 
71
+ **observation.activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. If unset, the top-level \`activateAfterIdle\` value is used for observations. Set \`false\` to disable the top-level idle setting for observations.
72
+
73
+ **observation.activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. If unset, the top-level \`activateOnProviderChange\` value is used for observations.
74
+
69
75
  **observation.blockAfter** (`number`): Token threshold above which synchronous (blocking) observation is forced. Between \`messageTokens\` and \`blockAfter\`, only async buffering/activation is used. Above \`blockAfter\`, a synchronous observation runs as a last resort, while buffered activation still preserves a minimum remaining context (min(1000, retention floor)). Accepts a multiplier (1 < value < 2, multiplied by \`messageTokens\`) or an absolute token count (≥ 2, must be greater than \`messageTokens\`). Only relevant when \`bufferTokens\` is set. Defaults to \`1.2\` when async buffering is enabled.
70
76
 
71
77
  **observation.previousObserverTokens** (`number | false`): Optional token budget for the observer's previous-observations context. When set to a number, the observations passed to the Observer agent are tail-truncated to fit within this budget while keeping the newest observations and preserving highlighted 🔴 items when possible. When a buffered reflection is pending, the already-reflected observation lines are automatically replaced with the reflection summary before truncation. Set to \`0\` to omit previous observations entirely, or \`false\` to disable truncation explicitly.
@@ -86,6 +92,10 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
86
92
 
87
93
  **reflection.bufferActivation** (`number`): Ratio (0-1) controlling when async reflection buffering starts. When observation tokens reach \`observationTokens \* bufferActivation\`, reflection runs in the background. On activation at the full threshold, the buffered reflection replaces the observations it covers, preserving any new observations appended after that range.
88
94
 
95
+ **reflection.activateAfterIdle** (`number | string | false`): Time before buffered reflections are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. Reflections do not inherit top-level \`activateAfterIdle\`; set this explicitly to opt reflections into idle activation.
96
+
97
+ **reflection.activateOnProviderChange** (`boolean`): Force buffered reflections to activate when the actor provider or model changes. Reflections do not inherit top-level \`activateOnProviderChange\`; set this explicitly to opt reflections into provider-change activation.
98
+
89
99
  **reflection.blockAfter** (`number`): Token threshold above which synchronous (blocking) reflection is forced. Between \`observationTokens\` and \`blockAfter\`, only async buffering/activation is used. Above \`blockAfter\`, a synchronous reflection runs as a last resort. Accepts a multiplier (1 < value < 2, multiplied by \`observationTokens\`) or an absolute token count (≥ 2, must be greater than \`observationTokens\`). Only relevant when \`bufferActivation\` is set. Defaults to \`1.2\` when async reflection is enabled.
90
100
 
91
101
  ### Token estimate metadata cache
package/dist/index.cjs CHANGED
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- var chunkWNLFJKTX_cjs = require('./chunk-WNLFJKTX.cjs');
3
+ var chunkET2TVAT3_cjs = require('./chunk-ET2TVAT3.cjs');
4
4
  var v3 = require('zod/v3');
5
5
  var zod = require('zod');
6
6
  var z4 = require('zod/v4');
@@ -16110,7 +16110,7 @@ function formatTimestamp(date) {
16110
16110
  }
16111
16111
  function truncateByTokens(text4, maxTokens, hint) {
16112
16112
  if (tokenx.estimateTokenCount(text4) <= maxTokens) return { text: text4, wasTruncated: false };
16113
- const truncated = chunkWNLFJKTX_cjs.truncateStringByTokens(text4, maxTokens);
16113
+ const truncated = chunkET2TVAT3_cjs.truncateStringByTokens(text4, maxTokens);
16114
16114
  const suffix = hint ? ` [${hint} for more]` : "";
16115
16115
  return { text: truncated + suffix, wasTruncated: true };
16116
16116
  }
@@ -16162,11 +16162,11 @@ ${JSON.stringify(inv.args, null, 2)}`;
16162
16162
  });
16163
16163
  }
16164
16164
  if (inv.state === "result") {
16165
- const { value: resultValue } = chunkWNLFJKTX_cjs.resolveToolResultValue(
16165
+ const { value: resultValue } = chunkET2TVAT3_cjs.resolveToolResultValue(
16166
16166
  part,
16167
16167
  inv.result
16168
16168
  );
16169
- const resultStr = chunkWNLFJKTX_cjs.formatToolResultForObserver(resultValue, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16169
+ const resultStr = chunkET2TVAT3_cjs.formatToolResultForObserver(resultValue, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16170
16170
  const fullText = `[Tool Result: ${inv.toolName}]
16171
16171
  ${resultStr}`;
16172
16172
  parts.push(makePart(msg, i, "tool-result", fullText, detail, inv.toolName));
@@ -16193,7 +16193,7 @@ ${typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs, null, 2)}`;
16193
16193
  const toolName = part.toolName;
16194
16194
  if (toolName) {
16195
16195
  const rawResult = part.output ?? part.result;
16196
- const resultStr = chunkWNLFJKTX_cjs.formatToolResultForObserver(rawResult, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16196
+ const resultStr = chunkET2TVAT3_cjs.formatToolResultForObserver(rawResult, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16197
16197
  const fullText = `[Tool Result: ${toolName}]
16198
16198
  ${resultStr}`;
16199
16199
  parts.push(makePart(msg, i, "tool-result", fullText, detail, toolName));
@@ -16272,7 +16272,7 @@ function renderFormattedParts(parts, timestamps, options) {
16272
16272
  const text4 = buildRenderedText(parts, timestamps);
16273
16273
  let totalTokens = tokenx.estimateTokenCount(text4);
16274
16274
  if (totalTokens > options.maxTokens) {
16275
- const truncated = chunkWNLFJKTX_cjs.truncateStringByTokens(text4, options.maxTokens);
16275
+ const truncated = chunkET2TVAT3_cjs.truncateStringByTokens(text4, options.maxTokens);
16276
16276
  return { text: truncated, truncated: true, tokenOffset: totalTokens - options.maxTokens };
16277
16277
  }
16278
16278
  const truncatedIndices = parts.map((p, i) => ({ part: p, index: i })).filter(({ part }) => part.text !== part.fullText).sort((a, b) => expandPriority(a.part) - expandPriority(b.part));
@@ -16305,7 +16305,7 @@ function renderFormattedParts(parts, timestamps, options) {
16305
16305
  if (expandedTokens <= options.maxTokens) {
16306
16306
  return { text: expanded, truncated: false, tokenOffset: 0 };
16307
16307
  }
16308
- const hardTruncated = chunkWNLFJKTX_cjs.truncateStringByTokens(expanded, options.maxTokens);
16308
+ const hardTruncated = chunkET2TVAT3_cjs.truncateStringByTokens(expanded, options.maxTokens);
16309
16309
  return { text: hardTruncated, truncated: true, tokenOffset: expandedTokens - options.maxTokens };
16310
16310
  }
16311
16311
  async function recallPart({
@@ -16356,7 +16356,7 @@ async function recallPart({
16356
16356
 
16357
16357
  `;
16358
16358
  const fallbackText = `${fallbackNote}${firstNextPart.text}`;
16359
- const truncatedText2 = chunkWNLFJKTX_cjs.truncateStringByTokens(fallbackText, maxTokens);
16359
+ const truncatedText2 = chunkET2TVAT3_cjs.truncateStringByTokens(fallbackText, maxTokens);
16360
16360
  const wasTruncated2 = truncatedText2 !== fallbackText;
16361
16361
  return {
16362
16362
  text: truncatedText2,
@@ -16371,7 +16371,7 @@ async function recallPart({
16371
16371
  }
16372
16372
  throw new Error(`Part index ${partIndex} not found in message ${cursor}. Available indices: ${availableIndices}`);
16373
16373
  }
16374
- const truncatedText = chunkWNLFJKTX_cjs.truncateStringByTokens(target.text, maxTokens);
16374
+ const truncatedText = chunkET2TVAT3_cjs.truncateStringByTokens(target.text, maxTokens);
16375
16375
  const wasTruncated = truncatedText !== target.text;
16376
16376
  return {
16377
16377
  text: truncatedText,
@@ -17351,16 +17351,15 @@ var Memory = class extends memory.MastraMemory {
17351
17351
  `Tried to query vector index ${indexName} but this Memory instance doesn't have an attached vector db.`
17352
17352
  );
17353
17353
  }
17354
+ const scopeFilter = resourceScope ? { resource_id: resourceId } : { thread_id: threadId };
17355
+ const userFilter = typeof config.semanticRecall === "object" ? config.semanticRecall.filter : void 0;
17356
+ const combinedFilter = userFilter ? { $and: [scopeFilter, userFilter] } : scopeFilter;
17354
17357
  vectorResults.push(
17355
17358
  ...await this.vector.query({
17356
17359
  indexName,
17357
17360
  queryVector: embedding,
17358
17361
  topK: vectorConfig.topK,
17359
- filter: resourceScope ? {
17360
- resource_id: resourceId
17361
- } : {
17362
- thread_id: threadId
17363
- }
17362
+ filter: combinedFilter
17364
17363
  })
17365
17364
  );
17366
17365
  })
@@ -17403,9 +17402,12 @@ var Memory = class extends memory.MastraMemory {
17403
17402
  throw error;
17404
17403
  }
17405
17404
  }
17406
- async getThreadById({ threadId }) {
17405
+ async getThreadById({
17406
+ threadId,
17407
+ resourceId
17408
+ }) {
17407
17409
  const memoryStore = await this.getMemoryStore();
17408
- return memoryStore.getThreadById({ threadId });
17410
+ return memoryStore.getThreadById({ threadId, resourceId });
17409
17411
  }
17410
17412
  async listThreads(args) {
17411
17413
  const memoryStore = await this.getMemoryStore();
@@ -17762,6 +17764,31 @@ ${workingMemory}`;
17762
17764
  });
17763
17765
  let totalTokens = 0;
17764
17766
  if (this.vector && config.semanticRecall) {
17767
+ const messagesByThread = /* @__PURE__ */ new Map();
17768
+ updatedMessages.forEach((message) => {
17769
+ if (message.threadId) {
17770
+ if (!messagesByThread.has(message.threadId)) {
17771
+ messagesByThread.set(message.threadId, []);
17772
+ }
17773
+ messagesByThread.get(message.threadId).push(message);
17774
+ }
17775
+ });
17776
+ const threadMetadataMap = /* @__PURE__ */ new Map();
17777
+ await Promise.all(
17778
+ Array.from(messagesByThread.keys()).map(async (threadId) => {
17779
+ try {
17780
+ const thread = await memoryStore.getThreadById({ threadId });
17781
+ if (thread?.metadata) {
17782
+ threadMetadataMap.set(threadId, thread.metadata);
17783
+ }
17784
+ } catch (error) {
17785
+ const message = error instanceof Error ? error.message : String(error);
17786
+ throw new Error(
17787
+ `Could not fetch metadata for thread ${threadId} while saving semantic recall embeddings: ${message}`
17788
+ );
17789
+ }
17790
+ })
17791
+ );
17765
17792
  const embeddingData = [];
17766
17793
  let dimension;
17767
17794
  await Promise.all(
@@ -17779,9 +17806,11 @@ ${workingMemory}`;
17779
17806
  if (result2.usage?.tokens) {
17780
17807
  totalTokens += result2.usage.tokens;
17781
17808
  }
17809
+ const threadMetadata = message.threadId ? threadMetadataMap.get(message.threadId) || {} : {};
17782
17810
  embeddingData.push({
17783
17811
  embeddings: result2.embeddings,
17784
17812
  metadata: result2.chunks.map(() => ({
17813
+ ...threadMetadata,
17785
17814
  message_id: message.id,
17786
17815
  thread_id: message.threadId,
17787
17816
  resource_id: message.resourceId
@@ -18094,7 +18123,7 @@ ${workingMemory}`;
18094
18123
  "Observational memory requires @mastra/core support for request-response-id-rotation. Please bump @mastra/core to a newer version."
18095
18124
  );
18096
18125
  }
18097
- const { ObservationalMemory: OMClass } = await import('./observational-memory-BJF72NKJ.cjs');
18126
+ const { ObservationalMemory: OMClass } = await import('./observational-memory-UJUAQKJB.cjs');
18098
18127
  const onIndexObservations = this.hasRetrievalSearch(omConfig.retrieval) ? async (observation) => {
18099
18128
  await this.indexObservation(observation);
18100
18129
  } : void 0;
@@ -19020,7 +19049,7 @@ Notes:
19020
19049
  if (!effectiveConfig) return null;
19021
19050
  const engine = await this.omEngine;
19022
19051
  if (!engine) return null;
19023
- const { ObservationalMemoryProcessor } = await import('./observational-memory-BJF72NKJ.cjs');
19052
+ const { ObservationalMemoryProcessor } = await import('./observational-memory-UJUAQKJB.cjs');
19024
19053
  return new ObservationalMemoryProcessor(engine, this, {
19025
19054
  temporalMarkers: effectiveConfig.temporalMarkers
19026
19055
  });
@@ -19029,11 +19058,11 @@ Notes:
19029
19058
 
19030
19059
  Object.defineProperty(exports, "ModelByInputTokens", {
19031
19060
  enumerable: true,
19032
- get: function () { return chunkWNLFJKTX_cjs.ModelByInputTokens; }
19061
+ get: function () { return chunkET2TVAT3_cjs.ModelByInputTokens; }
19033
19062
  });
19034
19063
  Object.defineProperty(exports, "getObservationsAsOf", {
19035
19064
  enumerable: true,
19036
- get: function () { return chunkWNLFJKTX_cjs.getObservationsAsOf; }
19065
+ get: function () { return chunkET2TVAT3_cjs.getObservationsAsOf; }
19037
19066
  });
19038
19067
  Object.defineProperty(exports, "MessageHistory", {
19039
19068
  enumerable: true,