@mastra/memory 1.17.6-alpha.1 → 1.18.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/dist/{chunk-WNLFJKTX.cjs → chunk-ET2TVAT3.cjs} +84 -33
- package/dist/chunk-ET2TVAT3.cjs.map +1 -0
- package/dist/{chunk-QZGJY67D.js → chunk-XVVCS6R6.js} +84 -33
- package/dist/chunk-XVVCS6R6.js.map +1 -0
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/assets/SOURCE_MAP.json +39 -39
- package/dist/docs/references/docs-memory-observational-memory.md +56 -12
- package/dist/docs/references/docs-memory-semantic-recall.md +68 -6
- package/dist/docs/references/reference-memory-observational-memory.md +11 -1
- package/dist/index.cjs +49 -20
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +40 -11
- package/dist/index.js.map +1 -1
- package/dist/{observational-memory-HAJ3K5JJ.js → observational-memory-2PRVG6BF.js} +3 -3
- package/dist/{observational-memory-HAJ3K5JJ.js.map → observational-memory-2PRVG6BF.js.map} +1 -1
- package/dist/{observational-memory-BJF72NKJ.cjs → observational-memory-UJUAQKJB.cjs} +26 -26
- package/dist/{observational-memory-BJF72NKJ.cjs.map → observational-memory-UJUAQKJB.cjs.map} +1 -1
- package/dist/processors/index.cjs +24 -24
- package/dist/processors/index.js +1 -1
- package/dist/processors/observational-memory/observation-turn/load-memory-context.d.ts +9 -0
- package/dist/processors/observational-memory/observation-turn/load-memory-context.d.ts.map +1 -0
- package/dist/processors/observational-memory/observation-turn/turn.d.ts.map +1 -1
- package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
- package/dist/processors/observational-memory/processor.d.ts.map +1 -1
- package/dist/processors/observational-memory/types.d.ts +35 -4
- package/dist/processors/observational-memory/types.d.ts.map +1 -1
- package/package.json +5 -5
- package/dist/chunk-QZGJY67D.js.map +0 -1
- package/dist/chunk-WNLFJKTX.cjs.map +0 -1
package/dist/docs/SKILL.md
CHANGED
|
@@ -1,119 +1,119 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "1.
|
|
2
|
+
"version": "1.18.0-alpha.3",
|
|
3
3
|
"package": "@mastra/memory",
|
|
4
4
|
"exports": {
|
|
5
5
|
"ModelByInputTokens": {
|
|
6
6
|
"types": "dist/processors/index.d.ts",
|
|
7
|
-
"implementation": "dist/chunk-
|
|
7
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
8
8
|
"line": 745
|
|
9
9
|
},
|
|
10
10
|
"OBSERVER_SYSTEM_PROMPT": {
|
|
11
11
|
"types": "dist/processors/index.d.ts",
|
|
12
|
-
"implementation": "dist/chunk-
|
|
12
|
+
"implementation": "dist/chunk-XVVCS6R6.js"
|
|
13
13
|
},
|
|
14
14
|
"ObservationalMemory": {
|
|
15
15
|
"types": "dist/processors/index.d.ts",
|
|
16
|
-
"implementation": "dist/chunk-
|
|
17
|
-
"line":
|
|
16
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
17
|
+
"line": 6690
|
|
18
18
|
},
|
|
19
19
|
"ObservationalMemoryProcessor": {
|
|
20
20
|
"types": "dist/processors/index.d.ts",
|
|
21
|
-
"implementation": "dist/chunk-
|
|
22
|
-
"line":
|
|
21
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
22
|
+
"line": 9264
|
|
23
23
|
},
|
|
24
24
|
"TokenCounter": {
|
|
25
25
|
"types": "dist/processors/index.d.ts",
|
|
26
|
-
"implementation": "dist/chunk-
|
|
27
|
-
"line":
|
|
26
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
27
|
+
"line": 6160
|
|
28
28
|
},
|
|
29
29
|
"buildObserverPrompt": {
|
|
30
30
|
"types": "dist/processors/index.d.ts",
|
|
31
|
-
"implementation": "dist/chunk-
|
|
32
|
-
"line":
|
|
31
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
32
|
+
"line": 3659
|
|
33
33
|
},
|
|
34
34
|
"buildObserverSystemPrompt": {
|
|
35
35
|
"types": "dist/processors/index.d.ts",
|
|
36
|
-
"implementation": "dist/chunk-
|
|
37
|
-
"line":
|
|
36
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
37
|
+
"line": 2967
|
|
38
38
|
},
|
|
39
39
|
"combineObservationGroupRanges": {
|
|
40
40
|
"types": "dist/processors/index.d.ts",
|
|
41
|
-
"implementation": "dist/chunk-
|
|
41
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
42
42
|
"line": 837
|
|
43
43
|
},
|
|
44
44
|
"deriveObservationGroupProvenance": {
|
|
45
45
|
"types": "dist/processors/index.d.ts",
|
|
46
|
-
"implementation": "dist/chunk-
|
|
46
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
47
47
|
"line": 871
|
|
48
48
|
},
|
|
49
49
|
"extractCurrentTask": {
|
|
50
50
|
"types": "dist/processors/index.d.ts",
|
|
51
|
-
"implementation": "dist/chunk-
|
|
52
|
-
"line":
|
|
51
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
52
|
+
"line": 3773
|
|
53
53
|
},
|
|
54
54
|
"formatMessagesForObserver": {
|
|
55
55
|
"types": "dist/processors/index.d.ts",
|
|
56
|
-
"implementation": "dist/chunk-
|
|
57
|
-
"line":
|
|
56
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
57
|
+
"line": 3385
|
|
58
58
|
},
|
|
59
59
|
"getObservationsAsOf": {
|
|
60
60
|
"types": "dist/processors/index.d.ts",
|
|
61
|
-
"implementation": "dist/chunk-
|
|
62
|
-
"line":
|
|
61
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
62
|
+
"line": 9476
|
|
63
63
|
},
|
|
64
64
|
"hasCurrentTaskSection": {
|
|
65
65
|
"types": "dist/processors/index.d.ts",
|
|
66
|
-
"implementation": "dist/chunk-
|
|
67
|
-
"line":
|
|
66
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
67
|
+
"line": 3761
|
|
68
68
|
},
|
|
69
69
|
"injectAnchorIds": {
|
|
70
70
|
"types": "dist/processors/index.d.ts",
|
|
71
|
-
"implementation": "dist/chunk-
|
|
72
|
-
"line":
|
|
71
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
72
|
+
"line": 2515
|
|
73
73
|
},
|
|
74
74
|
"optimizeObservationsForContext": {
|
|
75
75
|
"types": "dist/processors/index.d.ts",
|
|
76
|
-
"implementation": "dist/chunk-
|
|
77
|
-
"line":
|
|
76
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
77
|
+
"line": 3784
|
|
78
78
|
},
|
|
79
79
|
"parseAnchorId": {
|
|
80
80
|
"types": "dist/processors/index.d.ts",
|
|
81
|
-
"implementation": "dist/chunk-
|
|
82
|
-
"line":
|
|
81
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
82
|
+
"line": 2488
|
|
83
83
|
},
|
|
84
84
|
"parseObservationGroups": {
|
|
85
85
|
"types": "dist/processors/index.d.ts",
|
|
86
|
-
"implementation": "dist/chunk-
|
|
86
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
87
87
|
"line": 806
|
|
88
88
|
},
|
|
89
89
|
"parseObserverOutput": {
|
|
90
90
|
"types": "dist/processors/index.d.ts",
|
|
91
|
-
"implementation": "dist/chunk-
|
|
92
|
-
"line":
|
|
91
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
92
|
+
"line": 3669
|
|
93
93
|
},
|
|
94
94
|
"reconcileObservationGroupsFromReflection": {
|
|
95
95
|
"types": "dist/processors/index.d.ts",
|
|
96
|
-
"implementation": "dist/chunk-
|
|
96
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
97
97
|
"line": 895
|
|
98
98
|
},
|
|
99
99
|
"renderObservationGroupsForReflection": {
|
|
100
100
|
"types": "dist/processors/index.d.ts",
|
|
101
|
-
"implementation": "dist/chunk-
|
|
101
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
102
102
|
"line": 851
|
|
103
103
|
},
|
|
104
104
|
"stripEphemeralAnchorIds": {
|
|
105
105
|
"types": "dist/processors/index.d.ts",
|
|
106
|
-
"implementation": "dist/chunk-
|
|
107
|
-
"line":
|
|
106
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
107
|
+
"line": 2545
|
|
108
108
|
},
|
|
109
109
|
"stripObservationGroups": {
|
|
110
110
|
"types": "dist/processors/index.d.ts",
|
|
111
|
-
"implementation": "dist/chunk-
|
|
111
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
112
112
|
"line": 828
|
|
113
113
|
},
|
|
114
114
|
"wrapInObservationGroup": {
|
|
115
115
|
"types": "dist/processors/index.d.ts",
|
|
116
|
-
"implementation": "dist/chunk-
|
|
116
|
+
"implementation": "dist/chunk-XVVCS6R6.js",
|
|
117
117
|
"line": 799
|
|
118
118
|
},
|
|
119
119
|
"OBSERVATIONAL_MEMORY_DEFAULTS": {
|
|
@@ -149,7 +149,7 @@
|
|
|
149
149
|
"processors": {
|
|
150
150
|
"index": "dist/processors/index.js",
|
|
151
151
|
"chunks": [
|
|
152
|
-
"chunk-
|
|
152
|
+
"chunk-XVVCS6R6.js",
|
|
153
153
|
"chunk-LSJJAJAF.js"
|
|
154
154
|
]
|
|
155
155
|
}
|
|
@@ -77,6 +77,48 @@ The observer also sees these markers when it processes the thread, so the observ
|
|
|
77
77
|
|
|
78
78
|
See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
|
|
79
79
|
|
|
80
|
+
## Early activation
|
|
81
|
+
|
|
82
|
+
OM can activate buffered observations before the token threshold is reached. This is useful when a prompt cache is likely to expire, or when the agent changes model providers.
|
|
83
|
+
|
|
84
|
+
Top-level early activation settings apply to observations by default:
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
const memory = new Memory({
|
|
88
|
+
options: {
|
|
89
|
+
observationalMemory: {
|
|
90
|
+
model: 'google/gemini-2.5-flash',
|
|
91
|
+
activateAfterIdle: '5m',
|
|
92
|
+
activateOnProviderChange: true,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
})
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Use nested `observation` and `reflection` settings for per-phase control. Reflection early activation is opt-in, so top-level settings affect only observations.
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
const memory = new Memory({
|
|
102
|
+
options: {
|
|
103
|
+
observationalMemory: {
|
|
104
|
+
model: 'google/gemini-2.5-flash',
|
|
105
|
+
activateAfterIdle: '5m',
|
|
106
|
+
observation: {
|
|
107
|
+
activateAfterIdle: false,
|
|
108
|
+
},
|
|
109
|
+
reflection: {
|
|
110
|
+
activateAfterIdle: '10m',
|
|
111
|
+
activateOnProviderChange: true,
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
})
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
In this example, the top-level idle setting is disabled for observations, while reflections opt into idle and provider-change activation.
|
|
119
|
+
|
|
120
|
+
See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
|
|
121
|
+
|
|
80
122
|
## Benefits
|
|
81
123
|
|
|
82
124
|
- **Prompt caching**: OM's context is stable and observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
|
|
@@ -368,17 +410,19 @@ Reflection works similarly — the Reflector runs in the background when observa
|
|
|
368
410
|
|
|
369
411
|
### Settings
|
|
370
412
|
|
|
371
|
-
| Setting
|
|
372
|
-
|
|
|
373
|
-
| `observation.bufferTokens`
|
|
374
|
-
| `observation.bufferActivation`
|
|
375
|
-
| `observation.blockAfter`
|
|
376
|
-
| `activateAfterIdle`
|
|
377
|
-
| `activateOnProviderChange`
|
|
378
|
-
| `reflection.bufferActivation`
|
|
379
|
-
| `reflection.
|
|
413
|
+
| Setting | Default | What it controls |
|
|
414
|
+
| ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
415
|
+
| `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
|
|
416
|
+
| `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
|
|
417
|
+
| `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
|
|
418
|
+
| `activateAfterIdle` | none | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, or duration strings like `"5m"` or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
|
|
419
|
+
| `activateOnProviderChange` | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
|
|
420
|
+
| `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
|
|
421
|
+
| `reflection.activateAfterIdle` | none | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`. |
|
|
422
|
+
| `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`. |
|
|
423
|
+
| `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
|
|
380
424
|
|
|
381
|
-
If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations
|
|
425
|
+
If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
|
|
382
426
|
|
|
383
427
|
```typescript
|
|
384
428
|
const memory = new Memory({
|
|
@@ -392,9 +436,9 @@ const memory = new Memory({
|
|
|
392
436
|
})
|
|
393
437
|
```
|
|
394
438
|
|
|
395
|
-
With a 5-minute prompt cache TTL, this activates buffered
|
|
439
|
+
With a 5-minute prompt cache TTL, this activates buffered observations after 5 minutes of inactivity so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer, `300_000` works the same way.
|
|
396
440
|
|
|
397
|
-
Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered
|
|
441
|
+
Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
|
|
398
442
|
|
|
399
443
|
### Disabling
|
|
400
444
|
|
|
@@ -121,26 +121,88 @@ Each vector store page below includes installation instructions, configuration p
|
|
|
121
121
|
|
|
122
122
|
## Recall configuration
|
|
123
123
|
|
|
124
|
-
The
|
|
124
|
+
The following options control semantic recall behavior:
|
|
125
125
|
|
|
126
|
-
1. **topK**:
|
|
127
|
-
2. **messageRange**:
|
|
128
|
-
3. **scope**: Whether to search
|
|
126
|
+
1. **topK**: The number of similar messages to retrieve
|
|
127
|
+
2. **messageRange**: The surrounding messages to include with each match
|
|
128
|
+
3. **scope**: Whether to search the current thread or all threads for a resource
|
|
129
|
+
4. **filter**: Metadata criteria that restrict search results
|
|
129
130
|
|
|
130
131
|
```typescript
|
|
131
132
|
const agent = new Agent({
|
|
132
133
|
memory: new Memory({
|
|
133
134
|
options: {
|
|
134
135
|
semanticRecall: {
|
|
135
|
-
topK: 3, // Retrieve 3
|
|
136
|
+
topK: 3, // Retrieve 3 similar messages
|
|
136
137
|
messageRange: 2, // Include 2 messages before and after each match
|
|
137
|
-
scope: 'resource', // Search
|
|
138
|
+
scope: 'resource', // Search all threads for this resource
|
|
139
|
+
filter: { projectId: { $eq: 'project-a' } },
|
|
138
140
|
},
|
|
139
141
|
},
|
|
140
142
|
}),
|
|
141
143
|
})
|
|
142
144
|
```
|
|
143
145
|
|
|
146
|
+
> **Note:** `scope: 'resource'` is supported by the LibSQL, PostgreSQL, and Upstash storage adapters.
|
|
147
|
+
|
|
148
|
+
### Metadata filtering
|
|
149
|
+
|
|
150
|
+
The `filter` option restricts semantic recall results to messages with matching thread metadata.
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
const agent = new Agent({
|
|
154
|
+
memory: new Memory({
|
|
155
|
+
options: {
|
|
156
|
+
semanticRecall: {
|
|
157
|
+
scope: 'resource',
|
|
158
|
+
filter: {
|
|
159
|
+
projectId: { $eq: 'project-a' },
|
|
160
|
+
category: { $in: ['work', 'personal'] },
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
}),
|
|
165
|
+
})
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Filters match metadata stored on message embeddings when messages are saved. If thread metadata changes later, existing embeddings keep their previous metadata until those messages are saved or indexed again.
|
|
169
|
+
|
|
170
|
+
Supported filter operators:
|
|
171
|
+
|
|
172
|
+
- `$and`: Logical AND
|
|
173
|
+
- `$eq`: Equal to
|
|
174
|
+
- `$gt`: Greater than
|
|
175
|
+
- `$gte`: Greater than or equal
|
|
176
|
+
- `$in`: In array
|
|
177
|
+
- `$lt`: Less than
|
|
178
|
+
- `$lte`: Less than or equal
|
|
179
|
+
- `$ne`: Not equal to
|
|
180
|
+
- `$nin`: Not in array
|
|
181
|
+
- `$or`: Logical OR
|
|
182
|
+
|
|
183
|
+
The following example demonstrates metadata filters for common use cases:
|
|
184
|
+
|
|
185
|
+
```typescript
|
|
186
|
+
// Filter by project
|
|
187
|
+
const options = {
|
|
188
|
+
semanticRecall: { filter: { projectId: { $eq: 'my-project' } } },
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Filter by multiple categories
|
|
192
|
+
const options = {
|
|
193
|
+
semanticRecall: { filter: { category: { $in: ['work', 'research'] } } },
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Filter by project and priority
|
|
197
|
+
const options = {
|
|
198
|
+
semanticRecall: {
|
|
199
|
+
filter: {
|
|
200
|
+
$and: [{ projectId: { $eq: 'project-a' } }, { priority: { $gte: 3 } }],
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
144
206
|
## Embedder configuration
|
|
145
207
|
|
|
146
208
|
Semantic recall relies on an [embedding model](https://mastra.ai/reference/memory/memory-class) to convert messages into embeddings. Mastra supports embedding models through the model router using `provider/model` strings, or you can use any [embedding model](https://sdk.vercel.ai/docs/ai-sdk-core/embeddings) compatible with the AI SDK.
|
|
@@ -36,7 +36,9 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
|
|
|
36
36
|
|
|
37
37
|
**scope** (`'resource' | 'thread'`): Memory scope for observations. \`'thread'\` keeps observations per-thread. \`'resource'\` (experimental) shares observations across all threads for a resource, enabling cross-conversation memory. (Default: `'thread'`)
|
|
38
38
|
|
|
39
|
-
**activateAfterIdle** (`number | string`): Time before buffered observations
|
|
39
|
+
**activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity, even before \`observation.messageTokens\` is reached. Accepts a numeric millisecond value such as \`300\_000\`, duration strings like \`"5m"\` or \`"1hr"\`, or \`false\` to disable inherited observation idle activation. Reflections do not inherit this setting. Use \`reflection.activateAfterIdle\` to opt reflections into idle activation.
|
|
40
|
+
|
|
41
|
+
**activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. Reflections do not inherit this setting. Use \`reflection.activateOnProviderChange\` to opt reflections into provider-change activation. (Default: `false`)
|
|
40
42
|
|
|
41
43
|
**shareTokenBudget** (`boolean`): Share the token budget between messages and observations. When enabled, the total budget is \`observation.messageTokens + reflection.observationTokens\`. Messages can use more space when observations are small, and vice versa. This maximizes context usage through flexible allocation. \`shareTokenBudget\` is not yet compatible with async buffering. You must set \`observation: { bufferTokens: false }\` when using this option (this is a temporary limitation). (Default: `false`)
|
|
42
44
|
|
|
@@ -66,6 +68,10 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
|
|
|
66
68
|
|
|
67
69
|
**observation.bufferActivation** (`number`): Controls how much of the message window to retain after activation. Accepts a ratio (0-1) or an absolute token count (≥ 1000). For example, \`0.8\` means: activate enough buffers to remove 80% of \`messageTokens\` and leave 20% as active message history. An absolute token count like \`4000\` targets a goal of keeping \~4k message tokens remaining after activation. Higher values remove more message history per activation when using a ratio. Higher values keep more message history when using a token count.
|
|
68
70
|
|
|
71
|
+
**observation.activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. If unset, the top-level \`activateAfterIdle\` value is used for observations. Set \`false\` to disable the top-level idle setting for observations.
|
|
72
|
+
|
|
73
|
+
**observation.activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. If unset, the top-level \`activateOnProviderChange\` value is used for observations.
|
|
74
|
+
|
|
69
75
|
**observation.blockAfter** (`number`): Token threshold above which synchronous (blocking) observation is forced. Between \`messageTokens\` and \`blockAfter\`, only async buffering/activation is used. Above \`blockAfter\`, a synchronous observation runs as a last resort, while buffered activation still preserves a minimum remaining context (min(1000, retention floor)). Accepts a multiplier (1 < value < 2, multiplied by \`messageTokens\`) or an absolute token count (≥ 2, must be greater than \`messageTokens\`). Only relevant when \`bufferTokens\` is set. Defaults to \`1.2\` when async buffering is enabled.
|
|
70
76
|
|
|
71
77
|
**observation.previousObserverTokens** (`number | false`): Optional token budget for the observer's previous-observations context. When set to a number, the observations passed to the Observer agent are tail-truncated to fit within this budget while keeping the newest observations and preserving highlighted 🔴 items when possible. When a buffered reflection is pending, the already-reflected observation lines are automatically replaced with the reflection summary before truncation. Set to \`0\` to omit previous observations entirely, or \`false\` to disable truncation explicitly.
|
|
@@ -86,6 +92,10 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
|
|
|
86
92
|
|
|
87
93
|
**reflection.bufferActivation** (`number`): Ratio (0-1) controlling when async reflection buffering starts. When observation tokens reach \`observationTokens \* bufferActivation\`, reflection runs in the background. On activation at the full threshold, the buffered reflection replaces the observations it covers, preserving any new observations appended after that range.
|
|
88
94
|
|
|
95
|
+
**reflection.activateAfterIdle** (`number | string | false`): Time before buffered reflections are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. Reflections do not inherit top-level \`activateAfterIdle\`; set this explicitly to opt reflections into idle activation.
|
|
96
|
+
|
|
97
|
+
**reflection.activateOnProviderChange** (`boolean`): Force buffered reflections to activate when the actor provider or model changes. Reflections do not inherit top-level \`activateOnProviderChange\`; set this explicitly to opt reflections into provider-change activation.
|
|
98
|
+
|
|
89
99
|
**reflection.blockAfter** (`number`): Token threshold above which synchronous (blocking) reflection is forced. Between \`observationTokens\` and \`blockAfter\`, only async buffering/activation is used. Above \`blockAfter\`, a synchronous reflection runs as a last resort. Accepts a multiplier (1 < value < 2, multiplied by \`observationTokens\`) or an absolute token count (≥ 2, must be greater than \`observationTokens\`). Only relevant when \`bufferActivation\` is set. Defaults to \`1.2\` when async reflection is enabled.
|
|
90
100
|
|
|
91
101
|
### Token estimate metadata cache
|
package/dist/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
3
|
+
var chunkET2TVAT3_cjs = require('./chunk-ET2TVAT3.cjs');
|
|
4
4
|
var v3 = require('zod/v3');
|
|
5
5
|
var zod = require('zod');
|
|
6
6
|
var z4 = require('zod/v4');
|
|
@@ -16110,7 +16110,7 @@ function formatTimestamp(date) {
|
|
|
16110
16110
|
}
|
|
16111
16111
|
function truncateByTokens(text4, maxTokens, hint) {
|
|
16112
16112
|
if (tokenx.estimateTokenCount(text4) <= maxTokens) return { text: text4, wasTruncated: false };
|
|
16113
|
-
const truncated =
|
|
16113
|
+
const truncated = chunkET2TVAT3_cjs.truncateStringByTokens(text4, maxTokens);
|
|
16114
16114
|
const suffix = hint ? ` [${hint} for more]` : "";
|
|
16115
16115
|
return { text: truncated + suffix, wasTruncated: true };
|
|
16116
16116
|
}
|
|
@@ -16162,11 +16162,11 @@ ${JSON.stringify(inv.args, null, 2)}`;
|
|
|
16162
16162
|
});
|
|
16163
16163
|
}
|
|
16164
16164
|
if (inv.state === "result") {
|
|
16165
|
-
const { value: resultValue } =
|
|
16165
|
+
const { value: resultValue } = chunkET2TVAT3_cjs.resolveToolResultValue(
|
|
16166
16166
|
part,
|
|
16167
16167
|
inv.result
|
|
16168
16168
|
);
|
|
16169
|
-
const resultStr =
|
|
16169
|
+
const resultStr = chunkET2TVAT3_cjs.formatToolResultForObserver(resultValue, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
|
|
16170
16170
|
const fullText = `[Tool Result: ${inv.toolName}]
|
|
16171
16171
|
${resultStr}`;
|
|
16172
16172
|
parts.push(makePart(msg, i, "tool-result", fullText, detail, inv.toolName));
|
|
@@ -16193,7 +16193,7 @@ ${typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs, null, 2)}`;
|
|
|
16193
16193
|
const toolName = part.toolName;
|
|
16194
16194
|
if (toolName) {
|
|
16195
16195
|
const rawResult = part.output ?? part.result;
|
|
16196
|
-
const resultStr =
|
|
16196
|
+
const resultStr = chunkET2TVAT3_cjs.formatToolResultForObserver(rawResult, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
|
|
16197
16197
|
const fullText = `[Tool Result: ${toolName}]
|
|
16198
16198
|
${resultStr}`;
|
|
16199
16199
|
parts.push(makePart(msg, i, "tool-result", fullText, detail, toolName));
|
|
@@ -16272,7 +16272,7 @@ function renderFormattedParts(parts, timestamps, options) {
|
|
|
16272
16272
|
const text4 = buildRenderedText(parts, timestamps);
|
|
16273
16273
|
let totalTokens = tokenx.estimateTokenCount(text4);
|
|
16274
16274
|
if (totalTokens > options.maxTokens) {
|
|
16275
|
-
const truncated =
|
|
16275
|
+
const truncated = chunkET2TVAT3_cjs.truncateStringByTokens(text4, options.maxTokens);
|
|
16276
16276
|
return { text: truncated, truncated: true, tokenOffset: totalTokens - options.maxTokens };
|
|
16277
16277
|
}
|
|
16278
16278
|
const truncatedIndices = parts.map((p, i) => ({ part: p, index: i })).filter(({ part }) => part.text !== part.fullText).sort((a, b) => expandPriority(a.part) - expandPriority(b.part));
|
|
@@ -16305,7 +16305,7 @@ function renderFormattedParts(parts, timestamps, options) {
|
|
|
16305
16305
|
if (expandedTokens <= options.maxTokens) {
|
|
16306
16306
|
return { text: expanded, truncated: false, tokenOffset: 0 };
|
|
16307
16307
|
}
|
|
16308
|
-
const hardTruncated =
|
|
16308
|
+
const hardTruncated = chunkET2TVAT3_cjs.truncateStringByTokens(expanded, options.maxTokens);
|
|
16309
16309
|
return { text: hardTruncated, truncated: true, tokenOffset: expandedTokens - options.maxTokens };
|
|
16310
16310
|
}
|
|
16311
16311
|
async function recallPart({
|
|
@@ -16356,7 +16356,7 @@ async function recallPart({
|
|
|
16356
16356
|
|
|
16357
16357
|
`;
|
|
16358
16358
|
const fallbackText = `${fallbackNote}${firstNextPart.text}`;
|
|
16359
|
-
const truncatedText2 =
|
|
16359
|
+
const truncatedText2 = chunkET2TVAT3_cjs.truncateStringByTokens(fallbackText, maxTokens);
|
|
16360
16360
|
const wasTruncated2 = truncatedText2 !== fallbackText;
|
|
16361
16361
|
return {
|
|
16362
16362
|
text: truncatedText2,
|
|
@@ -16371,7 +16371,7 @@ async function recallPart({
|
|
|
16371
16371
|
}
|
|
16372
16372
|
throw new Error(`Part index ${partIndex} not found in message ${cursor}. Available indices: ${availableIndices}`);
|
|
16373
16373
|
}
|
|
16374
|
-
const truncatedText =
|
|
16374
|
+
const truncatedText = chunkET2TVAT3_cjs.truncateStringByTokens(target.text, maxTokens);
|
|
16375
16375
|
const wasTruncated = truncatedText !== target.text;
|
|
16376
16376
|
return {
|
|
16377
16377
|
text: truncatedText,
|
|
@@ -17351,16 +17351,15 @@ var Memory = class extends memory.MastraMemory {
|
|
|
17351
17351
|
`Tried to query vector index ${indexName} but this Memory instance doesn't have an attached vector db.`
|
|
17352
17352
|
);
|
|
17353
17353
|
}
|
|
17354
|
+
const scopeFilter = resourceScope ? { resource_id: resourceId } : { thread_id: threadId };
|
|
17355
|
+
const userFilter = typeof config.semanticRecall === "object" ? config.semanticRecall.filter : void 0;
|
|
17356
|
+
const combinedFilter = userFilter ? { $and: [scopeFilter, userFilter] } : scopeFilter;
|
|
17354
17357
|
vectorResults.push(
|
|
17355
17358
|
...await this.vector.query({
|
|
17356
17359
|
indexName,
|
|
17357
17360
|
queryVector: embedding,
|
|
17358
17361
|
topK: vectorConfig.topK,
|
|
17359
|
-
filter:
|
|
17360
|
-
resource_id: resourceId
|
|
17361
|
-
} : {
|
|
17362
|
-
thread_id: threadId
|
|
17363
|
-
}
|
|
17362
|
+
filter: combinedFilter
|
|
17364
17363
|
})
|
|
17365
17364
|
);
|
|
17366
17365
|
})
|
|
@@ -17403,9 +17402,12 @@ var Memory = class extends memory.MastraMemory {
|
|
|
17403
17402
|
throw error;
|
|
17404
17403
|
}
|
|
17405
17404
|
}
|
|
17406
|
-
async getThreadById({
|
|
17405
|
+
async getThreadById({
|
|
17406
|
+
threadId,
|
|
17407
|
+
resourceId
|
|
17408
|
+
}) {
|
|
17407
17409
|
const memoryStore = await this.getMemoryStore();
|
|
17408
|
-
return memoryStore.getThreadById({ threadId });
|
|
17410
|
+
return memoryStore.getThreadById({ threadId, resourceId });
|
|
17409
17411
|
}
|
|
17410
17412
|
async listThreads(args) {
|
|
17411
17413
|
const memoryStore = await this.getMemoryStore();
|
|
@@ -17762,6 +17764,31 @@ ${workingMemory}`;
|
|
|
17762
17764
|
});
|
|
17763
17765
|
let totalTokens = 0;
|
|
17764
17766
|
if (this.vector && config.semanticRecall) {
|
|
17767
|
+
const messagesByThread = /* @__PURE__ */ new Map();
|
|
17768
|
+
updatedMessages.forEach((message) => {
|
|
17769
|
+
if (message.threadId) {
|
|
17770
|
+
if (!messagesByThread.has(message.threadId)) {
|
|
17771
|
+
messagesByThread.set(message.threadId, []);
|
|
17772
|
+
}
|
|
17773
|
+
messagesByThread.get(message.threadId).push(message);
|
|
17774
|
+
}
|
|
17775
|
+
});
|
|
17776
|
+
const threadMetadataMap = /* @__PURE__ */ new Map();
|
|
17777
|
+
await Promise.all(
|
|
17778
|
+
Array.from(messagesByThread.keys()).map(async (threadId) => {
|
|
17779
|
+
try {
|
|
17780
|
+
const thread = await memoryStore.getThreadById({ threadId });
|
|
17781
|
+
if (thread?.metadata) {
|
|
17782
|
+
threadMetadataMap.set(threadId, thread.metadata);
|
|
17783
|
+
}
|
|
17784
|
+
} catch (error) {
|
|
17785
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
17786
|
+
throw new Error(
|
|
17787
|
+
`Could not fetch metadata for thread ${threadId} while saving semantic recall embeddings: ${message}`
|
|
17788
|
+
);
|
|
17789
|
+
}
|
|
17790
|
+
})
|
|
17791
|
+
);
|
|
17765
17792
|
const embeddingData = [];
|
|
17766
17793
|
let dimension;
|
|
17767
17794
|
await Promise.all(
|
|
@@ -17779,9 +17806,11 @@ ${workingMemory}`;
|
|
|
17779
17806
|
if (result2.usage?.tokens) {
|
|
17780
17807
|
totalTokens += result2.usage.tokens;
|
|
17781
17808
|
}
|
|
17809
|
+
const threadMetadata = message.threadId ? threadMetadataMap.get(message.threadId) || {} : {};
|
|
17782
17810
|
embeddingData.push({
|
|
17783
17811
|
embeddings: result2.embeddings,
|
|
17784
17812
|
metadata: result2.chunks.map(() => ({
|
|
17813
|
+
...threadMetadata,
|
|
17785
17814
|
message_id: message.id,
|
|
17786
17815
|
thread_id: message.threadId,
|
|
17787
17816
|
resource_id: message.resourceId
|
|
@@ -18094,7 +18123,7 @@ ${workingMemory}`;
|
|
|
18094
18123
|
"Observational memory requires @mastra/core support for request-response-id-rotation. Please bump @mastra/core to a newer version."
|
|
18095
18124
|
);
|
|
18096
18125
|
}
|
|
18097
|
-
const { ObservationalMemory: OMClass } = await import('./observational-memory-
|
|
18126
|
+
const { ObservationalMemory: OMClass } = await import('./observational-memory-UJUAQKJB.cjs');
|
|
18098
18127
|
const onIndexObservations = this.hasRetrievalSearch(omConfig.retrieval) ? async (observation) => {
|
|
18099
18128
|
await this.indexObservation(observation);
|
|
18100
18129
|
} : void 0;
|
|
@@ -19020,7 +19049,7 @@ Notes:
|
|
|
19020
19049
|
if (!effectiveConfig) return null;
|
|
19021
19050
|
const engine = await this.omEngine;
|
|
19022
19051
|
if (!engine) return null;
|
|
19023
|
-
const { ObservationalMemoryProcessor } = await import('./observational-memory-
|
|
19052
|
+
const { ObservationalMemoryProcessor } = await import('./observational-memory-UJUAQKJB.cjs');
|
|
19024
19053
|
return new ObservationalMemoryProcessor(engine, this, {
|
|
19025
19054
|
temporalMarkers: effectiveConfig.temporalMarkers
|
|
19026
19055
|
});
|
|
@@ -19029,11 +19058,11 @@ Notes:
|
|
|
19029
19058
|
|
|
19030
19059
|
Object.defineProperty(exports, "ModelByInputTokens", {
|
|
19031
19060
|
enumerable: true,
|
|
19032
|
-
get: function () { return
|
|
19061
|
+
get: function () { return chunkET2TVAT3_cjs.ModelByInputTokens; }
|
|
19033
19062
|
});
|
|
19034
19063
|
Object.defineProperty(exports, "getObservationsAsOf", {
|
|
19035
19064
|
enumerable: true,
|
|
19036
|
-
get: function () { return
|
|
19065
|
+
get: function () { return chunkET2TVAT3_cjs.getObservationsAsOf; }
|
|
19037
19066
|
});
|
|
19038
19067
|
Object.defineProperty(exports, "MessageHistory", {
|
|
19039
19068
|
enumerable: true,
|