@mastra/memory 1.11.1-alpha.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +1 -1
- package/dist/{chunk-2NZR2XHO.js → chunk-DDQHE4NV.js} +38 -20
- package/dist/chunk-DDQHE4NV.js.map +1 -0
- package/dist/{chunk-W2RTLXNQ.cjs → chunk-HLGFIN4J.cjs} +38 -20
- package/dist/chunk-HLGFIN4J.cjs.map +1 -0
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/assets/SOURCE_MAP.json +39 -39
- package/dist/docs/references/docs-memory-message-history.md +6 -4
- package/dist/docs/references/docs-memory-observational-memory.md +20 -11
- package/dist/docs/references/docs-memory-overview.md +4 -4
- package/dist/docs/references/docs-memory-semantic-recall.md +28 -19
- package/dist/docs/references/docs-memory-storage.md +4 -4
- package/dist/docs/references/reference-memory-observational-memory.md +1 -1
- package/dist/docs/references/reference-storage-dynamodb.md +1 -1
- package/dist/docs/references/reference-storage-upstash.md +1 -1
- package/dist/index.cjs +137 -22
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +130 -15
- package/dist/index.js.map +1 -1
- package/dist/{observational-memory-JCPPBSVG.cjs → observational-memory-34W4S4I5.cjs} +26 -26
- package/dist/{observational-memory-JCPPBSVG.cjs.map → observational-memory-34W4S4I5.cjs.map} +1 -1
- package/dist/{observational-memory-SASGM6OW.js → observational-memory-B25SASRW.js} +3 -3
- package/dist/{observational-memory-SASGM6OW.js.map → observational-memory-B25SASRW.js.map} +1 -1
- package/dist/processors/index.cjs +24 -24
- package/dist/processors/index.js +1 -1
- package/dist/processors/observational-memory/observation-strategies/async-buffer.d.ts +1 -0
- package/dist/processors/observational-memory/observation-strategies/async-buffer.d.ts.map +1 -1
- package/dist/processors/observational-memory/observation-strategies/base.d.ts +7 -2
- package/dist/processors/observational-memory/observation-strategies/base.d.ts.map +1 -1
- package/dist/processors/observational-memory/observation-strategies/resource-scoped.d.ts +1 -0
- package/dist/processors/observational-memory/observation-strategies/resource-scoped.d.ts.map +1 -1
- package/dist/processors/observational-memory/observation-strategies/sync.d.ts +1 -0
- package/dist/processors/observational-memory/observation-strategies/sync.d.ts.map +1 -1
- package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
- package/dist/tools/om-tools.d.ts.map +1 -1
- package/package.json +7 -7
- package/dist/chunk-2NZR2XHO.js.map +0 -1
- package/dist/chunk-W2RTLXNQ.cjs.map +0 -1
package/dist/docs/SKILL.md
CHANGED
|
@@ -1,119 +1,119 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "1.
|
|
2
|
+
"version": "1.12.0",
|
|
3
3
|
"package": "@mastra/memory",
|
|
4
4
|
"exports": {
|
|
5
5
|
"ModelByInputTokens": {
|
|
6
6
|
"types": "dist/processors/index.d.ts",
|
|
7
|
-
"implementation": "dist/chunk-
|
|
7
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
8
8
|
"line": 666
|
|
9
9
|
},
|
|
10
10
|
"OBSERVER_SYSTEM_PROMPT": {
|
|
11
11
|
"types": "dist/processors/index.d.ts",
|
|
12
|
-
"implementation": "dist/chunk-
|
|
12
|
+
"implementation": "dist/chunk-DDQHE4NV.js"
|
|
13
13
|
},
|
|
14
14
|
"ObservationalMemory": {
|
|
15
15
|
"types": "dist/processors/index.d.ts",
|
|
16
|
-
"implementation": "dist/chunk-
|
|
17
|
-
"line":
|
|
16
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
17
|
+
"line": 5724
|
|
18
18
|
},
|
|
19
19
|
"ObservationalMemoryProcessor": {
|
|
20
20
|
"types": "dist/processors/index.d.ts",
|
|
21
|
-
"implementation": "dist/chunk-
|
|
22
|
-
"line":
|
|
21
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
22
|
+
"line": 8245
|
|
23
23
|
},
|
|
24
24
|
"TokenCounter": {
|
|
25
25
|
"types": "dist/processors/index.d.ts",
|
|
26
|
-
"implementation": "dist/chunk-
|
|
27
|
-
"line":
|
|
26
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
27
|
+
"line": 5276
|
|
28
28
|
},
|
|
29
29
|
"buildObserverPrompt": {
|
|
30
30
|
"types": "dist/processors/index.d.ts",
|
|
31
|
-
"implementation": "dist/chunk-
|
|
32
|
-
"line":
|
|
31
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
32
|
+
"line": 3304
|
|
33
33
|
},
|
|
34
34
|
"buildObserverSystemPrompt": {
|
|
35
35
|
"types": "dist/processors/index.d.ts",
|
|
36
|
-
"implementation": "dist/chunk-
|
|
37
|
-
"line":
|
|
36
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
37
|
+
"line": 2778
|
|
38
38
|
},
|
|
39
39
|
"combineObservationGroupRanges": {
|
|
40
40
|
"types": "dist/processors/index.d.ts",
|
|
41
|
-
"implementation": "dist/chunk-
|
|
41
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
42
42
|
"line": 758
|
|
43
43
|
},
|
|
44
44
|
"deriveObservationGroupProvenance": {
|
|
45
45
|
"types": "dist/processors/index.d.ts",
|
|
46
|
-
"implementation": "dist/chunk-
|
|
46
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
47
47
|
"line": 792
|
|
48
48
|
},
|
|
49
49
|
"extractCurrentTask": {
|
|
50
50
|
"types": "dist/processors/index.d.ts",
|
|
51
|
-
"implementation": "dist/chunk-
|
|
52
|
-
"line":
|
|
51
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
52
|
+
"line": 3418
|
|
53
53
|
},
|
|
54
54
|
"formatMessagesForObserver": {
|
|
55
55
|
"types": "dist/processors/index.d.ts",
|
|
56
|
-
"implementation": "dist/chunk-
|
|
57
|
-
"line":
|
|
56
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
57
|
+
"line": 3044
|
|
58
58
|
},
|
|
59
59
|
"getObservationsAsOf": {
|
|
60
60
|
"types": "dist/processors/index.d.ts",
|
|
61
|
-
"implementation": "dist/chunk-
|
|
62
|
-
"line":
|
|
61
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
62
|
+
"line": 8423
|
|
63
63
|
},
|
|
64
64
|
"hasCurrentTaskSection": {
|
|
65
65
|
"types": "dist/processors/index.d.ts",
|
|
66
|
-
"implementation": "dist/chunk-
|
|
67
|
-
"line":
|
|
66
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
67
|
+
"line": 3406
|
|
68
68
|
},
|
|
69
69
|
"injectAnchorIds": {
|
|
70
70
|
"types": "dist/processors/index.d.ts",
|
|
71
|
-
"implementation": "dist/chunk-
|
|
72
|
-
"line":
|
|
71
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
72
|
+
"line": 2335
|
|
73
73
|
},
|
|
74
74
|
"optimizeObservationsForContext": {
|
|
75
75
|
"types": "dist/processors/index.d.ts",
|
|
76
|
-
"implementation": "dist/chunk-
|
|
77
|
-
"line":
|
|
76
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
77
|
+
"line": 3429
|
|
78
78
|
},
|
|
79
79
|
"parseAnchorId": {
|
|
80
80
|
"types": "dist/processors/index.d.ts",
|
|
81
|
-
"implementation": "dist/chunk-
|
|
82
|
-
"line":
|
|
81
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
82
|
+
"line": 2308
|
|
83
83
|
},
|
|
84
84
|
"parseObservationGroups": {
|
|
85
85
|
"types": "dist/processors/index.d.ts",
|
|
86
|
-
"implementation": "dist/chunk-
|
|
86
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
87
87
|
"line": 727
|
|
88
88
|
},
|
|
89
89
|
"parseObserverOutput": {
|
|
90
90
|
"types": "dist/processors/index.d.ts",
|
|
91
|
-
"implementation": "dist/chunk-
|
|
92
|
-
"line":
|
|
91
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
92
|
+
"line": 3314
|
|
93
93
|
},
|
|
94
94
|
"reconcileObservationGroupsFromReflection": {
|
|
95
95
|
"types": "dist/processors/index.d.ts",
|
|
96
|
-
"implementation": "dist/chunk-
|
|
96
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
97
97
|
"line": 816
|
|
98
98
|
},
|
|
99
99
|
"renderObservationGroupsForReflection": {
|
|
100
100
|
"types": "dist/processors/index.d.ts",
|
|
101
|
-
"implementation": "dist/chunk-
|
|
101
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
102
102
|
"line": 772
|
|
103
103
|
},
|
|
104
104
|
"stripEphemeralAnchorIds": {
|
|
105
105
|
"types": "dist/processors/index.d.ts",
|
|
106
|
-
"implementation": "dist/chunk-
|
|
107
|
-
"line":
|
|
106
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
107
|
+
"line": 2365
|
|
108
108
|
},
|
|
109
109
|
"stripObservationGroups": {
|
|
110
110
|
"types": "dist/processors/index.d.ts",
|
|
111
|
-
"implementation": "dist/chunk-
|
|
111
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
112
112
|
"line": 749
|
|
113
113
|
},
|
|
114
114
|
"wrapInObservationGroup": {
|
|
115
115
|
"types": "dist/processors/index.d.ts",
|
|
116
|
-
"implementation": "dist/chunk-
|
|
116
|
+
"implementation": "dist/chunk-DDQHE4NV.js",
|
|
117
117
|
"line": 720
|
|
118
118
|
},
|
|
119
119
|
"OBSERVATIONAL_MEMORY_DEFAULTS": {
|
|
@@ -161,7 +161,7 @@
|
|
|
161
161
|
"processors": {
|
|
162
162
|
"index": "dist/processors/index.js",
|
|
163
163
|
"chunks": [
|
|
164
|
-
"chunk-
|
|
164
|
+
"chunk-DDQHE4NV.js",
|
|
165
165
|
"chunk-LSJJAJAF.js"
|
|
166
166
|
]
|
|
167
167
|
}
|
|
@@ -48,7 +48,7 @@ export const mastra = new Mastra({
|
|
|
48
48
|
})
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
Instantiate a [`Memory`](https://mastra.ai/reference/memory/memory-class) instance in your agent:
|
|
52
52
|
|
|
53
53
|
```typescript
|
|
54
54
|
import { Memory } from '@mastra/memory'
|
|
@@ -66,7 +66,7 @@ export const agent = new Agent({
|
|
|
66
66
|
|
|
67
67
|
When you call the agent, messages are automatically saved to the database. You can specify a `threadId`, `resourceId`, and optional `metadata`:
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
**.generate()**:
|
|
70
70
|
|
|
71
71
|
```typescript
|
|
72
72
|
await agent.generate('Hello', {
|
|
@@ -81,7 +81,7 @@ await agent.generate('Hello', {
|
|
|
81
81
|
})
|
|
82
82
|
```
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
**.stream()**:
|
|
85
85
|
|
|
86
86
|
```typescript
|
|
87
87
|
await agent.stream('Hello', {
|
|
@@ -103,12 +103,14 @@ You can use this history in two ways:
|
|
|
103
103
|
- **Automatic inclusion** - Mastra automatically fetches and includes recent messages in the context window. By default, it includes the last 10 messages, keeping agents grounded in the conversation. You can adjust this number with `lastMessages`, but in most cases you don't need to think about it.
|
|
104
104
|
- [**Manual querying**](#querying) - For more control, use the `recall()` function to query threads and messages directly. This lets you choose exactly which memories are included in the context window, or fetch messages to render conversation history in your UI.
|
|
105
105
|
|
|
106
|
+
> **Tip:** When memory is enabled, [Studio](https://mastra.ai/docs/studio/overview) uses message history to display past conversations in the chat sidebar.
|
|
107
|
+
|
|
106
108
|
## Accessing memory
|
|
107
109
|
|
|
108
110
|
To access memory functions for querying, cloning, or deleting threads and messages, call `getMemory()` on an agent:
|
|
109
111
|
|
|
110
112
|
```typescript
|
|
111
|
-
const agent = mastra.
|
|
113
|
+
const agent = mastra.getAgentById('test-agent')
|
|
112
114
|
const memory = await agent.getMemory()
|
|
113
115
|
```
|
|
114
116
|
|
|
@@ -42,7 +42,7 @@ See [configuration options](https://mastra.ai/reference/memory/observational-mem
|
|
|
42
42
|
|
|
43
43
|
## Benefits
|
|
44
44
|
|
|
45
|
-
- **Prompt caching**: OM's context is stable
|
|
45
|
+
- **Prompt caching**: OM's context is stable and observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
|
|
46
46
|
- **Compression**: Raw message history and tool results get compressed into a dense observation log. Smaller context means faster responses and longer coherent conversations.
|
|
47
47
|
- **Zero context rot**: The agent sees relevant information instead of noisy tool calls and irrelevant tokens, so the agent stays on task over long sessions.
|
|
48
48
|
|
|
@@ -50,7 +50,7 @@ See [configuration options](https://mastra.ai/reference/memory/observational-mem
|
|
|
50
50
|
|
|
51
51
|
You don't remember every word of every conversation you've ever had. You observe what happened subconsciously, then your brain reflects — reorganizing, combining, and condensing into long-term memory. OM works the same way.
|
|
52
52
|
|
|
53
|
-
Every time an agent responds, it sees a context window containing its system prompt, recent message history, and any injected context. The context window is finite
|
|
53
|
+
Every time an agent responds, it sees a context window containing its system prompt, recent message history, and any injected context. The context window is finite; even models with large token limits perform worse when the window is full. This causes two problems:
|
|
54
54
|
|
|
55
55
|
- **Context rot**: the more raw message history an agent carries, the worse it performs.
|
|
56
56
|
- **Context waste**: most of that history contains tokens no longer needed to keep the agent on task.
|
|
@@ -59,14 +59,15 @@ OM solves both problems by compressing old context into dense observations.
|
|
|
59
59
|
|
|
60
60
|
### Observations
|
|
61
61
|
|
|
62
|
-
When message history tokens exceed a threshold (default: 30,000), the Observer creates observations
|
|
62
|
+
When message history tokens exceed a threshold (default: 30,000), the Observer creates observations which are concise notes about what happened:
|
|
63
63
|
|
|
64
64
|
OM uses fast local token estimation for this thresholding work. Text is estimated with `tokenx`, while image parts use provider-aware heuristics so multimodal conversations still trigger observation at the right time. The same applies to image-like `file` parts when a transport normalizes an uploaded image as a file instead of an image part. For example, OpenAI image detail settings can materially change when OM decides to observe.
|
|
65
65
|
|
|
66
66
|
The Observer can also see attachments in the history it reviews. OM keeps readable placeholders like `[Image #1: reference-board.png]` or `[File #1: floorplan.pdf]` in the transcript for readability, and forwards the actual attachment parts alongside the text. Image-like `file` parts are upgraded to image inputs for the Observer when possible, while non-image attachments are forwarded as file parts with normalized token counting. This applies to both normal thread observation and batched resource-scope observation.
|
|
67
67
|
|
|
68
|
-
```
|
|
68
|
+
```md
|
|
69
69
|
Date: 2026-01-15
|
|
70
|
+
|
|
70
71
|
- 🔴 12:10 User is building a Next.js app with Supabase auth, due in 1 week (meaning January 22nd 2026)
|
|
71
72
|
- 🔴 12:10 App uses server components with client-side hydration
|
|
72
73
|
- 🟡 12:12 User asked about middleware configuration for protected routes
|
|
@@ -77,11 +78,11 @@ The compression is typically 5–40×. The Observer also tracks a **current task
|
|
|
77
78
|
|
|
78
79
|
If you enable `observation.threadTitle`, the Observer can also suggest a short thread title when the conversation topic meaningfully changes. Thread title generation is opt-in and updates the thread metadata, so apps like Mastra Code can show the latest title in thread lists and status UI.
|
|
79
80
|
|
|
80
|
-
Example:
|
|
81
|
+
Example: An agent using Playwright MCP might see 50,000+ tokens per page snapshot. With OM, the Observer watches the interaction and creates a few hundred tokens of observations about what was on the page and what actions were taken. The agent stays on task without carrying every raw snapshot.
|
|
81
82
|
|
|
82
83
|
### Reflections
|
|
83
84
|
|
|
84
|
-
When observations exceed their threshold (default: 40,000 tokens), the Reflector condenses them
|
|
85
|
+
When observations exceed their threshold (default: 40,000 tokens), the Reflector condenses them, combines related items, and reflects on patterns.
|
|
85
86
|
|
|
86
87
|
The result is a three-tier system:
|
|
87
88
|
|
|
@@ -93,7 +94,7 @@ The result is a three-tier system:
|
|
|
93
94
|
|
|
94
95
|
> **Note:** Retrieval mode is experimental. The API may change in future releases.
|
|
95
96
|
|
|
96
|
-
Normal OM compresses messages into observations, which is great for staying on task
|
|
97
|
+
Normal OM compresses messages into observations, which is great for staying on task, but the original wording is gone. Retrieval mode fixes this by keeping each observation group linked to the raw messages that produced it. When the agent needs exact wording, tool output, or chronology that the summary compressed away, it can call a `recall` tool to page through the source messages.
|
|
97
98
|
|
|
98
99
|
#### Browsing only
|
|
99
100
|
|
|
@@ -162,6 +163,16 @@ With retrieval mode enabled, OM:
|
|
|
162
163
|
|
|
163
164
|
See the [recall tool reference](https://mastra.ai/reference/memory/observational-memory) for the full API (detail levels, part indexing, pagination, cross-thread browsing, and token limiting).
|
|
164
165
|
|
|
166
|
+
## Studio
|
|
167
|
+
|
|
168
|
+
To see how it works in practice, open [Studio](https://mastra.ai/docs/studio/overview) and navigate to an agent with OM enabled. The **Memory** tab displays:
|
|
169
|
+
|
|
170
|
+
- **Token progress bars**: Current token counts for messages and observations, showing how close each is to its threshold. Hover over the info icon to see the model and threshold for the Observer and Reflector.
|
|
171
|
+
- **Active observations**: The current observation log, rendered inline. When previous observation or reflection records exist, expand "Previous observations" to browse them.
|
|
172
|
+
- **Background processing**: During a conversation, buffered observation chunks and reflection status appear as the agent processes in the background.
|
|
173
|
+
|
|
174
|
+
The progress bars update live while the agent is observing or reflecting, showing elapsed time and a status badge.
|
|
175
|
+
|
|
165
176
|
## Models
|
|
166
177
|
|
|
167
178
|
The Observer and Reflector run in the background. Any model that works with Mastra's [model routing](https://mastra.ai/models) (`provider/model`) can be used. When using `observationalMemory: true`, the default model is `google/gemini-2.5-flash`. When passing a config object, a `model` must be explicitly set.
|
|
@@ -184,6 +195,8 @@ See [model configuration](https://mastra.ai/reference/memory/observational-memor
|
|
|
184
195
|
|
|
185
196
|
### Token-tiered model selection
|
|
186
197
|
|
|
198
|
+
**Added in:** `@mastra/memory@1.10.0`
|
|
199
|
+
|
|
187
200
|
You can use `ModelByInputTokens` to specify different Observer or Reflector models based on input token count. OM selects the matching model tier at runtime from the configured `upTo` thresholds.
|
|
188
201
|
|
|
189
202
|
```typescript
|
|
@@ -373,10 +386,6 @@ No manual migration needed. OM reads existing messages and observes them lazily
|
|
|
373
386
|
- **Thread scope**: The first time a thread exceeds `observation.messageTokens`, the Observer processes the backlog.
|
|
374
387
|
- **Resource scope**: All unobserved messages across all threads for a resource are processed together. For users with many existing threads, this could take significant time.
|
|
375
388
|
|
|
376
|
-
## Viewing in Mastra Studio
|
|
377
|
-
|
|
378
|
-
Mastra Studio shows OM status in real time in the memory tab: token usage, which model is running, current observations, and reflection history.
|
|
379
|
-
|
|
380
389
|
## Comparing OM with other memory features
|
|
381
390
|
|
|
382
391
|
- **[Message history](https://mastra.ai/docs/memory/message-history)**: High-fidelity record of the current conversation
|
|
@@ -107,7 +107,7 @@ Use memory when your agent needs to maintain multi-turn conversations that refer
|
|
|
107
107
|
|
|
108
108
|
> **Note:** Visit [Memory Class](https://mastra.ai/reference/memory/memory-class) for a full list of configuration options.
|
|
109
109
|
|
|
110
|
-
5. Call your agent, for example in [
|
|
110
|
+
5. Call your agent, for example in [Studio](https://mastra.ai/docs/studio/overview). Inside Studio, start a new chat with your agent and take a look at the right sidebar. It'll now display various memory-related information.
|
|
111
111
|
|
|
112
112
|
## Message history
|
|
113
113
|
|
|
@@ -165,7 +165,7 @@ export const memoryAgent = new Agent({
|
|
|
165
165
|
|
|
166
166
|
## Memory in multi-agent systems
|
|
167
167
|
|
|
168
|
-
When a [supervisor agent](https://mastra.ai/docs/agents/supervisor-agents) delegates to a subagent, Mastra isolates subagent memory automatically.
|
|
168
|
+
When a [supervisor agent](https://mastra.ai/docs/agents/supervisor-agents) delegates to a subagent, Mastra isolates subagent memory automatically. No flag enables this as it happens on every delegation. Understanding how this scoping works lets you decide what stays private and what to share intentionally.
|
|
169
169
|
|
|
170
170
|
### How delegation scopes memory
|
|
171
171
|
|
|
@@ -175,7 +175,7 @@ Each delegation creates a fresh `threadId` and a deterministic `resourceId` for
|
|
|
175
175
|
- **Resource ID**: Derived as `{parentResourceId}-{agentName}`. Because the resource ID is stable across delegations, resource-scoped memory persists between calls. A subagent remembers facts from previous delegations by the same user.
|
|
176
176
|
- **Memory instance**: If a subagent has no memory configured, it inherits the supervisor's `Memory` instance. If the subagent defines its own, that takes precedence.
|
|
177
177
|
|
|
178
|
-
The supervisor forwards its conversation context to the subagent so it has enough background to complete the task. Only the delegation prompt and the subagent's response are saved — the full parent conversation
|
|
178
|
+
The supervisor forwards its conversation context to the subagent so it has enough background to complete the task. Only the delegation prompt and the subagent's response are saved — the full parent conversation isn't stored. You can control which messages reach the subagent with the [`messageFilter`](https://mastra.ai/docs/agents/supervisor-agents) callback.
|
|
179
179
|
|
|
180
180
|
> **Note:** Subagent resource IDs are always suffixed with the agent name (`{parentResourceId}-{agentName}`). Two different subagents under the same supervisor never share a resource ID through delegation.
|
|
181
181
|
|
|
@@ -206,7 +206,7 @@ Because both calls use `resource: 'project-42'`, the writer can access the resea
|
|
|
206
206
|
|
|
207
207
|
Enable [Tracing](https://mastra.ai/docs/observability/tracing/overview) to monitor and debug memory in action. Traces show you exactly which messages and observations the agent included in its context for each request, helping you understand agent behavior and verify that memory retrieval is working as expected.
|
|
208
208
|
|
|
209
|
-
Open [
|
|
209
|
+
Open [Studio](https://mastra.ai/docs/studio/overview) and select the **Observability** tab in the sidebar. Open the trace of a recent agent request, then look for spans of LLMs calls.
|
|
210
210
|
|
|
211
211
|
## Switch memory per request
|
|
212
212
|
|
|
@@ -18,18 +18,33 @@ After getting a response from the LLM, all new messages (user, assistant, and to
|
|
|
18
18
|
|
|
19
19
|
## Quickstart
|
|
20
20
|
|
|
21
|
-
Semantic recall is
|
|
21
|
+
Semantic recall is disabled by default. To enable it, set `semanticRecall: true` in `options` and provide a `vector` store and `embedder`:
|
|
22
22
|
|
|
23
23
|
```typescript
|
|
24
24
|
import { Agent } from '@mastra/core/agent'
|
|
25
25
|
import { Memory } from '@mastra/memory'
|
|
26
|
+
import { LibSQLStore, LibSQLVector } from '@mastra/libsql'
|
|
27
|
+
import { ModelRouterEmbeddingModel } from '@mastra/core/llm'
|
|
26
28
|
|
|
27
29
|
const agent = new Agent({
|
|
28
30
|
id: 'support-agent',
|
|
29
31
|
name: 'SupportAgent',
|
|
30
32
|
instructions: 'You are a helpful support agent.',
|
|
31
33
|
model: 'openai/gpt-5.4',
|
|
32
|
-
memory: new Memory(
|
|
34
|
+
memory: new Memory({
|
|
35
|
+
storage: new LibSQLStore({
|
|
36
|
+
id: 'agent-storage',
|
|
37
|
+
url: 'file:./local.db',
|
|
38
|
+
}),
|
|
39
|
+
vector: new LibSQLVector({
|
|
40
|
+
id: 'agent-vector',
|
|
41
|
+
url: 'file:./local.db',
|
|
42
|
+
}),
|
|
43
|
+
embedder: new ModelRouterEmbeddingModel('openai/text-embedding-3-small'),
|
|
44
|
+
options: {
|
|
45
|
+
semanticRecall: true,
|
|
46
|
+
},
|
|
47
|
+
}),
|
|
33
48
|
})
|
|
34
49
|
```
|
|
35
50
|
|
|
@@ -77,6 +92,9 @@ const agent = new Agent({
|
|
|
77
92
|
id: 'agent-vector',
|
|
78
93
|
url: 'file:./local.db',
|
|
79
94
|
}),
|
|
95
|
+
options: {
|
|
96
|
+
semanticRecall: true,
|
|
97
|
+
},
|
|
80
98
|
}),
|
|
81
99
|
})
|
|
82
100
|
```
|
|
@@ -139,6 +157,9 @@ import { ModelRouterEmbeddingModel } from '@mastra/core/llm'
|
|
|
139
157
|
const agent = new Agent({
|
|
140
158
|
memory: new Memory({
|
|
141
159
|
embedder: new ModelRouterEmbeddingModel('openai/text-embedding-3-small'),
|
|
160
|
+
options: {
|
|
161
|
+
semanticRecall: true,
|
|
162
|
+
},
|
|
142
163
|
}),
|
|
143
164
|
})
|
|
144
165
|
```
|
|
@@ -262,26 +283,14 @@ const agent = new Agent({
|
|
|
262
283
|
|
|
263
284
|
For detailed information about index configuration options and performance tuning, see the [PgVector configuration guide](https://mastra.ai/reference/vectors/pg).
|
|
264
285
|
|
|
265
|
-
##
|
|
286
|
+
## Disable semantic recall
|
|
266
287
|
|
|
267
|
-
Semantic recall
|
|
268
|
-
|
|
269
|
-
Semantic recall is enabled by default but can be disabled when not needed:
|
|
270
|
-
|
|
271
|
-
```typescript
|
|
272
|
-
const agent = new Agent({
|
|
273
|
-
memory: new Memory({
|
|
274
|
-
options: {
|
|
275
|
-
semanticRecall: false,
|
|
276
|
-
},
|
|
277
|
-
}),
|
|
278
|
-
})
|
|
279
|
-
```
|
|
288
|
+
Semantic recall is disabled by default (`semanticRecall: false`). Each call adds latency because new messages are converted into embeddings and used to query a vector database before the LLM receives them.
|
|
280
289
|
|
|
281
|
-
|
|
290
|
+
Keep semantic recall disabled when:
|
|
282
291
|
|
|
283
|
-
-
|
|
284
|
-
-
|
|
292
|
+
- Message history provides sufficient context for the current conversation.
|
|
293
|
+
- You're building performance-sensitive applications, like realtime two-way audio, where embedding and vector query latency is noticeable.
|
|
285
294
|
|
|
286
295
|
## Viewing recalled messages
|
|
287
296
|
|
|
@@ -14,7 +14,7 @@ export const mastra = new Mastra({
|
|
|
14
14
|
})
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
-
> **Sharing the database with
|
|
17
|
+
> **Sharing the database with Studio:** When running `mastra dev` alongside your application (e.g., Next.js), use an absolute path to ensure both processes access the same database:
|
|
18
18
|
>
|
|
19
19
|
> ```typescript
|
|
20
20
|
> url: 'file:/absolute/path/to/your/project/mastra.db'
|
|
@@ -129,7 +129,7 @@ Mastra organizes conversations using two identifiers:
|
|
|
129
129
|
|
|
130
130
|
Both identifiers are required for agents to store information:
|
|
131
131
|
|
|
132
|
-
|
|
132
|
+
**.generate()**:
|
|
133
133
|
|
|
134
134
|
```typescript
|
|
135
135
|
const response = await agent.generate('hello', {
|
|
@@ -140,7 +140,7 @@ const response = await agent.generate('hello', {
|
|
|
140
140
|
})
|
|
141
141
|
```
|
|
142
142
|
|
|
143
|
-
|
|
143
|
+
**.stream()**:
|
|
144
144
|
|
|
145
145
|
```typescript
|
|
146
146
|
const stream = await agent.stream('hello', {
|
|
@@ -151,7 +151,7 @@ const stream = await agent.stream('hello', {
|
|
|
151
151
|
})
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
-
> **Note:** [Studio](https://mastra.ai/docs/
|
|
154
|
+
> **Note:** [Studio](https://mastra.ai/docs/studio/overview) automatically generates a thread and resource ID for you. When calling `stream()` or `generate()` yourself, remember to provide these identifiers explicitly.
|
|
155
155
|
|
|
156
156
|
### Thread title generation
|
|
157
157
|
|
|
@@ -666,7 +666,7 @@ const selector = new ModelByInputTokens({
|
|
|
666
666
|
|
|
667
667
|
#### Behavior
|
|
668
668
|
|
|
669
|
-
- Thresholds are sorted internally, so the order in the config object
|
|
669
|
+
- Thresholds are sorted internally, so the order in the config object doesn't matter.
|
|
670
670
|
- `inputTokens ≤ smallest threshold` → uses that threshold's model
|
|
671
671
|
- `inputTokens > largest threshold` → `resolve()` throws an error. If this happens during an OM Observer or Reflector run, OM aborts via TripWire, so callers receive an empty `text` result or streamed `tripwire` instead of a normal assistant response.
|
|
672
672
|
- OM computes the input token count for the Observer or Reflector call and resolves the matching model tier directly
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
The DynamoDB storage implementation provides a scalable and performant NoSQL database solution for Mastra, leveraging a single-table design pattern with [ElectroDB](https://electrodb.dev/).
|
|
4
4
|
|
|
5
|
-
> **Observability Not Supported:** DynamoDB storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to DynamoDB, and
|
|
5
|
+
> **Observability Not Supported:** DynamoDB storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to DynamoDB, and [Studio's](https://mastra.ai/docs/studio/overview) observability features won't work with DynamoDB as your only storage provider. To enable observability, use [composite storage](https://mastra.ai/reference/storage/composite) to route observability data to a supported provider like ClickHouse or PostgreSQL.
|
|
6
6
|
|
|
7
7
|
> **Item Size Limit:** DynamoDB enforces a **400 KB maximum item size**. This limit can be exceeded when storing messages with base64-encoded attachments such as images. See [Handling large attachments](https://mastra.ai/docs/memory/storage) for workarounds including uploading attachments to external storage.
|
|
8
8
|
|
|
@@ -4,7 +4,7 @@ The Upstash storage implementation provides a serverless-friendly storage soluti
|
|
|
4
4
|
|
|
5
5
|
> **Pricing:** When using Mastra with Upstash, the pay-as-you-go model can result in unexpectedly high costs due to the high volume of Redis commands generated during agent conversations. We strongly recommend using a **fixed pricing plan** for predictable costs. See [Upstash pricing](https://upstash.com/pricing/redis) for details and [GitHub issue #5850](https://github.com/mastra-ai/mastra/issues/5850) for context.
|
|
6
6
|
|
|
7
|
-
> **Observability Not Supported:** Upstash storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to Upstash, and
|
|
7
|
+
> **Observability Not Supported:** Upstash storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to Upstash, and [Studio's](https://mastra.ai/docs/studio/overview) observability features won't work with Upstash as your only storage provider. To enable observability, use [composite storage](https://mastra.ai/reference/storage/composite) to route observability data to a supported provider like ClickHouse or PostgreSQL.
|
|
8
8
|
|
|
9
9
|
## Installation
|
|
10
10
|
|