@mastra/memory 1.1.0 → 1.2.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -0
- package/dist/_types/@internal_ai-sdk-v4/dist/index.d.ts +30 -17
- package/dist/{chunk-FQJWVCDF.cjs → chunk-AWE2QQPI.cjs} +1884 -312
- package/dist/chunk-AWE2QQPI.cjs.map +1 -0
- package/dist/chunk-EQ4M72KU.js +439 -0
- package/dist/chunk-EQ4M72KU.js.map +1 -0
- package/dist/{chunk-O3CS4UGX.cjs → chunk-IDRQZVB4.cjs} +4 -4
- package/dist/{chunk-O3CS4UGX.cjs.map → chunk-IDRQZVB4.cjs.map} +1 -1
- package/dist/{chunk-YF4R74L2.js → chunk-RC6RZVYE.js} +4 -4
- package/dist/{chunk-YF4R74L2.js.map → chunk-RC6RZVYE.js.map} +1 -1
- package/dist/{chunk-6TXUWFIU.js → chunk-TYVPTNCP.js} +1885 -313
- package/dist/chunk-TYVPTNCP.js.map +1 -0
- package/dist/chunk-ZD3BKU5O.cjs +441 -0
- package/dist/chunk-ZD3BKU5O.cjs.map +1 -0
- package/dist/docs/SKILL.md +51 -50
- package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +22 -22
- package/dist/docs/{agents/03-agent-approval.md → references/docs-agents-agent-approval.md} +19 -19
- package/dist/docs/references/docs-agents-agent-memory.md +212 -0
- package/dist/docs/{agents/04-network-approval.md → references/docs-agents-network-approval.md} +13 -12
- package/dist/docs/{agents/02-networks.md → references/docs-agents-networks.md} +10 -12
- package/dist/docs/{memory/06-memory-processors.md → references/docs-memory-memory-processors.md} +6 -8
- package/dist/docs/{memory/03-message-history.md → references/docs-memory-message-history.md} +31 -20
- package/dist/docs/references/docs-memory-observational-memory.md +238 -0
- package/dist/docs/{memory/01-overview.md → references/docs-memory-overview.md} +8 -8
- package/dist/docs/{memory/05-semantic-recall.md → references/docs-memory-semantic-recall.md} +33 -17
- package/dist/docs/{memory/02-storage.md → references/docs-memory-storage.md} +29 -39
- package/dist/docs/{memory/04-working-memory.md → references/docs-memory-working-memory.md} +16 -27
- package/dist/docs/references/reference-core-getMemory.md +50 -0
- package/dist/docs/references/reference-core-listMemory.md +56 -0
- package/dist/docs/references/reference-memory-clone-utilities.md +199 -0
- package/dist/docs/references/reference-memory-cloneThread.md +130 -0
- package/dist/docs/references/reference-memory-createThread.md +68 -0
- package/dist/docs/references/reference-memory-getThreadById.md +24 -0
- package/dist/docs/references/reference-memory-listThreads.md +145 -0
- package/dist/docs/references/reference-memory-memory-class.md +147 -0
- package/dist/docs/references/reference-memory-observational-memory.md +528 -0
- package/dist/docs/{processors/01-reference.md → references/reference-processors-token-limiter-processor.md} +25 -12
- package/dist/docs/references/reference-storage-dynamodb.md +282 -0
- package/dist/docs/references/reference-storage-libsql.md +135 -0
- package/dist/docs/references/reference-storage-mongodb.md +262 -0
- package/dist/docs/references/reference-storage-postgresql.md +529 -0
- package/dist/docs/references/reference-storage-upstash.md +160 -0
- package/dist/docs/references/reference-vectors-libsql.md +305 -0
- package/dist/docs/references/reference-vectors-mongodb.md +295 -0
- package/dist/docs/references/reference-vectors-pg.md +408 -0
- package/dist/docs/references/reference-vectors-upstash.md +294 -0
- package/dist/index.cjs +919 -507
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +914 -502
- package/dist/index.js.map +1 -1
- package/dist/{observational-memory-3Q42SITP.cjs → observational-memory-3UO64HYD.cjs} +14 -14
- package/dist/{observational-memory-3Q42SITP.cjs.map → observational-memory-3UO64HYD.cjs.map} +1 -1
- package/dist/{observational-memory-VXLHOSDZ.js → observational-memory-TVHT3HP4.js} +3 -3
- package/dist/{observational-memory-VXLHOSDZ.js.map → observational-memory-TVHT3HP4.js.map} +1 -1
- package/dist/processors/index.cjs +12 -12
- package/dist/processors/index.js +1 -1
- package/dist/processors/observational-memory/index.d.ts +1 -1
- package/dist/processors/observational-memory/index.d.ts.map +1 -1
- package/dist/processors/observational-memory/observational-memory.d.ts +267 -1
- package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
- package/dist/processors/observational-memory/observer-agent.d.ts +3 -1
- package/dist/processors/observational-memory/observer-agent.d.ts.map +1 -1
- package/dist/processors/observational-memory/reflector-agent.d.ts +10 -3
- package/dist/processors/observational-memory/reflector-agent.d.ts.map +1 -1
- package/dist/processors/observational-memory/types.d.ts +243 -19
- package/dist/processors/observational-memory/types.d.ts.map +1 -1
- package/dist/{token-6GSAFR2W-WGTMOPEU.js → token-APYSY3BW-2DN6RAUY.js} +11 -11
- package/dist/token-APYSY3BW-2DN6RAUY.js.map +1 -0
- package/dist/{token-6GSAFR2W-2B4WM6AQ.cjs → token-APYSY3BW-ZQ7TMBY7.cjs} +14 -14
- package/dist/token-APYSY3BW-ZQ7TMBY7.cjs.map +1 -0
- package/dist/token-util-RMHT2CPJ-6TGPE335.cjs +10 -0
- package/dist/token-util-RMHT2CPJ-6TGPE335.cjs.map +1 -0
- package/dist/token-util-RMHT2CPJ-RJEA3FAN.js +8 -0
- package/dist/token-util-RMHT2CPJ-RJEA3FAN.js.map +1 -0
- package/dist/tools/working-memory.d.ts.map +1 -1
- package/package.json +6 -7
- package/dist/chunk-6TXUWFIU.js.map +0 -1
- package/dist/chunk-FQJWVCDF.cjs.map +0 -1
- package/dist/chunk-WM6IIUQW.js +0 -250
- package/dist/chunk-WM6IIUQW.js.map +0 -1
- package/dist/chunk-ZSBBXHNM.cjs +0 -252
- package/dist/chunk-ZSBBXHNM.cjs.map +0 -1
- package/dist/docs/README.md +0 -36
- package/dist/docs/agents/01-agent-memory.md +0 -166
- package/dist/docs/core/01-reference.md +0 -114
- package/dist/docs/memory/07-reference.md +0 -687
- package/dist/docs/storage/01-reference.md +0 -1218
- package/dist/docs/vectors/01-reference.md +0 -942
- package/dist/token-6GSAFR2W-2B4WM6AQ.cjs.map +0 -1
- package/dist/token-6GSAFR2W-WGTMOPEU.js.map +0 -1
- package/dist/token-util-NEHG7TUY-TV2H7N56.js +0 -8
- package/dist/token-util-NEHG7TUY-TV2H7N56.js.map +0 -1
- package/dist/token-util-NEHG7TUY-WJZIPNNX.cjs +0 -10
- package/dist/token-util-NEHG7TUY-WJZIPNNX.cjs.map +0 -1
package/dist/docs/{memory/03-message-history.md → references/docs-memory-message-history.md}
RENAMED
|
@@ -1,25 +1,42 @@
|
|
|
1
|
-
> Learn how to configure message history in Mastra to store recent messages from the current conversation.
|
|
2
|
-
|
|
3
1
|
# Message History
|
|
4
2
|
|
|
5
|
-
Message history is the most basic and important form of memory. It gives the LLM a view of recent messages in the context window, enabling your agent to reference earlier exchanges and respond coherently.
|
|
3
|
+
Message history is the most basic and important form of memory. It gives the LLM a view of recent messages in the context window, enabling your agent to reference earlier exchanges and respond coherently.
|
|
6
4
|
|
|
7
5
|
You can also retrieve message history to display past conversations in your UI.
|
|
8
6
|
|
|
9
|
-
> **
|
|
10
|
-
Each message belongs to a thread (the conversation) and a resource (the user or entity it's associated with). See [Threads and resources](https://mastra.ai/docs/memory/storage#threads-and-resources) for more detail.
|
|
7
|
+
> **Info:** Each message belongs to a thread (the conversation) and a resource (the user or entity it's associated with). See [Threads and resources](https://mastra.ai/docs/memory/storage) for more detail.
|
|
11
8
|
|
|
12
9
|
## Getting started
|
|
13
10
|
|
|
14
|
-
Install the Mastra memory module along with a [storage adapter](https://mastra.ai/docs/memory/storage
|
|
11
|
+
Install the Mastra memory module along with a [storage adapter](https://mastra.ai/docs/memory/storage) for your database. The examples below use `@mastra/libsql`, which stores data locally in a `mastra.db` file.
|
|
12
|
+
|
|
13
|
+
**npm**:
|
|
15
14
|
|
|
16
|
-
```bash
|
|
15
|
+
```bash
|
|
17
16
|
npm install @mastra/memory@latest @mastra/libsql@latest
|
|
18
17
|
```
|
|
19
18
|
|
|
19
|
+
**pnpm**:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pnpm add @mastra/memory@latest @mastra/libsql@latest
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**Yarn**:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
yarn add @mastra/memory@latest @mastra/libsql@latest
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Bun**:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
bun add @mastra/memory@latest @mastra/libsql@latest
|
|
35
|
+
```
|
|
36
|
+
|
|
20
37
|
Message history requires a storage adapter to persist conversations. Configure storage on your Mastra instance if you haven't already:
|
|
21
38
|
|
|
22
|
-
```typescript
|
|
39
|
+
```typescript
|
|
23
40
|
import { Mastra } from "@mastra/core";
|
|
24
41
|
import { LibSQLStore } from "@mastra/libsql";
|
|
25
42
|
|
|
@@ -33,7 +50,7 @@ export const mastra = new Mastra({
|
|
|
33
50
|
|
|
34
51
|
Give your agent a `Memory`:
|
|
35
52
|
|
|
36
|
-
```typescript
|
|
53
|
+
```typescript
|
|
37
54
|
import { Memory } from "@mastra/memory";
|
|
38
55
|
import { Agent } from "@mastra/core/agent";
|
|
39
56
|
|
|
@@ -49,7 +66,7 @@ export const agent = new Agent({
|
|
|
49
66
|
|
|
50
67
|
When you call the agent, messages are automatically saved to the database. You can specify a `threadId`, `resourceId`, and optional `metadata`:
|
|
51
68
|
|
|
52
|
-
|
|
69
|
+
**Generate**:
|
|
53
70
|
|
|
54
71
|
```typescript
|
|
55
72
|
await agent.generate("Hello", {
|
|
@@ -64,8 +81,7 @@ await agent.generate("Hello", {
|
|
|
64
81
|
});
|
|
65
82
|
```
|
|
66
83
|
|
|
67
|
-
|
|
68
|
-
**stream:**
|
|
84
|
+
**Stream**:
|
|
69
85
|
|
|
70
86
|
```typescript
|
|
71
87
|
await agent.stream("Hello", {
|
|
@@ -80,11 +96,7 @@ await agent.stream("Hello", {
|
|
|
80
96
|
});
|
|
81
97
|
```
|
|
82
98
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
> **Note:**
|
|
86
|
-
|
|
87
|
-
Threads and messages are created automatically when you call `agent.generate()` or `agent.stream()`, but you can also create them manually with [`createThread()`](https://mastra.ai/reference/memory/createThread) and [`saveMessages()`](https://mastra.ai/reference/memory/memory-class).
|
|
99
|
+
> **Info:** Threads and messages are created automatically when you call `agent.generate()` or `agent.stream()`, but you can also create them manually with [`createThread()`](https://mastra.ai/reference/memory/createThread) and [`saveMessages()`](https://mastra.ai/reference/memory/memory-class).
|
|
88
100
|
|
|
89
101
|
There are two ways to use this history:
|
|
90
102
|
|
|
@@ -106,8 +118,7 @@ The `Memory` instance gives you access to functions for listing threads, recalli
|
|
|
106
118
|
|
|
107
119
|
Use these methods to fetch threads and messages for displaying conversation history in your UI or for custom memory retrieval logic.
|
|
108
120
|
|
|
109
|
-
> **
|
|
110
|
-
The memory system does not enforce access control. Before running any query, verify in your application logic that the current user is authorized to access the `resourceId` being queried.
|
|
121
|
+
> **Warning:** The memory system does not enforce access control. Before running any query, verify in your application logic that the current user is authorized to access the `resourceId` being queried.
|
|
111
122
|
|
|
112
123
|
### Threads
|
|
113
124
|
|
|
@@ -240,7 +251,7 @@ const { thread, clonedMessages } = await memory.cloneThread({
|
|
|
240
251
|
});
|
|
241
252
|
```
|
|
242
253
|
|
|
243
|
-
You can filter which messages get cloned (by count or date range), specify custom thread IDs, and use utility methods to inspect clone relationships.
|
|
254
|
+
You can filter which messages get cloned (by count or date range), specify custom thread IDs, and use utility methods to inspect clone relationships.
|
|
244
255
|
|
|
245
256
|
See [`cloneThread()`](https://mastra.ai/reference/memory/cloneThread) and [clone utilities](https://mastra.ai/reference/memory/clone-utilities) for the full API.
|
|
246
257
|
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# Observational Memory
|
|
2
|
+
|
|
3
|
+
**Added in:** `@mastra/memory@1.1.0`
|
|
4
|
+
|
|
5
|
+
Observational Memory (OM) is Mastra's memory system for long-context agentic memory. Two background agents — an **Observer** and a **Reflector** — watch your agent's conversations and maintain a dense observation log that replaces raw message history as it grows.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
Enable `observationalMemory` in the memory options when creating your agent:
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
import { Memory } from "@mastra/memory";
|
|
13
|
+
import { Agent } from "@mastra/core/agent";
|
|
14
|
+
|
|
15
|
+
export const agent = new Agent({
|
|
16
|
+
name: "my-agent",
|
|
17
|
+
instructions: "You are a helpful assistant.",
|
|
18
|
+
model: "openai/gpt-5-mini",
|
|
19
|
+
memory: new Memory({
|
|
20
|
+
options: {
|
|
21
|
+
observationalMemory: true,
|
|
22
|
+
},
|
|
23
|
+
}),
|
|
24
|
+
});
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
That's it. The agent now has humanlike long-term memory that persists across conversations. Setting `observationalMemory: true` uses `google/gemini-2.5-flash` by default. To use a different model or customize thresholds, pass a config object instead:
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
const memory = new Memory({
|
|
31
|
+
options: {
|
|
32
|
+
observationalMemory: {
|
|
33
|
+
model: "deepseek/deepseek-reasoner",
|
|
34
|
+
},
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
See [configuration options](https://mastra.ai/reference/memory/observational-memory) for full API details.
|
|
40
|
+
|
|
41
|
+
> **Note:** OM currently only supports `@mastra/pg`, `@mastra/libsql`, and `@mastra/mongodb` storage adapters. It uses background agents for managing memory. When using `observationalMemory: true`, the default model is `google/gemini-2.5-flash`. When passing a config object, a `model` must be explicitly set.
|
|
42
|
+
|
|
43
|
+
## Benefits
|
|
44
|
+
|
|
45
|
+
- **Prompt caching**: OM's context is stable — observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
|
|
46
|
+
- **Compression**: Raw message history and tool results get compressed into a dense observation log. Smaller context means faster responses and longer coherent conversations.
|
|
47
|
+
- **Zero context rot**: The agent sees relevant information instead of noisy tool calls and irrelevant tokens, so the agent stays on task over long sessions.
|
|
48
|
+
|
|
49
|
+
## How It Works
|
|
50
|
+
|
|
51
|
+
You don't remember every word of every conversation you've ever had. You observe what happened subconsciously, then your brain reflects — reorganizing, combining, and condensing into long-term memory. OM works the same way.
|
|
52
|
+
|
|
53
|
+
Every time an agent responds, it sees a context window containing its system prompt, recent message history, and any injected context. The context window is finite — even models with large token limits perform worse when the window is full. This causes two problems:
|
|
54
|
+
|
|
55
|
+
- **Context rot**: the more raw message history an agent carries, the worse it performs.
|
|
56
|
+
- **Context waste**: most of that history contains tokens no longer needed to keep the agent on task.
|
|
57
|
+
|
|
58
|
+
OM solves both problems by compressing old context into dense observations.
|
|
59
|
+
|
|
60
|
+
### Observations
|
|
61
|
+
|
|
62
|
+
When message history tokens exceed a threshold (default: 30,000), the Observer creates observations — concise notes about what happened:
|
|
63
|
+
|
|
64
|
+
```text
|
|
65
|
+
Date: 2026-01-15
|
|
66
|
+
- 🔴 12:10 User is building a Next.js app with Supabase auth, due in 1 week (meaning January 22nd 2026)
|
|
67
|
+
- 🔴 12:10 App uses server components with client-side hydration
|
|
68
|
+
- 🟡 12:12 User asked about middleware configuration for protected routes
|
|
69
|
+
- 🔴 12:15 User stated the app name is "Acme Dashboard"
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
The compression is typically 5–40×. The Observer also tracks a **current task** and **suggested response** so the agent picks up where it left off.
|
|
73
|
+
|
|
74
|
+
Example: an agent using Playwright MCP might see 50,000+ tokens per page snapshot. With OM, the Observer watches the interaction and creates a few hundred tokens of observations about what was on the page and what actions were taken. The agent stays on task without carrying every raw snapshot.
|
|
75
|
+
|
|
76
|
+
### Reflections
|
|
77
|
+
|
|
78
|
+
When observations exceed their threshold (default: 40,000 tokens), the Reflector condenses them — combining related items and reflecting on patterns.
|
|
79
|
+
|
|
80
|
+
The result is a three-tier system:
|
|
81
|
+
|
|
82
|
+
1. **Recent messages**: Exact conversation history for the current task
|
|
83
|
+
2. **Observations**: A log of what the Observer has seen
|
|
84
|
+
3. **Reflections**: Condensed observations when memory becomes too long
|
|
85
|
+
|
|
86
|
+
## Models
|
|
87
|
+
|
|
88
|
+
The Observer and Reflector run in the background. Any model that works with Mastra's model routing (e.g. `openai/...`, `google/...`, `deepseek/...`) can be used.
|
|
89
|
+
|
|
90
|
+
When using `observationalMemory: true`, the default model is `google/gemini-2.5-flash`. When passing a config object, a `model` must be explicitly set.
|
|
91
|
+
|
|
92
|
+
We recommend `google/gemini-2.5-flash` — it works well for both observation and reflection, and its 1M token context window gives the Reflector headroom.
|
|
93
|
+
|
|
94
|
+
We've also tested `deepseek`, `qwen3`, and `glm-4.7` for the Observer. For the Reflector, make sure the model's context window can fit all observations. Note that Claude 4.5 models currently don't work well as observer or reflector.
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
const memory = new Memory({
|
|
98
|
+
options: {
|
|
99
|
+
observationalMemory: {
|
|
100
|
+
model: "deepseek/deepseek-reasoner",
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
});
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
See [model configuration](https://mastra.ai/reference/memory/observational-memory) for using different models per agent.
|
|
107
|
+
|
|
108
|
+
## Scopes
|
|
109
|
+
|
|
110
|
+
### Thread scope (default)
|
|
111
|
+
|
|
112
|
+
Each thread has its own observations.
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
const memory = new Memory({
|
|
116
|
+
options: {
|
|
117
|
+
observationalMemory: {
|
|
118
|
+
model: "google/gemini-2.5-flash",
|
|
119
|
+
scope: "thread",
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
});
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Resource scope
|
|
126
|
+
|
|
127
|
+
Observations are shared across all threads for a resource (typically a user). Enables cross-conversation memory.
|
|
128
|
+
|
|
129
|
+
```typescript
|
|
130
|
+
const memory = new Memory({
|
|
131
|
+
options: {
|
|
132
|
+
observationalMemory: {
|
|
133
|
+
model: "google/gemini-2.5-flash",
|
|
134
|
+
scope: "resource",
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
});
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
> **Warning:** In resource scope, unobserved messages across _all_ threads are processed together. For users with many existing threads, this can be slow. Use thread scope for existing apps.
|
|
141
|
+
|
|
142
|
+
## Token Budgets
|
|
143
|
+
|
|
144
|
+
OM uses token thresholds to decide when to observe and reflect. See [token budget configuration](https://mastra.ai/reference/memory/observational-memory) for details.
|
|
145
|
+
|
|
146
|
+
```typescript
|
|
147
|
+
const memory = new Memory({
|
|
148
|
+
options: {
|
|
149
|
+
observationalMemory: {
|
|
150
|
+
model: "google/gemini-2.5-flash",
|
|
151
|
+
observation: {
|
|
152
|
+
// when to run the Observer (default: 30,000)
|
|
153
|
+
messageTokens: 30_000,
|
|
154
|
+
},
|
|
155
|
+
reflection: {
|
|
156
|
+
// when to run the Reflector (default: 40,000)
|
|
157
|
+
observationTokens: 40_000,
|
|
158
|
+
},
|
|
159
|
+
// let message history borrow from observation budget
|
|
160
|
+
// requires bufferTokens: false (temporary limitation)
|
|
161
|
+
shareTokenBudget: false,
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
});
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Async Buffering
|
|
168
|
+
|
|
169
|
+
Without async buffering, the Observer runs synchronously when the message threshold is reached — the agent pauses mid-conversation while the Observer LLM call completes. With async buffering (enabled by default), observations are pre-computed in the background as the conversation grows. When the threshold is hit, buffered observations activate instantly with no pause.
|
|
170
|
+
|
|
171
|
+
### How it works
|
|
172
|
+
|
|
173
|
+
As the agent converses, message tokens accumulate. At regular intervals (`bufferTokens`), a background Observer call runs without blocking the agent. Each call produces a "chunk" of observations that's stored in a buffer.
|
|
174
|
+
|
|
175
|
+
When message tokens reach the `messageTokens` threshold, buffered chunks activate: their observations move into the active observation log, and the corresponding raw messages are removed from the context window. The agent never pauses.
|
|
176
|
+
|
|
177
|
+
If the agent produces messages faster than the Observer can process them, a `blockAfter` safety threshold forces a synchronous observation as a last resort.
|
|
178
|
+
|
|
179
|
+
Reflection works similarly — the Reflector runs in the background when observations reach a fraction of the reflection threshold.
|
|
180
|
+
|
|
181
|
+
### Settings
|
|
182
|
+
|
|
183
|
+
| Setting | Default | What it controls |
|
|
184
|
+
| ------------------------------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
185
|
+
| `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
|
|
186
|
+
| `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
|
|
187
|
+
| `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
|
|
188
|
+
| `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
|
|
189
|
+
| `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
|
|
190
|
+
|
|
191
|
+
### Disabling
|
|
192
|
+
|
|
193
|
+
To disable async buffering and use synchronous observation/reflection instead:
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
const memory = new Memory({
|
|
197
|
+
options: {
|
|
198
|
+
observationalMemory: {
|
|
199
|
+
model: "google/gemini-2.5-flash",
|
|
200
|
+
observation: {
|
|
201
|
+
bufferTokens: false,
|
|
202
|
+
},
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
});
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Setting `bufferTokens: false` disables both observation and reflection async buffering. See [async buffering configuration](https://mastra.ai/reference/memory/observational-memory) for the full API.
|
|
209
|
+
|
|
210
|
+
> **Note:** Async buffering is not supported with `scope: 'resource'`. It is automatically disabled in resource scope.
|
|
211
|
+
|
|
212
|
+
## Migrating existing threads
|
|
213
|
+
|
|
214
|
+
No manual migration needed. OM reads existing messages and observes them lazily when thresholds are exceeded.
|
|
215
|
+
|
|
216
|
+
- **Thread scope**: The first time a thread exceeds `observation.messageTokens`, the Observer processes the backlog.
|
|
217
|
+
- **Resource scope**: All unobserved messages across all threads for a resource are processed together. For users with many existing threads, this could take significant time.
|
|
218
|
+
|
|
219
|
+
## Viewing in Mastra Studio
|
|
220
|
+
|
|
221
|
+
Mastra Studio shows OM status in real time in the memory tab: token usage, which model is running, current observations, and reflection history.
|
|
222
|
+
|
|
223
|
+
## Comparing OM with other memory features
|
|
224
|
+
|
|
225
|
+
- **[Message history](https://mastra.ai/docs/memory/message-history)**: High-fidelity record of the current conversation
|
|
226
|
+
- **[Working memory](https://mastra.ai/docs/memory/working-memory)**: Small, structured state (JSON or markdown) for user preferences, names, goals
|
|
227
|
+
- **[Semantic Recall](https://mastra.ai/docs/memory/semantic-recall)**: RAG-based retrieval of relevant past messages
|
|
228
|
+
|
|
229
|
+
If you're using working memory to store conversation summaries or ongoing state that grows over time, OM is a better fit. Working memory is for small, structured data; OM is for long-running event logs. OM also manages message history automatically—the `messageTokens` setting controls how much raw history remains before observation runs.
|
|
230
|
+
|
|
231
|
+
In practical terms, OM replaces both working memory and message history, and has greater accuracy (and lower cost) than Semantic Recall.
|
|
232
|
+
|
|
233
|
+
## Related
|
|
234
|
+
|
|
235
|
+
- [Observational Memory Reference](https://mastra.ai/reference/memory/observational-memory)
|
|
236
|
+
- [Memory Overview](https://mastra.ai/docs/memory/overview)
|
|
237
|
+
- [Message History](https://mastra.ai/docs/memory/message-history)
|
|
238
|
+
- [Memory Processors](https://mastra.ai/docs/memory/memory-processors)
|
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
> Learn how Mastra
|
|
2
|
-
|
|
3
1
|
# Memory
|
|
4
2
|
|
|
5
3
|
Memory enables your agent to remember user messages, agent replies, and tool results across interactions, giving it the context it needs to stay consistent, maintain conversation flow, and produce better answers over time.
|
|
6
4
|
|
|
7
|
-
Mastra supports
|
|
5
|
+
Mastra supports four complementary memory types:
|
|
8
6
|
|
|
9
7
|
- [**Message history**](https://mastra.ai/docs/memory/message-history) - keeps recent messages from the current conversation so they can be rendered in the UI and used to maintain short-term continuity within the exchange.
|
|
10
8
|
- [**Working memory**](https://mastra.ai/docs/memory/working-memory) - stores persistent, structured user data such as names, preferences, and goals.
|
|
11
|
-
- [**Semantic recall**](https://mastra.ai/docs/memory/semantic-recall) - retrieves relevant messages from older conversations based on semantic meaning rather than exact keywords, mirroring how humans recall information by association. Requires a [vector database](https://mastra.ai/docs/memory/semantic-recall
|
|
9
|
+
- [**Semantic recall**](https://mastra.ai/docs/memory/semantic-recall) - retrieves relevant messages from older conversations based on semantic meaning rather than exact keywords, mirroring how humans recall information by association. Requires a [vector database](https://mastra.ai/docs/memory/semantic-recall) and an [embedding model](https://mastra.ai/docs/memory/semantic-recall).
|
|
10
|
+
- [**Observational memory**](https://mastra.ai/docs/memory/observational-memory) - uses background Observer and Reflector agents to maintain a dense observation log that replaces raw message history as it grows, keeping the context window small while preserving long-term memory across conversations.
|
|
12
11
|
|
|
13
12
|
If the combined memory exceeds the model's context limit, [memory processors](https://mastra.ai/docs/memory/memory-processors) can filter, trim, or prioritize content so the most relevant information is preserved.
|
|
14
13
|
|
|
@@ -19,12 +18,13 @@ Choose a memory option to get started:
|
|
|
19
18
|
- [Message history](https://mastra.ai/docs/memory/message-history)
|
|
20
19
|
- [Working memory](https://mastra.ai/docs/memory/working-memory)
|
|
21
20
|
- [Semantic recall](https://mastra.ai/docs/memory/semantic-recall)
|
|
21
|
+
- [Observational memory](https://mastra.ai/docs/memory/observational-memory)
|
|
22
22
|
|
|
23
23
|
## Storage
|
|
24
24
|
|
|
25
|
-
Before enabling memory, you must first configure a storage adapter. Mastra supports several databases including PostgreSQL, MongoDB, libSQL, and [more](https://mastra.ai/docs/memory/storage
|
|
25
|
+
Before enabling memory, you must first configure a storage adapter. Mastra supports several databases including PostgreSQL, MongoDB, libSQL, and [more](https://mastra.ai/docs/memory/storage).
|
|
26
26
|
|
|
27
|
-
Storage can be configured at the [instance level](https://mastra.ai/docs/memory/storage
|
|
27
|
+
Storage can be configured at the [instance level](https://mastra.ai/docs/memory/storage) (shared across all agents) or at the [agent level](https://mastra.ai/docs/memory/storage) (dedicated per agent).
|
|
28
28
|
|
|
29
29
|
For semantic recall, you can use a separate vector database like Pinecone alongside your primary storage.
|
|
30
30
|
|
|
@@ -34,12 +34,12 @@ See the [Storage](https://mastra.ai/docs/memory/storage) documentation for confi
|
|
|
34
34
|
|
|
35
35
|
When [tracing](https://mastra.ai/docs/observability/tracing/overview) is enabled, you can inspect exactly which messages the agent uses for context in each request. The trace output shows all memory included in the agent's context window - both recent message history and messages recalled via semantic recall.
|
|
36
36
|
|
|
37
|
-

|
|
38
38
|
|
|
39
39
|
This visibility helps you understand why an agent made specific decisions and verify that memory retrieval is working as expected.
|
|
40
40
|
|
|
41
41
|
## Next steps
|
|
42
42
|
|
|
43
43
|
- Learn more about [Storage](https://mastra.ai/docs/memory/storage) providers and configuration options
|
|
44
|
-
- Add [Message history](https://mastra.ai/docs/memory/message-history), [Working memory](https://mastra.ai/docs/memory/working-memory),
|
|
44
|
+
- Add [Message history](https://mastra.ai/docs/memory/message-history), [Working memory](https://mastra.ai/docs/memory/working-memory), [Semantic recall](https://mastra.ai/docs/memory/semantic-recall), or [Observational memory](https://mastra.ai/docs/memory/observational-memory)
|
|
45
45
|
- Visit [Memory configuration reference](https://mastra.ai/reference/memory/memory-class) for all available options
|
package/dist/docs/{memory/05-semantic-recall.md → references/docs-memory-semantic-recall.md}
RENAMED
|
@@ -1,20 +1,16 @@
|
|
|
1
|
-
> Learn how to use semantic recall in Mastra to retrieve relevant messages from past conversations using vector search and embeddings.
|
|
2
|
-
|
|
3
1
|
# Semantic Recall
|
|
4
2
|
|
|
5
3
|
If you ask your friend what they did last weekend, they will search in their memory for events associated with "last weekend" and then tell you what they did. That's sort of like how semantic recall works in Mastra.
|
|
6
4
|
|
|
7
|
-
> **Watch
|
|
8
|
-
|
|
9
|
-
What semantic recall is, how it works, and how to configure it in Mastra → [YouTube (5 minutes)](https://youtu.be/UVZtK8cK8xQ)
|
|
5
|
+
> **Watch 📹:** What semantic recall is, how it works, and how to configure it in Mastra → [YouTube (5 minutes)](https://youtu.be/UVZtK8cK8xQ)
|
|
10
6
|
|
|
11
7
|
## How Semantic Recall Works
|
|
12
8
|
|
|
13
|
-
Semantic recall is RAG-based search that helps agents maintain context across longer interactions when messages are no longer within [recent message history](
|
|
9
|
+
Semantic recall is RAG-based search that helps agents maintain context across longer interactions when messages are no longer within [recent message history](https://mastra.ai/docs/memory/message-history).
|
|
14
10
|
|
|
15
11
|
It uses vector embeddings of messages for similarity search, integrates with various vector stores, and has configurable context windows around retrieved messages.
|
|
16
12
|
|
|
17
|
-

|
|
18
14
|
|
|
19
15
|
When it's enabled, new messages are used to query a vector DB for semantically similar messages.
|
|
20
16
|
|
|
@@ -24,7 +20,7 @@ After getting a response from the LLM, all new messages (user, assistant, and to
|
|
|
24
20
|
|
|
25
21
|
Semantic recall is enabled by default, so if you give your agent memory it will be included:
|
|
26
22
|
|
|
27
|
-
```typescript
|
|
23
|
+
```typescript
|
|
28
24
|
import { Agent } from "@mastra/core/agent";
|
|
29
25
|
import { Memory } from "@mastra/memory";
|
|
30
26
|
|
|
@@ -64,7 +60,7 @@ const { messages: relevantMessages } = await memory!.recall({
|
|
|
64
60
|
|
|
65
61
|
Semantic recall relies on a [storage and vector db](https://mastra.ai/reference/memory/memory-class) to store messages and their embeddings.
|
|
66
62
|
|
|
67
|
-
```ts
|
|
63
|
+
```ts
|
|
68
64
|
import { Memory } from "@mastra/memory";
|
|
69
65
|
import { Agent } from "@mastra/core/agent";
|
|
70
66
|
import { LibSQLStore, LibSQLVector } from "@mastra/libsql";
|
|
@@ -113,7 +109,7 @@ The three main parameters that control semantic recall behavior are:
|
|
|
113
109
|
2. **messageRange**: How much surrounding context to include with each match
|
|
114
110
|
3. **scope**: Whether to search within the current thread or across all threads owned by a resource (the default is resource scope).
|
|
115
111
|
|
|
116
|
-
```typescript
|
|
112
|
+
```typescript
|
|
117
113
|
const agent = new Agent({
|
|
118
114
|
memory: new Memory({
|
|
119
115
|
options: {
|
|
@@ -135,7 +131,7 @@ Semantic recall relies on an [embedding model](https://mastra.ai/reference/memor
|
|
|
135
131
|
|
|
136
132
|
The simplest way is to use a `provider/model` string with autocomplete support:
|
|
137
133
|
|
|
138
|
-
```ts
|
|
134
|
+
```ts
|
|
139
135
|
import { Memory } from "@mastra/memory";
|
|
140
136
|
import { Agent } from "@mastra/core/agent";
|
|
141
137
|
import { ModelRouterEmbeddingModel } from "@mastra/core/llm";
|
|
@@ -158,7 +154,7 @@ The model router automatically handles API key detection from environment variab
|
|
|
158
154
|
|
|
159
155
|
You can also use AI SDK embedding models directly:
|
|
160
156
|
|
|
161
|
-
```ts
|
|
157
|
+
```ts
|
|
162
158
|
import { Memory } from "@mastra/memory";
|
|
163
159
|
import { Agent } from "@mastra/core/agent";
|
|
164
160
|
import { ModelRouterEmbeddingModel } from "@mastra/core/llm";
|
|
@@ -174,13 +170,33 @@ const agent = new Agent({
|
|
|
174
170
|
|
|
175
171
|
To use FastEmbed (a local embedding model), install `@mastra/fastembed`:
|
|
176
172
|
|
|
177
|
-
|
|
173
|
+
**npm**:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
178
176
|
npm install @mastra/fastembed@latest
|
|
179
177
|
```
|
|
180
178
|
|
|
179
|
+
**pnpm**:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
pnpm add @mastra/fastembed@latest
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Yarn**:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
yarn add @mastra/fastembed@latest
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
**Bun**:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
bun add @mastra/fastembed@latest
|
|
195
|
+
```
|
|
196
|
+
|
|
181
197
|
Then configure it in your memory:
|
|
182
198
|
|
|
183
|
-
```ts
|
|
199
|
+
```ts
|
|
184
200
|
import { Memory } from "@mastra/memory";
|
|
185
201
|
import { Agent } from "@mastra/core/agent";
|
|
186
202
|
import { fastembed } from "@mastra/fastembed";
|
|
@@ -198,7 +214,7 @@ When using PostgreSQL as your vector store, you can optimize semantic recall per
|
|
|
198
214
|
|
|
199
215
|
PostgreSQL supports both IVFFlat and HNSW indexes. By default, Mastra creates an IVFFlat index, but HNSW indexes typically provide better performance, especially with OpenAI embeddings which use inner product distance.
|
|
200
216
|
|
|
201
|
-
```typescript
|
|
217
|
+
```typescript
|
|
202
218
|
import { Memory } from "@mastra/memory";
|
|
203
219
|
import { PgStore, PgVector } from "@mastra/pg";
|
|
204
220
|
|
|
@@ -228,7 +244,7 @@ const agent = new Agent({
|
|
|
228
244
|
});
|
|
229
245
|
```
|
|
230
246
|
|
|
231
|
-
For detailed information about index configuration options and performance tuning, see the [PgVector configuration guide](https://mastra.ai/reference/vectors/pg
|
|
247
|
+
For detailed information about index configuration options and performance tuning, see the [PgVector configuration guide](https://mastra.ai/reference/vectors/pg).
|
|
232
248
|
|
|
233
249
|
## Disabling
|
|
234
250
|
|
|
@@ -236,7 +252,7 @@ There is a performance impact to using semantic recall. New messages are convert
|
|
|
236
252
|
|
|
237
253
|
Semantic recall is enabled by default but can be disabled when not needed:
|
|
238
254
|
|
|
239
|
-
```typescript
|
|
255
|
+
```typescript
|
|
240
256
|
const agent = new Agent({
|
|
241
257
|
memory: new Memory({
|
|
242
258
|
options: {
|