@mastra/memory 1.1.0 → 1.2.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/CHANGELOG.md +70 -0
  2. package/dist/_types/@internal_ai-sdk-v4/dist/index.d.ts +30 -17
  3. package/dist/{chunk-6TXUWFIU.js → chunk-5YW6JV6Y.js} +1958 -321
  4. package/dist/chunk-5YW6JV6Y.js.map +1 -0
  5. package/dist/{chunk-FQJWVCDF.cjs → chunk-7SCXX4S7.cjs} +1957 -320
  6. package/dist/chunk-7SCXX4S7.cjs.map +1 -0
  7. package/dist/chunk-EQ4M72KU.js +439 -0
  8. package/dist/chunk-EQ4M72KU.js.map +1 -0
  9. package/dist/{chunk-O3CS4UGX.cjs → chunk-IDRQZVB4.cjs} +4 -4
  10. package/dist/{chunk-O3CS4UGX.cjs.map → chunk-IDRQZVB4.cjs.map} +1 -1
  11. package/dist/{chunk-YF4R74L2.js → chunk-RC6RZVYE.js} +4 -4
  12. package/dist/{chunk-YF4R74L2.js.map → chunk-RC6RZVYE.js.map} +1 -1
  13. package/dist/chunk-ZD3BKU5O.cjs +441 -0
  14. package/dist/chunk-ZD3BKU5O.cjs.map +1 -0
  15. package/dist/docs/SKILL.md +51 -50
  16. package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +22 -22
  17. package/dist/docs/{agents/03-agent-approval.md → references/docs-agents-agent-approval.md} +19 -19
  18. package/dist/docs/references/docs-agents-agent-memory.md +212 -0
  19. package/dist/docs/{agents/04-network-approval.md → references/docs-agents-network-approval.md} +13 -12
  20. package/dist/docs/{agents/02-networks.md → references/docs-agents-networks.md} +10 -12
  21. package/dist/docs/{memory/06-memory-processors.md → references/docs-memory-memory-processors.md} +6 -8
  22. package/dist/docs/{memory/03-message-history.md → references/docs-memory-message-history.md} +31 -20
  23. package/dist/docs/references/docs-memory-observational-memory.md +169 -0
  24. package/dist/docs/{memory/01-overview.md → references/docs-memory-overview.md} +8 -8
  25. package/dist/docs/{memory/05-semantic-recall.md → references/docs-memory-semantic-recall.md} +33 -17
  26. package/dist/docs/{memory/02-storage.md → references/docs-memory-storage.md} +29 -39
  27. package/dist/docs/{memory/04-working-memory.md → references/docs-memory-working-memory.md} +16 -27
  28. package/dist/docs/references/reference-core-getMemory.md +50 -0
  29. package/dist/docs/references/reference-core-listMemory.md +56 -0
  30. package/dist/docs/references/reference-memory-clone-utilities.md +199 -0
  31. package/dist/docs/references/reference-memory-cloneThread.md +130 -0
  32. package/dist/docs/references/reference-memory-createThread.md +68 -0
  33. package/dist/docs/references/reference-memory-getThreadById.md +24 -0
  34. package/dist/docs/references/reference-memory-listThreads.md +145 -0
  35. package/dist/docs/references/reference-memory-memory-class.md +147 -0
  36. package/dist/docs/references/reference-memory-observational-memory.md +219 -0
  37. package/dist/docs/{processors/01-reference.md → references/reference-processors-token-limiter-processor.md} +25 -12
  38. package/dist/docs/references/reference-storage-dynamodb.md +282 -0
  39. package/dist/docs/references/reference-storage-libsql.md +135 -0
  40. package/dist/docs/references/reference-storage-mongodb.md +262 -0
  41. package/dist/docs/references/reference-storage-postgresql.md +529 -0
  42. package/dist/docs/references/reference-storage-upstash.md +160 -0
  43. package/dist/docs/references/reference-vectors-libsql.md +305 -0
  44. package/dist/docs/references/reference-vectors-mongodb.md +295 -0
  45. package/dist/docs/references/reference-vectors-pg.md +408 -0
  46. package/dist/docs/references/reference-vectors-upstash.md +294 -0
  47. package/dist/index.cjs +919 -507
  48. package/dist/index.cjs.map +1 -1
  49. package/dist/index.d.ts.map +1 -1
  50. package/dist/index.js +914 -502
  51. package/dist/index.js.map +1 -1
  52. package/dist/{observational-memory-3Q42SITP.cjs → observational-memory-G3HACXHE.cjs} +14 -14
  53. package/dist/{observational-memory-3Q42SITP.cjs.map → observational-memory-G3HACXHE.cjs.map} +1 -1
  54. package/dist/{observational-memory-VXLHOSDZ.js → observational-memory-LI6QFTRE.js} +3 -3
  55. package/dist/{observational-memory-VXLHOSDZ.js.map → observational-memory-LI6QFTRE.js.map} +1 -1
  56. package/dist/processors/index.cjs +12 -12
  57. package/dist/processors/index.js +1 -1
  58. package/dist/processors/observational-memory/index.d.ts +1 -1
  59. package/dist/processors/observational-memory/index.d.ts.map +1 -1
  60. package/dist/processors/observational-memory/observational-memory.d.ts +283 -1
  61. package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
  62. package/dist/processors/observational-memory/observer-agent.d.ts +3 -1
  63. package/dist/processors/observational-memory/observer-agent.d.ts.map +1 -1
  64. package/dist/processors/observational-memory/reflector-agent.d.ts +10 -3
  65. package/dist/processors/observational-memory/reflector-agent.d.ts.map +1 -1
  66. package/dist/processors/observational-memory/types.d.ts +243 -19
  67. package/dist/processors/observational-memory/types.d.ts.map +1 -1
  68. package/dist/{token-6GSAFR2W-WGTMOPEU.js → token-APYSY3BW-2DN6RAUY.js} +11 -11
  69. package/dist/token-APYSY3BW-2DN6RAUY.js.map +1 -0
  70. package/dist/{token-6GSAFR2W-2B4WM6AQ.cjs → token-APYSY3BW-ZQ7TMBY7.cjs} +14 -14
  71. package/dist/token-APYSY3BW-ZQ7TMBY7.cjs.map +1 -0
  72. package/dist/token-util-RMHT2CPJ-6TGPE335.cjs +10 -0
  73. package/dist/token-util-RMHT2CPJ-6TGPE335.cjs.map +1 -0
  74. package/dist/token-util-RMHT2CPJ-RJEA3FAN.js +8 -0
  75. package/dist/token-util-RMHT2CPJ-RJEA3FAN.js.map +1 -0
  76. package/dist/tools/working-memory.d.ts.map +1 -1
  77. package/package.json +5 -6
  78. package/dist/chunk-6TXUWFIU.js.map +0 -1
  79. package/dist/chunk-FQJWVCDF.cjs.map +0 -1
  80. package/dist/chunk-WM6IIUQW.js +0 -250
  81. package/dist/chunk-WM6IIUQW.js.map +0 -1
  82. package/dist/chunk-ZSBBXHNM.cjs +0 -252
  83. package/dist/chunk-ZSBBXHNM.cjs.map +0 -1
  84. package/dist/docs/README.md +0 -36
  85. package/dist/docs/agents/01-agent-memory.md +0 -166
  86. package/dist/docs/core/01-reference.md +0 -114
  87. package/dist/docs/memory/07-reference.md +0 -687
  88. package/dist/docs/storage/01-reference.md +0 -1218
  89. package/dist/docs/vectors/01-reference.md +0 -942
  90. package/dist/token-6GSAFR2W-2B4WM6AQ.cjs.map +0 -1
  91. package/dist/token-6GSAFR2W-WGTMOPEU.js.map +0 -1
  92. package/dist/token-util-NEHG7TUY-TV2H7N56.js +0 -8
  93. package/dist/token-util-NEHG7TUY-TV2H7N56.js.map +0 -1
  94. package/dist/token-util-NEHG7TUY-WJZIPNNX.cjs +0 -10
  95. package/dist/token-util-NEHG7TUY-WJZIPNNX.cjs.map +0 -1
@@ -1,25 +1,42 @@
1
- > Learn how to configure message history in Mastra to store recent messages from the current conversation.
2
-
3
1
  # Message History
4
2
 
5
- Message history is the most basic and important form of memory. It gives the LLM a view of recent messages in the context window, enabling your agent to reference earlier exchanges and respond coherently.
3
+ Message history is the most basic and important form of memory. It gives the LLM a view of recent messages in the context window, enabling your agent to reference earlier exchanges and respond coherently.
6
4
 
7
5
  You can also retrieve message history to display past conversations in your UI.
8
6
 
9
- > **Note:**
10
- Each message belongs to a thread (the conversation) and a resource (the user or entity it's associated with). See [Threads and resources](https://mastra.ai/docs/memory/storage#threads-and-resources) for more detail.
7
+ > **Info:** Each message belongs to a thread (the conversation) and a resource (the user or entity it's associated with). See [Threads and resources](https://mastra.ai/docs/memory/storage) for more detail.
11
8
 
12
9
  ## Getting started
13
10
 
14
- Install the Mastra memory module along with a [storage adapter](https://mastra.ai/docs/memory/storage#supported-providers) for your database. The examples below use `@mastra/libsql`, which stores data locally in a `mastra.db` file.
11
+ Install the Mastra memory module along with a [storage adapter](https://mastra.ai/docs/memory/storage) for your database. The examples below use `@mastra/libsql`, which stores data locally in a `mastra.db` file.
12
+
13
+ **npm**:
15
14
 
16
- ```bash npm2yarn
15
+ ```bash
17
16
  npm install @mastra/memory@latest @mastra/libsql@latest
18
17
  ```
19
18
 
19
+ **pnpm**:
20
+
21
+ ```bash
22
+ pnpm add @mastra/memory@latest @mastra/libsql@latest
23
+ ```
24
+
25
+ **Yarn**:
26
+
27
+ ```bash
28
+ yarn add @mastra/memory@latest @mastra/libsql@latest
29
+ ```
30
+
31
+ **Bun**:
32
+
33
+ ```bash
34
+ bun add @mastra/memory@latest @mastra/libsql@latest
35
+ ```
36
+
20
37
  Message history requires a storage adapter to persist conversations. Configure storage on your Mastra instance if you haven't already:
21
38
 
22
- ```typescript title="src/mastra/index.ts"
39
+ ```typescript
23
40
  import { Mastra } from "@mastra/core";
24
41
  import { LibSQLStore } from "@mastra/libsql";
25
42
 
@@ -33,7 +50,7 @@ export const mastra = new Mastra({
33
50
 
34
51
  Give your agent a `Memory`:
35
52
 
36
- ```typescript title="src/mastra/agents/your-agent.ts"
53
+ ```typescript
37
54
  import { Memory } from "@mastra/memory";
38
55
  import { Agent } from "@mastra/core/agent";
39
56
 
@@ -49,7 +66,7 @@ export const agent = new Agent({
49
66
 
50
67
  When you call the agent, messages are automatically saved to the database. You can specify a `threadId`, `resourceId`, and optional `metadata`:
51
68
 
52
- **generate:**
69
+ **Generate**:
53
70
 
54
71
  ```typescript
55
72
  await agent.generate("Hello", {
@@ -64,8 +81,7 @@ await agent.generate("Hello", {
64
81
  });
65
82
  ```
66
83
 
67
-
68
- **stream:**
84
+ **Stream**:
69
85
 
70
86
  ```typescript
71
87
  await agent.stream("Hello", {
@@ -80,11 +96,7 @@ await agent.stream("Hello", {
80
96
  });
81
97
  ```
82
98
 
83
-
84
-
85
- > **Note:**
86
-
87
- Threads and messages are created automatically when you call `agent.generate()` or `agent.stream()`, but you can also create them manually with [`createThread()`](https://mastra.ai/reference/memory/createThread) and [`saveMessages()`](https://mastra.ai/reference/memory/memory-class).
99
+ > **Info:** Threads and messages are created automatically when you call `agent.generate()` or `agent.stream()`, but you can also create them manually with [`createThread()`](https://mastra.ai/reference/memory/createThread) and [`saveMessages()`](https://mastra.ai/reference/memory/memory-class).
88
100
 
89
101
  There are two ways to use this history:
90
102
 
@@ -106,8 +118,7 @@ The `Memory` instance gives you access to functions for listing threads, recalli
106
118
 
107
119
  Use these methods to fetch threads and messages for displaying conversation history in your UI or for custom memory retrieval logic.
108
120
 
109
- > **Note:**
110
- The memory system does not enforce access control. Before running any query, verify in your application logic that the current user is authorized to access the `resourceId` being queried.
121
+ > **Warning:** The memory system does not enforce access control. Before running any query, verify in your application logic that the current user is authorized to access the `resourceId` being queried.
111
122
 
112
123
  ### Threads
113
124
 
@@ -240,7 +251,7 @@ const { thread, clonedMessages } = await memory.cloneThread({
240
251
  });
241
252
  ```
242
253
 
243
- You can filter which messages get cloned (by count or date range), specify custom thread IDs, and use utility methods to inspect clone relationships.
254
+ You can filter which messages get cloned (by count or date range), specify custom thread IDs, and use utility methods to inspect clone relationships.
244
255
 
245
256
  See [`cloneThread()`](https://mastra.ai/reference/memory/cloneThread) and [clone utilities](https://mastra.ai/reference/memory/clone-utilities) for the full API.
246
257
 
@@ -0,0 +1,169 @@
1
+ # Observational Memory
2
+
3
+ **Added in:** `@mastra/memory@1.1.0`
4
+
5
+ Observational Memory (OM) is Mastra's memory system for long-context agentic memory. Two background agents — an **Observer** and a **Reflector** — watch your agent's conversations and maintain a dense observation log that replaces raw message history as it grows.
6
+
7
+ ## Quick Start
8
+
9
+ Enable `observationalMemory` in the memory options when creating your agent:
10
+
11
+ ```typescript
12
+ import { Memory } from "@mastra/memory";
13
+ import { Agent } from "@mastra/core/agent";
14
+
15
+ export const agent = new Agent({
16
+ name: "my-agent",
17
+ instructions: "You are a helpful assistant.",
18
+ model: "openai/gpt-5-mini",
19
+ memory: new Memory({
20
+ options: {
21
+ observationalMemory: true,
22
+ },
23
+ }),
24
+ });
25
+ ```
26
+
27
+ That's it. The agent now has humanlike long-term memory that persists across conversations.
28
+
29
+ See [configuration options](https://mastra.ai/reference/memory/observational-memory) for full API details.
30
+
31
+ > **Note:** OM currently only supports `@mastra/pg`, `@mastra/libsql`, and `@mastra/mongodb` storage adapters. It also uses background agents for managing memory. The default model (configurable) is `google/gemini-2.5-flash` as it's the one we've tested the most.
32
+
33
+ ## Benefits
34
+
35
+ - **Prompt caching**: OM's context is stable — observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
36
+ - **Compression**: Raw message history and tool results get compressed into a dense observation log. Smaller context means faster responses and longer coherent conversations.
37
+ - **Zero context rot**: The agent sees relevant information instead of noisy tool calls and irrelevant tokens, so the agent stays on task over long sessions.
38
+
39
+ ## How It Works
40
+
41
+ You don't remember every word of every conversation you've ever had. You observe what happened subconsciously, then your brain reflects — reorganizing, combining, and condensing into long-term memory. OM works the same way.
42
+
43
+ Every time an agent responds, it sees a context window containing its system prompt, recent message history, and any injected context. The context window is finite — even models with large token limits perform worse when the window is full. This causes two problems:
44
+
45
+ - **Context rot**: the more raw message history an agent carries, the worse it performs.
46
+ - **Context waste**: most of that history contains tokens no longer needed to keep the agent on task.
47
+
48
+ OM solves both problems by compressing old context into dense observations.
49
+
50
+ ### Observations
51
+
52
+ When message history tokens exceed a threshold (default: 30,000), the Observer creates observations — concise notes about what happened:
53
+
54
+ ```text
55
+ Date: 2026-01-15
56
+ - 🔴 12:10 User is building a Next.js app with Supabase auth, due in 1 week (meaning January 22nd 2026)
57
+ - 🔴 12:10 App uses server components with client-side hydration
58
+ - 🟡 12:12 User asked about middleware configuration for protected routes
59
+ - 🔴 12:15 User stated the app name is "Acme Dashboard"
60
+ ```
61
+
62
+ The compression is typically 5–40×. The Observer also tracks a **current task** and **suggested response** so the agent picks up where it left off.
63
+
64
+ Example: an agent using Playwright MCP might see 50,000+ tokens per page snapshot. With OM, the Observer watches the interaction and creates a few hundred tokens of observations about what was on the page and what actions were taken. The agent stays on task without carrying every raw snapshot.
65
+
66
+ ### Reflections
67
+
68
+ When observations exceed their threshold (default: 40,000 tokens), the Reflector condenses them — combining related items and reflecting on patterns.
69
+
70
+ The result is a three-tier system:
71
+
72
+ 1. **Recent messages**: Exact conversation history for the current task
73
+ 2. **Observations**: A log of what the Observer has seen
74
+ 3. **Reflections**: Condensed observations when memory becomes too long
75
+
76
+ ## Models
77
+
78
+ The Observer and Reflector run in the background. Any model that works with Mastra's model routing (e.g. `openai/...`, `google/...`, `deepseek/...`) can be used.
79
+
80
+ The default is `google/gemini-2.5-flash` — it works well for both observation and reflection, and its 1M token context window gives the Reflector headroom.
81
+
82
+ We've also tested `deepseek`, `qwen3`, and `glm-4.7` for the Observer. For the Reflector, make sure the model's context window can fit all observations. Note that Claude 4.5 models currently don't work well as observer or reflector.
83
+
84
+ ```typescript
85
+ const memory = new Memory({
86
+ options: {
87
+ observationalMemory: {
88
+ model: "deepseek/deepseek-reasoner",
89
+ },
90
+ },
91
+ });
92
+ ```
93
+
94
+ See [model configuration](https://mastra.ai/reference/memory/observational-memory) for using different models per agent.
95
+
96
+ ## Scopes
97
+
98
+ ### Thread scope (default)
99
+
100
+ Each thread has its own observations.
101
+
102
+ ```typescript
103
+ observationalMemory: {
104
+ scope: "thread",
105
+ }
106
+ ```
107
+
108
+ ### Resource scope
109
+
110
+ Observations are shared across all threads for a resource (typically a user). Enables cross-conversation memory.
111
+
112
+ ```typescript
113
+ observationalMemory: {
114
+ scope: "resource",
115
+ }
116
+ ```
117
+
118
+ > **Warning:** In resource scope, unobserved messages across _all_ threads are processed together. For users with many existing threads, this can be slow. Use thread scope for existing apps.
119
+
120
+ ## Token Budgets
121
+
122
+ OM uses token thresholds to decide when to observe and reflect. See [token budget configuration](https://mastra.ai/reference/memory/observational-memory) for details.
123
+
124
+ ```typescript
125
+ const memory = new Memory({
126
+ options: {
127
+ observationalMemory: {
128
+ observation: {
129
+ // when to run the Observer (default: 30,000)
130
+ messageTokens: 30_000,
131
+ },
132
+ reflection: {
133
+ // when to run the Reflector (default: 40,000)
134
+ observationTokens: 40_000,
135
+ },
136
+ // let message history borrow from observation budget
137
+ shareTokenBudget: false,
138
+ },
139
+ },
140
+ });
141
+ ```
142
+
143
+ ## Migrating existing threads
144
+
145
+ No manual migration needed. OM reads existing messages and observes them lazily when thresholds are exceeded.
146
+
147
+ - **Thread scope**: The first time a thread exceeds `observation.messageTokens`, the Observer processes the backlog.
148
+ - **Resource scope**: All unobserved messages across all threads for a resource are processed together. For users with many existing threads, this could take significant time.
149
+
150
+ ## Viewing in Mastra Studio
151
+
152
+ Mastra Studio shows OM status in real time in the memory tab: token usage, which model is running, current observations, and reflection history.
153
+
154
+ ## Comparing OM with other memory features
155
+
156
+ - **[Message history](https://mastra.ai/docs/memory/message-history)**: High-fidelity record of the current conversation
157
+ - **[Working memory](https://mastra.ai/docs/memory/working-memory)**: Small, structured state (JSON or markdown) for user preferences, names, goals
158
+ - **[Semantic Recall](https://mastra.ai/docs/memory/semantic-recall)**: RAG-based retrieval of relevant past messages
159
+
160
+ If you're using working memory to store conversation summaries or ongoing state that grows over time, OM is a better fit. Working memory is for small, structured data; OM is for long-running event logs. OM also manages message history automatically—the `messageTokens` setting controls how much raw history remains before observation runs.
161
+
162
+ In practical terms, OM replaces both working memory and message history, and has greater accuracy (and lower cost) than Semantic Recall.
163
+
164
+ ## Related
165
+
166
+ - [Observational Memory Reference](https://mastra.ai/reference/memory/observational-memory)
167
+ - [Memory Overview](https://mastra.ai/docs/memory/overview)
168
+ - [Message History](https://mastra.ai/docs/memory/message-history)
169
+ - [Memory Processors](https://mastra.ai/docs/memory/memory-processors)
@@ -1,14 +1,13 @@
1
- > Learn how Mastra
2
-
3
1
  # Memory
4
2
 
5
3
  Memory enables your agent to remember user messages, agent replies, and tool results across interactions, giving it the context it needs to stay consistent, maintain conversation flow, and produce better answers over time.
6
4
 
7
- Mastra supports three complementary memory types:
5
+ Mastra supports four complementary memory types:
8
6
 
9
7
  - [**Message history**](https://mastra.ai/docs/memory/message-history) - keeps recent messages from the current conversation so they can be rendered in the UI and used to maintain short-term continuity within the exchange.
10
8
  - [**Working memory**](https://mastra.ai/docs/memory/working-memory) - stores persistent, structured user data such as names, preferences, and goals.
11
- - [**Semantic recall**](https://mastra.ai/docs/memory/semantic-recall) - retrieves relevant messages from older conversations based on semantic meaning rather than exact keywords, mirroring how humans recall information by association. Requires a [vector database](https://mastra.ai/docs/memory/semantic-recall#storage-configuration) and an [embedding model](https://mastra.ai/docs/memory/semantic-recall#embedder-configuration).
9
+ - [**Semantic recall**](https://mastra.ai/docs/memory/semantic-recall) - retrieves relevant messages from older conversations based on semantic meaning rather than exact keywords, mirroring how humans recall information by association. Requires a [vector database](https://mastra.ai/docs/memory/semantic-recall) and an [embedding model](https://mastra.ai/docs/memory/semantic-recall).
10
+ - [**Observational memory**](https://mastra.ai/docs/memory/observational-memory) - uses background Observer and Reflector agents to maintain a dense observation log that replaces raw message history as it grows, keeping the context window small while preserving long-term memory across conversations.
12
11
 
13
12
  If the combined memory exceeds the model's context limit, [memory processors](https://mastra.ai/docs/memory/memory-processors) can filter, trim, or prioritize content so the most relevant information is preserved.
14
13
 
@@ -19,12 +18,13 @@ Choose a memory option to get started:
19
18
  - [Message history](https://mastra.ai/docs/memory/message-history)
20
19
  - [Working memory](https://mastra.ai/docs/memory/working-memory)
21
20
  - [Semantic recall](https://mastra.ai/docs/memory/semantic-recall)
21
+ - [Observational memory](https://mastra.ai/docs/memory/observational-memory)
22
22
 
23
23
  ## Storage
24
24
 
25
- Before enabling memory, you must first configure a storage adapter. Mastra supports several databases including PostgreSQL, MongoDB, libSQL, and [more](https://mastra.ai/docs/memory/storage#supported-providers).
25
+ Before enabling memory, you must first configure a storage adapter. Mastra supports several databases including PostgreSQL, MongoDB, libSQL, and [more](https://mastra.ai/docs/memory/storage).
26
26
 
27
- Storage can be configured at the [instance level](https://mastra.ai/docs/memory/storage#instance-level-storage) (shared across all agents) or at the [agent level](https://mastra.ai/docs/memory/storage#agent-level-storage) (dedicated per agent).
27
+ Storage can be configured at the [instance level](https://mastra.ai/docs/memory/storage) (shared across all agents) or at the [agent level](https://mastra.ai/docs/memory/storage) (dedicated per agent).
28
28
 
29
29
  For semantic recall, you can use a separate vector database like Pinecone alongside your primary storage.
30
30
 
@@ -34,12 +34,12 @@ See the [Storage](https://mastra.ai/docs/memory/storage) documentation for confi
34
34
 
35
35
  When [tracing](https://mastra.ai/docs/observability/tracing/overview) is enabled, you can inspect exactly which messages the agent uses for context in each request. The trace output shows all memory included in the agent's context window - both recent message history and messages recalled via semantic recall.
36
36
 
37
- ![Trace output showing memory context included in an agent request](https://mastra.ai/_next/image?url=%2Ftracingafter.png&w=1920&q=75)
37
+ ![Trace output showing memory context included in an agent request](https://mastra.ai/_next/image?url=%2Ftracingafter.png\&w=1920\&q=75)
38
38
 
39
39
  This visibility helps you understand why an agent made specific decisions and verify that memory retrieval is working as expected.
40
40
 
41
41
  ## Next steps
42
42
 
43
43
  - Learn more about [Storage](https://mastra.ai/docs/memory/storage) providers and configuration options
44
- - Add [Message history](https://mastra.ai/docs/memory/message-history), [Working memory](https://mastra.ai/docs/memory/working-memory), or [Semantic recall](https://mastra.ai/docs/memory/semantic-recall)
44
+ - Add [Message history](https://mastra.ai/docs/memory/message-history), [Working memory](https://mastra.ai/docs/memory/working-memory), [Semantic recall](https://mastra.ai/docs/memory/semantic-recall), or [Observational memory](https://mastra.ai/docs/memory/observational-memory)
45
45
  - Visit [Memory configuration reference](https://mastra.ai/reference/memory/memory-class) for all available options
@@ -1,20 +1,16 @@
1
- > Learn how to use semantic recall in Mastra to retrieve relevant messages from past conversations using vector search and embeddings.
2
-
3
1
  # Semantic Recall
4
2
 
5
3
  If you ask your friend what they did last weekend, they will search in their memory for events associated with "last weekend" and then tell you what they did. That's sort of like how semantic recall works in Mastra.
6
4
 
7
- > **Watch 📹**
8
-
9
- What semantic recall is, how it works, and how to configure it in Mastra → [YouTube (5 minutes)](https://youtu.be/UVZtK8cK8xQ)
5
+ > **Watch 📹:** What semantic recall is, how it works, and how to configure it in Mastra → [YouTube (5 minutes)](https://youtu.be/UVZtK8cK8xQ)
10
6
 
11
7
  ## How Semantic Recall Works
12
8
 
13
- Semantic recall is RAG-based search that helps agents maintain context across longer interactions when messages are no longer within [recent message history](./message-history).
9
+ Semantic recall is RAG-based search that helps agents maintain context across longer interactions when messages are no longer within [recent message history](https://mastra.ai/docs/memory/message-history).
14
10
 
15
11
  It uses vector embeddings of messages for similarity search, integrates with various vector stores, and has configurable context windows around retrieved messages.
16
12
 
17
- ![Diagram showing Mastra Memory semantic recall](/img/semantic-recall.png)
13
+ ![Diagram showing Mastra Memory semantic recall](/assets/images/semantic-recall-fd7b9336a6d0d18019216cb6d3dbe710.png)
18
14
 
19
15
  When it's enabled, new messages are used to query a vector DB for semantically similar messages.
20
16
 
@@ -24,7 +20,7 @@ After getting a response from the LLM, all new messages (user, assistant, and to
24
20
 
25
21
  Semantic recall is enabled by default, so if you give your agent memory it will be included:
26
22
 
27
- ```typescript {9}
23
+ ```typescript
28
24
  import { Agent } from "@mastra/core/agent";
29
25
  import { Memory } from "@mastra/memory";
30
26
 
@@ -64,7 +60,7 @@ const { messages: relevantMessages } = await memory!.recall({
64
60
 
65
61
  Semantic recall relies on a [storage and vector db](https://mastra.ai/reference/memory/memory-class) to store messages and their embeddings.
66
62
 
67
- ```ts {8-16}
63
+ ```ts
68
64
  import { Memory } from "@mastra/memory";
69
65
  import { Agent } from "@mastra/core/agent";
70
66
  import { LibSQLStore, LibSQLVector } from "@mastra/libsql";
@@ -113,7 +109,7 @@ The three main parameters that control semantic recall behavior are:
113
109
  2. **messageRange**: How much surrounding context to include with each match
114
110
  3. **scope**: Whether to search within the current thread or across all threads owned by a resource (the default is resource scope).
115
111
 
116
- ```typescript {5-7}
112
+ ```typescript
117
113
  const agent = new Agent({
118
114
  memory: new Memory({
119
115
  options: {
@@ -135,7 +131,7 @@ Semantic recall relies on an [embedding model](https://mastra.ai/reference/memor
135
131
 
136
132
  The simplest way is to use a `provider/model` string with autocomplete support:
137
133
 
138
- ```ts {7}
134
+ ```ts
139
135
  import { Memory } from "@mastra/memory";
140
136
  import { Agent } from "@mastra/core/agent";
141
137
  import { ModelRouterEmbeddingModel } from "@mastra/core/llm";
@@ -158,7 +154,7 @@ The model router automatically handles API key detection from environment variab
158
154
 
159
155
  You can also use AI SDK embedding models directly:
160
156
 
161
- ```ts {2,7}
157
+ ```ts
162
158
  import { Memory } from "@mastra/memory";
163
159
  import { Agent } from "@mastra/core/agent";
164
160
  import { ModelRouterEmbeddingModel } from "@mastra/core/llm";
@@ -174,13 +170,33 @@ const agent = new Agent({
174
170
 
175
171
  To use FastEmbed (a local embedding model), install `@mastra/fastembed`:
176
172
 
177
- ```bash npm2yarn
173
+ **npm**:
174
+
175
+ ```bash
178
176
  npm install @mastra/fastembed@latest
179
177
  ```
180
178
 
179
+ **pnpm**:
180
+
181
+ ```bash
182
+ pnpm add @mastra/fastembed@latest
183
+ ```
184
+
185
+ **Yarn**:
186
+
187
+ ```bash
188
+ yarn add @mastra/fastembed@latest
189
+ ```
190
+
191
+ **Bun**:
192
+
193
+ ```bash
194
+ bun add @mastra/fastembed@latest
195
+ ```
196
+
181
197
  Then configure it in your memory:
182
198
 
183
- ```ts {3,7}
199
+ ```ts
184
200
  import { Memory } from "@mastra/memory";
185
201
  import { Agent } from "@mastra/core/agent";
186
202
  import { fastembed } from "@mastra/fastembed";
@@ -198,7 +214,7 @@ When using PostgreSQL as your vector store, you can optimize semantic recall per
198
214
 
199
215
  PostgreSQL supports both IVFFlat and HNSW indexes. By default, Mastra creates an IVFFlat index, but HNSW indexes typically provide better performance, especially with OpenAI embeddings which use inner product distance.
200
216
 
201
- ```typescript {18-23}
217
+ ```typescript
202
218
  import { Memory } from "@mastra/memory";
203
219
  import { PgStore, PgVector } from "@mastra/pg";
204
220
 
@@ -228,7 +244,7 @@ const agent = new Agent({
228
244
  });
229
245
  ```
230
246
 
231
- For detailed information about index configuration options and performance tuning, see the [PgVector configuration guide](https://mastra.ai/reference/vectors/pg#index-configuration-guide).
247
+ For detailed information about index configuration options and performance tuning, see the [PgVector configuration guide](https://mastra.ai/reference/vectors/pg).
232
248
 
233
249
  ## Disabling
234
250
 
@@ -236,7 +252,7 @@ There is a performance impact to using semantic recall. New messages are convert
236
252
 
237
253
  Semantic recall is enabled by default but can be disabled when not needed:
238
254
 
239
- ```typescript {4}
255
+ ```typescript
240
256
  const agent = new Agent({
241
257
  memory: new Memory({
242
258
  options: {
@@ -1,10 +1,8 @@
1
- > Configure storage for Mastra
2
-
3
1
  # Storage
4
2
 
5
- For agents to remember previous interactions, Mastra needs a database. Use a storage adapter for one of the [supported databases](#supported-providers) and pass it to your Mastra instance.
3
+ For agents to remember previous interactions, Mastra needs a database. Use a storage adapter for one of the [supported databases](#supported-providers) and pass it to your Mastra instance.
6
4
 
7
- ```typescript title="src/mastra/index.ts"
5
+ ```typescript
8
6
  import { Mastra } from "@mastra/core";
9
7
  import { LibSQLStore } from "@mastra/libsql";
10
8
 
@@ -16,18 +14,17 @@ export const mastra = new Mastra({
16
14
  });
17
15
  ```
18
16
 
19
- > **Sharing the database with Mastra Studio**
20
- When running `mastra dev` alongside your application (e.g., Next.js), use an absolute path to ensure both processes access the same database:
21
-
22
- ```typescript
23
- url: "file:/absolute/path/to/your/project/mastra.db"
24
- ```
25
-
26
- Relative paths like `file:./mastra.db` resolve based on each process's working directory, which may differ.
17
+ > **Sharing the database with Mastra Studio:** When running `mastra dev` alongside your application (e.g., Next.js), use an absolute path to ensure both processes access the same database:
18
+ >
19
+ > ```typescript
20
+ > url: "file:/absolute/path/to/your/project/mastra.db"
21
+ > ```
22
+ >
23
+ > Relative paths like `file:./mastra.db` resolve based on each process's working directory, which may differ.
27
24
 
28
25
  This configures instance-level storage, which all agents share by default. You can also configure [agent-level storage](#agent-level-storage) for isolated data boundaries.
29
26
 
30
- Mastra automatically creates the necessary tables on first interaction. See the [core schema](https://mastra.ai/reference/storage/overview#core-schema) for details on what gets created, including tables for messages, threads, resources, workflows, traces, and evaluation datasets.
27
+ Mastra automatically creates the necessary tables on first interaction. See the [core schema](https://mastra.ai/reference/storage/overview) for details on what gets created, including tables for messages, threads, resources, workflows, traces, and evaluation datasets.
31
28
 
32
29
  ## Supported providers
33
30
 
@@ -44,8 +41,7 @@ Each provider page includes installation instructions, configuration parameters,
44
41
  - [LanceDB](https://mastra.ai/reference/storage/lance)
45
42
  - [Microsoft SQL Server](https://mastra.ai/reference/storage/mssql)
46
43
 
47
- > **Note:**
48
- libSQL is the easiest way to get started because it doesn’t require running a separate database server.
44
+ > **Tip:** libSQL is the easiest way to get started because it doesn’t require running a separate database server.
49
45
 
50
46
  ## Configuration scope
51
47
 
@@ -55,7 +51,7 @@ Storage can be configured at the instance level (shared by all agents) or at the
55
51
 
56
52
  Add storage to your Mastra instance so all agents, workflows, observability traces and scores share the same memory provider:
57
53
 
58
- ```typescript title="src/mastra/index.ts"
54
+ ```typescript
59
55
  import { Mastra } from "@mastra/core";
60
56
  import { PostgresStore } from "@mastra/pg";
61
57
 
@@ -75,9 +71,9 @@ This is useful when all primitives share the same storage backend and have simil
75
71
 
76
72
  #### Composite storage
77
73
 
78
- [Composite storage](https://mastra.ai/reference/storage/composite) is an alternative way to configure instance-level storage. Use `MastraCompositeStore` to set the `memory` domain (and any other [domains](https://mastra.ai/reference/storage/composite#storage-domains) you need) to different storage providers.
74
+ [Composite storage](https://mastra.ai/reference/storage/composite) is an alternative way to configure instance-level storage. Use `MastraCompositeStore` to set the `memory` domain (and any other [domains](https://mastra.ai/reference/storage/composite) you need) to different storage providers.
79
75
 
80
- ```typescript title="src/mastra/index.ts"
76
+ ```typescript
81
77
  import { Mastra } from "@mastra/core";
82
78
  import { MastraCompositeStore } from "@mastra/core/storage";
83
79
  import { MemoryLibSQL } from "@mastra/libsql";
@@ -88,7 +84,6 @@ export const mastra = new Mastra({
88
84
  storage: new MastraCompositeStore({
89
85
  id: "composite",
90
86
  domains: {
91
- // highlight-next-line
92
87
  memory: new MemoryLibSQL({ url: "file:./memory.db" }),
93
88
  workflows: new WorkflowsPG({ connectionString: process.env.DATABASE_URL }),
94
89
  observability: new ObservabilityStorageClickhouse({
@@ -107,7 +102,7 @@ This is useful when different types of data have different performance or operat
107
102
 
108
103
  Agent-level storage overrides storage configured at the instance level. Add storage to a specific agent when you need data boundaries or compliance requirements:
109
104
 
110
- ```typescript title="src/mastra/agents/your-agent.ts"
105
+ ```typescript
111
106
  import { Agent } from "@mastra/core/agent";
112
107
  import { Memory } from "@mastra/memory";
113
108
  import { PostgresStore } from "@mastra/pg";
@@ -123,19 +118,18 @@ export const agent = new Agent({
123
118
  });
124
119
  ```
125
120
 
126
- > **Note:**
127
- [Mastra Cloud Store](https://mastra.ai/docs/mastra-cloud/deployment#using-mastra-cloud-store) doesn't support agent-level storage.
121
+ > **Warning:** [Mastra Cloud Store](https://mastra.ai/docs/mastra-cloud/deployment) doesn't support agent-level storage.
128
122
 
129
123
  ## Threads and resources
130
124
 
131
- Mastra organizes conversations using two identifiers:
125
+ Mastra organizes conversations using two identifiers:
132
126
 
133
127
  - **Thread** - a conversation session containing a sequence of messages.
134
128
  - **Resource** - the entity that owns the thread, such as a user, organization, project, or any other domain entity in your application.
135
129
 
136
130
  Both identifiers are required for agents to store information:
137
131
 
138
- **generate:**
132
+ **Generate**:
139
133
 
140
134
  ```typescript
141
135
  const response = await agent.generate("hello", {
@@ -146,8 +140,7 @@ const response = await agent.generate("hello", {
146
140
  });
147
141
  ```
148
142
 
149
-
150
- **stream:**
143
+ **Stream**:
151
144
 
152
145
  ```typescript
153
146
  const stream = await agent.stream("hello", {
@@ -158,10 +151,7 @@ const stream = await agent.stream("hello", {
158
151
  });
159
152
  ```
160
153
 
161
-
162
-
163
- > **Note:**
164
- [Studio](https://mastra.ai/docs/getting-started/studio) automatically generates a thread and resource ID for you. When calling `stream()` or `generate()` yourself, remember to provide these identifiers explicitly.
154
+ > **Note:** [Studio](https://mastra.ai/docs/getting-started/studio) automatically generates a thread and resource ID for you. When calling `stream()` or `generate()` yourself, remember to provide these identifiers explicitly.
165
155
 
166
156
  ### Thread title generation
167
157
 
@@ -169,7 +159,7 @@ Mastra can automatically generate descriptive thread titles based on the user's
169
159
 
170
160
  Use this option when implementing a ChatGPT-style chat interface to render a title alongside each thread in the conversation list (for example, in a sidebar) derived from the thread’s initial user message.
171
161
 
172
- ```typescript title="src/mastra/agents/my-agent.ts"
162
+ ```typescript
173
163
  export const agent = new Agent({
174
164
  id: "agent",
175
165
  memory: new Memory({
@@ -182,9 +172,9 @@ export const agent = new Agent({
182
172
 
183
173
  Title generation runs asynchronously after the agent responds and does not affect response time.
184
174
 
185
- To optimize cost or behavior, provide a smaller [`model`](/models) and custom `instructions`:
175
+ To optimize cost or behavior, provide a smaller [`model`](https://mastra.ai/models) and custom `instructions`:
186
176
 
187
- ```typescript title="src/mastra/agents/my-agent.ts"
177
+ ```typescript
188
178
  export const agent = new Agent({
189
179
  id: "agent",
190
180
  memory: new Memory({
@@ -206,17 +196,17 @@ Semantic recall has different storage requirements - it needs a vector database
206
196
 
207
197
  Some storage providers enforce record size limits that base64-encoded file attachments (such as images) can exceed:
208
198
 
209
- | Provider | Record size limit |
210
- | -------- | ----------------- |
211
- | [DynamoDB](https://mastra.ai/reference/storage/dynamodb) | 400 KB |
212
- | [Convex](https://mastra.ai/reference/storage/convex) | 1 MiB |
213
- | [Cloudflare D1](https://mastra.ai/reference/storage/cloudflare-d1) | 1 MiB |
199
+ | Provider | Record size limit |
200
+ | ------------------------------------------------------------------ | ----------------- |
201
+ | [DynamoDB](https://mastra.ai/reference/storage/dynamodb) | 400 KB |
202
+ | [Convex](https://mastra.ai/reference/storage/convex) | 1 MiB |
203
+ | [Cloudflare D1](https://mastra.ai/reference/storage/cloudflare-d1) | 1 MiB |
214
204
 
215
205
  PostgreSQL, MongoDB, and libSQL have higher limits and are generally unaffected.
216
206
 
217
207
  To avoid this, use an input processor to upload attachments to external storage (S3, R2, GCS, [Convex file storage](https://docs.convex.dev/file-storage), etc.) and replace them with URL references before persistence.
218
208
 
219
- ```typescript title="src/mastra/processors/attachment-uploader.ts"
209
+ ```typescript
220
210
  import type { Processor } from "@mastra/core/processors";
221
211
  import type { MastraDBMessage } from "@mastra/core/memory";
222
212