@mastra/memory 1.11.1-alpha.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +1 -1
  3. package/dist/{chunk-2NZR2XHO.js → chunk-DDQHE4NV.js} +38 -20
  4. package/dist/chunk-DDQHE4NV.js.map +1 -0
  5. package/dist/{chunk-W2RTLXNQ.cjs → chunk-HLGFIN4J.cjs} +38 -20
  6. package/dist/chunk-HLGFIN4J.cjs.map +1 -0
  7. package/dist/docs/SKILL.md +1 -1
  8. package/dist/docs/assets/SOURCE_MAP.json +39 -39
  9. package/dist/docs/references/docs-memory-message-history.md +6 -4
  10. package/dist/docs/references/docs-memory-observational-memory.md +20 -11
  11. package/dist/docs/references/docs-memory-overview.md +4 -4
  12. package/dist/docs/references/docs-memory-semantic-recall.md +28 -19
  13. package/dist/docs/references/docs-memory-storage.md +4 -4
  14. package/dist/docs/references/reference-memory-observational-memory.md +1 -1
  15. package/dist/docs/references/reference-storage-dynamodb.md +1 -1
  16. package/dist/docs/references/reference-storage-upstash.md +1 -1
  17. package/dist/index.cjs +137 -22
  18. package/dist/index.cjs.map +1 -1
  19. package/dist/index.js +130 -15
  20. package/dist/index.js.map +1 -1
  21. package/dist/{observational-memory-JCPPBSVG.cjs → observational-memory-34W4S4I5.cjs} +26 -26
  22. package/dist/{observational-memory-JCPPBSVG.cjs.map → observational-memory-34W4S4I5.cjs.map} +1 -1
  23. package/dist/{observational-memory-SASGM6OW.js → observational-memory-B25SASRW.js} +3 -3
  24. package/dist/{observational-memory-SASGM6OW.js.map → observational-memory-B25SASRW.js.map} +1 -1
  25. package/dist/processors/index.cjs +24 -24
  26. package/dist/processors/index.js +1 -1
  27. package/dist/processors/observational-memory/observation-strategies/async-buffer.d.ts +1 -0
  28. package/dist/processors/observational-memory/observation-strategies/async-buffer.d.ts.map +1 -1
  29. package/dist/processors/observational-memory/observation-strategies/base.d.ts +7 -2
  30. package/dist/processors/observational-memory/observation-strategies/base.d.ts.map +1 -1
  31. package/dist/processors/observational-memory/observation-strategies/resource-scoped.d.ts +1 -0
  32. package/dist/processors/observational-memory/observation-strategies/resource-scoped.d.ts.map +1 -1
  33. package/dist/processors/observational-memory/observation-strategies/sync.d.ts +1 -0
  34. package/dist/processors/observational-memory/observation-strategies/sync.d.ts.map +1 -1
  35. package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
  36. package/dist/tools/om-tools.d.ts.map +1 -1
  37. package/package.json +7 -7
  38. package/dist/chunk-2NZR2XHO.js.map +0 -1
  39. package/dist/chunk-W2RTLXNQ.cjs.map +0 -1
@@ -3,7 +3,7 @@ name: mastra-memory
3
3
  description: Documentation for @mastra/memory. Use when working with @mastra/memory APIs, configuration, or implementation.
4
4
  metadata:
5
5
  package: "@mastra/memory"
6
- version: "1.11.1-alpha.0"
6
+ version: "1.12.0"
7
7
  ---
8
8
 
9
9
  ## When to use
@@ -1,119 +1,119 @@
1
1
  {
2
- "version": "1.11.1-alpha.0",
2
+ "version": "1.12.0",
3
3
  "package": "@mastra/memory",
4
4
  "exports": {
5
5
  "ModelByInputTokens": {
6
6
  "types": "dist/processors/index.d.ts",
7
- "implementation": "dist/chunk-2NZR2XHO.js",
7
+ "implementation": "dist/chunk-DDQHE4NV.js",
8
8
  "line": 666
9
9
  },
10
10
  "OBSERVER_SYSTEM_PROMPT": {
11
11
  "types": "dist/processors/index.d.ts",
12
- "implementation": "dist/chunk-2NZR2XHO.js"
12
+ "implementation": "dist/chunk-DDQHE4NV.js"
13
13
  },
14
14
  "ObservationalMemory": {
15
15
  "types": "dist/processors/index.d.ts",
16
- "implementation": "dist/chunk-2NZR2XHO.js",
17
- "line": 5705
16
+ "implementation": "dist/chunk-DDQHE4NV.js",
17
+ "line": 5724
18
18
  },
19
19
  "ObservationalMemoryProcessor": {
20
20
  "types": "dist/processors/index.d.ts",
21
- "implementation": "dist/chunk-2NZR2XHO.js",
22
- "line": 8227
21
+ "implementation": "dist/chunk-DDQHE4NV.js",
22
+ "line": 8245
23
23
  },
24
24
  "TokenCounter": {
25
25
  "types": "dist/processors/index.d.ts",
26
- "implementation": "dist/chunk-2NZR2XHO.js",
27
- "line": 5257
26
+ "implementation": "dist/chunk-DDQHE4NV.js",
27
+ "line": 5276
28
28
  },
29
29
  "buildObserverPrompt": {
30
30
  "types": "dist/processors/index.d.ts",
31
- "implementation": "dist/chunk-2NZR2XHO.js",
32
- "line": 3285
31
+ "implementation": "dist/chunk-DDQHE4NV.js",
32
+ "line": 3304
33
33
  },
34
34
  "buildObserverSystemPrompt": {
35
35
  "types": "dist/processors/index.d.ts",
36
- "implementation": "dist/chunk-2NZR2XHO.js",
37
- "line": 2759
36
+ "implementation": "dist/chunk-DDQHE4NV.js",
37
+ "line": 2778
38
38
  },
39
39
  "combineObservationGroupRanges": {
40
40
  "types": "dist/processors/index.d.ts",
41
- "implementation": "dist/chunk-2NZR2XHO.js",
41
+ "implementation": "dist/chunk-DDQHE4NV.js",
42
42
  "line": 758
43
43
  },
44
44
  "deriveObservationGroupProvenance": {
45
45
  "types": "dist/processors/index.d.ts",
46
- "implementation": "dist/chunk-2NZR2XHO.js",
46
+ "implementation": "dist/chunk-DDQHE4NV.js",
47
47
  "line": 792
48
48
  },
49
49
  "extractCurrentTask": {
50
50
  "types": "dist/processors/index.d.ts",
51
- "implementation": "dist/chunk-2NZR2XHO.js",
52
- "line": 3399
51
+ "implementation": "dist/chunk-DDQHE4NV.js",
52
+ "line": 3418
53
53
  },
54
54
  "formatMessagesForObserver": {
55
55
  "types": "dist/processors/index.d.ts",
56
- "implementation": "dist/chunk-2NZR2XHO.js",
57
- "line": 3025
56
+ "implementation": "dist/chunk-DDQHE4NV.js",
57
+ "line": 3044
58
58
  },
59
59
  "getObservationsAsOf": {
60
60
  "types": "dist/processors/index.d.ts",
61
- "implementation": "dist/chunk-2NZR2XHO.js",
62
- "line": 8405
61
+ "implementation": "dist/chunk-DDQHE4NV.js",
62
+ "line": 8423
63
63
  },
64
64
  "hasCurrentTaskSection": {
65
65
  "types": "dist/processors/index.d.ts",
66
- "implementation": "dist/chunk-2NZR2XHO.js",
67
- "line": 3387
66
+ "implementation": "dist/chunk-DDQHE4NV.js",
67
+ "line": 3406
68
68
  },
69
69
  "injectAnchorIds": {
70
70
  "types": "dist/processors/index.d.ts",
71
- "implementation": "dist/chunk-2NZR2XHO.js",
72
- "line": 2316
71
+ "implementation": "dist/chunk-DDQHE4NV.js",
72
+ "line": 2335
73
73
  },
74
74
  "optimizeObservationsForContext": {
75
75
  "types": "dist/processors/index.d.ts",
76
- "implementation": "dist/chunk-2NZR2XHO.js",
77
- "line": 3410
76
+ "implementation": "dist/chunk-DDQHE4NV.js",
77
+ "line": 3429
78
78
  },
79
79
  "parseAnchorId": {
80
80
  "types": "dist/processors/index.d.ts",
81
- "implementation": "dist/chunk-2NZR2XHO.js",
82
- "line": 2289
81
+ "implementation": "dist/chunk-DDQHE4NV.js",
82
+ "line": 2308
83
83
  },
84
84
  "parseObservationGroups": {
85
85
  "types": "dist/processors/index.d.ts",
86
- "implementation": "dist/chunk-2NZR2XHO.js",
86
+ "implementation": "dist/chunk-DDQHE4NV.js",
87
87
  "line": 727
88
88
  },
89
89
  "parseObserverOutput": {
90
90
  "types": "dist/processors/index.d.ts",
91
- "implementation": "dist/chunk-2NZR2XHO.js",
92
- "line": 3295
91
+ "implementation": "dist/chunk-DDQHE4NV.js",
92
+ "line": 3314
93
93
  },
94
94
  "reconcileObservationGroupsFromReflection": {
95
95
  "types": "dist/processors/index.d.ts",
96
- "implementation": "dist/chunk-2NZR2XHO.js",
96
+ "implementation": "dist/chunk-DDQHE4NV.js",
97
97
  "line": 816
98
98
  },
99
99
  "renderObservationGroupsForReflection": {
100
100
  "types": "dist/processors/index.d.ts",
101
- "implementation": "dist/chunk-2NZR2XHO.js",
101
+ "implementation": "dist/chunk-DDQHE4NV.js",
102
102
  "line": 772
103
103
  },
104
104
  "stripEphemeralAnchorIds": {
105
105
  "types": "dist/processors/index.d.ts",
106
- "implementation": "dist/chunk-2NZR2XHO.js",
107
- "line": 2346
106
+ "implementation": "dist/chunk-DDQHE4NV.js",
107
+ "line": 2365
108
108
  },
109
109
  "stripObservationGroups": {
110
110
  "types": "dist/processors/index.d.ts",
111
- "implementation": "dist/chunk-2NZR2XHO.js",
111
+ "implementation": "dist/chunk-DDQHE4NV.js",
112
112
  "line": 749
113
113
  },
114
114
  "wrapInObservationGroup": {
115
115
  "types": "dist/processors/index.d.ts",
116
- "implementation": "dist/chunk-2NZR2XHO.js",
116
+ "implementation": "dist/chunk-DDQHE4NV.js",
117
117
  "line": 720
118
118
  },
119
119
  "OBSERVATIONAL_MEMORY_DEFAULTS": {
@@ -161,7 +161,7 @@
161
161
  "processors": {
162
162
  "index": "dist/processors/index.js",
163
163
  "chunks": [
164
- "chunk-2NZR2XHO.js",
164
+ "chunk-DDQHE4NV.js",
165
165
  "chunk-LSJJAJAF.js"
166
166
  ]
167
167
  }
@@ -48,7 +48,7 @@ export const mastra = new Mastra({
48
48
  })
49
49
  ```
50
50
 
51
- Give your agent a `Memory`:
51
+ Instantiate a [`Memory`](https://mastra.ai/reference/memory/memory-class) instance in your agent:
52
52
 
53
53
  ```typescript
54
54
  import { Memory } from '@mastra/memory'
@@ -66,7 +66,7 @@ export const agent = new Agent({
66
66
 
67
67
  When you call the agent, messages are automatically saved to the database. You can specify a `threadId`, `resourceId`, and optional `metadata`:
68
68
 
69
- **Generate**:
69
+ **.generate()**:
70
70
 
71
71
  ```typescript
72
72
  await agent.generate('Hello', {
@@ -81,7 +81,7 @@ await agent.generate('Hello', {
81
81
  })
82
82
  ```
83
83
 
84
- **Stream**:
84
+ **.stream()**:
85
85
 
86
86
  ```typescript
87
87
  await agent.stream('Hello', {
@@ -103,12 +103,14 @@ You can use this history in two ways:
103
103
  - **Automatic inclusion** - Mastra automatically fetches and includes recent messages in the context window. By default, it includes the last 10 messages, keeping agents grounded in the conversation. You can adjust this number with `lastMessages`, but in most cases you don't need to think about it.
104
104
  - [**Manual querying**](#querying) - For more control, use the `recall()` function to query threads and messages directly. This lets you choose exactly which memories are included in the context window, or fetch messages to render conversation history in your UI.
105
105
 
106
+ > **Tip:** When memory is enabled, [Studio](https://mastra.ai/docs/studio/overview) uses message history to display past conversations in the chat sidebar.
107
+
106
108
  ## Accessing memory
107
109
 
108
110
  To access memory functions for querying, cloning, or deleting threads and messages, call `getMemory()` on an agent:
109
111
 
110
112
  ```typescript
111
- const agent = mastra.getAgent('weatherAgent')
113
+ const agent = mastra.getAgentById('test-agent')
112
114
  const memory = await agent.getMemory()
113
115
  ```
114
116
 
@@ -42,7 +42,7 @@ See [configuration options](https://mastra.ai/reference/memory/observational-mem
42
42
 
43
43
  ## Benefits
44
44
 
45
- - **Prompt caching**: OM's context is stable observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
45
+ - **Prompt caching**: OM's context is stable and observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
46
46
  - **Compression**: Raw message history and tool results get compressed into a dense observation log. Smaller context means faster responses and longer coherent conversations.
47
47
  - **Zero context rot**: The agent sees relevant information instead of noisy tool calls and irrelevant tokens, so the agent stays on task over long sessions.
48
48
 
@@ -50,7 +50,7 @@ See [configuration options](https://mastra.ai/reference/memory/observational-mem
50
50
 
51
51
  You don't remember every word of every conversation you've ever had. You observe what happened subconsciously, then your brain reflects — reorganizing, combining, and condensing into long-term memory. OM works the same way.
52
52
 
53
- Every time an agent responds, it sees a context window containing its system prompt, recent message history, and any injected context. The context window is finite even models with large token limits perform worse when the window is full. This causes two problems:
53
+ Every time an agent responds, it sees a context window containing its system prompt, recent message history, and any injected context. The context window is finite; even models with large token limits perform worse when the window is full. This causes two problems:
54
54
 
55
55
  - **Context rot**: the more raw message history an agent carries, the worse it performs.
56
56
  - **Context waste**: most of that history contains tokens no longer needed to keep the agent on task.
@@ -59,14 +59,15 @@ OM solves both problems by compressing old context into dense observations.
59
59
 
60
60
  ### Observations
61
61
 
62
- When message history tokens exceed a threshold (default: 30,000), the Observer creates observations concise notes about what happened:
62
+ When message history tokens exceed a threshold (default: 30,000), the Observer creates observations which are concise notes about what happened:
63
63
 
64
64
  OM uses fast local token estimation for this thresholding work. Text is estimated with `tokenx`, while image parts use provider-aware heuristics so multimodal conversations still trigger observation at the right time. The same applies to image-like `file` parts when a transport normalizes an uploaded image as a file instead of an image part. For example, OpenAI image detail settings can materially change when OM decides to observe.
65
65
 
66
66
  The Observer can also see attachments in the history it reviews. OM keeps readable placeholders like `[Image #1: reference-board.png]` or `[File #1: floorplan.pdf]` in the transcript for readability, and forwards the actual attachment parts alongside the text. Image-like `file` parts are upgraded to image inputs for the Observer when possible, while non-image attachments are forwarded as file parts with normalized token counting. This applies to both normal thread observation and batched resource-scope observation.
67
67
 
68
- ```text
68
+ ```md
69
69
  Date: 2026-01-15
70
+
70
71
  - 🔴 12:10 User is building a Next.js app with Supabase auth, due in 1 week (meaning January 22nd 2026)
71
72
  - 🔴 12:10 App uses server components with client-side hydration
72
73
  - 🟡 12:12 User asked about middleware configuration for protected routes
@@ -77,11 +78,11 @@ The compression is typically 5–40×. The Observer also tracks a **current task
77
78
 
78
79
  If you enable `observation.threadTitle`, the Observer can also suggest a short thread title when the conversation topic meaningfully changes. Thread title generation is opt-in and updates the thread metadata, so apps like Mastra Code can show the latest title in thread lists and status UI.
79
80
 
80
- Example: an agent using Playwright MCP might see 50,000+ tokens per page snapshot. With OM, the Observer watches the interaction and creates a few hundred tokens of observations about what was on the page and what actions were taken. The agent stays on task without carrying every raw snapshot.
81
+ Example: An agent using Playwright MCP might see 50,000+ tokens per page snapshot. With OM, the Observer watches the interaction and creates a few hundred tokens of observations about what was on the page and what actions were taken. The agent stays on task without carrying every raw snapshot.
81
82
 
82
83
  ### Reflections
83
84
 
84
- When observations exceed their threshold (default: 40,000 tokens), the Reflector condenses them combining related items and reflecting on patterns.
85
+ When observations exceed their threshold (default: 40,000 tokens), the Reflector condenses them, combines related items, and reflects on patterns.
85
86
 
86
87
  The result is a three-tier system:
87
88
 
@@ -93,7 +94,7 @@ The result is a three-tier system:
93
94
 
94
95
  > **Note:** Retrieval mode is experimental. The API may change in future releases.
95
96
 
96
- Normal OM compresses messages into observations, which is great for staying on task but the original wording is gone. Retrieval mode fixes this by keeping each observation group linked to the raw messages that produced it. When the agent needs exact wording, tool output, or chronology that the summary compressed away, it can call a `recall` tool to page through the source messages.
97
+ Normal OM compresses messages into observations, which is great for staying on task, but the original wording is gone. Retrieval mode fixes this by keeping each observation group linked to the raw messages that produced it. When the agent needs exact wording, tool output, or chronology that the summary compressed away, it can call a `recall` tool to page through the source messages.
97
98
 
98
99
  #### Browsing only
99
100
 
@@ -162,6 +163,16 @@ With retrieval mode enabled, OM:
162
163
 
163
164
  See the [recall tool reference](https://mastra.ai/reference/memory/observational-memory) for the full API (detail levels, part indexing, pagination, cross-thread browsing, and token limiting).
164
165
 
166
+ ## Studio
167
+
168
+ To see how it works in practice, open [Studio](https://mastra.ai/docs/studio/overview) and navigate to an agent with OM enabled. The **Memory** tab displays:
169
+
170
+ - **Token progress bars**: Current token counts for messages and observations, showing how close each is to its threshold. Hover over the info icon to see the model and threshold for the Observer and Reflector.
171
+ - **Active observations**: The current observation log, rendered inline. When previous observation or reflection records exist, expand "Previous observations" to browse them.
172
+ - **Background processing**: During a conversation, buffered observation chunks and reflection status appear as the agent processes in the background.
173
+
174
+ The progress bars update live while the agent is observing or reflecting, showing elapsed time and a status badge.
175
+
165
176
  ## Models
166
177
 
167
178
  The Observer and Reflector run in the background. Any model that works with Mastra's [model routing](https://mastra.ai/models) (`provider/model`) can be used. When using `observationalMemory: true`, the default model is `google/gemini-2.5-flash`. When passing a config object, a `model` must be explicitly set.
@@ -184,6 +195,8 @@ See [model configuration](https://mastra.ai/reference/memory/observational-memor
184
195
 
185
196
  ### Token-tiered model selection
186
197
 
198
+ **Added in:** `@mastra/memory@1.10.0`
199
+
187
200
  You can use `ModelByInputTokens` to specify different Observer or Reflector models based on input token count. OM selects the matching model tier at runtime from the configured `upTo` thresholds.
188
201
 
189
202
  ```typescript
@@ -373,10 +386,6 @@ No manual migration needed. OM reads existing messages and observes them lazily
373
386
  - **Thread scope**: The first time a thread exceeds `observation.messageTokens`, the Observer processes the backlog.
374
387
  - **Resource scope**: All unobserved messages across all threads for a resource are processed together. For users with many existing threads, this could take significant time.
375
388
 
376
- ## Viewing in Mastra Studio
377
-
378
- Mastra Studio shows OM status in real time in the memory tab: token usage, which model is running, current observations, and reflection history.
379
-
380
389
  ## Comparing OM with other memory features
381
390
 
382
391
  - **[Message history](https://mastra.ai/docs/memory/message-history)**: High-fidelity record of the current conversation
@@ -107,7 +107,7 @@ Use memory when your agent needs to maintain multi-turn conversations that refer
107
107
 
108
108
  > **Note:** Visit [Memory Class](https://mastra.ai/reference/memory/memory-class) for a full list of configuration options.
109
109
 
110
- 5. Call your agent, for example in [Mastra Studio](https://mastra.ai/docs/getting-started/studio). Inside Studio, start a new chat with your agent and take a look at the right sidebar. It'll now display various memory-related information.
110
+ 5. Call your agent, for example in [Studio](https://mastra.ai/docs/studio/overview). Inside Studio, start a new chat with your agent and take a look at the right sidebar. It'll now display various memory-related information.
111
111
 
112
112
  ## Message history
113
113
 
@@ -165,7 +165,7 @@ export const memoryAgent = new Agent({
165
165
 
166
166
  ## Memory in multi-agent systems
167
167
 
168
- When a [supervisor agent](https://mastra.ai/docs/agents/supervisor-agents) delegates to a subagent, Mastra isolates subagent memory automatically. There is no flag to enable this as it happens on every delegation. Understanding how this scoping works lets you decide what stays private and what to share intentionally.
168
+ When a [supervisor agent](https://mastra.ai/docs/agents/supervisor-agents) delegates to a subagent, Mastra isolates subagent memory automatically. No flag enables this as it happens on every delegation. Understanding how this scoping works lets you decide what stays private and what to share intentionally.
169
169
 
170
170
  ### How delegation scopes memory
171
171
 
@@ -175,7 +175,7 @@ Each delegation creates a fresh `threadId` and a deterministic `resourceId` for
175
175
  - **Resource ID**: Derived as `{parentResourceId}-{agentName}`. Because the resource ID is stable across delegations, resource-scoped memory persists between calls. A subagent remembers facts from previous delegations by the same user.
176
176
  - **Memory instance**: If a subagent has no memory configured, it inherits the supervisor's `Memory` instance. If the subagent defines its own, that takes precedence.
177
177
 
178
- The supervisor forwards its conversation context to the subagent so it has enough background to complete the task. Only the delegation prompt and the subagent's response are saved — the full parent conversation is not stored. You can control which messages reach the subagent with the [`messageFilter`](https://mastra.ai/docs/agents/supervisor-agents) callback.
178
+ The supervisor forwards its conversation context to the subagent so it has enough background to complete the task. Only the delegation prompt and the subagent's response are saved — the full parent conversation isn't stored. You can control which messages reach the subagent with the [`messageFilter`](https://mastra.ai/docs/agents/supervisor-agents) callback.
179
179
 
180
180
  > **Note:** Subagent resource IDs are always suffixed with the agent name (`{parentResourceId}-{agentName}`). Two different subagents under the same supervisor never share a resource ID through delegation.
181
181
 
@@ -206,7 +206,7 @@ Because both calls use `resource: 'project-42'`, the writer can access the resea
206
206
 
207
207
  Enable [Tracing](https://mastra.ai/docs/observability/tracing/overview) to monitor and debug memory in action. Traces show you exactly which messages and observations the agent included in its context for each request, helping you understand agent behavior and verify that memory retrieval is working as expected.
208
208
 
209
- Open [Mastra Studio](https://mastra.ai/docs/getting-started/studio) and select the **Observability** tab in the sidebar. Open the trace of a recent agent request, then look for spans of LLMs calls.
209
+ Open [Studio](https://mastra.ai/docs/studio/overview) and select the **Observability** tab in the sidebar. Open the trace of a recent agent request, then look for spans of LLMs calls.
210
210
 
211
211
  ## Switch memory per request
212
212
 
@@ -18,18 +18,33 @@ After getting a response from the LLM, all new messages (user, assistant, and to
18
18
 
19
19
  ## Quickstart
20
20
 
21
- Semantic recall is enabled by default, so if you give your agent memory it will be included:
21
+ Semantic recall is disabled by default. To enable it, set `semanticRecall: true` in `options` and provide a `vector` store and `embedder`:
22
22
 
23
23
  ```typescript
24
24
  import { Agent } from '@mastra/core/agent'
25
25
  import { Memory } from '@mastra/memory'
26
+ import { LibSQLStore, LibSQLVector } from '@mastra/libsql'
27
+ import { ModelRouterEmbeddingModel } from '@mastra/core/llm'
26
28
 
27
29
  const agent = new Agent({
28
30
  id: 'support-agent',
29
31
  name: 'SupportAgent',
30
32
  instructions: 'You are a helpful support agent.',
31
33
  model: 'openai/gpt-5.4',
32
- memory: new Memory(),
34
+ memory: new Memory({
35
+ storage: new LibSQLStore({
36
+ id: 'agent-storage',
37
+ url: 'file:./local.db',
38
+ }),
39
+ vector: new LibSQLVector({
40
+ id: 'agent-vector',
41
+ url: 'file:./local.db',
42
+ }),
43
+ embedder: new ModelRouterEmbeddingModel('openai/text-embedding-3-small'),
44
+ options: {
45
+ semanticRecall: true,
46
+ },
47
+ }),
33
48
  })
34
49
  ```
35
50
 
@@ -77,6 +92,9 @@ const agent = new Agent({
77
92
  id: 'agent-vector',
78
93
  url: 'file:./local.db',
79
94
  }),
95
+ options: {
96
+ semanticRecall: true,
97
+ },
80
98
  }),
81
99
  })
82
100
  ```
@@ -139,6 +157,9 @@ import { ModelRouterEmbeddingModel } from '@mastra/core/llm'
139
157
  const agent = new Agent({
140
158
  memory: new Memory({
141
159
  embedder: new ModelRouterEmbeddingModel('openai/text-embedding-3-small'),
160
+ options: {
161
+ semanticRecall: true,
162
+ },
142
163
  }),
143
164
  })
144
165
  ```
@@ -262,26 +283,14 @@ const agent = new Agent({
262
283
 
263
284
  For detailed information about index configuration options and performance tuning, see the [PgVector configuration guide](https://mastra.ai/reference/vectors/pg).
264
285
 
265
- ## Disabling
286
+ ## Disable semantic recall
266
287
 
267
- Semantic recall has a performance impact. New messages are converted into embeddings and used to query a vector database before new messages are sent to the LLM.
268
-
269
- Semantic recall is enabled by default but can be disabled when not needed:
270
-
271
- ```typescript
272
- const agent = new Agent({
273
- memory: new Memory({
274
- options: {
275
- semanticRecall: false,
276
- },
277
- }),
278
- })
279
- ```
288
+ Semantic recall is disabled by default (`semanticRecall: false`). Each call adds latency because new messages are converted into embeddings and used to query a vector database before the LLM receives them.
280
289
 
281
- You might want to disable semantic recall in scenarios like:
290
+ Keep semantic recall disabled when:
282
291
 
283
- - When message history provides sufficient context for the current conversation.
284
- - In performance-sensitive applications, like realtime two-way audio, where the added latency of creating embeddings and running vector queries is noticeable.
292
+ - Message history provides sufficient context for the current conversation.
293
+ - You're building performance-sensitive applications, like realtime two-way audio, where embedding and vector query latency is noticeable.
285
294
 
286
295
  ## Viewing recalled messages
287
296
 
@@ -14,7 +14,7 @@ export const mastra = new Mastra({
14
14
  })
15
15
  ```
16
16
 
17
- > **Sharing the database with Mastra Studio:** When running `mastra dev` alongside your application (e.g., Next.js), use an absolute path to ensure both processes access the same database:
17
+ > **Sharing the database with Studio:** When running `mastra dev` alongside your application (e.g., Next.js), use an absolute path to ensure both processes access the same database:
18
18
  >
19
19
  > ```typescript
20
20
  > url: 'file:/absolute/path/to/your/project/mastra.db'
@@ -129,7 +129,7 @@ Mastra organizes conversations using two identifiers:
129
129
 
130
130
  Both identifiers are required for agents to store information:
131
131
 
132
- **Generate**:
132
+ **.generate()**:
133
133
 
134
134
  ```typescript
135
135
  const response = await agent.generate('hello', {
@@ -140,7 +140,7 @@ const response = await agent.generate('hello', {
140
140
  })
141
141
  ```
142
142
 
143
- **Stream**:
143
+ **.stream()**:
144
144
 
145
145
  ```typescript
146
146
  const stream = await agent.stream('hello', {
@@ -151,7 +151,7 @@ const stream = await agent.stream('hello', {
151
151
  })
152
152
  ```
153
153
 
154
- > **Note:** [Studio](https://mastra.ai/docs/getting-started/studio) automatically generates a thread and resource ID for you. When calling `stream()` or `generate()` yourself, remember to provide these identifiers explicitly.
154
+ > **Note:** [Studio](https://mastra.ai/docs/studio/overview) automatically generates a thread and resource ID for you. When calling `stream()` or `generate()` yourself, remember to provide these identifiers explicitly.
155
155
 
156
156
  ### Thread title generation
157
157
 
@@ -666,7 +666,7 @@ const selector = new ModelByInputTokens({
666
666
 
667
667
  #### Behavior
668
668
 
669
- - Thresholds are sorted internally, so the order in the config object does not matter.
669
+ - Thresholds are sorted internally, so the order in the config object doesn't matter.
670
670
  - `inputTokens ≤ smallest threshold` → uses that threshold's model
671
671
  - `inputTokens > largest threshold` → `resolve()` throws an error. If this happens during an OM Observer or Reflector run, OM aborts via TripWire, so callers receive an empty `text` result or streamed `tripwire` instead of a normal assistant response.
672
672
  - OM computes the input token count for the Observer or Reflector call and resolves the matching model tier directly
@@ -2,7 +2,7 @@
2
2
 
3
3
  The DynamoDB storage implementation provides a scalable and performant NoSQL database solution for Mastra, leveraging a single-table design pattern with [ElectroDB](https://electrodb.dev/).
4
4
 
5
- > **Observability Not Supported:** DynamoDB storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to DynamoDB, and Mastra Studio's observability features won't work with DynamoDB as your only storage provider. To enable observability, use [composite storage](https://mastra.ai/reference/storage/composite) to route observability data to a supported provider like ClickHouse or PostgreSQL.
5
+ > **Observability Not Supported:** DynamoDB storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to DynamoDB, and [Studio's](https://mastra.ai/docs/studio/overview) observability features won't work with DynamoDB as your only storage provider. To enable observability, use [composite storage](https://mastra.ai/reference/storage/composite) to route observability data to a supported provider like ClickHouse or PostgreSQL.
6
6
 
7
7
  > **Item Size Limit:** DynamoDB enforces a **400 KB maximum item size**. This limit can be exceeded when storing messages with base64-encoded attachments such as images. See [Handling large attachments](https://mastra.ai/docs/memory/storage) for workarounds including uploading attachments to external storage.
8
8
 
@@ -4,7 +4,7 @@ The Upstash storage implementation provides a serverless-friendly storage soluti
4
4
 
5
5
  > **Pricing:** When using Mastra with Upstash, the pay-as-you-go model can result in unexpectedly high costs due to the high volume of Redis commands generated during agent conversations. We strongly recommend using a **fixed pricing plan** for predictable costs. See [Upstash pricing](https://upstash.com/pricing/redis) for details and [GitHub issue #5850](https://github.com/mastra-ai/mastra/issues/5850) for context.
6
6
 
7
- > **Observability Not Supported:** Upstash storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to Upstash, and Mastra Studio's observability features won't work with Upstash as your only storage provider. To enable observability, use [composite storage](https://mastra.ai/reference/storage/composite) to route observability data to a supported provider like ClickHouse or PostgreSQL.
7
+ > **Observability Not Supported:** Upstash storage **doesn't support the observability domain**. Traces from the `DefaultExporter` can't be persisted to Upstash, and [Studio's](https://mastra.ai/docs/studio/overview) observability features won't work with Upstash as your only storage provider. To enable observability, use [composite storage](https://mastra.ai/reference/storage/composite) to route observability data to a supported provider like ClickHouse or PostgreSQL.
8
8
 
9
9
  ## Installation
10
10