@mastra/memory 1.18.3-alpha.0 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +94 -0
- package/dist/{chunk-BK3AYI7X.cjs → chunk-5IJQOXJM.cjs} +117 -24
- package/dist/chunk-5IJQOXJM.cjs.map +1 -0
- package/dist/{chunk-KLETR4RS.js → chunk-NZXH5WER.js} +117 -24
- package/dist/chunk-NZXH5WER.js.map +1 -0
- package/dist/docs/SKILL.md +3 -1
- package/dist/docs/assets/SOURCE_MAP.json +47 -47
- package/dist/docs/references/docs-agents-agent-approval.md +2 -0
- package/dist/docs/references/docs-agents-background-tasks.md +9 -6
- package/dist/docs/references/docs-evals-evals-with-memory.md +146 -0
- package/dist/docs/references/docs-memory-multi-user-threads.md +206 -0
- package/dist/docs/references/docs-memory-observational-memory.md +53 -17
- package/dist/docs/references/docs-memory-overview.md +1 -0
- package/dist/docs/references/docs-memory-working-memory.md +1 -1
- package/dist/docs/references/reference-memory-observational-memory.md +5 -3
- package/dist/index.cjs +13 -13
- package/dist/index.js +4 -4
- package/dist/{observational-memory-K5ES5KKQ.js → observational-memory-KFKHBTCB.js} +3 -3
- package/dist/{observational-memory-K5ES5KKQ.js.map → observational-memory-KFKHBTCB.js.map} +1 -1
- package/dist/{observational-memory-SRGNHILF.cjs → observational-memory-V2APY3TO.cjs} +26 -26
- package/dist/{observational-memory-SRGNHILF.cjs.map → observational-memory-V2APY3TO.cjs.map} +1 -1
- package/dist/processors/index.cjs +24 -24
- package/dist/processors/index.js +1 -1
- package/dist/processors/observational-memory/activation-ttl.d.ts +4 -0
- package/dist/processors/observational-memory/activation-ttl.d.ts.map +1 -0
- package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
- package/dist/processors/observational-memory/observer-agent.d.ts +13 -0
- package/dist/processors/observational-memory/observer-agent.d.ts.map +1 -1
- package/dist/processors/observational-memory/observer-runner.d.ts.map +1 -1
- package/dist/processors/observational-memory/processor.d.ts.map +1 -1
- package/dist/processors/observational-memory/reflector-runner.d.ts.map +1 -1
- package/dist/processors/observational-memory/tracing.d.ts.map +1 -1
- package/dist/processors/observational-memory/types.d.ts +30 -6
- package/dist/processors/observational-memory/types.d.ts.map +1 -1
- package/package.json +7 -7
- package/dist/chunk-BK3AYI7X.cjs.map +0 -1
- package/dist/chunk-KLETR4RS.js.map +0 -1
package/dist/docs/SKILL.md
CHANGED
|
@@ -3,7 +3,7 @@ name: mastra-memory
|
|
|
3
3
|
description: Documentation for @mastra/memory. Use when working with @mastra/memory APIs, configuration, or implementation.
|
|
4
4
|
metadata:
|
|
5
5
|
package: "@mastra/memory"
|
|
6
|
-
version: "1.
|
|
6
|
+
version: "1.19.0"
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
## When to use
|
|
@@ -20,8 +20,10 @@ Read the individual reference documents for detailed explanations and code examp
|
|
|
20
20
|
- [Background tasks](references/docs-agents-background-tasks.md) - Learn how to dispatch long-running tool calls in the background, keep the stream open until they complete, and orchestrate subagents asynchronously.
|
|
21
21
|
- [Agent networks](references/docs-agents-networks.md) - Coordinate multiple agents, workflows, and tools using agent networks for complex, non-deterministic task execution.
|
|
22
22
|
- [Supervisor agents](references/docs-agents-supervisor-agents.md) - Learn how to coordinate multiple agents with delegation hooks, iteration monitoring, message filtering, and task completion scoring.
|
|
23
|
+
- [Evals with memory](references/docs-evals-evals-with-memory.md) - Run scorers against memory-enabled agents — including observational memory in thread scope — using runEvals and dataset experiments.
|
|
23
24
|
- [Memory processors](references/docs-memory-memory-processors.md) - Learn how to use memory processors in Mastra to filter, trim, and transform messages before they're sent to the language model to manage context window limits.
|
|
24
25
|
- [Message history](references/docs-memory-message-history.md) - Learn how to configure message history in Mastra to store recent messages from the current conversation.
|
|
26
|
+
- [Multi-user threads](references/docs-memory-multi-user-threads.md) - Share one Mastra thread between multiple users by carrying speaker identity in the message body.
|
|
25
27
|
- [Observational Memory](references/docs-memory-observational-memory.md) - Learn how Observational Memory keeps your agent's context window small while preserving long-term memory across conversations.
|
|
26
28
|
- [Memory overview](references/docs-memory-overview.md) - Learn how Mastra's memory system works with working memory, message history, semantic recall, and observational memory.
|
|
27
29
|
- [Semantic recall](references/docs-memory-semantic-recall.md) - Learn how to use semantic recall in Mastra to retrieve relevant messages from past conversations using vector search and embeddings.
|
|
@@ -1,120 +1,120 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "1.
|
|
2
|
+
"version": "1.19.0",
|
|
3
3
|
"package": "@mastra/memory",
|
|
4
4
|
"exports": {
|
|
5
5
|
"ModelByInputTokens": {
|
|
6
6
|
"types": "dist/processors/index.d.ts",
|
|
7
|
-
"implementation": "dist/chunk-
|
|
8
|
-
"line":
|
|
7
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
8
|
+
"line": 786
|
|
9
9
|
},
|
|
10
10
|
"OBSERVER_SYSTEM_PROMPT": {
|
|
11
11
|
"types": "dist/processors/index.d.ts",
|
|
12
|
-
"implementation": "dist/chunk-
|
|
12
|
+
"implementation": "dist/chunk-NZXH5WER.js"
|
|
13
13
|
},
|
|
14
14
|
"ObservationalMemory": {
|
|
15
15
|
"types": "dist/processors/index.d.ts",
|
|
16
|
-
"implementation": "dist/chunk-
|
|
17
|
-
"line":
|
|
16
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
17
|
+
"line": 7009
|
|
18
18
|
},
|
|
19
19
|
"ObservationalMemoryProcessor": {
|
|
20
20
|
"types": "dist/processors/index.d.ts",
|
|
21
|
-
"implementation": "dist/chunk-
|
|
22
|
-
"line":
|
|
21
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
22
|
+
"line": 9589
|
|
23
23
|
},
|
|
24
24
|
"TokenCounter": {
|
|
25
25
|
"types": "dist/processors/index.d.ts",
|
|
26
|
-
"implementation": "dist/chunk-
|
|
27
|
-
"line":
|
|
26
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
27
|
+
"line": 6455
|
|
28
28
|
},
|
|
29
29
|
"buildObserverPrompt": {
|
|
30
30
|
"types": "dist/processors/index.d.ts",
|
|
31
|
-
"implementation": "dist/chunk-
|
|
32
|
-
"line":
|
|
31
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
32
|
+
"line": 3760
|
|
33
33
|
},
|
|
34
34
|
"buildObserverSystemPrompt": {
|
|
35
35
|
"types": "dist/processors/index.d.ts",
|
|
36
|
-
"implementation": "dist/chunk-
|
|
37
|
-
"line":
|
|
36
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
37
|
+
"line": 3031
|
|
38
38
|
},
|
|
39
39
|
"combineObservationGroupRanges": {
|
|
40
40
|
"types": "dist/processors/index.d.ts",
|
|
41
|
-
"implementation": "dist/chunk-
|
|
42
|
-
"line":
|
|
41
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
42
|
+
"line": 878
|
|
43
43
|
},
|
|
44
44
|
"deriveObservationGroupProvenance": {
|
|
45
45
|
"types": "dist/processors/index.d.ts",
|
|
46
|
-
"implementation": "dist/chunk-
|
|
47
|
-
"line":
|
|
46
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
47
|
+
"line": 912
|
|
48
48
|
},
|
|
49
49
|
"extractCurrentTask": {
|
|
50
50
|
"types": "dist/processors/index.d.ts",
|
|
51
|
-
"implementation": "dist/chunk-
|
|
52
|
-
"line":
|
|
51
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
52
|
+
"line": 3874
|
|
53
53
|
},
|
|
54
54
|
"formatMessagesForObserver": {
|
|
55
55
|
"types": "dist/processors/index.d.ts",
|
|
56
|
-
"implementation": "dist/chunk-
|
|
57
|
-
"line":
|
|
56
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
57
|
+
"line": 3486
|
|
58
58
|
},
|
|
59
59
|
"getObservationsAsOf": {
|
|
60
60
|
"types": "dist/processors/index.d.ts",
|
|
61
|
-
"implementation": "dist/chunk-
|
|
62
|
-
"line":
|
|
61
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
62
|
+
"line": 9801
|
|
63
63
|
},
|
|
64
64
|
"hasCurrentTaskSection": {
|
|
65
65
|
"types": "dist/processors/index.d.ts",
|
|
66
|
-
"implementation": "dist/chunk-
|
|
67
|
-
"line":
|
|
66
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
67
|
+
"line": 3862
|
|
68
68
|
},
|
|
69
69
|
"injectAnchorIds": {
|
|
70
70
|
"types": "dist/processors/index.d.ts",
|
|
71
|
-
"implementation": "dist/chunk-
|
|
72
|
-
"line":
|
|
71
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
72
|
+
"line": 2579
|
|
73
73
|
},
|
|
74
74
|
"optimizeObservationsForContext": {
|
|
75
75
|
"types": "dist/processors/index.d.ts",
|
|
76
|
-
"implementation": "dist/chunk-
|
|
77
|
-
"line":
|
|
76
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
77
|
+
"line": 3885
|
|
78
78
|
},
|
|
79
79
|
"parseAnchorId": {
|
|
80
80
|
"types": "dist/processors/index.d.ts",
|
|
81
|
-
"implementation": "dist/chunk-
|
|
82
|
-
"line":
|
|
81
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
82
|
+
"line": 2552
|
|
83
83
|
},
|
|
84
84
|
"parseObservationGroups": {
|
|
85
85
|
"types": "dist/processors/index.d.ts",
|
|
86
|
-
"implementation": "dist/chunk-
|
|
87
|
-
"line":
|
|
86
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
87
|
+
"line": 847
|
|
88
88
|
},
|
|
89
89
|
"parseObserverOutput": {
|
|
90
90
|
"types": "dist/processors/index.d.ts",
|
|
91
|
-
"implementation": "dist/chunk-
|
|
92
|
-
"line":
|
|
91
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
92
|
+
"line": 3770
|
|
93
93
|
},
|
|
94
94
|
"reconcileObservationGroupsFromReflection": {
|
|
95
95
|
"types": "dist/processors/index.d.ts",
|
|
96
|
-
"implementation": "dist/chunk-
|
|
97
|
-
"line":
|
|
96
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
97
|
+
"line": 936
|
|
98
98
|
},
|
|
99
99
|
"renderObservationGroupsForReflection": {
|
|
100
100
|
"types": "dist/processors/index.d.ts",
|
|
101
|
-
"implementation": "dist/chunk-
|
|
102
|
-
"line":
|
|
101
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
102
|
+
"line": 892
|
|
103
103
|
},
|
|
104
104
|
"stripEphemeralAnchorIds": {
|
|
105
105
|
"types": "dist/processors/index.d.ts",
|
|
106
|
-
"implementation": "dist/chunk-
|
|
107
|
-
"line":
|
|
106
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
107
|
+
"line": 2609
|
|
108
108
|
},
|
|
109
109
|
"stripObservationGroups": {
|
|
110
110
|
"types": "dist/processors/index.d.ts",
|
|
111
|
-
"implementation": "dist/chunk-
|
|
112
|
-
"line":
|
|
111
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
112
|
+
"line": 869
|
|
113
113
|
},
|
|
114
114
|
"wrapInObservationGroup": {
|
|
115
115
|
"types": "dist/processors/index.d.ts",
|
|
116
|
-
"implementation": "dist/chunk-
|
|
117
|
-
"line":
|
|
116
|
+
"implementation": "dist/chunk-NZXH5WER.js",
|
|
117
|
+
"line": 840
|
|
118
118
|
},
|
|
119
119
|
"OBSERVATIONAL_MEMORY_DEFAULTS": {
|
|
120
120
|
"types": "dist/processors/index.d.ts",
|
|
@@ -149,7 +149,7 @@
|
|
|
149
149
|
"processors": {
|
|
150
150
|
"index": "dist/processors/index.js",
|
|
151
151
|
"chunks": [
|
|
152
|
-
"chunk-
|
|
152
|
+
"chunk-NZXH5WER.js",
|
|
153
153
|
"chunk-LSJJAJAF.js"
|
|
154
154
|
]
|
|
155
155
|
}
|
|
@@ -92,6 +92,8 @@ A tool can also pause _during_ its `execute` function by calling `suspend()`. Th
|
|
|
92
92
|
|
|
93
93
|
The stream emits a `tool-call-suspended` chunk with a custom payload defined by the tool's `suspendSchema`. You resume by calling `resumeStream()` with data matching the tool's `resumeSchema`.
|
|
94
94
|
|
|
95
|
+
> **Note:** `suspend()` does not throw — return immediately after calling it (e.g. `return await suspend({ ... })`). Code after `await suspend(...)` still runs before the tool pauses.
|
|
96
|
+
|
|
95
97
|
## Tool approval with `generate()`
|
|
96
98
|
|
|
97
99
|
Tool approval also works with `generate()` for non-streaming use cases. When a tool requires approval, `generate()` returns immediately with `finishReason: 'suspended'`, a `suspendPayload` containing the tool call details (`toolCallId`, `toolName`, `args`), and a `runId`:
|
|
@@ -40,11 +40,12 @@ The full set of options is listed in the [backgroundTasks configuration referenc
|
|
|
40
40
|
|
|
41
41
|
## Run a tool in the background
|
|
42
42
|
|
|
43
|
-
Enabling the manager doesn't run anything in the background by itself as every tool defaults to foreground execution.
|
|
43
|
+
Enabling the manager doesn't run anything in the background by itself as every tool defaults to foreground execution. Tools opt in at one of two layers:
|
|
44
44
|
|
|
45
|
-
1. **
|
|
45
|
+
1. **Tool-level config**: the tool itself declares it as background-eligible.
|
|
46
46
|
2. **Agent-level config**: the agent declares which of its tools are background-eligible.
|
|
47
|
-
|
|
47
|
+
|
|
48
|
+
Once a tool has opted in, the LLM can optionally include a `_background` field in the tool arguments to override the resolved config for a specific call (timeout, retries, or to flip the call back to foreground).
|
|
48
49
|
|
|
49
50
|
### Tool-level
|
|
50
51
|
|
|
@@ -103,13 +104,15 @@ When a tool is registered on an agent that has background tasks enabled, the mod
|
|
|
103
104
|
}
|
|
104
105
|
```
|
|
105
106
|
|
|
107
|
+
The `_background` override is a _modifier_ on tools the developer has already opted in at the tool or agent layer — it is not a standalone opt-in. If a tool hasn't been opted in, `_background.enabled: true` from the model is ignored and the tool runs in the foreground. This keeps deterministic, foreground-only tools (calculators, lookups, schema validators) from being silently dispatched as tasks.
|
|
108
|
+
|
|
106
109
|
### Resolution order
|
|
107
110
|
|
|
108
111
|
When a tool call is dispatched, the resolved background config is computed in this priority order:
|
|
109
112
|
|
|
110
|
-
1.
|
|
111
|
-
2.
|
|
112
|
-
3.
|
|
113
|
+
1. Agent-level `backgroundTasks.tools` entry for the tool.
|
|
114
|
+
2. Tool-level `backgroundTasks` config.
|
|
115
|
+
3. LLM `_background.enabled` override (only used to enable background dispatch when the tool was opted in at one of the layers above).
|
|
113
116
|
4. Manager defaults (`defaultTimeoutMs`, `defaultRetries`).
|
|
114
117
|
|
|
115
118
|
If the agent has `backgroundTasks.disabled: true`, every tool call runs synchronously regardless of the layers above.
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# Evals with memory
|
|
2
|
+
|
|
3
|
+
Agents that use memory in `thread` scope — including observational memory — require a thread ID at run time. When an eval invokes the agent without one, you'll see:
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
ObservationalMemory (scope: 'thread') requires a threadId, but none was found in RequestContext or MessageList.
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
This page covers the three working patterns for running Mastra evals against memory-enabled agents, what each path supports, and which one to pick. A complete runnable repro for all three approaches lives in [`examples/evals-with-memory`](https://github.com/mastra-ai/mastra/tree/main/examples/evals-with-memory).
|
|
10
|
+
|
|
11
|
+
## When to use which approach
|
|
12
|
+
|
|
13
|
+
| Goal | Approach |
|
|
14
|
+
| ----------------------------------------------- | ----------------------------------------------------------------------------------------- |
|
|
15
|
+
| One shared conversation across every item | [`runEvals` with global `targetOptions.memory`](#shared-thread-with-runevals) |
|
|
16
|
+
| One independent thread per item, simple CI loop | [`runEvals` per item](#per-item-threads-with-runevals) |
|
|
17
|
+
| Per-item threads driven by a stored `Dataset` | [`dataset.startExperiment` with an inline task](#dataset-experiments-with-an-inline-task) |
|
|
18
|
+
|
|
19
|
+
Pre-seeding `RequestContext` with `MastraMemory` is **not** a supported way to drive memory into an agent. Thread resolution reads `args.memory.thread` — `RequestContext.MastraMemory` is populated by `prepare-memory-step` after the agent has already resolved its thread.
|
|
20
|
+
|
|
21
|
+
## Shared thread with `runEvals`
|
|
22
|
+
|
|
23
|
+
`runEvals` accepts `targetOptions`, which is forwarded to `agent.generate()`. Passing `memory: { thread, resource }` runs every data item against the same thread — useful for testing recall across a multi-turn conversation.
|
|
24
|
+
|
|
25
|
+
```typescript
|
|
26
|
+
import { runEvals } from '@mastra/core/evals'
|
|
27
|
+
import { supportAgent } from './support-agent'
|
|
28
|
+
import { recallScorer } from '../scorers/recall-scorer'
|
|
29
|
+
|
|
30
|
+
const memory = await supportAgent.getMemory()
|
|
31
|
+
await memory!.createThread({ threadId: 'eval-thread', resourceId: 'ci-user' })
|
|
32
|
+
|
|
33
|
+
const result = await runEvals({
|
|
34
|
+
target: supportAgent,
|
|
35
|
+
scorers: [recallScorer],
|
|
36
|
+
targetOptions: {
|
|
37
|
+
memory: { thread: 'eval-thread', resource: 'ci-user' },
|
|
38
|
+
},
|
|
39
|
+
data: [
|
|
40
|
+
{ input: 'My order number is 12345' },
|
|
41
|
+
{ input: 'What is my order number?', groundTruth: '12345' },
|
|
42
|
+
],
|
|
43
|
+
})
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
`targetOptions` is **global per call**. There is no per-item override on `RunEvalsDataItem` today.
|
|
47
|
+
|
|
48
|
+
## Per-item threads with `runEvals`
|
|
49
|
+
|
|
50
|
+
When each data item needs its own thread (the common CI shape), call `runEvals` once per item with a unique `targetOptions.memory` and aggregate the scores yourself.
|
|
51
|
+
|
|
52
|
+
```typescript
|
|
53
|
+
import { randomUUID } from 'node:crypto'
|
|
54
|
+
import { runEvals } from '@mastra/core/evals'
|
|
55
|
+
import { supportAgent } from './support-agent'
|
|
56
|
+
import { recallScorer } from '../scorers/recall-scorer'
|
|
57
|
+
|
|
58
|
+
const memory = await supportAgent.getMemory()
|
|
59
|
+
const resourceId = 'ci-user'
|
|
60
|
+
|
|
61
|
+
const items = [
|
|
62
|
+
{ input: 'Cats are mammals', groundTruth: 'mammals' },
|
|
63
|
+
{ input: 'Dogs are mammals too', groundTruth: 'mammals' },
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
// `runEvals` returns `{ scores: Record<string, number>; summary: { totalItems } }`.
|
|
67
|
+
const scores: number[] = []
|
|
68
|
+
for (const item of items) {
|
|
69
|
+
const threadId = `eval-${randomUUID()}`
|
|
70
|
+
await memory!.createThread({ threadId, resourceId, title: item.input })
|
|
71
|
+
|
|
72
|
+
const result = await runEvals({
|
|
73
|
+
target: supportAgent,
|
|
74
|
+
scorers: [recallScorer],
|
|
75
|
+
targetOptions: { memory: { thread: threadId, resource: resourceId } },
|
|
76
|
+
data: [item],
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
scores.push(result.scores[recallScorer.id])
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const average = scores.reduce((a, b) => a + b, 0) / scores.length
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
> **Note:** Create the thread before running the eval. Observational memory in `thread` scope reads from a record that must already exist.
|
|
86
|
+
|
|
87
|
+
## Dataset experiments with an inline task
|
|
88
|
+
|
|
89
|
+
`dataset.startExperiment({ target: agent })` does **not** forward a `memory` option to the agent — only `requestContext`. To run a stored dataset against a memory-enabled agent, use an inline `task` function and stash `{ threadId, resourceId }` in each item's `metadata`. The scorer pipeline still runs as normal.
|
|
90
|
+
|
|
91
|
+
```typescript
|
|
92
|
+
import { randomUUID } from 'node:crypto'
|
|
93
|
+
import { mastra } from '../index'
|
|
94
|
+
import { supportAgent } from '../agents/support-agent'
|
|
95
|
+
import { recallScorer } from '../scorers/recall-scorer'
|
|
96
|
+
|
|
97
|
+
const memory = await supportAgent.getMemory()
|
|
98
|
+
const resourceId = 'ci-user'
|
|
99
|
+
|
|
100
|
+
const items = [
|
|
101
|
+
{ input: 'Cats are mammals', groundTruth: 'mammals', thread: `ds-${randomUUID()}` },
|
|
102
|
+
{ input: 'Dogs are mammals too', groundTruth: 'mammals', thread: `ds-${randomUUID()}` },
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
for (const it of items) {
|
|
106
|
+
await memory!.createThread({ threadId: it.thread, resourceId, title: it.input })
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const dataset = await mastra.datasets.create({
|
|
110
|
+
name: 'support-recall',
|
|
111
|
+
description: 'Per-item memory via inline task + item metadata',
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
await dataset.addItems({
|
|
115
|
+
items: items.map(it => ({
|
|
116
|
+
input: it.input,
|
|
117
|
+
groundTruth: it.groundTruth,
|
|
118
|
+
metadata: { threadId: it.thread, resourceId },
|
|
119
|
+
})),
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
const summary = await dataset.startExperiment({
|
|
123
|
+
scorers: [recallScorer],
|
|
124
|
+
task: async ({ input, metadata }) => {
|
|
125
|
+
const { threadId, resourceId: rid } = (metadata ?? {}) as {
|
|
126
|
+
threadId: string
|
|
127
|
+
resourceId: string
|
|
128
|
+
}
|
|
129
|
+
const result = await supportAgent.generate(input as string, {
|
|
130
|
+
memory: { thread: threadId, resource: rid },
|
|
131
|
+
})
|
|
132
|
+
return result.text
|
|
133
|
+
},
|
|
134
|
+
})
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The inline `task` receives the item's `metadata`, so each row can drive its own thread without changing the agent or any scorer.
|
|
138
|
+
|
|
139
|
+
> **Note:** Visit [runEvals reference](https://mastra.ai/reference/evals/run-evals) and [Dataset reference](https://mastra.ai/reference/datasets/dataset) for full configuration.
|
|
140
|
+
|
|
141
|
+
## Related
|
|
142
|
+
|
|
143
|
+
- [Running scorers in CI](https://mastra.ai/docs/evals/running-in-ci)
|
|
144
|
+
- [Running experiments](https://mastra.ai/docs/evals/datasets/running-experiments)
|
|
145
|
+
- [Observational memory](https://mastra.ai/docs/memory/observational-memory)
|
|
146
|
+
- [runEvals API reference](https://mastra.ai/reference/evals/run-evals)
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# Multi-user threads
|
|
2
|
+
|
|
3
|
+
A single Mastra thread can be shared by multiple users, each with their own name and functional role. You carry speaker identity in the message body so the agent can tell users apart while reading from a single shared thread.
|
|
4
|
+
|
|
5
|
+
## When to use multi-user threads
|
|
6
|
+
|
|
7
|
+
Use multi-user threads when several people collaborate on the same subject through one agent:
|
|
8
|
+
|
|
9
|
+
- Collaborative documents with editors, reviewers, and approvers
|
|
10
|
+
- Group chats where one assistant serves many participants
|
|
11
|
+
- Multi-stakeholder reviews where different roles have different authority
|
|
12
|
+
|
|
13
|
+
## Share one `resourceId` across all participants
|
|
14
|
+
|
|
15
|
+
A thread belongs to exactly one `resourceId`, so all participants on a shared thread need to pass the same value. Instead of using a user id (the default for single-user apps), key `resourceId` on the conversation itself — for example `doc_${docId}` for a shared document, or `room_${roomId}` for a group chat. With everyone pointing at the same `resourceId`, they read and write the same history.
|
|
16
|
+
|
|
17
|
+
## Tag each user message with the speaker's identity
|
|
18
|
+
|
|
19
|
+
The model needs to know who's talking on every turn. Since the message body is the one place that survives into history and back into context, wrap each user message in a small `<turn>` tag with the speaker's id, name, and role. The tag stays attached to the message, so when prior turns are recalled the model still sees who said what.
|
|
20
|
+
|
|
21
|
+
Build the tag with a small helper. The example below is one way to do it — copy it into your project and adapt it to your shape of user data:
|
|
22
|
+
|
|
23
|
+
```typescript
|
|
24
|
+
export type Speaker = {
|
|
25
|
+
id: string
|
|
26
|
+
name: string
|
|
27
|
+
role: string
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function escapeAttr(value: string) {
|
|
31
|
+
return value
|
|
32
|
+
.replace(/&/g, '&')
|
|
33
|
+
.replace(/"/g, '"')
|
|
34
|
+
.replace(/</g, '<')
|
|
35
|
+
.replace(/>/g, '>')
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function asUserTurn(speaker: Speaker, text: string) {
|
|
39
|
+
const id = escapeAttr(speaker.id)
|
|
40
|
+
const name = escapeAttr(speaker.name)
|
|
41
|
+
const role = escapeAttr(speaker.role)
|
|
42
|
+
return {
|
|
43
|
+
role: 'user' as const,
|
|
44
|
+
content: `<turn author_id="${id}" author_name="${name}" functional_role="${role}">
|
|
45
|
+
${text}
|
|
46
|
+
</turn>`,
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Teach the agent how to read the `<turn>` tag in its instructions. The agent must have `memory` configured so it can be called with a `thread` and `resource`:
|
|
52
|
+
|
|
53
|
+
```typescript
|
|
54
|
+
import { Agent } from '@mastra/core/agent'
|
|
55
|
+
import { Memory } from '@mastra/memory'
|
|
56
|
+
import { LibSQLStore } from '@mastra/libsql'
|
|
57
|
+
|
|
58
|
+
const memory = new Memory({
|
|
59
|
+
storage: new LibSQLStore({ url: 'file:./collab.db' }),
|
|
60
|
+
options: {
|
|
61
|
+
lastMessages: 20,
|
|
62
|
+
},
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
export const collabAgent = new Agent({
|
|
66
|
+
id: 'collab',
|
|
67
|
+
name: 'CollabAgent',
|
|
68
|
+
model: 'openai/gpt-5.4-mini',
|
|
69
|
+
memory,
|
|
70
|
+
instructions: `
|
|
71
|
+
You are a collaborative document assistant. Multiple users talk to you in the SAME thread.
|
|
72
|
+
|
|
73
|
+
Every user message is wrapped in a <turn> tag carrying the user's identity:
|
|
74
|
+
|
|
75
|
+
<turn author_id="u_alice" author_name="Alice" functional_role="editor">
|
|
76
|
+
...message text...
|
|
77
|
+
</turn>
|
|
78
|
+
|
|
79
|
+
Rules:
|
|
80
|
+
1. Address users by their author_name.
|
|
81
|
+
2. Respect functional_role: editors propose changes, reviewers approve.
|
|
82
|
+
3. When attributing past statements, read author_name from the surrounding <turn> tag.
|
|
83
|
+
4. Do not echo the <turn> tags back at users.
|
|
84
|
+
`.trim(),
|
|
85
|
+
})
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Call the agent with the wrapped message. Every participant shares the same `thread` and `resource`:
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
import { asUserTurn } from './identity'
|
|
92
|
+
|
|
93
|
+
const docResourceId = 'doc_42'
|
|
94
|
+
const docThreadId = 'doc_42'
|
|
95
|
+
|
|
96
|
+
const alice = { id: 'u_alice', name: 'Alice', role: 'editor' }
|
|
97
|
+
const bob = { id: 'u_bob', name: 'Bob', role: 'reviewer' }
|
|
98
|
+
|
|
99
|
+
await collabAgent.generate([asUserTurn(alice, 'My favorite color is teal.')], {
|
|
100
|
+
memory: { thread: docThreadId, resource: docResourceId },
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
await collabAgent.generate([asUserTurn(bob, 'I want QA sign-off before publish.')], {
|
|
104
|
+
memory: { thread: docThreadId, resource: docResourceId },
|
|
105
|
+
})
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
The `<turn>` tag persists in the message body, so when history is recalled on later turns the model still sees who said what.
|
|
109
|
+
|
|
110
|
+
## Combining with memory layers
|
|
111
|
+
|
|
112
|
+
The user-tagging pattern composes with every memory layer. Pick the layer based on how long the conversation needs to remember per-user facts:
|
|
113
|
+
|
|
114
|
+
- **Short conversations** (a single session, or a thread small enough to fit in `lastMessages`), or when you need a verbatim record of who said what: use [message history alone](#message-history-alone). The user tags in history are enough; no extra memory layer needed.
|
|
115
|
+
- **Long-running threads** (conversations that outgrow `lastMessages`, where you need per-user facts to survive history eviction): use [observational memory](#with-observational-memory-recommended).
|
|
116
|
+
- **Need a structured participants list, or your storage adapter doesn't support OM** (OM requires LibSQL, PG, or MongoDB): use [working memory](#with-working-memory).
|
|
117
|
+
|
|
118
|
+
We recommend using observational memory or working memory, not both — they cover overlapping needs, and running both at once adds latency and token cost without much benefit.
|
|
119
|
+
|
|
120
|
+
### Message history alone
|
|
121
|
+
|
|
122
|
+
For short conversations, or when you need a verbatim record of who said what, the user tags in history are enough. `lastMessages` brings prior turns back into context with their attribution intact:
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
import { Memory } from '@mastra/memory'
|
|
126
|
+
import { LibSQLStore } from '@mastra/libsql'
|
|
127
|
+
|
|
128
|
+
const memory = new Memory({
|
|
129
|
+
storage: new LibSQLStore({ url: 'file:./collab.db' }),
|
|
130
|
+
options: {
|
|
131
|
+
lastMessages: 20,
|
|
132
|
+
},
|
|
133
|
+
})
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
The model reads identity from the `<turn>` tag on the current message and from prior tagged messages brought back through `lastMessages`.
|
|
137
|
+
|
|
138
|
+
### With observational memory (recommended)
|
|
139
|
+
|
|
140
|
+
[Observational Memory](https://mastra.ai/docs/memory/observational-memory) (OM) extracts per-user facts into a background log without burning the agent's tool budget. The default Observer model reads `<turn>` tags natively and produces named attribution like `Alice stated her favorite color is teal.` and `Bob asked for QA sign-off before publish.`
|
|
141
|
+
|
|
142
|
+
Prefer OM over working memory for multi-user threads when your storage supports it. OM extracts facts automatically, scales to any number of participants, and doesn't need template upkeep. Enable it with no overrides:
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
import { Memory } from '@mastra/memory'
|
|
146
|
+
import { LibSQLStore } from '@mastra/libsql'
|
|
147
|
+
|
|
148
|
+
const memory = new Memory({
|
|
149
|
+
storage: new LibSQLStore({ url: 'file:./collab.db' }),
|
|
150
|
+
options: {
|
|
151
|
+
lastMessages: 20,
|
|
152
|
+
observationalMemory: true,
|
|
153
|
+
},
|
|
154
|
+
})
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
OM requires a storage adapter that supports it: `@mastra/libsql`, `@mastra/pg`, or `@mastra/mongodb`.
|
|
158
|
+
|
|
159
|
+
> **Note:** If you switch the Observer to a weaker model and see facts collapse to a generic `User`, use [`observation.instruction`](https://mastra.ai/reference/memory/observational-memory) to teach the Observer how to read the `<turn>` tag.
|
|
160
|
+
|
|
161
|
+
### With working memory
|
|
162
|
+
|
|
163
|
+
Use working memory when OM isn't an option — for example, when your storage adapter doesn't support OM, or when you need a structured, deterministic participants list the agent can read and write on every turn.
|
|
164
|
+
|
|
165
|
+
The default [working memory](https://mastra.ai/docs/memory/working-memory) template assumes one user per thread ("First Name", "Last Name", etc.). For multi-user threads, provide a template with a participants list:
|
|
166
|
+
|
|
167
|
+
```typescript
|
|
168
|
+
import { Memory } from '@mastra/memory'
|
|
169
|
+
import { LibSQLStore } from '@mastra/libsql'
|
|
170
|
+
|
|
171
|
+
const memory = new Memory({
|
|
172
|
+
storage: new LibSQLStore({ url: 'file:./collab.db' }),
|
|
173
|
+
options: {
|
|
174
|
+
lastMessages: 20,
|
|
175
|
+
workingMemory: {
|
|
176
|
+
enabled: true,
|
|
177
|
+
scope: 'thread',
|
|
178
|
+
template: `# Document Collaboration State
|
|
179
|
+
|
|
180
|
+
## Participants
|
|
181
|
+
<!-- One entry per known collaborator. Use author_id as the stable key. -->
|
|
182
|
+
<!-- - **<author_name>** (<author_id>, <functional_role>): <their position> -->
|
|
183
|
+
|
|
184
|
+
## Open Questions
|
|
185
|
+
|
|
186
|
+
## Decisions
|
|
187
|
+
`,
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
})
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Set `scope: 'thread'` so the participants list belongs to the document, not to any individual user. Add one instruction telling the agent to append new participants to the list whenever a new `author_id` shows up in a `<turn>`.
|
|
194
|
+
|
|
195
|
+
For more on templates, see [Custom templates](https://mastra.ai/docs/memory/working-memory).
|
|
196
|
+
|
|
197
|
+
## Security
|
|
198
|
+
|
|
199
|
+
Set the `speaker` from your authenticated request context, never from the request body. If a client can choose its own `author_id`, one user can impersonate another. Use [Request Context](https://mastra.ai/docs/server/request-context) to read the verified user from your auth layer and build the `<turn>` tag on the server before calling the agent.
|
|
200
|
+
|
|
201
|
+
## Related
|
|
202
|
+
|
|
203
|
+
- [Working memory](https://mastra.ai/docs/memory/working-memory)
|
|
204
|
+
- [Observational memory](https://mastra.ai/docs/memory/observational-memory)
|
|
205
|
+
- [Share memory between agents](https://mastra.ai/docs/memory/overview)
|
|
206
|
+
- [`Memory` reference](https://mastra.ai/reference/memory/memory-class)
|