@mastra/memory 1.18.3-alpha.0 → 1.19.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/dist/{chunk-BK3AYI7X.cjs → chunk-5IJQOXJM.cjs} +117 -24
  3. package/dist/chunk-5IJQOXJM.cjs.map +1 -0
  4. package/dist/{chunk-KLETR4RS.js → chunk-NZXH5WER.js} +117 -24
  5. package/dist/chunk-NZXH5WER.js.map +1 -0
  6. package/dist/docs/SKILL.md +2 -1
  7. package/dist/docs/assets/SOURCE_MAP.json +47 -47
  8. package/dist/docs/references/docs-evals-evals-with-memory.md +146 -0
  9. package/dist/docs/references/docs-memory-observational-memory.md +52 -17
  10. package/dist/docs/references/reference-memory-observational-memory.md +5 -3
  11. package/dist/index.cjs +13 -13
  12. package/dist/index.js +4 -4
  13. package/dist/{observational-memory-K5ES5KKQ.js → observational-memory-KFKHBTCB.js} +3 -3
  14. package/dist/{observational-memory-K5ES5KKQ.js.map → observational-memory-KFKHBTCB.js.map} +1 -1
  15. package/dist/{observational-memory-SRGNHILF.cjs → observational-memory-V2APY3TO.cjs} +26 -26
  16. package/dist/{observational-memory-SRGNHILF.cjs.map → observational-memory-V2APY3TO.cjs.map} +1 -1
  17. package/dist/processors/index.cjs +24 -24
  18. package/dist/processors/index.js +1 -1
  19. package/dist/processors/observational-memory/activation-ttl.d.ts +4 -0
  20. package/dist/processors/observational-memory/activation-ttl.d.ts.map +1 -0
  21. package/dist/processors/observational-memory/observational-memory.d.ts.map +1 -1
  22. package/dist/processors/observational-memory/observer-agent.d.ts +13 -0
  23. package/dist/processors/observational-memory/observer-agent.d.ts.map +1 -1
  24. package/dist/processors/observational-memory/observer-runner.d.ts.map +1 -1
  25. package/dist/processors/observational-memory/processor.d.ts.map +1 -1
  26. package/dist/processors/observational-memory/reflector-runner.d.ts.map +1 -1
  27. package/dist/processors/observational-memory/tracing.d.ts.map +1 -1
  28. package/dist/processors/observational-memory/types.d.ts +30 -6
  29. package/dist/processors/observational-memory/types.d.ts.map +1 -1
  30. package/package.json +4 -4
  31. package/dist/chunk-BK3AYI7X.cjs.map +0 -1
  32. package/dist/chunk-KLETR4RS.js.map +0 -1
@@ -3,7 +3,7 @@ name: mastra-memory
3
3
  description: Documentation for @mastra/memory. Use when working with @mastra/memory APIs, configuration, or implementation.
4
4
  metadata:
5
5
  package: "@mastra/memory"
6
- version: "1.18.3-alpha.0"
6
+ version: "1.19.0-alpha.1"
7
7
  ---
8
8
 
9
9
  ## When to use
@@ -20,6 +20,7 @@ Read the individual reference documents for detailed explanations and code examp
20
20
  - [Background tasks](references/docs-agents-background-tasks.md) - Learn how to dispatch long-running tool calls in the background, keep the stream open until they complete, and orchestrate subagents asynchronously.
21
21
  - [Agent networks](references/docs-agents-networks.md) - Coordinate multiple agents, workflows, and tools using agent networks for complex, non-deterministic task execution.
22
22
  - [Supervisor agents](references/docs-agents-supervisor-agents.md) - Learn how to coordinate multiple agents with delegation hooks, iteration monitoring, message filtering, and task completion scoring.
23
+ - [Evals with memory](references/docs-evals-evals-with-memory.md) - Run scorers against memory-enabled agents — including observational memory in thread scope — using runEvals and dataset experiments.
23
24
  - [Memory processors](references/docs-memory-memory-processors.md) - Learn how to use memory processors in Mastra to filter, trim, and transform messages before they're sent to the language model to manage context window limits.
24
25
  - [Message history](references/docs-memory-message-history.md) - Learn how to configure message history in Mastra to store recent messages from the current conversation.
25
26
  - [Observational Memory](references/docs-memory-observational-memory.md) - Learn how Observational Memory keeps your agent's context window small while preserving long-term memory across conversations.
@@ -1,120 +1,120 @@
1
1
  {
2
- "version": "1.18.3-alpha.0",
2
+ "version": "1.19.0-alpha.1",
3
3
  "package": "@mastra/memory",
4
4
  "exports": {
5
5
  "ModelByInputTokens": {
6
6
  "types": "dist/processors/index.d.ts",
7
- "implementation": "dist/chunk-KLETR4RS.js",
8
- "line": 745
7
+ "implementation": "dist/chunk-NZXH5WER.js",
8
+ "line": 786
9
9
  },
10
10
  "OBSERVER_SYSTEM_PROMPT": {
11
11
  "types": "dist/processors/index.d.ts",
12
- "implementation": "dist/chunk-KLETR4RS.js"
12
+ "implementation": "dist/chunk-NZXH5WER.js"
13
13
  },
14
14
  "ObservationalMemory": {
15
15
  "types": "dist/processors/index.d.ts",
16
- "implementation": "dist/chunk-KLETR4RS.js",
17
- "line": 6922
16
+ "implementation": "dist/chunk-NZXH5WER.js",
17
+ "line": 7009
18
18
  },
19
19
  "ObservationalMemoryProcessor": {
20
20
  "types": "dist/processors/index.d.ts",
21
- "implementation": "dist/chunk-KLETR4RS.js",
22
- "line": 9496
21
+ "implementation": "dist/chunk-NZXH5WER.js",
22
+ "line": 9589
23
23
  },
24
24
  "TokenCounter": {
25
25
  "types": "dist/processors/index.d.ts",
26
- "implementation": "dist/chunk-KLETR4RS.js",
27
- "line": 6371
26
+ "implementation": "dist/chunk-NZXH5WER.js",
27
+ "line": 6455
28
28
  },
29
29
  "buildObserverPrompt": {
30
30
  "types": "dist/processors/index.d.ts",
31
- "implementation": "dist/chunk-KLETR4RS.js",
32
- "line": 3682
31
+ "implementation": "dist/chunk-NZXH5WER.js",
32
+ "line": 3760
33
33
  },
34
34
  "buildObserverSystemPrompt": {
35
35
  "types": "dist/processors/index.d.ts",
36
- "implementation": "dist/chunk-KLETR4RS.js",
37
- "line": 2990
36
+ "implementation": "dist/chunk-NZXH5WER.js",
37
+ "line": 3031
38
38
  },
39
39
  "combineObservationGroupRanges": {
40
40
  "types": "dist/processors/index.d.ts",
41
- "implementation": "dist/chunk-KLETR4RS.js",
42
- "line": 837
41
+ "implementation": "dist/chunk-NZXH5WER.js",
42
+ "line": 878
43
43
  },
44
44
  "deriveObservationGroupProvenance": {
45
45
  "types": "dist/processors/index.d.ts",
46
- "implementation": "dist/chunk-KLETR4RS.js",
47
- "line": 871
46
+ "implementation": "dist/chunk-NZXH5WER.js",
47
+ "line": 912
48
48
  },
49
49
  "extractCurrentTask": {
50
50
  "types": "dist/processors/index.d.ts",
51
- "implementation": "dist/chunk-KLETR4RS.js",
52
- "line": 3796
51
+ "implementation": "dist/chunk-NZXH5WER.js",
52
+ "line": 3874
53
53
  },
54
54
  "formatMessagesForObserver": {
55
55
  "types": "dist/processors/index.d.ts",
56
- "implementation": "dist/chunk-KLETR4RS.js",
57
- "line": 3408
56
+ "implementation": "dist/chunk-NZXH5WER.js",
57
+ "line": 3486
58
58
  },
59
59
  "getObservationsAsOf": {
60
60
  "types": "dist/processors/index.d.ts",
61
- "implementation": "dist/chunk-KLETR4RS.js",
62
- "line": 9708
61
+ "implementation": "dist/chunk-NZXH5WER.js",
62
+ "line": 9801
63
63
  },
64
64
  "hasCurrentTaskSection": {
65
65
  "types": "dist/processors/index.d.ts",
66
- "implementation": "dist/chunk-KLETR4RS.js",
67
- "line": 3784
66
+ "implementation": "dist/chunk-NZXH5WER.js",
67
+ "line": 3862
68
68
  },
69
69
  "injectAnchorIds": {
70
70
  "types": "dist/processors/index.d.ts",
71
- "implementation": "dist/chunk-KLETR4RS.js",
72
- "line": 2538
71
+ "implementation": "dist/chunk-NZXH5WER.js",
72
+ "line": 2579
73
73
  },
74
74
  "optimizeObservationsForContext": {
75
75
  "types": "dist/processors/index.d.ts",
76
- "implementation": "dist/chunk-KLETR4RS.js",
77
- "line": 3807
76
+ "implementation": "dist/chunk-NZXH5WER.js",
77
+ "line": 3885
78
78
  },
79
79
  "parseAnchorId": {
80
80
  "types": "dist/processors/index.d.ts",
81
- "implementation": "dist/chunk-KLETR4RS.js",
82
- "line": 2511
81
+ "implementation": "dist/chunk-NZXH5WER.js",
82
+ "line": 2552
83
83
  },
84
84
  "parseObservationGroups": {
85
85
  "types": "dist/processors/index.d.ts",
86
- "implementation": "dist/chunk-KLETR4RS.js",
87
- "line": 806
86
+ "implementation": "dist/chunk-NZXH5WER.js",
87
+ "line": 847
88
88
  },
89
89
  "parseObserverOutput": {
90
90
  "types": "dist/processors/index.d.ts",
91
- "implementation": "dist/chunk-KLETR4RS.js",
92
- "line": 3692
91
+ "implementation": "dist/chunk-NZXH5WER.js",
92
+ "line": 3770
93
93
  },
94
94
  "reconcileObservationGroupsFromReflection": {
95
95
  "types": "dist/processors/index.d.ts",
96
- "implementation": "dist/chunk-KLETR4RS.js",
97
- "line": 895
96
+ "implementation": "dist/chunk-NZXH5WER.js",
97
+ "line": 936
98
98
  },
99
99
  "renderObservationGroupsForReflection": {
100
100
  "types": "dist/processors/index.d.ts",
101
- "implementation": "dist/chunk-KLETR4RS.js",
102
- "line": 851
101
+ "implementation": "dist/chunk-NZXH5WER.js",
102
+ "line": 892
103
103
  },
104
104
  "stripEphemeralAnchorIds": {
105
105
  "types": "dist/processors/index.d.ts",
106
- "implementation": "dist/chunk-KLETR4RS.js",
107
- "line": 2568
106
+ "implementation": "dist/chunk-NZXH5WER.js",
107
+ "line": 2609
108
108
  },
109
109
  "stripObservationGroups": {
110
110
  "types": "dist/processors/index.d.ts",
111
- "implementation": "dist/chunk-KLETR4RS.js",
112
- "line": 828
111
+ "implementation": "dist/chunk-NZXH5WER.js",
112
+ "line": 869
113
113
  },
114
114
  "wrapInObservationGroup": {
115
115
  "types": "dist/processors/index.d.ts",
116
- "implementation": "dist/chunk-KLETR4RS.js",
117
- "line": 799
116
+ "implementation": "dist/chunk-NZXH5WER.js",
117
+ "line": 840
118
118
  },
119
119
  "OBSERVATIONAL_MEMORY_DEFAULTS": {
120
120
  "types": "dist/processors/index.d.ts",
@@ -149,7 +149,7 @@
149
149
  "processors": {
150
150
  "index": "dist/processors/index.js",
151
151
  "chunks": [
152
- "chunk-KLETR4RS.js",
152
+ "chunk-NZXH5WER.js",
153
153
  "chunk-LSJJAJAF.js"
154
154
  ]
155
155
  }
@@ -0,0 +1,146 @@
1
+ # Evals with memory
2
+
3
+ Agents that use memory in `thread` scope — including observational memory — require a thread ID at run time. When an eval invokes the agent without one, you'll see:
4
+
5
+ ```text
6
+ ObservationalMemory (scope: 'thread') requires a threadId, but none was found in RequestContext or MessageList.
7
+ ```
8
+
9
+ This page covers the three working patterns for running Mastra evals against memory-enabled agents, what each path supports, and which one to pick. A complete runnable repro for all three approaches lives in [`examples/evals-with-memory`](https://github.com/mastra-ai/mastra/tree/main/examples/evals-with-memory).
10
+
11
+ ## When to use which approach
12
+
13
+ | Goal | Approach |
14
+ | ----------------------------------------------- | ----------------------------------------------------------------------------------------- |
15
+ | One shared conversation across every item | [`runEvals` with global `targetOptions.memory`](#shared-thread-with-runevals) |
16
+ | One independent thread per item, simple CI loop | [`runEvals` per item](#per-item-threads-with-runevals) |
17
+ | Per-item threads driven by a stored `Dataset` | [`dataset.startExperiment` with an inline task](#dataset-experiments-with-an-inline-task) |
18
+
19
+ Pre-seeding `RequestContext` with `MastraMemory` is **not** a supported way to drive memory into an agent. Thread resolution reads `args.memory.thread` — `RequestContext.MastraMemory` is populated by `prepare-memory-step` after the agent has already resolved its thread.
20
+
21
+ ## Shared thread with `runEvals`
22
+
23
+ `runEvals` accepts `targetOptions`, which is forwarded to `agent.generate()`. Passing `memory: { thread, resource }` runs every data item against the same thread — useful for testing recall across a multi-turn conversation.
24
+
25
+ ```typescript
26
+ import { runEvals } from '@mastra/core/evals'
27
+ import { supportAgent } from './support-agent'
28
+ import { recallScorer } from '../scorers/recall-scorer'
29
+
30
+ const memory = await supportAgent.getMemory()
31
+ await memory!.createThread({ threadId: 'eval-thread', resourceId: 'ci-user' })
32
+
33
+ const result = await runEvals({
34
+ target: supportAgent,
35
+ scorers: [recallScorer],
36
+ targetOptions: {
37
+ memory: { thread: 'eval-thread', resource: 'ci-user' },
38
+ },
39
+ data: [
40
+ { input: 'My order number is 12345' },
41
+ { input: 'What is my order number?', groundTruth: '12345' },
42
+ ],
43
+ })
44
+ ```
45
+
46
+ `targetOptions` is **global per call**. There is no per-item override on `RunEvalsDataItem` today.
47
+
48
+ ## Per-item threads with `runEvals`
49
+
50
+ When each data item needs its own thread (the common CI shape), call `runEvals` once per item with a unique `targetOptions.memory` and aggregate the scores yourself.
51
+
52
+ ```typescript
53
+ import { randomUUID } from 'node:crypto'
54
+ import { runEvals } from '@mastra/core/evals'
55
+ import { supportAgent } from './support-agent'
56
+ import { recallScorer } from '../scorers/recall-scorer'
57
+
58
+ const memory = await supportAgent.getMemory()
59
+ const resourceId = 'ci-user'
60
+
61
+ const items = [
62
+ { input: 'Cats are mammals', groundTruth: 'mammals' },
63
+ { input: 'Dogs are mammals too', groundTruth: 'mammals' },
64
+ ]
65
+
66
+ // `runEvals` returns `{ scores: Record<string, number>; summary: { totalItems } }`.
67
+ const scores: number[] = []
68
+ for (const item of items) {
69
+ const threadId = `eval-${randomUUID()}`
70
+ await memory!.createThread({ threadId, resourceId, title: item.input })
71
+
72
+ const result = await runEvals({
73
+ target: supportAgent,
74
+ scorers: [recallScorer],
75
+ targetOptions: { memory: { thread: threadId, resource: resourceId } },
76
+ data: [item],
77
+ })
78
+
79
+ scores.push(result.scores[recallScorer.id])
80
+ }
81
+
82
+ const average = scores.reduce((a, b) => a + b, 0) / scores.length
83
+ ```
84
+
85
+ > **Note:** Create the thread before running the eval. Observational memory in `thread` scope reads from a record that must already exist.
86
+
87
+ ## Dataset experiments with an inline task
88
+
89
+ `dataset.startExperiment({ target: agent })` does **not** forward a `memory` option to the agent — only `requestContext`. To run a stored dataset against a memory-enabled agent, use an inline `task` function and stash `{ threadId, resourceId }` in each item's `metadata`. The scorer pipeline still runs as normal.
90
+
91
+ ```typescript
92
+ import { randomUUID } from 'node:crypto'
93
+ import { mastra } from '../index'
94
+ import { supportAgent } from '../agents/support-agent'
95
+ import { recallScorer } from '../scorers/recall-scorer'
96
+
97
+ const memory = await supportAgent.getMemory()
98
+ const resourceId = 'ci-user'
99
+
100
+ const items = [
101
+ { input: 'Cats are mammals', groundTruth: 'mammals', thread: `ds-${randomUUID()}` },
102
+ { input: 'Dogs are mammals too', groundTruth: 'mammals', thread: `ds-${randomUUID()}` },
103
+ ]
104
+
105
+ for (const it of items) {
106
+ await memory!.createThread({ threadId: it.thread, resourceId, title: it.input })
107
+ }
108
+
109
+ const dataset = await mastra.datasets.create({
110
+ name: 'support-recall',
111
+ description: 'Per-item memory via inline task + item metadata',
112
+ })
113
+
114
+ await dataset.addItems({
115
+ items: items.map(it => ({
116
+ input: it.input,
117
+ groundTruth: it.groundTruth,
118
+ metadata: { threadId: it.thread, resourceId },
119
+ })),
120
+ })
121
+
122
+ const summary = await dataset.startExperiment({
123
+ scorers: [recallScorer],
124
+ task: async ({ input, metadata }) => {
125
+ const { threadId, resourceId: rid } = (metadata ?? {}) as {
126
+ threadId: string
127
+ resourceId: string
128
+ }
129
+ const result = await supportAgent.generate(input as string, {
130
+ memory: { thread: threadId, resource: rid },
131
+ })
132
+ return result.text
133
+ },
134
+ })
135
+ ```
136
+
137
+ The inline `task` receives the item's `metadata`, so each row can drive its own thread without changing the agent or any scorer.
138
+
139
+ > **Note:** Visit [runEvals reference](https://mastra.ai/reference/evals/run-evals) and [Dataset reference](https://mastra.ai/reference/datasets/dataset) for full configuration.
140
+
141
+ ## Related
142
+
143
+ - [Running scorers in CI](https://mastra.ai/docs/evals/running-in-ci)
144
+ - [Running experiments](https://mastra.ai/docs/evals/datasets/running-experiments)
145
+ - [Observational memory](https://mastra.ai/docs/memory/observational-memory)
146
+ - [runEvals API reference](https://mastra.ai/reference/evals/run-evals)
@@ -88,7 +88,7 @@ const memory = new Memory({
88
88
  options: {
89
89
  observationalMemory: {
90
90
  model: 'google/gemini-2.5-flash',
91
- activateAfterIdle: '5m',
91
+ activateAfterIdle: 'auto',
92
92
  activateOnProviderChange: true,
93
93
  },
94
94
  },
@@ -144,6 +144,28 @@ OM uses fast local token estimation for this thresholding work. Text is estimate
144
144
 
145
145
  The Observer can also see attachments in the history it reviews. OM keeps readable placeholders like `[Image #1: reference-board.png]` or `[File #1: floorplan.pdf]` in the transcript for readability, and forwards the actual attachment parts alongside the text. Image-like `file` parts are upgraded to image inputs for the Observer when possible, while non-image attachments are forwarded as file parts with normalized token counting. This applies to both normal thread observation and batched resource-scope observation.
146
146
 
147
+ If your Observer model is text-only or its API rejects multimodal input, set `observation.observeAttachments` to `false` to drop attachments before they reach the Observer. The readable placeholders (`[Image #1: ...]`, `[File #1: ...]`) are kept in the transcript so the Observer can still reason about what was shared without receiving the binary payload. The same filter applies to tool results that contain image or file parts:
148
+
149
+ ```typescript
150
+ new Agent({
151
+ name: 'assistant',
152
+ instructions: 'You are a helpful assistant.',
153
+ model: 'openai/gpt-5-mini',
154
+ memory: new Memory({
155
+ options: {
156
+ observationalMemory: {
157
+ observation: {
158
+ model: 'deepseek/deepseek-reasoner',
159
+ observeAttachments: false,
160
+ },
161
+ },
162
+ },
163
+ }),
164
+ })
165
+ ```
166
+
167
+ You can also pass an allowlist of mimeType globs (for example `['image/*']`) to forward only the kinds the Observer can handle.
168
+
147
169
  ```md
148
170
  Date: 2026-01-15
149
171
 
@@ -444,35 +466,48 @@ Reflection works similarly — the Reflector runs in the background when observa
444
466
 
445
467
  ### Settings
446
468
 
447
- | Setting | Default | What it controls |
448
- | ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
449
- | `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
450
- | `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
451
- | `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
452
- | `activateAfterIdle` | none | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, or duration strings like `"5m"` or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
453
- | `activateOnProviderChange` | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
454
- | `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
455
- | `reflection.activateAfterIdle` | none | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`. |
456
- | `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`. |
457
- | `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
458
-
459
- If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
469
+ | Setting | Default | What it controls |
470
+ | ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
471
+ | `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
472
+ | `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
473
+ | `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
474
+ | `activateAfterIdle` | none | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, duration strings like `"5m"` or `"1hr"`, or `"auto"` for a provider-aware prompt cache TTL. |
475
+ | `activateOnProviderChange` | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
476
+ | `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
477
+ | `reflection.activateAfterIdle` | none | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`. |
478
+ | `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`. |
479
+ | `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
480
+
481
+ If you're relying on prompt caching, set `activateAfterIdle` to `"auto"` or to a specific cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
482
+
483
+ With `"auto"`, Mastra chooses an idle activation TTL from the active model provider:
484
+
485
+ | Provider | Auto TTL |
486
+ | --------------------------------------------------------------------------------------- | --------- |
487
+ | Anthropic, OpenRouter, unknown providers, xAI | 5 minutes |
488
+ | DeepSeek | 1 hour |
489
+ | Google Gemini | 24 hours |
490
+ | Groq | 2 hours |
491
+ | OpenAI with `providerOptions.openai.promptCacheRetention: "24h"` | 1 hour |
492
+ | OpenAI with `providerOptions.openai.promptCacheRetention: "in_memory"` | 5 minutes |
493
+ | OpenAI `gpt-4*`, `gpt-5`, `gpt-5-*`, `gpt-5.1*`, `gpt-5.2*`, `gpt-5.3*`, and `gpt-5.4*` | 5 minutes |
494
+ | Other OpenAI models | 1 hour |
460
495
 
461
496
  ```typescript
462
497
  const memory = new Memory({
463
498
  options: {
464
499
  observationalMemory: {
465
500
  model: 'google/gemini-2.5-flash',
466
- activateAfterIdle: '5m',
501
+ activateAfterIdle: 'auto',
467
502
  activateOnProviderChange: true,
468
503
  },
469
504
  },
470
505
  })
471
506
  ```
472
507
 
473
- With a 5-minute prompt cache TTL, this activates buffered observations after 5 minutes of inactivity so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer, `300_000` works the same way.
508
+ With `"auto"`, this activates buffered observations based on the active provider's prompt cache behavior so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer a fixed 5-minute TTL, use `"5m"` or `300_000`.
474
509
 
475
- Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
510
+ Changing models or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
476
511
 
477
512
  ### Disabling
478
513
 
@@ -36,7 +36,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
36
36
 
37
37
  **scope** (`'resource' | 'thread'`): Memory scope for observations. \`'thread'\` keeps observations per-thread. \`'resource'\` (experimental) shares observations across all threads for a resource, enabling cross-conversation memory. (Default: `'thread'`)
38
38
 
39
- **activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity, even before \`observation.messageTokens\` is reached. Accepts a numeric millisecond value such as \`300\_000\`, duration strings like \`"5m"\` or \`"1hr"\`, or \`false\` to disable inherited observation idle activation. Reflections do not inherit this setting. Use \`reflection.activateAfterIdle\` to opt reflections into idle activation.
39
+ **activateAfterIdle** (`number | string | false | "auto"`): Time before buffered observations are forced to activate after inactivity, even before \`observation.messageTokens\` is reached. Accepts a numeric millisecond value such as \`300\_000\`, duration strings like \`"5m"\` or \`"1hr"\`, \`"auto"\` for a provider-aware prompt cache TTL, or \`false\` to disable inherited observation idle activation. Reflections do not inherit this setting. Use \`reflection.activateAfterIdle\` to opt reflections into idle activation.
40
40
 
41
41
  **activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. Reflections do not inherit this setting. Use \`reflection.activateOnProviderChange\` to opt reflections into provider-change activation. (Default: `false`)
42
42
 
@@ -54,6 +54,8 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
54
54
 
55
55
  **observation.threadTitle** (`boolean`): When \`true\`, the Observer suggests short thread titles and updates the thread title when the conversation topic meaningfully changes. This is opt-in and defaults to disabled.
56
56
 
57
+ **observation.observeAttachments** (`boolean | string[]`): Controls which image/file attachments are forwarded to the Observer model alongside their placeholder text lines. \`true\` (default) forwards all attachments. \`false\` drops all attachments while keeping placeholders visible. An array is a case-insensitive mimeType allowlist supporting exact matches (\`'application/pdf'\`), wildcard subtypes (\`'image/\*'\`), and bare \`'\*'\` for everything. Useful when the Observer model is text-only (e.g. some DeepSeek endpoints) while the main agent uses a multimodal model. Tool-result attachments are filtered using the same rule.
58
+
57
59
  **observation.messageTokens** (`number`): Token count of unobserved messages that triggers observation. When unobserved message tokens exceed this threshold, the Observer agent is called. Text is estimated locally with \`tokenx\`. Image parts are included with model-aware heuristics when possible, with deterministic fallbacks when image metadata is incomplete. Image-like \`file\` parts are counted the same way when uploads are normalized as files.
58
60
 
59
61
  **observation.maxTokensPerBatch** (`number`): Maximum tokens per batch when observing multiple threads in resource scope. Threads are chunked into batches of this size and processed in parallel. Lower values mean more parallelism but more API calls.
@@ -68,7 +70,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
68
70
 
69
71
  **observation.bufferActivation** (`number`): Controls how much of the message window to retain after activation. Accepts a ratio (0-1) or an absolute token count (≥ 1000). For example, \`0.8\` means: activate enough buffers to remove 80% of \`messageTokens\` and leave 20% as active message history. An absolute token count like \`4000\` targets a goal of keeping \~4k message tokens remaining after activation. Higher values remove more message history per activation when using a ratio. Higher values keep more message history when using a token count.
70
72
 
71
- **observation.activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. If unset, the top-level \`activateAfterIdle\` value is used for observations. Set \`false\` to disable the top-level idle setting for observations.
73
+ **observation.activateAfterIdle** (`number | string | false | "auto"`): Time before buffered observations are forced to activate after inactivity. Accepts milliseconds, a duration string, \`"auto"\` for a provider-aware prompt cache TTL, or \`false\`. If unset, the top-level \`activateAfterIdle\` value is used for observations. Set \`false\` to disable the top-level idle setting for observations.
72
74
 
73
75
  **observation.activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. If unset, the top-level \`activateOnProviderChange\` value is used for observations.
74
76
 
@@ -92,7 +94,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
92
94
 
93
95
  **reflection.bufferActivation** (`number`): Ratio (0-1) controlling when async reflection buffering starts. When observation tokens reach \`observationTokens \* bufferActivation\`, reflection runs in the background. On activation at the full threshold, the buffered reflection replaces the observations it covers, preserving any new observations appended after that range.
94
96
 
95
- **reflection.activateAfterIdle** (`number | string | false`): Time before buffered reflections are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. Reflections do not inherit top-level \`activateAfterIdle\`; set this explicitly to opt reflections into idle activation.
97
+ **reflection.activateAfterIdle** (`number | string | false | "auto"`): Time before buffered reflections are forced to activate after inactivity. Accepts milliseconds, a duration string, \`"auto"\` for a provider-aware prompt cache TTL, or \`false\`. Reflections do not inherit top-level \`activateAfterIdle\`; set this explicitly to opt reflections into idle activation.
96
98
 
97
99
  **reflection.activateOnProviderChange** (`boolean`): Force buffered reflections to activate when the actor provider or model changes. Reflections do not inherit top-level \`activateOnProviderChange\`; set this explicitly to opt reflections into provider-change activation.
98
100
 
package/dist/index.cjs CHANGED
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- var chunkBK3AYI7X_cjs = require('./chunk-BK3AYI7X.cjs');
3
+ var chunk5IJQOXJM_cjs = require('./chunk-5IJQOXJM.cjs');
4
4
  var v3 = require('zod/v3');
5
5
  var zod = require('zod');
6
6
  var z4 = require('zod/v4');
@@ -16056,7 +16056,7 @@ function formatTimestamp(date) {
16056
16056
  }
16057
16057
  function truncateByTokens(text4, maxTokens, hint) {
16058
16058
  if (tokenx.estimateTokenCount(text4) <= maxTokens) return { text: text4, wasTruncated: false };
16059
- const truncated = chunkBK3AYI7X_cjs.truncateStringByTokens(text4, maxTokens);
16059
+ const truncated = chunk5IJQOXJM_cjs.truncateStringByTokens(text4, maxTokens);
16060
16060
  const suffix = hint ? ` [${hint} for more]` : "";
16061
16061
  return { text: truncated + suffix, wasTruncated: true };
16062
16062
  }
@@ -16108,11 +16108,11 @@ ${JSON.stringify(inv.args, null, 2)}`;
16108
16108
  });
16109
16109
  }
16110
16110
  if (inv.state === "result") {
16111
- const { value: resultValue } = chunkBK3AYI7X_cjs.resolveToolResultValue(
16111
+ const { value: resultValue } = chunk5IJQOXJM_cjs.resolveToolResultValue(
16112
16112
  part,
16113
16113
  inv.result
16114
16114
  );
16115
- const resultStr = chunkBK3AYI7X_cjs.formatToolResultForObserver(resultValue, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16115
+ const resultStr = chunk5IJQOXJM_cjs.formatToolResultForObserver(resultValue, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16116
16116
  const fullText = `[Tool Result: ${inv.toolName}]
16117
16117
  ${resultStr}`;
16118
16118
  parts.push(makePart(msg, i, "tool-result", fullText, detail, inv.toolName));
@@ -16139,7 +16139,7 @@ ${typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs, null, 2)}`;
16139
16139
  const toolName = part.toolName;
16140
16140
  if (toolName) {
16141
16141
  const rawResult = part.output ?? part.result;
16142
- const resultStr = chunkBK3AYI7X_cjs.formatToolResultForObserver(rawResult, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16142
+ const resultStr = chunk5IJQOXJM_cjs.formatToolResultForObserver(rawResult, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
16143
16143
  const fullText = `[Tool Result: ${toolName}]
16144
16144
  ${resultStr}`;
16145
16145
  parts.push(makePart(msg, i, "tool-result", fullText, detail, toolName));
@@ -16218,7 +16218,7 @@ function renderFormattedParts(parts, timestamps, options) {
16218
16218
  const text4 = buildRenderedText(parts, timestamps);
16219
16219
  let totalTokens = tokenx.estimateTokenCount(text4);
16220
16220
  if (totalTokens > options.maxTokens) {
16221
- const truncated = chunkBK3AYI7X_cjs.truncateStringByTokens(text4, options.maxTokens);
16221
+ const truncated = chunk5IJQOXJM_cjs.truncateStringByTokens(text4, options.maxTokens);
16222
16222
  return { text: truncated, truncated: true, tokenOffset: totalTokens - options.maxTokens };
16223
16223
  }
16224
16224
  const truncatedIndices = parts.map((p, i) => ({ part: p, index: i })).filter(({ part }) => part.text !== part.fullText).sort((a, b) => expandPriority(a.part) - expandPriority(b.part));
@@ -16251,7 +16251,7 @@ function renderFormattedParts(parts, timestamps, options) {
16251
16251
  if (expandedTokens <= options.maxTokens) {
16252
16252
  return { text: expanded, truncated: false, tokenOffset: 0 };
16253
16253
  }
16254
- const hardTruncated = chunkBK3AYI7X_cjs.truncateStringByTokens(expanded, options.maxTokens);
16254
+ const hardTruncated = chunk5IJQOXJM_cjs.truncateStringByTokens(expanded, options.maxTokens);
16255
16255
  return { text: hardTruncated, truncated: true, tokenOffset: expandedTokens - options.maxTokens };
16256
16256
  }
16257
16257
  async function recallPart({
@@ -16302,7 +16302,7 @@ async function recallPart({
16302
16302
 
16303
16303
  `;
16304
16304
  const fallbackText = `${fallbackNote}${firstNextPart.text}`;
16305
- const truncatedText2 = chunkBK3AYI7X_cjs.truncateStringByTokens(fallbackText, maxTokens);
16305
+ const truncatedText2 = chunk5IJQOXJM_cjs.truncateStringByTokens(fallbackText, maxTokens);
16306
16306
  const wasTruncated2 = truncatedText2 !== fallbackText;
16307
16307
  return {
16308
16308
  text: truncatedText2,
@@ -16317,7 +16317,7 @@ async function recallPart({
16317
16317
  }
16318
16318
  throw new Error(`Part index ${partIndex} not found in message ${cursor}. Available indices: ${availableIndices}`);
16319
16319
  }
16320
- const truncatedText = chunkBK3AYI7X_cjs.truncateStringByTokens(target.text, maxTokens);
16320
+ const truncatedText = chunk5IJQOXJM_cjs.truncateStringByTokens(target.text, maxTokens);
16321
16321
  const wasTruncated = truncatedText !== target.text;
16322
16322
  return {
16323
16323
  text: truncatedText,
@@ -18079,7 +18079,7 @@ ${workingMemory}`;
18079
18079
  "Observational memory requires @mastra/core support for request-response-id-rotation. Please bump @mastra/core to a newer version."
18080
18080
  );
18081
18081
  }
18082
- const { ObservationalMemory: OMClass } = await import('./observational-memory-SRGNHILF.cjs');
18082
+ const { ObservationalMemory: OMClass } = await import('./observational-memory-V2APY3TO.cjs');
18083
18083
  const onIndexObservations = this.hasRetrievalSearch(omConfig.retrieval) ? async (observation) => {
18084
18084
  await this.indexObservation(observation);
18085
18085
  } : void 0;
@@ -19005,7 +19005,7 @@ Notes:
19005
19005
  if (!effectiveConfig) return null;
19006
19006
  const engine = await this.omEngine;
19007
19007
  if (!engine) return null;
19008
- const { ObservationalMemoryProcessor } = await import('./observational-memory-SRGNHILF.cjs');
19008
+ const { ObservationalMemoryProcessor } = await import('./observational-memory-V2APY3TO.cjs');
19009
19009
  return new ObservationalMemoryProcessor(engine, this, {
19010
19010
  temporalMarkers: effectiveConfig.temporalMarkers
19011
19011
  });
@@ -19014,11 +19014,11 @@ Notes:
19014
19014
 
19015
19015
  Object.defineProperty(exports, "ModelByInputTokens", {
19016
19016
  enumerable: true,
19017
- get: function () { return chunkBK3AYI7X_cjs.ModelByInputTokens; }
19017
+ get: function () { return chunk5IJQOXJM_cjs.ModelByInputTokens; }
19018
19018
  });
19019
19019
  Object.defineProperty(exports, "getObservationsAsOf", {
19020
19020
  enumerable: true,
19021
- get: function () { return chunkBK3AYI7X_cjs.getObservationsAsOf; }
19021
+ get: function () { return chunk5IJQOXJM_cjs.getObservationsAsOf; }
19022
19022
  });
19023
19023
  Object.defineProperty(exports, "MessageHistory", {
19024
19024
  enumerable: true,
package/dist/index.js CHANGED
@@ -1,5 +1,5 @@
1
- import { truncateStringByTokens, resolveToolResultValue, formatToolResultForObserver } from './chunk-KLETR4RS.js';
2
- export { ModelByInputTokens, getObservationsAsOf } from './chunk-KLETR4RS.js';
1
+ import { truncateStringByTokens, resolveToolResultValue, formatToolResultForObserver } from './chunk-NZXH5WER.js';
2
+ export { ModelByInputTokens, getObservationsAsOf } from './chunk-NZXH5WER.js';
3
3
  import { ZodFirstPartyTypeKind } from 'zod/v3';
4
4
  import { z } from 'zod';
5
5
  import * as z4 from 'zod/v4';
@@ -18056,7 +18056,7 @@ ${workingMemory}`;
18056
18056
  "Observational memory requires @mastra/core support for request-response-id-rotation. Please bump @mastra/core to a newer version."
18057
18057
  );
18058
18058
  }
18059
- const { ObservationalMemory: OMClass } = await import('./observational-memory-K5ES5KKQ.js');
18059
+ const { ObservationalMemory: OMClass } = await import('./observational-memory-KFKHBTCB.js');
18060
18060
  const onIndexObservations = this.hasRetrievalSearch(omConfig.retrieval) ? async (observation) => {
18061
18061
  await this.indexObservation(observation);
18062
18062
  } : void 0;
@@ -18982,7 +18982,7 @@ Notes:
18982
18982
  if (!effectiveConfig) return null;
18983
18983
  const engine = await this.omEngine;
18984
18984
  if (!engine) return null;
18985
- const { ObservationalMemoryProcessor } = await import('./observational-memory-K5ES5KKQ.js');
18985
+ const { ObservationalMemoryProcessor } = await import('./observational-memory-KFKHBTCB.js');
18986
18986
  return new ObservationalMemoryProcessor(engine, this, {
18987
18987
  temporalMarkers: effectiveConfig.temporalMarkers
18988
18988
  });
@@ -1,4 +1,4 @@
1
- export { ModelByInputTokens, OBSERVER_SYSTEM_PROMPT, ObservationalMemory, ObservationalMemoryProcessor, TokenCounter, buildObserverPrompt, buildObserverSystemPrompt, combineObservationGroupRanges, deriveObservationGroupProvenance, extractCurrentTask, formatMessagesForObserver, getObservationsAsOf, hasCurrentTaskSection, injectAnchorIds, optimizeObservationsForContext, parseAnchorId, parseObservationGroups, parseObserverOutput, reconcileObservationGroupsFromReflection, renderObservationGroupsForReflection, stripEphemeralAnchorIds, stripObservationGroups, wrapInObservationGroup } from './chunk-KLETR4RS.js';
1
+ export { ModelByInputTokens, OBSERVER_SYSTEM_PROMPT, ObservationalMemory, ObservationalMemoryProcessor, TokenCounter, buildObserverPrompt, buildObserverSystemPrompt, combineObservationGroupRanges, deriveObservationGroupProvenance, extractCurrentTask, formatMessagesForObserver, getObservationsAsOf, hasCurrentTaskSection, injectAnchorIds, optimizeObservationsForContext, parseAnchorId, parseObservationGroups, parseObserverOutput, reconcileObservationGroupsFromReflection, renderObservationGroupsForReflection, stripEphemeralAnchorIds, stripObservationGroups, wrapInObservationGroup } from './chunk-NZXH5WER.js';
2
2
  export { OBSERVATIONAL_MEMORY_DEFAULTS, OBSERVATION_CONTEXT_INSTRUCTIONS, OBSERVATION_CONTEXT_PROMPT, OBSERVATION_CONTINUATION_HINT } from './chunk-LSJJAJAF.js';
3
- //# sourceMappingURL=observational-memory-K5ES5KKQ.js.map
4
- //# sourceMappingURL=observational-memory-K5ES5KKQ.js.map
3
+ //# sourceMappingURL=observational-memory-KFKHBTCB.js.map
4
+ //# sourceMappingURL=observational-memory-KFKHBTCB.js.map