@mastra/mcp-docs-server 1.1.35-alpha.2 → 1.1.35-alpha.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/course/03-agent-memory/18-advanced-configuration-semantic-recall.md +48 -4
- package/.docs/docs/agents/background-tasks.md +62 -2
- package/.docs/docs/agents/processors.md +34 -2
- package/.docs/docs/agents/response-caching.md +148 -0
- package/.docs/docs/agents/using-tools.md +8 -0
- package/.docs/docs/editor/tools.md +1 -1
- package/.docs/docs/index.md +2 -2
- package/.docs/docs/mastra-platform/configuration.md +1 -1
- package/.docs/docs/mastra-platform/overview.md +1 -1
- package/.docs/docs/memory/observational-memory.md +63 -14
- package/.docs/docs/memory/overview.md +2 -1
- package/.docs/docs/memory/semantic-recall.md +68 -6
- package/.docs/docs/observability/logging.md +2 -2
- package/.docs/docs/observability/metrics/overview.md +4 -4
- package/.docs/docs/observability/overview.md +6 -6
- package/.docs/docs/observability/tracing/bridges/otel.md +25 -0
- package/.docs/docs/observability/tracing/exporters/arize.md +5 -5
- package/.docs/docs/observability/tracing/exporters/braintrust.md +37 -0
- package/.docs/docs/observability/tracing/exporters/langfuse.md +21 -0
- package/.docs/docs/observability/tracing/exporters/{cloud.md → mastra-platform.md} +28 -26
- package/.docs/docs/observability/tracing/exporters/{default.md → mastra-storage.md} +56 -19
- package/.docs/docs/observability/tracing/exporters/otel.md +79 -2
- package/.docs/docs/observability/tracing/overview.md +30 -29
- package/.docs/docs/observability/tracing/processors/sensitive-data-filter.md +6 -6
- package/.docs/docs/server/mastra-server.md +30 -19
- package/.docs/docs/studio/observability.md +4 -4
- package/.docs/docs/studio/overview.md +4 -0
- package/.docs/docs/workflows/suspend-and-resume.md +28 -1
- package/.docs/guides/deployment/inngest.md +29 -8
- package/.docs/guides/guide/web-search.md +7 -7
- package/.docs/guides/migrations/mastra-cloud.md +6 -6
- package/.docs/guides/migrations/upgrade-to-v1/tracing.md +19 -17
- package/.docs/models/gateways/azure-openai.md +94 -23
- package/.docs/models/gateways/netlify.md +3 -1
- package/.docs/models/gateways/openrouter.md +5 -1
- package/.docs/models/gateways/vercel.md +2 -1
- package/.docs/models/index.md +1 -1
- package/.docs/models/providers/deepinfra.md +2 -1
- package/.docs/models/providers/deepseek.md +3 -1
- package/.docs/models/providers/digitalocean.md +10 -2
- package/.docs/models/providers/firepass.md +71 -0
- package/.docs/models/providers/google.md +3 -2
- package/.docs/models/providers/kilo.md +5 -3
- package/.docs/models/providers/kiro.md +110 -0
- package/.docs/models/providers/llmgateway.md +8 -2
- package/.docs/models/providers/nebius.md +37 -55
- package/.docs/models/providers/openai.md +2 -0
- package/.docs/models/providers/opencode-go.md +2 -4
- package/.docs/models/providers/opencode.md +3 -3
- package/.docs/models/providers/poe.md +4 -1
- package/.docs/models/providers/qiniu-ai.md +2 -2
- package/.docs/models/providers/wafer.ai.md +2 -1
- package/.docs/models/providers/xiaomi-token-plan-ams.md +6 -5
- package/.docs/models/providers/xiaomi-token-plan-cn.md +6 -5
- package/.docs/models/providers/xiaomi-token-plan-sgp.md +6 -5
- package/.docs/models/providers/xiaomi.md +2 -2
- package/.docs/models/providers/zenmux.md +1 -1
- package/.docs/models/providers.md +1 -0
- package/.docs/reference/agents/agent.md +2 -0
- package/.docs/reference/cli/mastra.md +464 -0
- package/.docs/reference/client-js/agents.md +26 -1
- package/.docs/reference/client-js/responses.md +4 -0
- package/.docs/reference/configuration.md +6 -6
- package/.docs/reference/editor/tool-provider.md +3 -3
- package/.docs/reference/harness/harness-class.md +23 -8
- package/.docs/reference/index.md +3 -0
- package/.docs/reference/memory/observational-memory.md +11 -1
- package/.docs/reference/observability/metrics/automatic-metrics.md +2 -4
- package/.docs/reference/observability/tracing/bridges/datadog.md +2 -2
- package/.docs/reference/observability/tracing/bridges/otel.md +26 -4
- package/.docs/reference/observability/tracing/configuration.md +6 -3
- package/.docs/reference/observability/tracing/exporters/arize.md +1 -1
- package/.docs/reference/observability/tracing/exporters/braintrust.md +2 -0
- package/.docs/reference/observability/tracing/exporters/cloud-exporter.md +3 -1
- package/.docs/reference/observability/tracing/exporters/console-exporter.md +2 -2
- package/.docs/reference/observability/tracing/exporters/default-exporter.md +7 -1
- package/.docs/reference/observability/tracing/exporters/mastra-platform-exporter.md +263 -0
- package/.docs/reference/observability/tracing/exporters/mastra-storage-exporter.md +194 -0
- package/.docs/reference/observability/tracing/exporters/otel.md +12 -8
- package/.docs/reference/observability/tracing/instances.md +2 -2
- package/.docs/reference/observability/tracing/interfaces.md +37 -2
- package/.docs/reference/observability/tracing/processors/sensitive-data-filter.md +22 -0
- package/.docs/reference/observability/tracing/span-filtering.md +2 -2
- package/.docs/reference/processors/processor-interface.md +74 -12
- package/.docs/reference/processors/provider-history-compat.md +132 -0
- package/.docs/reference/processors/response-cache.md +114 -0
- package/.docs/reference/processors/tool-call-filter.md +28 -0
- package/.docs/reference/storage/clickhouse.md +8 -8
- package/.docs/reference/storage/cloudflare-d1.md +1 -1
- package/.docs/reference/storage/cloudflare.md +1 -1
- package/.docs/reference/storage/composite.md +1 -1
- package/.docs/reference/storage/convex.md +1 -1
- package/.docs/reference/storage/duckdb.md +3 -3
- package/.docs/reference/storage/dynamodb.md +1 -1
- package/.docs/reference/storage/lance.md +1 -1
- package/.docs/reference/storage/libsql.md +1 -1
- package/.docs/reference/storage/postgresql.md +1 -1
- package/.docs/reference/storage/upstash.md +1 -1
- package/.docs/reference/streaming/ChunkType.md +44 -0
- package/.docs/reference/streaming/agents/stream.md +18 -2
- package/.docs/reference/tools/create-tool.md +46 -0
- package/.docs/reference/tools/mcp-client.md +47 -0
- package/.docs/reference/workflows/workflow-state-reader.md +113 -0
- package/CHANGELOG.md +71 -0
- package/package.json +4 -4
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
# Advanced
|
|
1
|
+
# Advanced configuration of semantic recall
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Configure semantic recall with the `semanticRecall` option:
|
|
4
4
|
|
|
5
5
|
```typescript
|
|
6
6
|
const memory = new Memory({
|
|
@@ -19,11 +19,55 @@ const memory = new Memory({
|
|
|
19
19
|
before: 2,
|
|
20
20
|
after: 1,
|
|
21
21
|
},
|
|
22
|
+
scope: 'resource', // Search all threads for this resource
|
|
23
|
+
filter: { projectId: { $eq: 'project-a' } },
|
|
22
24
|
},
|
|
23
25
|
},
|
|
24
26
|
})
|
|
25
27
|
```
|
|
26
28
|
|
|
27
|
-
The `topK` parameter controls how many
|
|
29
|
+
The `topK` parameter controls how many similar messages Mastra retrieves. A higher value retrieves more messages, which can help with complex topics but may include less relevant information. The default value is `4`.
|
|
28
30
|
|
|
29
|
-
The `messageRange` parameter controls how much context
|
|
31
|
+
The `messageRange` parameter controls how much context Mastra includes with each match. Messages before and after the match help the agent understand the matched message.
|
|
32
|
+
|
|
33
|
+
The `scope` parameter controls whether Mastra searches the current thread (`'thread'`) or all threads owned by a resource (`'resource'`). Use `scope: 'resource'` to let the agent recall information from past conversations for the same resource.
|
|
34
|
+
|
|
35
|
+
The `filter` parameter restricts semantic recall results to messages with matching thread metadata, such as a project ID or category.
|
|
36
|
+
|
|
37
|
+
Filters match metadata stored on message embeddings when messages are saved. If thread metadata changes later, existing embeddings keep their previous metadata until those messages are saved or indexed again.
|
|
38
|
+
|
|
39
|
+
Supported filter operators:
|
|
40
|
+
|
|
41
|
+
- `$and`: Logical AND
|
|
42
|
+
- `$eq`: Equal to
|
|
43
|
+
- `$gt`: Greater than
|
|
44
|
+
- `$gte`: Greater than or equal
|
|
45
|
+
- `$in`: In array
|
|
46
|
+
- `$lt`: Less than
|
|
47
|
+
- `$lte`: Less than or equal
|
|
48
|
+
- `$ne`: Not equal to
|
|
49
|
+
- `$nin`: Not in array
|
|
50
|
+
- `$or`: Logical OR
|
|
51
|
+
|
|
52
|
+
The following example demonstrates metadata filters for common use cases:
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
// Filter by project
|
|
56
|
+
const options = {
|
|
57
|
+
semanticRecall: { filter: { projectId: { $eq: 'my-project' } } },
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Filter by multiple categories
|
|
61
|
+
const options = {
|
|
62
|
+
semanticRecall: { filter: { category: { $in: ['work', 'research'] } } },
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Filter by project and priority
|
|
66
|
+
const options = {
|
|
67
|
+
semanticRecall: {
|
|
68
|
+
filter: {
|
|
69
|
+
$and: [{ projectId: { $eq: 'project-a' } }, { priority: { $gte: 3 } }],
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
}
|
|
73
|
+
```
|
|
@@ -127,10 +127,12 @@ When a tool call dispatches as a background task, two streams may surface lifecy
|
|
|
127
127
|
| `background-task-completed` | The task finished successfully. The `payload.result` matches the eventual tool result. | Manager stream |
|
|
128
128
|
| `background-task-failed` | The task threw or timed out. | Manager stream |
|
|
129
129
|
| `background-task-cancelled` | The task was cancelled before completing. | Manager stream |
|
|
130
|
+
| `background-task-suspended` | The tool called `suspend()` from inside its execute. | Manager stream |
|
|
131
|
+
| `background-task-resumed` | A suspended task was resumed via `manager.resume(taskId, resumeData)`. | Manager stream |
|
|
130
132
|
|
|
131
|
-
`agent.stream().fullStream` only emits the agent-loop chunks (`background-task-started`, `background-task-progress`) on its own. `agent.streamUntilIdle()` emits the same two chunks and additionally subscribes to the manager pubsub for the run's memory scope and pipes the
|
|
133
|
+
`agent.stream().fullStream` only emits the agent-loop chunks (`background-task-started`, `background-task-progress`) on its own. `agent.streamUntilIdle()` emits the same two chunks and additionally subscribes to the manager pubsub for the run's memory scope and pipes the seven manager chunks (`background-task-running`, `background-task-output`, `background-task-completed`, `background-task-failed`, `background-task-cancelled`, `background-task-suspended`, `background-task-resumed`) into the same `fullStream`.
|
|
132
134
|
|
|
133
|
-
`backgroundTaskManager.stream()` only emits the
|
|
135
|
+
`backgroundTaskManager.stream()` only emits the seven manager chunks.
|
|
134
136
|
|
|
135
137
|
The full payload shapes are documented in the [background task chunks reference](https://mastra.ai/reference/streaming/ChunkType).
|
|
136
138
|
|
|
@@ -210,6 +212,64 @@ When this `researchAgent` is delegated to from a supervisor that has no backgrou
|
|
|
210
212
|
|
|
211
213
|
Use this pattern when you want a subagent to behave consistently in the background regardless of which supervisor invokes it. Use the supervisor-side opt-in (above) when you want to tune background behavior centrally per supervisor.
|
|
212
214
|
|
|
215
|
+
## Suspending and resuming
|
|
216
|
+
|
|
217
|
+
A background task can pause itself mid-execution and wait for an external signal before continuing. This is useful for human approvals, webhooks, or any flow where the next step depends on data that arrives later.
|
|
218
|
+
|
|
219
|
+
A tool calls `suspend(data)` from inside its `execute`, which:
|
|
220
|
+
|
|
221
|
+
- Persists `status: 'suspended'` and the `data` payload on the task record.
|
|
222
|
+
- Saves the workflow snapshot so the run survives process restarts.
|
|
223
|
+
- Emits a `background-task-suspended` chunk on the manager stream.
|
|
224
|
+
- Releases the concurrency slot so other tasks can run.
|
|
225
|
+
|
|
226
|
+
Resume the task with `mastra.backgroundTaskManager.resume(taskId, resumeData)`. The `resumeData` arrives in the tool's `execute` options on the resumed run, and the task transitions back to `running`.
|
|
227
|
+
|
|
228
|
+
```typescript
|
|
229
|
+
import { createTool } from '@mastra/core/tools'
|
|
230
|
+
import { z } from 'zod'
|
|
231
|
+
|
|
232
|
+
export const reviewTool = createTool({
|
|
233
|
+
id: 'review',
|
|
234
|
+
description: 'Submit a draft for human review.',
|
|
235
|
+
inputSchema: z.object({ draft: z.string() }),
|
|
236
|
+
outputSchema: z.object({ approvedBy: z.string(), edits: z.string().optional() }),
|
|
237
|
+
background: { enabled: true },
|
|
238
|
+
execute: async ({ draft }, context) => {
|
|
239
|
+
const { suspend, resumeData } = context.agent
|
|
240
|
+
if (!resumeData) {
|
|
241
|
+
await suspend?.({ awaiting: 'approval', draft })
|
|
242
|
+
return { approvedBy: '', edits: undefined }
|
|
243
|
+
}
|
|
244
|
+
const { reviewer, edits } = resumeData as { reviewer: string; edits?: string }
|
|
245
|
+
return { approvedBy: reviewer, edits }
|
|
246
|
+
},
|
|
247
|
+
})
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
The first invocation of `execute` sees `resumeData === undefined` and calls `suspend`. After the task is resumed, the runtime restarts the tool with `resumeData` populated; the `if` branch falls through and the tool returns its real result.
|
|
251
|
+
|
|
252
|
+
To resume the task once an approval arrives:
|
|
253
|
+
|
|
254
|
+
```typescript
|
|
255
|
+
await mastra.backgroundTaskManager?.resume(taskId, {
|
|
256
|
+
reviewer: 'alice@example.com',
|
|
257
|
+
edits: 'Reworded paragraph 3.',
|
|
258
|
+
})
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### What happens to the agent loop
|
|
262
|
+
|
|
263
|
+
When a task suspends mid-`streamUntilIdle()`, the wrapper treats it as terminal for the current iteration and closes. To continue the agent immediately when the resume payload is in hand, call `agent.resumeStreamUntilIdle(resumeData, { runId, toolCallId, memory })`: the resumed bg task runs to completion, its result lands in the message list, and the agent runs a follow-up turn — all on the same SSE connection. If you'd rather drive the resume out-of-band, call `mastra.backgroundTaskManager.resume(taskId, resumeData)` directly and the result still writes into the thread for the next user turn to pick up.
|
|
264
|
+
|
|
265
|
+
### Re-registering the executor on resume
|
|
266
|
+
|
|
267
|
+
The manager keeps tool executors in process memory. If the process restarts while a task is suspended, the executor closure is gone — the caller of `resume()` must re-register it first via `manager.registerTaskContext(taskId, ...)`. Tasks dispatched and resumed inside the same process don't need this.
|
|
268
|
+
|
|
269
|
+
### Cancelling a suspended task
|
|
270
|
+
|
|
271
|
+
`manager.cancel(taskId)` works against suspended tasks the same way it works for running ones: the row flips to `cancelled`, the workflow snapshot is cleaned up, and a `task.cancelled` event fires.
|
|
272
|
+
|
|
213
273
|
## Lifecycle callbacks
|
|
214
274
|
|
|
215
275
|
Each layer can register terminal-state callbacks. They don't replace one another, and success/failure hooks fire for their respective outcomes:
|
|
@@ -211,6 +211,22 @@ The method receives the current `stepNumber`, `model`, `tools`, `toolChoice`, `m
|
|
|
211
211
|
|
|
212
212
|
See the [`Processor` reference](https://mastra.ai/reference/processors/processor-interface) for all available arguments and return types.
|
|
213
213
|
|
|
214
|
+
### Rewrite the LLM request before the provider call
|
|
215
|
+
|
|
216
|
+
Use `processLLMRequest()` when you need to rewrite the final prompt that Mastra sends to the model. This hook runs after Mastra converts the `MessageList` into the provider-facing prompt format (`LanguageModelV2Prompt`) and immediately before the provider call.
|
|
217
|
+
|
|
218
|
+
Use the message-based hooks for conversation changes:
|
|
219
|
+
|
|
220
|
+
- `processInput()`: Change the conversation once before the agentic loop starts.
|
|
221
|
+
- `processInputStep()`: Change messages or step configuration before each LLM call.
|
|
222
|
+
- `processLLMRequest()`: Change only the outbound prompt for the current provider call.
|
|
223
|
+
|
|
224
|
+
Changes returned from `processLLMRequest()` are transient. They don't persist back to `MessageList`, memory, UI history, or future provider calls. This makes the hook a good fit for provider compatibility rewrites, role/content normalization, or other model-specific prompt changes that shouldn't alter stored conversation history.
|
|
225
|
+
|
|
226
|
+
The method receives `prompt`, `model`, `stepNumber`, `steps`, `state`, and the shared processor context. Calling `abort()` from `processLLMRequest()` emits the normal tripwire response and stops the call.
|
|
227
|
+
|
|
228
|
+
See the [`Processor` reference](https://mastra.ai/reference/processors/processor-interface) for all available arguments and return types.
|
|
229
|
+
|
|
214
230
|
### Use the `prepareStep()` callback
|
|
215
231
|
|
|
216
232
|
The `prepareStep()` callback on `generate()` or `stream()` is a shorthand for `processInputStep()`. Internally, Mastra wraps it in a processor that calls your function at each step. It accepts the same arguments and return type as `processInputStep()`, but doesn't require creating a class:
|
|
@@ -317,7 +333,7 @@ For more on retry behavior, see [Retry mechanism](#retry-mechanism) in Advanced
|
|
|
317
333
|
|
|
318
334
|
### Persist data across chunks and steps
|
|
319
335
|
|
|
320
|
-
Output methods receive a `state` object that persists for the lifetime of one request. State is keyed by the processor's `id`, so each processor sees only its own data, and it
|
|
336
|
+
Output methods receive a `state` object that persists for the lifetime of one request. State is keyed by the processor's `id`, so each processor sees only its own data, and it's shared between `processOutputStream`, `processOutputStep`, and `processOutputResult`. A new state object is created for every new `agent.generate()` or `agent.stream()` call.
|
|
321
337
|
|
|
322
338
|
```typescript
|
|
323
339
|
import type { Processor } from '@mastra/core/processors'
|
|
@@ -375,6 +391,14 @@ new ToolCallFilter({
|
|
|
375
391
|
})
|
|
376
392
|
```
|
|
377
393
|
|
|
394
|
+
Set `preserveModelOutput: true` to keep compact `toModelOutput` history for filtered completed tool results. The filter keeps only the model-facing output and removes raw tool args and raw results.
|
|
395
|
+
|
|
396
|
+
```typescript
|
|
397
|
+
new ToolCallFilter({
|
|
398
|
+
preserveModelOutput: true,
|
|
399
|
+
})
|
|
400
|
+
```
|
|
401
|
+
|
|
378
402
|
See the [`ToolCallFilter` reference](https://mastra.ai/reference/processors/tool-call-filter) for configuration options and the [Memory Processors](https://mastra.ai/docs/memory/memory-processors) page for pre-memory filtering.
|
|
379
403
|
|
|
380
404
|
### `ToolSearchProcessor`
|
|
@@ -383,6 +407,14 @@ Enables dynamic tool discovery for agents with large tool libraries. Instead of
|
|
|
383
407
|
|
|
384
408
|
See the [`ToolSearchProcessor` reference](https://mastra.ai/reference/processors/tool-search-processor) for configuration options and usage examples.
|
|
385
409
|
|
|
410
|
+
### `ProviderHistoryCompat`
|
|
411
|
+
|
|
412
|
+
Handles provider-specific history incompatibilities when agents reuse messages across model providers. It can rewrite the outbound LLM request before the provider call, or recover from known provider API errors and retry.
|
|
413
|
+
|
|
414
|
+
Add `ProviderHistoryCompat` explicitly when you need provider history compatibility rules, reactive API error recovery, custom compatibility rules, or predictable processor ordering.
|
|
415
|
+
|
|
416
|
+
See the [`ProviderHistoryCompat` reference](https://mastra.ai/reference/processors/provider-history-compat) for setup, built-in rules, and custom rule options.
|
|
417
|
+
|
|
386
418
|
## Advanced patterns
|
|
387
419
|
|
|
388
420
|
### Ensure a final response with `maxSteps`
|
|
@@ -494,7 +526,7 @@ for await (const chunk of stream.fullStream) {
|
|
|
494
526
|
|
|
495
527
|
Custom chunk types must use the `data-` prefix (e.g., `data-moderation-update`, `data-status`).
|
|
496
528
|
|
|
497
|
-
By default, `processOutputStream()` skips `data-*` chunks so it
|
|
529
|
+
By default, `processOutputStream()` skips `data-*` chunks so it doesn't accidentally operate on tool telemetry or other processors' output. To inspect, modify, or block these chunks in a processor, set `processDataParts = true` on that processor:
|
|
498
530
|
|
|
499
531
|
```typescript
|
|
500
532
|
class ModerationCollector implements Processor {
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Response caching
|
|
2
|
+
|
|
3
|
+
Response caching skips the LLM call and replays a previously cached response when an agent receives an identical request. Use it to drop latency to single-digit milliseconds and avoid paying for repeated calls.
|
|
4
|
+
|
|
5
|
+
Caching is implemented as the [`ResponseCache`](https://mastra.ai/reference/processors/response-cache) input processor. There is no agent-level option — to enable caching, register the processor explicitly. This keeps the API surface small while we collect feedback; per-call overrides flow through `RequestContext`.
|
|
6
|
+
|
|
7
|
+
## When to use response caching
|
|
8
|
+
|
|
9
|
+
Reach for it when the same request shape repeats across users or sessions, for example prompt templates, suggested-prompt buttons, agentic search re-asks, or guardrail LLMs that classify the same input over and over. Skip it when calls trigger external side effects through tools, since cache hits replay tool calls without re-executing them.
|
|
10
|
+
|
|
11
|
+
## Quickstart
|
|
12
|
+
|
|
13
|
+
Add a `ResponseCache` to the agent's `inputProcessors` and pass any `MastraServerCache` as the backend. For development, `InMemoryServerCache` works out of the box:
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { Agent } from '@mastra/core/agent'
|
|
17
|
+
import { InMemoryServerCache } from '@mastra/core/cache'
|
|
18
|
+
import { ResponseCache } from '@mastra/core/processors'
|
|
19
|
+
|
|
20
|
+
const cache = new InMemoryServerCache()
|
|
21
|
+
|
|
22
|
+
export const searchAgent = new Agent({
|
|
23
|
+
name: 'Search Agent',
|
|
24
|
+
instructions: 'You answer questions concisely.',
|
|
25
|
+
model: 'openai/gpt-5',
|
|
26
|
+
inputProcessors: [new ResponseCache({ cache, ttl: 600 })], // 10 minutes
|
|
27
|
+
})
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
The first call runs the LLM normally and writes the response to the cache. Subsequent calls with an identical resolved prompt return the cached response without invoking the LLM.
|
|
31
|
+
|
|
32
|
+
## Per-call overrides via RequestContext
|
|
33
|
+
|
|
34
|
+
Per-call config flows through `RequestContext`. Use `ResponseCache.context()` to build a fresh context, or `ResponseCache.applyContext()` to merge into one you already have:
|
|
35
|
+
|
|
36
|
+
```typescript
|
|
37
|
+
import { ResponseCache } from '@mastra/core/processors'
|
|
38
|
+
import { RequestContext } from '@mastra/core/request-context'
|
|
39
|
+
|
|
40
|
+
// Fresh context with the override
|
|
41
|
+
await agent.stream('hello', {
|
|
42
|
+
requestContext: ResponseCache.context({ key: 'custom-key', bust: true }),
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
// Or merge into an existing context
|
|
46
|
+
const ctx = new RequestContext()
|
|
47
|
+
ctx.set('caller-meta', { userId: 'u-123' })
|
|
48
|
+
ResponseCache.applyContext(ctx, { bust: true })
|
|
49
|
+
await agent.stream('hello', { requestContext: ctx })
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Three fields are overridable per call:
|
|
53
|
+
|
|
54
|
+
- `key` — string or function. Overrides the auto-derived cache key for this request only.
|
|
55
|
+
- `scope` — string or `null`. Overrides the tenant/user scope for this request only. `null` opts out of scoping.
|
|
56
|
+
- `bust` — boolean. Skips the cache read but still writes on completion (useful for "force refresh" buttons).
|
|
57
|
+
|
|
58
|
+
`cache`, `ttl`, and `agentId` stay on the constructor — they are instance-level concerns and not safe to vary per call.
|
|
59
|
+
|
|
60
|
+
## Tenant scoping
|
|
61
|
+
|
|
62
|
+
By default, `ResponseCache` looks up `MASTRA_RESOURCE_ID_KEY` on the request context and uses it as the cache scope. This means an agent that already populates the resource id (e.g. via memory) gets per-user isolation automatically — two users never see each other's cached responses.
|
|
63
|
+
|
|
64
|
+
Override explicitly when you need a different scope:
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
new Agent({
|
|
68
|
+
// ...
|
|
69
|
+
inputProcessors: [
|
|
70
|
+
new ResponseCache({
|
|
71
|
+
cache,
|
|
72
|
+
scope: 'org-123', // explicit tenant scope
|
|
73
|
+
}),
|
|
74
|
+
],
|
|
75
|
+
})
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Pass `scope: null` to deliberately share entries across all callers — only use this for known-public, non-personalized content.
|
|
79
|
+
|
|
80
|
+
## Custom cache backend
|
|
81
|
+
|
|
82
|
+
`ResponseCache` accepts any `MastraServerCache`. For production, use `RedisCache` from `@mastra/redis`:
|
|
83
|
+
|
|
84
|
+
```typescript
|
|
85
|
+
import { Agent } from '@mastra/core/agent'
|
|
86
|
+
import { ResponseCache } from '@mastra/core/processors'
|
|
87
|
+
import { RedisCache } from '@mastra/redis'
|
|
88
|
+
|
|
89
|
+
const cache = new RedisCache({ url: process.env.REDIS_URL })
|
|
90
|
+
|
|
91
|
+
export const agent = new Agent({
|
|
92
|
+
name: 'Cached Agent',
|
|
93
|
+
instructions: '...',
|
|
94
|
+
model: 'openai/gpt-5',
|
|
95
|
+
inputProcessors: [new ResponseCache({ cache })],
|
|
96
|
+
})
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
For a custom backend, extend `MastraServerCache` and implement its abstract methods (the processor only calls `get` and `set`).
|
|
100
|
+
|
|
101
|
+
## How caching is implemented
|
|
102
|
+
|
|
103
|
+
`ResponseCache` hooks into `processLLMRequest` (cache lookup, short-circuits on hit) and `processLLMResponse` (cache write on completion). Both run inside the agentic loop _after_ memory has loaded and earlier input processors have transformed the prompt.
|
|
104
|
+
|
|
105
|
+
This means the cache key is derived from the resolved `LanguageModelV2Prompt` Mastra is about to send to the model — i.e. _after_ memory has loaded and earlier input processors have run — and each step in an agentic tool loop is independently cached.
|
|
106
|
+
|
|
107
|
+
## What's in the cache key
|
|
108
|
+
|
|
109
|
+
When you don't supply `key`, the processor derives one deterministically from the inputs that change the LLM's response at this step: `agentId`, `stepNumber` (so each step in a tool loop has its own cache entry), `scope`, model identity (`provider`, `modelId`, spec version), and the resolved `prompt` (post-memory + post-processors). Any change to these inputs automatically invalidates the cache.
|
|
110
|
+
|
|
111
|
+
### Customize the cache key
|
|
112
|
+
|
|
113
|
+
Pass `key` as a function on the constructor or per-call to derive your own cache key from any subset of those inputs. The function receives the same inputs the deterministic hash would have consumed and returns a string (or a `Promise<string>`):
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
import { ResponseCache, buildResponseCacheKey } from '@mastra/core/processors'
|
|
117
|
+
|
|
118
|
+
await agent.stream(input, {
|
|
119
|
+
requestContext: ResponseCache.context({
|
|
120
|
+
// Cache only on the model id and the resolved prompt tail — ignore
|
|
121
|
+
// step number, scope, etc.
|
|
122
|
+
key: ({ model, prompt }) => `qa:${model.modelId}:${JSON.stringify(prompt).slice(-200)}`,
|
|
123
|
+
}),
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
// Or reuse the deterministic helper while overriding individual fields:
|
|
127
|
+
await agent.stream(input, {
|
|
128
|
+
requestContext: ResponseCache.context({
|
|
129
|
+
key: inputs => buildResponseCacheKey({ ...inputs, scope: 'global' }),
|
|
130
|
+
}),
|
|
131
|
+
})
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
If the function throws, the processor falls back to the default key derivation so the call still benefits from caching.
|
|
135
|
+
|
|
136
|
+
## How cache hits work
|
|
137
|
+
|
|
138
|
+
When the processor finds a cache hit, it short-circuits the LLM call by returning the cached chunks from `processLLMRequest`. The agentic loop synthesizes a stream from those chunks instead of calling the model. `agent.generate()` collects them into a `FullOutput`; `agent.stream()` returns a `MastraModelOutput` whose chunks come from the cached buffer, so consumers iterating `fullStream` or awaiting `text`, `usage`, and `finishReason` see the cached values.
|
|
139
|
+
|
|
140
|
+
Cache writes happen after the response completes. Failed runs (errors, tripwire activations) are not cached, so the next call retries cleanly.
|
|
141
|
+
|
|
142
|
+
## Related
|
|
143
|
+
|
|
144
|
+
- [`ResponseCache` reference](https://mastra.ai/reference/processors/response-cache)
|
|
145
|
+
- [Processors](https://mastra.ai/docs/agents/processors)
|
|
146
|
+
- [Guardrails](https://mastra.ai/docs/agents/guardrails)
|
|
147
|
+
- [Agent.stream()](https://mastra.ai/reference/streaming/agents/stream)
|
|
148
|
+
- [Agent.generate()](https://mastra.ai/reference/agents/generate)
|
|
@@ -224,6 +224,14 @@ export const weatherTool = createTool({
|
|
|
224
224
|
})
|
|
225
225
|
```
|
|
226
226
|
|
|
227
|
+
## Transform tool payloads for UI and transcripts
|
|
228
|
+
|
|
229
|
+
Use `transform` when a tool returns raw data your application needs, but browser-facing streams or user-visible transcript messages should receive a smaller or safer shape. `transform` is separate from `toModelOutput`: `toModelOutput` shapes the payload sent back to the model, while `transform` shapes tool input, output, errors, approval payloads, and suspension payloads for `display` and `transcript` targets.
|
|
230
|
+
|
|
231
|
+
If a transform is configured and it fails, Mastra does not fall back to the raw payload for display or transcript targets. Input deltas are suppressed when no safe `inputDelta` transform is available.
|
|
232
|
+
|
|
233
|
+
See the [`createTool()` reference](https://mastra.ai/reference/tools/create-tool) for a `transform` example. For shared rules across several tools, configure the agent-level `transform` policy in the [`Agent` constructor](https://mastra.ai/reference/agents/agent).
|
|
234
|
+
|
|
227
235
|
## Control tool selection
|
|
228
236
|
|
|
229
237
|
Pass `toolChoice` or `activeTools` to `.generate()` or `.stream()` to control which tools the agent uses at runtime.
|
|
@@ -73,7 +73,7 @@ Integration providers connect external tool platforms to the editor. Once regist
|
|
|
73
73
|
})
|
|
74
74
|
```
|
|
75
75
|
|
|
76
|
-
Composio tool slugs use a format like `GITHUB_CREATE_ISSUE`. Tool calls are scoped to
|
|
76
|
+
Composio tool slugs use a format like `GITHUB_CREATE_ISSUE`. Tool calls are scoped to the `resourceId` passed through request context for per-user authentication.
|
|
77
77
|
|
|
78
78
|
### Arcade
|
|
79
79
|
|
package/.docs/docs/index.md
CHANGED
|
@@ -126,8 +126,8 @@ Templates: [Customer Feedback Summarization](https://mastra.ai/templates/custome
|
|
|
126
126
|
|
|
127
127
|
Browse [templates](https://mastra.ai/templates) for working examples.
|
|
128
128
|
|
|
129
|
-
##
|
|
129
|
+
## Want to learn more?
|
|
130
130
|
|
|
131
|
-
|
|
131
|
+
Here's a quick introduction:
|
|
132
132
|
|
|
133
133
|
[YouTube video player](https://www.youtube-nocookie.com/embed/1qnmnRICX50)
|
|
@@ -55,4 +55,4 @@ MASTRA_PROJECT_ID="<staging-id>" mastra studio deploy --env-file .env.staging --
|
|
|
55
55
|
MASTRA_PROJECT_ID="<production-id>" mastra studio deploy --env-file .env.production --yes
|
|
56
56
|
```
|
|
57
57
|
|
|
58
|
-
Each project has its own Studio URL and its own observability data. When using [`
|
|
58
|
+
Each project has its own Studio URL and its own observability data. When using [`MastraPlatformExporter`](https://mastra.ai/docs/observability/tracing/exporters/mastra-platform), set `MASTRA_PROJECT_ID` and `MASTRA_CLOUD_ACCESS_TOKEN` per environment so traces route to the matching Studio project.
|
|
@@ -75,4 +75,4 @@ Once you're ready to deploy your application to production, use [`mastra studio
|
|
|
75
75
|
|
|
76
76
|
Follow the [Studio deployment guide](https://mastra.ai/docs/studio/deployment) and [Server deployment guide](https://mastra.ai/guides/deployment/mastra-platform) for step-by-step instructions.
|
|
77
77
|
|
|
78
|
-
If you host your Mastra application on your own infrastructure, you can still send observability data to Studio using the [
|
|
78
|
+
If you host your Mastra application on your own infrastructure, you can still send observability data to Studio using the [MastraPlatformExporter](https://mastra.ai/docs/observability/tracing/exporters/mastra-platform).
|
|
@@ -77,6 +77,48 @@ The observer also sees these markers when it processes the thread, so the observ
|
|
|
77
77
|
|
|
78
78
|
See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
|
|
79
79
|
|
|
80
|
+
## Early activation
|
|
81
|
+
|
|
82
|
+
OM can activate buffered observations before the token threshold is reached. This is useful when a prompt cache is likely to expire, or when the agent changes model providers.
|
|
83
|
+
|
|
84
|
+
Top-level early activation settings apply to observations by default:
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
const memory = new Memory({
|
|
88
|
+
options: {
|
|
89
|
+
observationalMemory: {
|
|
90
|
+
model: 'google/gemini-2.5-flash',
|
|
91
|
+
activateAfterIdle: '5m',
|
|
92
|
+
activateOnProviderChange: true,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
})
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Use nested `observation` and `reflection` settings for per-phase control. Reflection early activation is opt-in, so top-level settings affect only observations.
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
const memory = new Memory({
|
|
102
|
+
options: {
|
|
103
|
+
observationalMemory: {
|
|
104
|
+
model: 'google/gemini-2.5-flash',
|
|
105
|
+
activateAfterIdle: '5m',
|
|
106
|
+
observation: {
|
|
107
|
+
activateAfterIdle: false,
|
|
108
|
+
},
|
|
109
|
+
reflection: {
|
|
110
|
+
activateAfterIdle: '10m',
|
|
111
|
+
activateOnProviderChange: true,
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
})
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
In this example, the top-level idle setting is disabled for observations, while reflections opt into idle and provider-change activation.
|
|
119
|
+
|
|
120
|
+
See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
|
|
121
|
+
|
|
80
122
|
## Benefits
|
|
81
123
|
|
|
82
124
|
- **Prompt caching**: OM's context is stable and observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
|
|
@@ -216,7 +258,7 @@ The Observer and Reflector run in the background. Any model that works with Mast
|
|
|
216
258
|
|
|
217
259
|
Generally speaking, we recommend using a model that has a large context window (128K+ tokens) and is fast enough to run in the background without slowing down your actions.
|
|
218
260
|
|
|
219
|
-
If you're unsure which model to use, start with the default `google/gemini-2.5-flash`. We've also successfully tested `openai/gpt-5-mini`, `anthropic/claude-haiku-4-5`, `deepseek/deepseek-reasoner`, `qwen3`, and `glm-4.7`.
|
|
261
|
+
If you're unsure which model to use, start with the default `google/gemini-2.5-flash`. We've also successfully tested `openai/gpt-5-mini`, `anthropic/claude-haiku-4-5`, `deepseek/deepseek-reasoner`, `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash`, `xai/grok-4-1-fast`, `qwen3`, and `glm-4.7`.
|
|
220
262
|
|
|
221
263
|
```typescript
|
|
222
264
|
const memory = new Memory({
|
|
@@ -230,6 +272,10 @@ const memory = new Memory({
|
|
|
230
272
|
|
|
231
273
|
See [model configuration](https://mastra.ai/reference/memory/observational-memory) for using different models per agent.
|
|
232
274
|
|
|
275
|
+
> **Note:** `google/gemini-2.5-flash` is unusually good at preserving detail in long output. As a result, the Reflector can produce reflections that stay above the configured `reflection.observationTokens` threshold even after the maximum compression retry. When this happens, the Reflector returns the smallest non-degenerate candidate produced during retries so the loop terminates instead of running forever.
|
|
276
|
+
>
|
|
277
|
+
> If you'd rather have more aggressive compression on the Reflector, swap to a model that condenses more readily, such as `xai/grok-4-1-fast`, `deepseek/deepseek-v4-pro`, or `deepseek/deepseek-v4-flash`. You can keep `google/gemini-2.5-flash` for the Observer and use a different model for the Reflector — see [different models per agent](https://mastra.ai/reference/memory/observational-memory).
|
|
278
|
+
|
|
233
279
|
### Token-tiered model selection
|
|
234
280
|
|
|
235
281
|
**Added in:** `@mastra/memory@1.10.0`
|
|
@@ -364,17 +410,19 @@ Reflection works similarly — the Reflector runs in the background when observa
|
|
|
364
410
|
|
|
365
411
|
### Settings
|
|
366
412
|
|
|
367
|
-
| Setting
|
|
368
|
-
|
|
|
369
|
-
| `observation.bufferTokens`
|
|
370
|
-
| `observation.bufferActivation`
|
|
371
|
-
| `observation.blockAfter`
|
|
372
|
-
| `activateAfterIdle`
|
|
373
|
-
| `activateOnProviderChange`
|
|
374
|
-
| `reflection.bufferActivation`
|
|
375
|
-
| `reflection.
|
|
413
|
+
| Setting | Default | What it controls |
|
|
414
|
+
| ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
415
|
+
| `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
|
|
416
|
+
| `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
|
|
417
|
+
| `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
|
|
418
|
+
| `activateAfterIdle` | none | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, or duration strings like `"5m"` or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
|
|
419
|
+
| `activateOnProviderChange` | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
|
|
420
|
+
| `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
|
|
421
|
+
| `reflection.activateAfterIdle` | none | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`. |
|
|
422
|
+
| `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`. |
|
|
423
|
+
| `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
|
|
376
424
|
|
|
377
|
-
If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations
|
|
425
|
+
If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
|
|
378
426
|
|
|
379
427
|
```typescript
|
|
380
428
|
const memory = new Memory({
|
|
@@ -388,9 +436,9 @@ const memory = new Memory({
|
|
|
388
436
|
})
|
|
389
437
|
```
|
|
390
438
|
|
|
391
|
-
With a 5-minute prompt cache TTL, this activates buffered
|
|
439
|
+
With a 5-minute prompt cache TTL, this activates buffered observations after 5 minutes of inactivity so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer, `300_000` works the same way.
|
|
392
440
|
|
|
393
|
-
Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered
|
|
441
|
+
Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
|
|
394
442
|
|
|
395
443
|
### Disabling
|
|
396
444
|
|
|
@@ -458,4 +506,5 @@ In practical terms, OM replaces both working memory and message history, and has
|
|
|
458
506
|
- [Observational Memory Reference](https://mastra.ai/reference/memory/observational-memory)
|
|
459
507
|
- [Memory Overview](https://mastra.ai/docs/memory/overview)
|
|
460
508
|
- [Message History](https://mastra.ai/docs/memory/message-history)
|
|
461
|
-
- [Memory Processors](https://mastra.ai/docs/memory/memory-processors)
|
|
509
|
+
- [Memory Processors](https://mastra.ai/docs/memory/memory-processors)
|
|
510
|
+
- [Mastra Code](https://code.mastra.ai/): A coding agent using Observational Memory
|
|
@@ -237,4 +237,5 @@ export const memoryAgent = new Agent({
|
|
|
237
237
|
|
|
238
238
|
- [`Memory` reference](https://mastra.ai/reference/memory/memory-class)
|
|
239
239
|
- [Tracing](https://mastra.ai/docs/observability/tracing/overview)
|
|
240
|
-
- [Request Context](https://mastra.ai/docs/server/request-context)
|
|
240
|
+
- [Request Context](https://mastra.ai/docs/server/request-context)
|
|
241
|
+
- [Mastra Code](https://code.mastra.ai/): A coding agent using Mastra's memory system
|